1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX512 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// Group template arguments that can be derived from the vector type (EltNum x 16// EltVT). These are things like the register class for the writemask, etc. 17// The idea is to pass one of these as the template argument rather than the 18// individual arguments. 19// The template is also used for scalar types, in this case numelts is 1. 20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, 21 string suffix = ""> { 22 RegisterClass RC = rc; 23 ValueType EltVT = eltvt; 24 int NumElts = numelts; 25 26 // Corresponding mask register class. 27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts); 28 29 // Corresponding mask register pair class. 30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?, 31 !cast<RegisterOperand>("VK" # NumElts # "Pair")); 32 33 // Corresponding write-mask register class. 34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM"); 35 36 // The mask VT. 37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1"); 38 39 // Suffix used in the instruction mnemonic. 40 string Suffix = suffix; 41 42 // VTName is a string name for vector VT. For vector types it will be 43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32 44 // It is a little bit complex for scalar types, where NumElts = 1. 45 // In this case we build v4f32 or v2f64 46 string VTName = "v" # !if (!eq (NumElts, 1), 47 !if (!eq (EltVT.Size, 16), 8, 48 !if (!eq (EltVT.Size, 32), 4, 49 !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT; 50 51 // The vector VT. 52 ValueType VT = !cast<ValueType>(VTName); 53 54 string EltTypeName = !cast<string>(EltVT); 55 // Size of the element type in bits, e.g. 32 for v16i32. 56 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName)); 57 int EltSize = EltVT.Size; 58 59 // "i" for integer types and "f" for floating-point types 60 string TypeVariantName = !subst(EltSizeName, "", EltTypeName); 61 62 // Size of RC in bits, e.g. 512 for VR512. 63 int Size = VT.Size; 64 65 // The corresponding memory operand, e.g. i512mem for VR512. 66 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem"); 67 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem"); 68 // FP scalar memory operand for intrinsics - ssmem/sdmem. 69 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"), 70 !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"), 71 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?))); 72 73 // Load patterns 74 PatFrag LdFrag = !cast<PatFrag>("load" # VTName); 75 76 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName); 77 78 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT); 79 PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName); 80 81 PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"), 82 !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"), 83 !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?))); 84 85 // The string to specify embedded broadcast in assembly. 86 string BroadcastStr = "{1to" # NumElts # "}"; 87 88 // 8-bit compressed displacement tuple/subvector format. This is only 89 // defined for NumElts <= 8. 90 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0), 91 !cast<CD8VForm>("CD8VT" # NumElts), ?); 92 93 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, 94 !if (!eq (Size, 256), sub_ymm, ?)); 95 96 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle, 97 !if (!eq (EltTypeName, "f64"), SSEPackedDouble, 98 !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME? 99 SSEPackedInt))); 100 101 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, 102 !if (!eq (EltTypeName, "f16"), FR16X, 103 FR64X)); 104 105 dag ImmAllZerosV = (VT immAllZerosV); 106 107 string ZSuffix = !if (!eq (Size, 128), "Z128", 108 !if (!eq (Size, 256), "Z256", "Z")); 109} 110 111def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; 112def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; 113def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; 114def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; 115def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">; 116def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; 117def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; 118 119// "x" in v32i8x_info means RC = VR256X 120def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; 121def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; 122def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; 123def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; 124def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">; 125def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; 126def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; 127 128def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; 129def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; 130def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; 131def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; 132def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">; 133def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; 134def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; 135 136// We map scalar types to the smallest (128-bit) vector type 137// with the appropriate element type. This allows to use the same masking logic. 138def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; 139def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; 140def f16x_info : X86VectorVTInfo<1, f16, VR128X, "sh">; 141def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; 142def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; 143 144class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256, 145 X86VectorVTInfo i128> { 146 X86VectorVTInfo info512 = i512; 147 X86VectorVTInfo info256 = i256; 148 X86VectorVTInfo info128 = i128; 149} 150 151def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info, 152 v16i8x_info>; 153def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info, 154 v8i16x_info>; 155def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info, 156 v4i32x_info>; 157def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info, 158 v2i64x_info>; 159def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info, 160 v8f16x_info>; 161def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, 162 v4f32x_info>; 163def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, 164 v2f64x_info>; 165 166class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, 167 ValueType _vt> { 168 RegisterClass KRC = _krc; 169 RegisterClass KRCWM = _krcwm; 170 ValueType KVT = _vt; 171} 172 173def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>; 174def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; 175def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; 176def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; 177def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; 178def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; 179def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; 180 181// Used for matching masked operations. Ensures the operation part only has a 182// single use. 183def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 184 (vselect node:$mask, node:$src1, node:$src2), [{ 185 return isProfitableToFormMaskedOp(N); 186}]>; 187 188def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 189 (X86selects node:$mask, node:$src1, node:$src2), [{ 190 return isProfitableToFormMaskedOp(N); 191}]>; 192 193// This multiclass generates the masking variants from the non-masking 194// variant. It only provides the assembly pieces for the masking variants. 195// It assumes custom ISel patterns for masking which can be provided as 196// template arguments. 197multiclass AVX512_maskable_custom<bits<8> O, Format F, 198 dag Outs, 199 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 200 string OpcodeStr, 201 string AttSrcAsm, string IntelSrcAsm, 202 list<dag> Pattern, 203 list<dag> MaskingPattern, 204 list<dag> ZeroMaskingPattern, 205 string MaskingConstraint = "", 206 bit IsCommutable = 0, 207 bit IsKCommutable = 0, 208 bit IsKZCommutable = IsCommutable, 209 string ClobberConstraint = ""> { 210 let isCommutable = IsCommutable, Constraints = ClobberConstraint in 211 def NAME: AVX512<O, F, Outs, Ins, 212 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 213 "$dst, "#IntelSrcAsm#"}", 214 Pattern>; 215 216 // Prefer over VMOV*rrk Pat<> 217 let isCommutable = IsKCommutable in 218 def NAME#k: AVX512<O, F, Outs, MaskingIns, 219 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 220 "$dst {${mask}}, "#IntelSrcAsm#"}", 221 MaskingPattern>, 222 EVEX_K { 223 // In case of the 3src subclass this is overridden with a let. 224 string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint, 225 !if(!eq(MaskingConstraint, ""), ClobberConstraint, 226 !strconcat(ClobberConstraint, ", ", MaskingConstraint))); 227 } 228 229 // Zero mask does not add any restrictions to commute operands transformation. 230 // So, it is Ok to use IsCommutable instead of IsKCommutable. 231 let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<> 232 Constraints = ClobberConstraint in 233 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 234 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 235 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 236 ZeroMaskingPattern>, 237 EVEX_KZ; 238} 239 240 241// Common base class of AVX512_maskable and AVX512_maskable_3src. 242multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 243 dag Outs, 244 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 245 string OpcodeStr, 246 string AttSrcAsm, string IntelSrcAsm, 247 dag RHS, dag MaskingRHS, 248 SDPatternOperator Select = vselect_mask, 249 string MaskingConstraint = "", 250 bit IsCommutable = 0, 251 bit IsKCommutable = 0, 252 bit IsKZCommutable = IsCommutable, 253 string ClobberConstraint = ""> : 254 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 255 AttSrcAsm, IntelSrcAsm, 256 [(set _.RC:$dst, RHS)], 257 [(set _.RC:$dst, MaskingRHS)], 258 [(set _.RC:$dst, 259 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 260 MaskingConstraint, IsCommutable, 261 IsKCommutable, IsKZCommutable, ClobberConstraint>; 262 263// This multiclass generates the unconditional/non-masking, the masking and 264// the zero-masking variant of the vector instruction. In the masking case, the 265// preserved vector elements come from a new dummy input operand tied to $dst. 266// This version uses a separate dag for non-masking and masking. 267multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 268 dag Outs, dag Ins, string OpcodeStr, 269 string AttSrcAsm, string IntelSrcAsm, 270 dag RHS, dag MaskRHS, 271 string ClobberConstraint = "", 272 bit IsCommutable = 0, bit IsKCommutable = 0, 273 bit IsKZCommutable = IsCommutable> : 274 AVX512_maskable_custom<O, F, Outs, Ins, 275 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 276 !con((ins _.KRCWM:$mask), Ins), 277 OpcodeStr, AttSrcAsm, IntelSrcAsm, 278 [(set _.RC:$dst, RHS)], 279 [(set _.RC:$dst, 280 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 281 [(set _.RC:$dst, 282 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 283 "$src0 = $dst", IsCommutable, IsKCommutable, 284 IsKZCommutable, ClobberConstraint>; 285 286// This multiclass generates the unconditional/non-masking, the masking and 287// the zero-masking variant of the vector instruction. In the masking case, the 288// preserved vector elements come from a new dummy input operand tied to $dst. 289multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 290 dag Outs, dag Ins, string OpcodeStr, 291 string AttSrcAsm, string IntelSrcAsm, 292 dag RHS, 293 bit IsCommutable = 0, bit IsKCommutable = 0, 294 bit IsKZCommutable = IsCommutable, 295 SDPatternOperator Select = vselect_mask, 296 string ClobberConstraint = ""> : 297 AVX512_maskable_common<O, F, _, Outs, Ins, 298 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 299 !con((ins _.KRCWM:$mask), Ins), 300 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 301 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 302 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 303 IsKZCommutable, ClobberConstraint>; 304 305// This multiclass generates the unconditional/non-masking, the masking and 306// the zero-masking variant of the scalar instruction. 307multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 308 dag Outs, dag Ins, string OpcodeStr, 309 string AttSrcAsm, string IntelSrcAsm, 310 dag RHS> : 311 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 312 RHS, 0, 0, 0, X86selects_mask>; 313 314// Similar to AVX512_maskable but in this case one of the source operands 315// ($src1) is already tied to $dst so we just use that for the preserved 316// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 317// $src1. 318multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 319 dag Outs, dag NonTiedIns, string OpcodeStr, 320 string AttSrcAsm, string IntelSrcAsm, 321 dag RHS, 322 bit IsCommutable = 0, 323 bit IsKCommutable = 0, 324 SDPatternOperator Select = vselect_mask, 325 bit MaskOnly = 0> : 326 AVX512_maskable_common<O, F, _, Outs, 327 !con((ins _.RC:$src1), NonTiedIns), 328 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 329 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 330 OpcodeStr, AttSrcAsm, IntelSrcAsm, 331 !if(MaskOnly, (null_frag), RHS), 332 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 333 Select, "", IsCommutable, IsKCommutable>; 334 335// Similar to AVX512_maskable_3src but in this case the input VT for the tied 336// operand differs from the output VT. This requires a bitconvert on 337// the preserved vector going into the vselect. 338// NOTE: The unmasked pattern is disabled. 339multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 340 X86VectorVTInfo InVT, 341 dag Outs, dag NonTiedIns, string OpcodeStr, 342 string AttSrcAsm, string IntelSrcAsm, 343 dag RHS, bit IsCommutable = 0> : 344 AVX512_maskable_common<O, F, OutVT, Outs, 345 !con((ins InVT.RC:$src1), NonTiedIns), 346 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 347 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 348 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 349 (vselect_mask InVT.KRCWM:$mask, RHS, 350 (bitconvert InVT.RC:$src1)), 351 vselect_mask, "", IsCommutable>; 352 353multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 354 dag Outs, dag NonTiedIns, string OpcodeStr, 355 string AttSrcAsm, string IntelSrcAsm, 356 dag RHS, 357 bit IsCommutable = 0, 358 bit IsKCommutable = 0, 359 bit MaskOnly = 0> : 360 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 361 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 362 X86selects_mask, MaskOnly>; 363 364multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 365 dag Outs, dag Ins, 366 string OpcodeStr, 367 string AttSrcAsm, string IntelSrcAsm, 368 list<dag> Pattern> : 369 AVX512_maskable_custom<O, F, Outs, Ins, 370 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 371 !con((ins _.KRCWM:$mask), Ins), 372 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 373 "$src0 = $dst">; 374 375multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 376 dag Outs, dag NonTiedIns, 377 string OpcodeStr, 378 string AttSrcAsm, string IntelSrcAsm, 379 list<dag> Pattern> : 380 AVX512_maskable_custom<O, F, Outs, 381 !con((ins _.RC:$src1), NonTiedIns), 382 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 383 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 384 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 385 "">; 386 387// Instruction with mask that puts result in mask register, 388// like "compare" and "vptest" 389multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 390 dag Outs, 391 dag Ins, dag MaskingIns, 392 string OpcodeStr, 393 string AttSrcAsm, string IntelSrcAsm, 394 list<dag> Pattern, 395 list<dag> MaskingPattern, 396 bit IsCommutable = 0> { 397 let isCommutable = IsCommutable in { 398 def NAME: AVX512<O, F, Outs, Ins, 399 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 400 "$dst, "#IntelSrcAsm#"}", 401 Pattern>; 402 403 def NAME#k: AVX512<O, F, Outs, MaskingIns, 404 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 405 "$dst {${mask}}, "#IntelSrcAsm#"}", 406 MaskingPattern>, EVEX_K; 407 } 408} 409 410multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 411 dag Outs, 412 dag Ins, dag MaskingIns, 413 string OpcodeStr, 414 string AttSrcAsm, string IntelSrcAsm, 415 dag RHS, dag MaskingRHS, 416 bit IsCommutable = 0> : 417 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 418 AttSrcAsm, IntelSrcAsm, 419 [(set _.KRC:$dst, RHS)], 420 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 421 422multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 423 dag Outs, dag Ins, string OpcodeStr, 424 string AttSrcAsm, string IntelSrcAsm, 425 dag RHS, dag RHS_su, bit IsCommutable = 0> : 426 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 427 !con((ins _.KRCWM:$mask), Ins), 428 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 429 (and _.KRCWM:$mask, RHS_su), IsCommutable>; 430 431// Used by conversion instructions. 432multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _, 433 dag Outs, 434 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 435 string OpcodeStr, 436 string AttSrcAsm, string IntelSrcAsm, 437 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> : 438 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 439 AttSrcAsm, IntelSrcAsm, 440 [(set _.RC:$dst, RHS)], 441 [(set _.RC:$dst, MaskingRHS)], 442 [(set _.RC:$dst, ZeroMaskingRHS)], 443 "$src0 = $dst">; 444 445multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _, 446 dag Outs, dag NonTiedIns, string OpcodeStr, 447 string AttSrcAsm, string IntelSrcAsm, 448 dag RHS, dag MaskingRHS, bit IsCommutable, 449 bit IsKCommutable> : 450 AVX512_maskable_custom<O, F, Outs, 451 !con((ins _.RC:$src1), NonTiedIns), 452 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 453 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 454 OpcodeStr, AttSrcAsm, IntelSrcAsm, 455 [(set _.RC:$dst, RHS)], 456 [(set _.RC:$dst, 457 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))], 458 [(set _.RC:$dst, 459 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))], 460 "", IsCommutable, IsKCommutable>; 461 462// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 463// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 464// swizzled by ExecutionDomainFix to pxor. 465// We set canFoldAsLoad because this can be converted to a constant-pool 466// load of an all-zeros value if folding it would be beneficial. 467let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 468 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 469def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 470 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 471def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 472 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 473} 474 475let Predicates = [HasAVX512] in { 476def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; 477def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; 478def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; 479def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>; 480def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; 481def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; 482} 483 484// Alias instructions that allow VPTERNLOG to be used with a mask to create 485// a mix of all ones and all zeros elements. This is done this way to force 486// the same register to be used as input for all three sources. 487let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 488def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 489 (ins VK16WM:$mask), "", 490 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 491 (v16i32 immAllOnesV), 492 (v16i32 immAllZerosV)))]>; 493def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 494 (ins VK8WM:$mask), "", 495 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 496 (v8i64 immAllOnesV), 497 (v8i64 immAllZerosV)))]>; 498} 499 500let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 501 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 502def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 503 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 504def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 505 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 506} 507 508let Predicates = [HasAVX512] in { 509def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; 510def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; 511def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; 512def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>; 513def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; 514def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; 515def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; 516def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; 517def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; 518def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>; 519def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; 520def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; 521} 522 523// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 524// This is expanded by ExpandPostRAPseudos. 525let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 526 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 527 def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "", 528 [(set FR16X:$dst, fp16imm0)]>; 529 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 530 [(set FR32X:$dst, fp32imm0)]>; 531 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 532 [(set FR64X:$dst, fp64imm0)]>; 533 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 534 [(set VR128X:$dst, fp128imm0)]>; 535} 536 537//===----------------------------------------------------------------------===// 538// AVX-512 - VECTOR INSERT 539// 540 541// Supports two different pattern operators for mask and unmasked ops. Allows 542// null_frag to be passed for one. 543multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 544 X86VectorVTInfo To, 545 SDPatternOperator vinsert_insert, 546 SDPatternOperator vinsert_for_mask, 547 X86FoldableSchedWrite sched> { 548 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 549 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 550 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 551 "vinsert" # From.EltTypeName # "x" # From.NumElts, 552 "$src3, $src2, $src1", "$src1, $src2, $src3", 553 (vinsert_insert:$src3 (To.VT To.RC:$src1), 554 (From.VT From.RC:$src2), 555 (iPTR imm)), 556 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 557 (From.VT From.RC:$src2), 558 (iPTR imm))>, 559 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 560 let mayLoad = 1 in 561 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 562 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 563 "vinsert" # From.EltTypeName # "x" # From.NumElts, 564 "$src3, $src2, $src1", "$src1, $src2, $src3", 565 (vinsert_insert:$src3 (To.VT To.RC:$src1), 566 (From.VT (From.LdFrag addr:$src2)), 567 (iPTR imm)), 568 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 569 (From.VT (From.LdFrag addr:$src2)), 570 (iPTR imm))>, AVX512AIi8Base, EVEX_4V, 571 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 572 Sched<[sched.Folded, sched.ReadAfterFold]>; 573 } 574} 575 576// Passes the same pattern operator for masked and unmasked ops. 577multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 578 X86VectorVTInfo To, 579 SDPatternOperator vinsert_insert, 580 X86FoldableSchedWrite sched> : 581 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 582 583multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 584 X86VectorVTInfo To, PatFrag vinsert_insert, 585 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 586 let Predicates = p in { 587 def : Pat<(vinsert_insert:$ins 588 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 589 (To.VT (!cast<Instruction>(InstrStr#"rr") 590 To.RC:$src1, From.RC:$src2, 591 (INSERT_get_vinsert_imm To.RC:$ins)))>; 592 593 def : Pat<(vinsert_insert:$ins 594 (To.VT To.RC:$src1), 595 (From.VT (From.LdFrag addr:$src2)), 596 (iPTR imm)), 597 (To.VT (!cast<Instruction>(InstrStr#"rm") 598 To.RC:$src1, addr:$src2, 599 (INSERT_get_vinsert_imm To.RC:$ins)))>; 600 } 601} 602 603multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 604 ValueType EltVT64, int Opcode256, 605 X86FoldableSchedWrite sched> { 606 607 let Predicates = [HasVLX] in 608 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 609 X86VectorVTInfo< 4, EltVT32, VR128X>, 610 X86VectorVTInfo< 8, EltVT32, VR256X>, 611 vinsert128_insert, sched>, EVEX_V256; 612 613 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 614 X86VectorVTInfo< 4, EltVT32, VR128X>, 615 X86VectorVTInfo<16, EltVT32, VR512>, 616 vinsert128_insert, sched>, EVEX_V512; 617 618 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 619 X86VectorVTInfo< 4, EltVT64, VR256X>, 620 X86VectorVTInfo< 8, EltVT64, VR512>, 621 vinsert256_insert, sched>, VEX_W, EVEX_V512; 622 623 // Even with DQI we'd like to only use these instructions for masking. 624 let Predicates = [HasVLX, HasDQI] in 625 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 626 X86VectorVTInfo< 2, EltVT64, VR128X>, 627 X86VectorVTInfo< 4, EltVT64, VR256X>, 628 null_frag, vinsert128_insert, sched>, 629 VEX_W1X, EVEX_V256; 630 631 // Even with DQI we'd like to only use these instructions for masking. 632 let Predicates = [HasDQI] in { 633 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 634 X86VectorVTInfo< 2, EltVT64, VR128X>, 635 X86VectorVTInfo< 8, EltVT64, VR512>, 636 null_frag, vinsert128_insert, sched>, 637 VEX_W, EVEX_V512; 638 639 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 640 X86VectorVTInfo< 8, EltVT32, VR256X>, 641 X86VectorVTInfo<16, EltVT32, VR512>, 642 null_frag, vinsert256_insert, sched>, 643 EVEX_V512; 644 } 645} 646 647// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 648defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 649defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 650 651// Codegen pattern with the alternative types, 652// Even with AVX512DQ we'll still use these for unmasked operations. 653defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 654 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 655defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 656 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 657 658defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 659 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 660defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 661 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 662 663defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 664 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 665defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 666 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 667 668// Codegen pattern with the alternative types insert VEC128 into VEC256 669defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 670 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 671defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 672 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 673defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info, 674 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 675// Codegen pattern with the alternative types insert VEC128 into VEC512 676defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 677 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 678defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 679 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 680defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info, 681 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 682// Codegen pattern with the alternative types insert VEC256 into VEC512 683defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 684 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 685defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 686 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 687defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info, 688 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 689 690 691multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 692 X86VectorVTInfo To, X86VectorVTInfo Cast, 693 PatFrag vinsert_insert, 694 SDNodeXForm INSERT_get_vinsert_imm, 695 list<Predicate> p> { 696let Predicates = p in { 697 def : Pat<(Cast.VT 698 (vselect_mask Cast.KRCWM:$mask, 699 (bitconvert 700 (vinsert_insert:$ins (To.VT To.RC:$src1), 701 (From.VT From.RC:$src2), 702 (iPTR imm))), 703 Cast.RC:$src0)), 704 (!cast<Instruction>(InstrStr#"rrk") 705 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 706 (INSERT_get_vinsert_imm To.RC:$ins))>; 707 def : Pat<(Cast.VT 708 (vselect_mask Cast.KRCWM:$mask, 709 (bitconvert 710 (vinsert_insert:$ins (To.VT To.RC:$src1), 711 (From.VT 712 (bitconvert 713 (From.LdFrag addr:$src2))), 714 (iPTR imm))), 715 Cast.RC:$src0)), 716 (!cast<Instruction>(InstrStr#"rmk") 717 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 718 (INSERT_get_vinsert_imm To.RC:$ins))>; 719 720 def : Pat<(Cast.VT 721 (vselect_mask Cast.KRCWM:$mask, 722 (bitconvert 723 (vinsert_insert:$ins (To.VT To.RC:$src1), 724 (From.VT From.RC:$src2), 725 (iPTR imm))), 726 Cast.ImmAllZerosV)), 727 (!cast<Instruction>(InstrStr#"rrkz") 728 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 729 (INSERT_get_vinsert_imm To.RC:$ins))>; 730 def : Pat<(Cast.VT 731 (vselect_mask Cast.KRCWM:$mask, 732 (bitconvert 733 (vinsert_insert:$ins (To.VT To.RC:$src1), 734 (From.VT (From.LdFrag addr:$src2)), 735 (iPTR imm))), 736 Cast.ImmAllZerosV)), 737 (!cast<Instruction>(InstrStr#"rmkz") 738 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 739 (INSERT_get_vinsert_imm To.RC:$ins))>; 740} 741} 742 743defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 744 v8f32x_info, vinsert128_insert, 745 INSERT_get_vinsert128_imm, [HasVLX]>; 746defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 747 v4f64x_info, vinsert128_insert, 748 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 749 750defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 751 v8i32x_info, vinsert128_insert, 752 INSERT_get_vinsert128_imm, [HasVLX]>; 753defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 754 v8i32x_info, vinsert128_insert, 755 INSERT_get_vinsert128_imm, [HasVLX]>; 756defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 757 v8i32x_info, vinsert128_insert, 758 INSERT_get_vinsert128_imm, [HasVLX]>; 759defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 760 v4i64x_info, vinsert128_insert, 761 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 762defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 763 v4i64x_info, vinsert128_insert, 764 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 765defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 766 v4i64x_info, vinsert128_insert, 767 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 768 769defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 770 v16f32_info, vinsert128_insert, 771 INSERT_get_vinsert128_imm, [HasAVX512]>; 772defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 773 v8f64_info, vinsert128_insert, 774 INSERT_get_vinsert128_imm, [HasDQI]>; 775 776defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 777 v16i32_info, vinsert128_insert, 778 INSERT_get_vinsert128_imm, [HasAVX512]>; 779defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 780 v16i32_info, vinsert128_insert, 781 INSERT_get_vinsert128_imm, [HasAVX512]>; 782defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 783 v16i32_info, vinsert128_insert, 784 INSERT_get_vinsert128_imm, [HasAVX512]>; 785defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 786 v8i64_info, vinsert128_insert, 787 INSERT_get_vinsert128_imm, [HasDQI]>; 788defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 789 v8i64_info, vinsert128_insert, 790 INSERT_get_vinsert128_imm, [HasDQI]>; 791defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 792 v8i64_info, vinsert128_insert, 793 INSERT_get_vinsert128_imm, [HasDQI]>; 794 795defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 796 v16f32_info, vinsert256_insert, 797 INSERT_get_vinsert256_imm, [HasDQI]>; 798defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 799 v8f64_info, vinsert256_insert, 800 INSERT_get_vinsert256_imm, [HasAVX512]>; 801 802defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 803 v16i32_info, vinsert256_insert, 804 INSERT_get_vinsert256_imm, [HasDQI]>; 805defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 806 v16i32_info, vinsert256_insert, 807 INSERT_get_vinsert256_imm, [HasDQI]>; 808defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 809 v16i32_info, vinsert256_insert, 810 INSERT_get_vinsert256_imm, [HasDQI]>; 811defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 812 v8i64_info, vinsert256_insert, 813 INSERT_get_vinsert256_imm, [HasAVX512]>; 814defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 815 v8i64_info, vinsert256_insert, 816 INSERT_get_vinsert256_imm, [HasAVX512]>; 817defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 818 v8i64_info, vinsert256_insert, 819 INSERT_get_vinsert256_imm, [HasAVX512]>; 820 821// vinsertps - insert f32 to XMM 822let ExeDomain = SSEPackedSingle in { 823let isCommutable = 1 in 824def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 825 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 826 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 827 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, 828 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 829def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 830 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 831 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 832 [(set VR128X:$dst, (X86insertps VR128X:$src1, 833 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 834 timm:$src3))]>, 835 EVEX_4V, EVEX_CD8<32, CD8VT1>, 836 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 837} 838 839//===----------------------------------------------------------------------===// 840// AVX-512 VECTOR EXTRACT 841//--- 842 843// Supports two different pattern operators for mask and unmasked ops. Allows 844// null_frag to be passed for one. 845multiclass vextract_for_size_split<int Opcode, 846 X86VectorVTInfo From, X86VectorVTInfo To, 847 SDPatternOperator vextract_extract, 848 SDPatternOperator vextract_for_mask, 849 SchedWrite SchedRR, SchedWrite SchedMR> { 850 851 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 852 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 853 (ins From.RC:$src1, u8imm:$idx), 854 "vextract" # To.EltTypeName # "x" # To.NumElts, 855 "$idx, $src1", "$src1, $idx", 856 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 857 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 858 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 859 860 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 861 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 862 "vextract" # To.EltTypeName # "x" # To.NumElts # 863 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 864 [(store (To.VT (vextract_extract:$idx 865 (From.VT From.RC:$src1), (iPTR imm))), 866 addr:$dst)]>, EVEX, 867 Sched<[SchedMR]>; 868 869 let mayStore = 1, hasSideEffects = 0 in 870 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 871 (ins To.MemOp:$dst, To.KRCWM:$mask, 872 From.RC:$src1, u8imm:$idx), 873 "vextract" # To.EltTypeName # "x" # To.NumElts # 874 "\t{$idx, $src1, $dst {${mask}}|" 875 "$dst {${mask}}, $src1, $idx}", []>, 876 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable; 877 } 878} 879 880// Passes the same pattern operator for masked and unmasked ops. 881multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 882 X86VectorVTInfo To, 883 SDPatternOperator vextract_extract, 884 SchedWrite SchedRR, SchedWrite SchedMR> : 885 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 886 887// Codegen pattern for the alternative types 888multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 889 X86VectorVTInfo To, PatFrag vextract_extract, 890 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 891 let Predicates = p in { 892 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 893 (To.VT (!cast<Instruction>(InstrStr#"rr") 894 From.RC:$src1, 895 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 896 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 897 (iPTR imm))), addr:$dst), 898 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 899 (EXTRACT_get_vextract_imm To.RC:$ext))>; 900 } 901} 902 903multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 904 ValueType EltVT64, int Opcode256, 905 SchedWrite SchedRR, SchedWrite SchedMR> { 906 let Predicates = [HasAVX512] in { 907 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 908 X86VectorVTInfo<16, EltVT32, VR512>, 909 X86VectorVTInfo< 4, EltVT32, VR128X>, 910 vextract128_extract, SchedRR, SchedMR>, 911 EVEX_V512, EVEX_CD8<32, CD8VT4>; 912 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 913 X86VectorVTInfo< 8, EltVT64, VR512>, 914 X86VectorVTInfo< 4, EltVT64, VR256X>, 915 vextract256_extract, SchedRR, SchedMR>, 916 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 917 } 918 let Predicates = [HasVLX] in 919 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 920 X86VectorVTInfo< 8, EltVT32, VR256X>, 921 X86VectorVTInfo< 4, EltVT32, VR128X>, 922 vextract128_extract, SchedRR, SchedMR>, 923 EVEX_V256, EVEX_CD8<32, CD8VT4>; 924 925 // Even with DQI we'd like to only use these instructions for masking. 926 let Predicates = [HasVLX, HasDQI] in 927 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 928 X86VectorVTInfo< 4, EltVT64, VR256X>, 929 X86VectorVTInfo< 2, EltVT64, VR128X>, 930 null_frag, vextract128_extract, SchedRR, SchedMR>, 931 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; 932 933 // Even with DQI we'd like to only use these instructions for masking. 934 let Predicates = [HasDQI] in { 935 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 936 X86VectorVTInfo< 8, EltVT64, VR512>, 937 X86VectorVTInfo< 2, EltVT64, VR128X>, 938 null_frag, vextract128_extract, SchedRR, SchedMR>, 939 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 940 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 941 X86VectorVTInfo<16, EltVT32, VR512>, 942 X86VectorVTInfo< 8, EltVT32, VR256X>, 943 null_frag, vextract256_extract, SchedRR, SchedMR>, 944 EVEX_V512, EVEX_CD8<32, CD8VT8>; 945 } 946} 947 948// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 949defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 950defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 951 952// extract_subvector codegen patterns with the alternative types. 953// Even with AVX512DQ we'll still use these for unmasked operations. 954defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 955 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 956defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 957 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 958 959defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 960 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 961defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 962 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 963 964defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 965 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 966defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 967 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 968 969// Codegen pattern with the alternative types extract VEC128 from VEC256 970defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 971 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 972defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 973 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 974defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info, 975 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 976 977// Codegen pattern with the alternative types extract VEC128 from VEC512 978defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 979 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 980defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 981 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 982defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info, 983 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 984// Codegen pattern with the alternative types extract VEC256 from VEC512 985defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 986 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 987defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 988 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 989defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info, 990 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 991 992 993// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 994// smaller extract to enable EVEX->VEX. 995let Predicates = [NoVLX] in { 996def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 997 (v2i64 (VEXTRACTI128rr 998 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 999 (iPTR 1)))>; 1000def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 1001 (v2f64 (VEXTRACTF128rr 1002 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 1003 (iPTR 1)))>; 1004def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 1005 (v4i32 (VEXTRACTI128rr 1006 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 1007 (iPTR 1)))>; 1008def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 1009 (v4f32 (VEXTRACTF128rr 1010 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 1011 (iPTR 1)))>; 1012def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 1013 (v8i16 (VEXTRACTI128rr 1014 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 1015 (iPTR 1)))>; 1016def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 1017 (v8f16 (VEXTRACTF128rr 1018 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 1019 (iPTR 1)))>; 1020def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 1021 (v16i8 (VEXTRACTI128rr 1022 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 1023 (iPTR 1)))>; 1024} 1025 1026// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 1027// smaller extract to enable EVEX->VEX. 1028let Predicates = [HasVLX] in { 1029def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 1030 (v2i64 (VEXTRACTI32x4Z256rr 1031 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 1032 (iPTR 1)))>; 1033def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 1034 (v2f64 (VEXTRACTF32x4Z256rr 1035 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 1036 (iPTR 1)))>; 1037def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 1038 (v4i32 (VEXTRACTI32x4Z256rr 1039 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 1040 (iPTR 1)))>; 1041def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 1042 (v4f32 (VEXTRACTF32x4Z256rr 1043 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 1044 (iPTR 1)))>; 1045def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 1046 (v8i16 (VEXTRACTI32x4Z256rr 1047 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 1048 (iPTR 1)))>; 1049def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 1050 (v8f16 (VEXTRACTF32x4Z256rr 1051 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 1052 (iPTR 1)))>; 1053def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 1054 (v16i8 (VEXTRACTI32x4Z256rr 1055 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 1056 (iPTR 1)))>; 1057} 1058 1059 1060// Additional patterns for handling a bitcast between the vselect and the 1061// extract_subvector. 1062multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 1063 X86VectorVTInfo To, X86VectorVTInfo Cast, 1064 PatFrag vextract_extract, 1065 SDNodeXForm EXTRACT_get_vextract_imm, 1066 list<Predicate> p> { 1067let Predicates = p in { 1068 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1069 (bitconvert 1070 (To.VT (vextract_extract:$ext 1071 (From.VT From.RC:$src), (iPTR imm)))), 1072 To.RC:$src0)), 1073 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 1074 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 1075 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1076 1077 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1078 (bitconvert 1079 (To.VT (vextract_extract:$ext 1080 (From.VT From.RC:$src), (iPTR imm)))), 1081 Cast.ImmAllZerosV)), 1082 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 1083 Cast.KRCWM:$mask, From.RC:$src, 1084 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1085} 1086} 1087 1088defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 1089 v4f32x_info, vextract128_extract, 1090 EXTRACT_get_vextract128_imm, [HasVLX]>; 1091defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 1092 v2f64x_info, vextract128_extract, 1093 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1094 1095defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 1096 v4i32x_info, vextract128_extract, 1097 EXTRACT_get_vextract128_imm, [HasVLX]>; 1098defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 1099 v4i32x_info, vextract128_extract, 1100 EXTRACT_get_vextract128_imm, [HasVLX]>; 1101defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 1102 v4i32x_info, vextract128_extract, 1103 EXTRACT_get_vextract128_imm, [HasVLX]>; 1104defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 1105 v2i64x_info, vextract128_extract, 1106 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1107defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 1108 v2i64x_info, vextract128_extract, 1109 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1110defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 1111 v2i64x_info, vextract128_extract, 1112 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1113 1114defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 1115 v4f32x_info, vextract128_extract, 1116 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1117defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 1118 v2f64x_info, vextract128_extract, 1119 EXTRACT_get_vextract128_imm, [HasDQI]>; 1120 1121defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 1122 v4i32x_info, vextract128_extract, 1123 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1124defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 1125 v4i32x_info, vextract128_extract, 1126 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1127defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 1128 v4i32x_info, vextract128_extract, 1129 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1130defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 1131 v2i64x_info, vextract128_extract, 1132 EXTRACT_get_vextract128_imm, [HasDQI]>; 1133defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 1134 v2i64x_info, vextract128_extract, 1135 EXTRACT_get_vextract128_imm, [HasDQI]>; 1136defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 1137 v2i64x_info, vextract128_extract, 1138 EXTRACT_get_vextract128_imm, [HasDQI]>; 1139 1140defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 1141 v8f32x_info, vextract256_extract, 1142 EXTRACT_get_vextract256_imm, [HasDQI]>; 1143defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 1144 v4f64x_info, vextract256_extract, 1145 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1146 1147defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 1148 v8i32x_info, vextract256_extract, 1149 EXTRACT_get_vextract256_imm, [HasDQI]>; 1150defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 1151 v8i32x_info, vextract256_extract, 1152 EXTRACT_get_vextract256_imm, [HasDQI]>; 1153defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 1154 v8i32x_info, vextract256_extract, 1155 EXTRACT_get_vextract256_imm, [HasDQI]>; 1156defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 1157 v4i64x_info, vextract256_extract, 1158 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1159defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 1160 v4i64x_info, vextract256_extract, 1161 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1162defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 1163 v4i64x_info, vextract256_extract, 1164 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1165 1166// vextractps - extract 32 bits from XMM 1167def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), 1168 (ins VR128X:$src1, u8imm:$src2), 1169 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1170 [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1171 EVEX, VEX_WIG, Sched<[WriteVecExtract]>; 1172 1173def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 1174 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1175 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1176 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1177 addr:$dst)]>, 1178 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1179 1180//===---------------------------------------------------------------------===// 1181// AVX-512 BROADCAST 1182//--- 1183// broadcast with a scalar argument. 1184multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo, 1185 X86VectorVTInfo SrcInfo> { 1186 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1187 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr) 1188 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1189 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1190 (X86VBroadcast SrcInfo.FRC:$src), 1191 DestInfo.RC:$src0)), 1192 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk) 1193 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1194 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1195 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1196 (X86VBroadcast SrcInfo.FRC:$src), 1197 DestInfo.ImmAllZerosV)), 1198 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz) 1199 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1200} 1201 1202// Split version to allow mask and broadcast node to be different types. This 1203// helps support the 32x2 broadcasts. 1204multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1205 SchedWrite SchedRR, SchedWrite SchedRM, 1206 X86VectorVTInfo MaskInfo, 1207 X86VectorVTInfo DestInfo, 1208 X86VectorVTInfo SrcInfo, 1209 bit IsConvertibleToThreeAddress, 1210 SDPatternOperator UnmaskedOp = X86VBroadcast, 1211 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { 1212 let hasSideEffects = 0 in 1213 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), 1214 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1215 [(set MaskInfo.RC:$dst, 1216 (MaskInfo.VT 1217 (bitconvert 1218 (DestInfo.VT 1219 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], 1220 DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>; 1221 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1222 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), 1223 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1224 "${dst} {${mask}} {z}, $src}"), 1225 [(set MaskInfo.RC:$dst, 1226 (vselect_mask MaskInfo.KRCWM:$mask, 1227 (MaskInfo.VT 1228 (bitconvert 1229 (DestInfo.VT 1230 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1231 MaskInfo.ImmAllZerosV))], 1232 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; 1233 let Constraints = "$src0 = $dst" in 1234 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1235 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1236 SrcInfo.RC:$src), 1237 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1238 "${dst} {${mask}}, $src}"), 1239 [(set MaskInfo.RC:$dst, 1240 (vselect_mask MaskInfo.KRCWM:$mask, 1241 (MaskInfo.VT 1242 (bitconvert 1243 (DestInfo.VT 1244 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1245 MaskInfo.RC:$src0))], 1246 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; 1247 1248 let hasSideEffects = 0, mayLoad = 1 in 1249 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1250 (ins SrcInfo.ScalarMemOp:$src), 1251 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1252 [(set MaskInfo.RC:$dst, 1253 (MaskInfo.VT 1254 (bitconvert 1255 (DestInfo.VT 1256 (UnmaskedBcastOp addr:$src)))))], 1257 DestInfo.ExeDomain>, T8PD, EVEX, 1258 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1259 1260 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1261 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), 1262 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1263 "${dst} {${mask}} {z}, $src}"), 1264 [(set MaskInfo.RC:$dst, 1265 (vselect_mask MaskInfo.KRCWM:$mask, 1266 (MaskInfo.VT 1267 (bitconvert 1268 (DestInfo.VT 1269 (SrcInfo.BroadcastLdFrag addr:$src)))), 1270 MaskInfo.ImmAllZerosV))], 1271 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, 1272 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1273 1274 let Constraints = "$src0 = $dst", 1275 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 1276 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1277 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1278 SrcInfo.ScalarMemOp:$src), 1279 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1280 "${dst} {${mask}}, $src}"), 1281 [(set MaskInfo.RC:$dst, 1282 (vselect_mask MaskInfo.KRCWM:$mask, 1283 (MaskInfo.VT 1284 (bitconvert 1285 (DestInfo.VT 1286 (SrcInfo.BroadcastLdFrag addr:$src)))), 1287 MaskInfo.RC:$src0))], 1288 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, 1289 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1290} 1291 1292// Helper class to force mask and broadcast result to same type. 1293multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, 1294 SchedWrite SchedRR, SchedWrite SchedRM, 1295 X86VectorVTInfo DestInfo, 1296 X86VectorVTInfo SrcInfo, 1297 bit IsConvertibleToThreeAddress> : 1298 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM, 1299 DestInfo, DestInfo, SrcInfo, 1300 IsConvertibleToThreeAddress>; 1301 1302multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1303 AVX512VLVectorVTInfo _> { 1304 let Predicates = [HasAVX512] in { 1305 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1306 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1307 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1308 EVEX_V512; 1309 } 1310 1311 let Predicates = [HasVLX] in { 1312 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1313 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1314 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1315 EVEX_V256; 1316 } 1317} 1318 1319multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1320 AVX512VLVectorVTInfo _> { 1321 let Predicates = [HasAVX512] in { 1322 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1323 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1324 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1325 EVEX_V512; 1326 } 1327 1328 let Predicates = [HasVLX] in { 1329 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1330 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1331 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1332 EVEX_V256; 1333 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1334 WriteFShuffle256Ld, _.info128, _.info128, 1>, 1335 avx512_broadcast_scalar<NAME, _.info128, _.info128>, 1336 EVEX_V128; 1337 } 1338} 1339defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1340 avx512vl_f32_info>; 1341defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1342 avx512vl_f64_info>, VEX_W1X; 1343 1344multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1345 X86VectorVTInfo _, SDPatternOperator OpNode, 1346 RegisterClass SrcRC> { 1347 // Fold with a mask even if it has multiple uses since it is cheap. 1348 let ExeDomain = _.ExeDomain in 1349 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1350 (ins SrcRC:$src), 1351 "vpbroadcast"#_.Suffix, "$src", "$src", 1352 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0, 1353 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>, 1354 T8PD, EVEX, Sched<[SchedRR]>; 1355} 1356 1357multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1358 X86VectorVTInfo _, SDPatternOperator OpNode, 1359 RegisterClass SrcRC, SubRegIndex Subreg> { 1360 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1361 defm rr : AVX512_maskable_custom<opc, MRMSrcReg, 1362 (outs _.RC:$dst), (ins GR32:$src), 1363 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1364 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1365 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [], 1366 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; 1367 1368 def : Pat <(_.VT (OpNode SrcRC:$src)), 1369 (!cast<Instruction>(Name#rr) 1370 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1371 1372 // Fold with a mask even if it has multiple uses since it is cheap. 1373 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1374 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask, 1375 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1376 1377 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1378 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask, 1379 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1380} 1381 1382multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1383 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1384 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1385 let Predicates = [prd] in 1386 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1387 OpNode, SrcRC, Subreg>, EVEX_V512; 1388 let Predicates = [prd, HasVLX] in { 1389 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1390 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1391 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1392 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1393 } 1394} 1395 1396multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1397 SDPatternOperator OpNode, 1398 RegisterClass SrcRC, Predicate prd> { 1399 let Predicates = [prd] in 1400 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1401 SrcRC>, EVEX_V512; 1402 let Predicates = [prd, HasVLX] in { 1403 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1404 SrcRC>, EVEX_V256; 1405 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1406 SrcRC>, EVEX_V128; 1407 } 1408} 1409 1410defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1411 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1412defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1413 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1414 HasBWI>; 1415defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1416 X86VBroadcast, GR32, HasAVX512>; 1417defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1418 X86VBroadcast, GR64, HasAVX512>, VEX_W; 1419 1420multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1421 AVX512VLVectorVTInfo _, Predicate prd, 1422 bit IsConvertibleToThreeAddress> { 1423 let Predicates = [prd] in { 1424 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1425 WriteShuffle256Ld, _.info512, _.info128, 1426 IsConvertibleToThreeAddress>, 1427 EVEX_V512; 1428 } 1429 let Predicates = [prd, HasVLX] in { 1430 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1431 WriteShuffle256Ld, _.info256, _.info128, 1432 IsConvertibleToThreeAddress>, 1433 EVEX_V256; 1434 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle, 1435 WriteShuffleXLd, _.info128, _.info128, 1436 IsConvertibleToThreeAddress>, 1437 EVEX_V128; 1438 } 1439} 1440 1441defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1442 avx512vl_i8_info, HasBWI, 0>; 1443defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1444 avx512vl_i16_info, HasBWI, 0>; 1445defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1446 avx512vl_i32_info, HasAVX512, 1>; 1447defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1448 avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; 1449 1450multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1451 SDPatternOperator OpNode, 1452 X86VectorVTInfo _Dst, 1453 X86VectorVTInfo _Src> { 1454 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1455 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1456 (_Dst.VT (OpNode addr:$src))>, 1457 Sched<[SchedWriteShuffle.YMM.Folded]>, 1458 AVX5128IBase, EVEX; 1459} 1460 1461// This should be used for the AVX512DQ broadcast instructions. It disables 1462// the unmasked patterns so that we only use the DQ instructions when masking 1463// is requested. 1464multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1465 SDPatternOperator OpNode, 1466 X86VectorVTInfo _Dst, 1467 X86VectorVTInfo _Src> { 1468 let hasSideEffects = 0, mayLoad = 1 in 1469 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1470 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1471 (null_frag), 1472 (_Dst.VT (OpNode addr:$src))>, 1473 Sched<[SchedWriteShuffle.YMM.Folded]>, 1474 AVX5128IBase, EVEX; 1475} 1476let Predicates = [HasBWI] in { 1477 def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)), 1478 (VPBROADCASTWZrm addr:$src)>; 1479 1480 def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))), 1481 (VPBROADCASTWZrr VR128X:$src)>; 1482 def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))), 1483 (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1484} 1485let Predicates = [HasVLX, HasBWI] in { 1486 def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)), 1487 (VPBROADCASTWZ128rm addr:$src)>; 1488 def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)), 1489 (VPBROADCASTWZ256rm addr:$src)>; 1490 1491 def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))), 1492 (VPBROADCASTWZ128rr VR128X:$src)>; 1493 def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))), 1494 (VPBROADCASTWZ256rr VR128X:$src)>; 1495 1496 def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))), 1497 (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1498 def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))), 1499 (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1500} 1501 1502//===----------------------------------------------------------------------===// 1503// AVX-512 BROADCAST SUBVECTORS 1504// 1505 1506defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1507 X86SubVBroadcastld128, v16i32_info, v4i32x_info>, 1508 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1509defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1510 X86SubVBroadcastld128, v16f32_info, v4f32x_info>, 1511 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1512defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1513 X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W, 1514 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1515defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1516 X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W, 1517 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1518 1519let Predicates = [HasAVX512] in { 1520def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)), 1521 (VBROADCASTF64X4rm addr:$src)>; 1522def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)), 1523 (VBROADCASTF64X4rm addr:$src)>; 1524def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)), 1525 (VBROADCASTF64X4rm addr:$src)>; 1526def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)), 1527 (VBROADCASTI64X4rm addr:$src)>; 1528def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)), 1529 (VBROADCASTI64X4rm addr:$src)>; 1530def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)), 1531 (VBROADCASTI64X4rm addr:$src)>; 1532def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)), 1533 (VBROADCASTI64X4rm addr:$src)>; 1534 1535def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)), 1536 (VBROADCASTF32X4rm addr:$src)>; 1537def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)), 1538 (VBROADCASTF32X4rm addr:$src)>; 1539def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)), 1540 (VBROADCASTF32X4rm addr:$src)>; 1541def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)), 1542 (VBROADCASTI32X4rm addr:$src)>; 1543def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)), 1544 (VBROADCASTI32X4rm addr:$src)>; 1545def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)), 1546 (VBROADCASTI32X4rm addr:$src)>; 1547def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)), 1548 (VBROADCASTI32X4rm addr:$src)>; 1549 1550// Patterns for selects of bitcasted operations. 1551def : Pat<(vselect_mask VK16WM:$mask, 1552 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1553 (v16f32 immAllZerosV)), 1554 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1555def : Pat<(vselect_mask VK16WM:$mask, 1556 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1557 VR512:$src0), 1558 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1559def : Pat<(vselect_mask VK16WM:$mask, 1560 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1561 (v16i32 immAllZerosV)), 1562 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1563def : Pat<(vselect_mask VK16WM:$mask, 1564 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1565 VR512:$src0), 1566 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1567 1568def : Pat<(vselect_mask VK8WM:$mask, 1569 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1570 (v8f64 immAllZerosV)), 1571 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1572def : Pat<(vselect_mask VK8WM:$mask, 1573 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1574 VR512:$src0), 1575 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1576def : Pat<(vselect_mask VK8WM:$mask, 1577 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1578 (v8i64 immAllZerosV)), 1579 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1580def : Pat<(vselect_mask VK8WM:$mask, 1581 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1582 VR512:$src0), 1583 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1584} 1585 1586let Predicates = [HasVLX] in { 1587defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1588 X86SubVBroadcastld128, v8i32x_info, v4i32x_info>, 1589 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1590defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1591 X86SubVBroadcastld128, v8f32x_info, v4f32x_info>, 1592 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1593 1594def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), 1595 (VBROADCASTF32X4Z256rm addr:$src)>; 1596def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), 1597 (VBROADCASTF32X4Z256rm addr:$src)>; 1598def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)), 1599 (VBROADCASTF32X4Z256rm addr:$src)>; 1600def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), 1601 (VBROADCASTI32X4Z256rm addr:$src)>; 1602def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), 1603 (VBROADCASTI32X4Z256rm addr:$src)>; 1604def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), 1605 (VBROADCASTI32X4Z256rm addr:$src)>; 1606def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), 1607 (VBROADCASTI32X4Z256rm addr:$src)>; 1608 1609// Patterns for selects of bitcasted operations. 1610def : Pat<(vselect_mask VK8WM:$mask, 1611 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1612 (v8f32 immAllZerosV)), 1613 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1614def : Pat<(vselect_mask VK8WM:$mask, 1615 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1616 VR256X:$src0), 1617 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1618def : Pat<(vselect_mask VK8WM:$mask, 1619 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1620 (v8i32 immAllZerosV)), 1621 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1622def : Pat<(vselect_mask VK8WM:$mask, 1623 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1624 VR256X:$src0), 1625 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1626} 1627 1628let Predicates = [HasVLX, HasDQI] in { 1629defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1630 X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X, 1631 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1632defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1633 X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X, 1634 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1635 1636// Patterns for selects of bitcasted operations. 1637def : Pat<(vselect_mask VK4WM:$mask, 1638 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1639 (v4f64 immAllZerosV)), 1640 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1641def : Pat<(vselect_mask VK4WM:$mask, 1642 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1643 VR256X:$src0), 1644 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1645def : Pat<(vselect_mask VK4WM:$mask, 1646 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1647 (v4i64 immAllZerosV)), 1648 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1649def : Pat<(vselect_mask VK4WM:$mask, 1650 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1651 VR256X:$src0), 1652 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1653} 1654 1655let Predicates = [HasDQI] in { 1656defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1657 X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W, 1658 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1659defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1660 X86SubVBroadcastld256, v16i32_info, v8i32x_info>, 1661 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1662defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1663 X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W, 1664 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1665defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1666 X86SubVBroadcastld256, v16f32_info, v8f32x_info>, 1667 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1668 1669// Patterns for selects of bitcasted operations. 1670def : Pat<(vselect_mask VK16WM:$mask, 1671 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1672 (v16f32 immAllZerosV)), 1673 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1674def : Pat<(vselect_mask VK16WM:$mask, 1675 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1676 VR512:$src0), 1677 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1678def : Pat<(vselect_mask VK16WM:$mask, 1679 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1680 (v16i32 immAllZerosV)), 1681 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1682def : Pat<(vselect_mask VK16WM:$mask, 1683 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1684 VR512:$src0), 1685 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1686 1687def : Pat<(vselect_mask VK8WM:$mask, 1688 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1689 (v8f64 immAllZerosV)), 1690 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1691def : Pat<(vselect_mask VK8WM:$mask, 1692 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1693 VR512:$src0), 1694 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1695def : Pat<(vselect_mask VK8WM:$mask, 1696 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1697 (v8i64 immAllZerosV)), 1698 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1699def : Pat<(vselect_mask VK8WM:$mask, 1700 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1701 VR512:$src0), 1702 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1703} 1704 1705multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1706 AVX512VLVectorVTInfo _Dst, 1707 AVX512VLVectorVTInfo _Src> { 1708 let Predicates = [HasDQI] in 1709 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1710 WriteShuffle256Ld, _Dst.info512, 1711 _Src.info512, _Src.info128, 0, null_frag, null_frag>, 1712 EVEX_V512; 1713 let Predicates = [HasDQI, HasVLX] in 1714 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1715 WriteShuffle256Ld, _Dst.info256, 1716 _Src.info256, _Src.info128, 0, null_frag, null_frag>, 1717 EVEX_V256; 1718} 1719 1720multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1721 AVX512VLVectorVTInfo _Dst, 1722 AVX512VLVectorVTInfo _Src> : 1723 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1724 1725 let Predicates = [HasDQI, HasVLX] in 1726 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle, 1727 WriteShuffleXLd, _Dst.info128, 1728 _Src.info128, _Src.info128, 0, null_frag, null_frag>, 1729 EVEX_V128; 1730} 1731 1732defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1733 avx512vl_i32_info, avx512vl_i64_info>; 1734defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1735 avx512vl_f32_info, avx512vl_f64_info>; 1736 1737//===----------------------------------------------------------------------===// 1738// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1739//--- 1740multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1741 X86VectorVTInfo _, RegisterClass KRC> { 1742 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1743 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1744 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1745 EVEX, Sched<[WriteShuffle]>; 1746} 1747 1748multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1749 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1750 let Predicates = [HasCDI] in 1751 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1752 let Predicates = [HasCDI, HasVLX] in { 1753 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1754 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1755 } 1756} 1757 1758defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1759 avx512vl_i32_info, VK16>; 1760defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1761 avx512vl_i64_info, VK8>, VEX_W; 1762 1763//===----------------------------------------------------------------------===// 1764// -- VPERMI2 - 3 source operands form -- 1765multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1766 X86FoldableSchedWrite sched, 1767 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1768let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1769 hasSideEffects = 0 in { 1770 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1771 (ins _.RC:$src2, _.RC:$src3), 1772 OpcodeStr, "$src3, $src2", "$src2, $src3", 1773 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1774 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1775 1776 let mayLoad = 1 in 1777 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1778 (ins _.RC:$src2, _.MemOp:$src3), 1779 OpcodeStr, "$src3, $src2", "$src2, $src3", 1780 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1781 (_.VT (_.LdFrag addr:$src3)))), 1>, 1782 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1783 } 1784} 1785 1786multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1787 X86FoldableSchedWrite sched, 1788 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1789 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1790 hasSideEffects = 0, mayLoad = 1 in 1791 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1792 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1793 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1794 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1795 (_.VT (X86VPermt2 _.RC:$src2, 1796 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1797 AVX5128IBase, EVEX_4V, EVEX_B, 1798 Sched<[sched.Folded, sched.ReadAfterFold]>; 1799} 1800 1801multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1802 X86FoldableSchedWrite sched, 1803 AVX512VLVectorVTInfo VTInfo, 1804 AVX512VLVectorVTInfo ShuffleMask> { 1805 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1806 ShuffleMask.info512>, 1807 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1808 ShuffleMask.info512>, EVEX_V512; 1809 let Predicates = [HasVLX] in { 1810 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1811 ShuffleMask.info128>, 1812 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1813 ShuffleMask.info128>, EVEX_V128; 1814 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1815 ShuffleMask.info256>, 1816 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1817 ShuffleMask.info256>, EVEX_V256; 1818 } 1819} 1820 1821multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1822 X86FoldableSchedWrite sched, 1823 AVX512VLVectorVTInfo VTInfo, 1824 AVX512VLVectorVTInfo Idx, 1825 Predicate Prd> { 1826 let Predicates = [Prd] in 1827 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1828 Idx.info512>, EVEX_V512; 1829 let Predicates = [Prd, HasVLX] in { 1830 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1831 Idx.info128>, EVEX_V128; 1832 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1833 Idx.info256>, EVEX_V256; 1834 } 1835} 1836 1837defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1838 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1839defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1840 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1841defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1842 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1843 VEX_W, EVEX_CD8<16, CD8VF>; 1844defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1845 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1846 EVEX_CD8<8, CD8VF>; 1847defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1848 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1849defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1850 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1851 1852// Extra patterns to deal with extra bitcasts due to passthru and index being 1853// different types on the fp versions. 1854multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1855 X86VectorVTInfo IdxVT, 1856 X86VectorVTInfo CastVT> { 1857 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1858 (X86VPermt2 (_.VT _.RC:$src2), 1859 (IdxVT.VT (bitconvert 1860 (CastVT.VT _.RC:$src1))), 1861 _.RC:$src3), 1862 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1863 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1864 _.RC:$src2, _.RC:$src3)>; 1865 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1866 (X86VPermt2 _.RC:$src2, 1867 (IdxVT.VT (bitconvert 1868 (CastVT.VT _.RC:$src1))), 1869 (_.LdFrag addr:$src3)), 1870 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1871 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1872 _.RC:$src2, addr:$src3)>; 1873 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1874 (X86VPermt2 _.RC:$src2, 1875 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1876 (_.BroadcastLdFrag addr:$src3)), 1877 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1878 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1879 _.RC:$src2, addr:$src3)>; 1880} 1881 1882// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1883defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>; 1884defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; 1885defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; 1886 1887// VPERMT2 1888multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1889 X86FoldableSchedWrite sched, 1890 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1891let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1892 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1893 (ins IdxVT.RC:$src2, _.RC:$src3), 1894 OpcodeStr, "$src3, $src2", "$src2, $src3", 1895 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1896 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1897 1898 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1899 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1900 OpcodeStr, "$src3, $src2", "$src2, $src3", 1901 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1902 (_.LdFrag addr:$src3))), 1>, 1903 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1904 } 1905} 1906multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1907 X86FoldableSchedWrite sched, 1908 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1909 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1910 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1911 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1912 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1913 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1914 (_.VT (X86VPermt2 _.RC:$src1, 1915 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1916 AVX5128IBase, EVEX_4V, EVEX_B, 1917 Sched<[sched.Folded, sched.ReadAfterFold]>; 1918} 1919 1920multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1921 X86FoldableSchedWrite sched, 1922 AVX512VLVectorVTInfo VTInfo, 1923 AVX512VLVectorVTInfo ShuffleMask> { 1924 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1925 ShuffleMask.info512>, 1926 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1927 ShuffleMask.info512>, EVEX_V512; 1928 let Predicates = [HasVLX] in { 1929 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1930 ShuffleMask.info128>, 1931 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1932 ShuffleMask.info128>, EVEX_V128; 1933 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1934 ShuffleMask.info256>, 1935 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1936 ShuffleMask.info256>, EVEX_V256; 1937 } 1938} 1939 1940multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1941 X86FoldableSchedWrite sched, 1942 AVX512VLVectorVTInfo VTInfo, 1943 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1944 let Predicates = [Prd] in 1945 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1946 Idx.info512>, EVEX_V512; 1947 let Predicates = [Prd, HasVLX] in { 1948 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1949 Idx.info128>, EVEX_V128; 1950 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1951 Idx.info256>, EVEX_V256; 1952 } 1953} 1954 1955defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1956 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1957defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1958 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1959defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1960 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1961 VEX_W, EVEX_CD8<16, CD8VF>; 1962defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1963 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1964 EVEX_CD8<8, CD8VF>; 1965defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1966 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1967defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1968 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1969 1970//===----------------------------------------------------------------------===// 1971// AVX-512 - BLEND using mask 1972// 1973 1974multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1975 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1976 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1977 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1978 (ins _.RC:$src1, _.RC:$src2), 1979 !strconcat(OpcodeStr, 1980 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1981 EVEX_4V, Sched<[sched]>; 1982 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1983 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1984 !strconcat(OpcodeStr, 1985 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1986 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 1987 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1988 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1989 !strconcat(OpcodeStr, 1990 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1991 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable; 1992 let mayLoad = 1 in { 1993 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1994 (ins _.RC:$src1, _.MemOp:$src2), 1995 !strconcat(OpcodeStr, 1996 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 1997 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 1998 Sched<[sched.Folded, sched.ReadAfterFold]>; 1999 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2000 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2001 !strconcat(OpcodeStr, 2002 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 2003 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 2004 Sched<[sched.Folded, sched.ReadAfterFold]>; 2005 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2006 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2007 !strconcat(OpcodeStr, 2008 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 2009 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 2010 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 2011 } 2012 } 2013} 2014multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 2015 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2016 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { 2017 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2018 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 2019 !strconcat(OpcodeStr, 2020 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2021 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2022 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2023 Sched<[sched.Folded, sched.ReadAfterFold]>; 2024 2025 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2026 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 2027 !strconcat(OpcodeStr, 2028 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 2029 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2030 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2031 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 2032 2033 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2034 (ins _.RC:$src1, _.ScalarMemOp:$src2), 2035 !strconcat(OpcodeStr, 2036 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 2037 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2038 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2039 Sched<[sched.Folded, sched.ReadAfterFold]>; 2040 } 2041} 2042 2043multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2044 AVX512VLVectorVTInfo VTInfo> { 2045 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2046 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2047 EVEX_V512; 2048 2049 let Predicates = [HasVLX] in { 2050 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2051 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2052 EVEX_V256; 2053 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2054 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2055 EVEX_V128; 2056 } 2057} 2058 2059multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2060 AVX512VLVectorVTInfo VTInfo> { 2061 let Predicates = [HasBWI] in 2062 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2063 EVEX_V512; 2064 2065 let Predicates = [HasBWI, HasVLX] in { 2066 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2067 EVEX_V256; 2068 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2069 EVEX_V128; 2070 } 2071} 2072 2073defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 2074 avx512vl_f32_info>; 2075defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 2076 avx512vl_f64_info>, VEX_W; 2077defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 2078 avx512vl_i32_info>; 2079defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 2080 avx512vl_i64_info>, VEX_W; 2081defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 2082 avx512vl_i8_info>; 2083defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 2084 avx512vl_i16_info>, VEX_W; 2085 2086//===----------------------------------------------------------------------===// 2087// Compare Instructions 2088//===----------------------------------------------------------------------===// 2089 2090// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 2091 2092multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, 2093 PatFrag OpNode_su, PatFrag OpNodeSAE_su, 2094 X86FoldableSchedWrite sched> { 2095 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2096 (outs _.KRC:$dst), 2097 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2098 "vcmp"#_.Suffix, 2099 "$cc, $src2, $src1", "$src1, $src2, $cc", 2100 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2101 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2102 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2103 let mayLoad = 1 in 2104 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2105 (outs _.KRC:$dst), 2106 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), 2107 "vcmp"#_.Suffix, 2108 "$cc, $src2, $src1", "$src1, $src2, $cc", 2109 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2110 timm:$cc), 2111 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2112 timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2113 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2114 2115 let Uses = [MXCSR] in 2116 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2117 (outs _.KRC:$dst), 2118 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2119 "vcmp"#_.Suffix, 2120 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", 2121 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2122 timm:$cc), 2123 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2124 timm:$cc)>, 2125 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 2126 2127 let isCodeGenOnly = 1 in { 2128 let isCommutable = 1 in 2129 def rr : AVX512Ii8<0xC2, MRMSrcReg, 2130 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), 2131 !strconcat("vcmp", _.Suffix, 2132 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2133 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2134 _.FRC:$src2, 2135 timm:$cc))]>, 2136 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2137 def rm : AVX512Ii8<0xC2, MRMSrcMem, 2138 (outs _.KRC:$dst), 2139 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2140 !strconcat("vcmp", _.Suffix, 2141 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2142 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2143 (_.ScalarLdFrag addr:$src2), 2144 timm:$cc))]>, 2145 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2146 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2147 } 2148} 2149 2150def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2151 (X86cmpms node:$src1, node:$src2, node:$cc), [{ 2152 return N->hasOneUse(); 2153}]>; 2154def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2155 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{ 2156 return N->hasOneUse(); 2157}]>; 2158 2159let Predicates = [HasAVX512] in { 2160 let ExeDomain = SSEPackedSingle in 2161 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, 2162 X86cmpms_su, X86cmpmsSAE_su, 2163 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2164 let ExeDomain = SSEPackedDouble in 2165 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, 2166 X86cmpms_su, X86cmpmsSAE_su, 2167 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; 2168} 2169let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in 2170 defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE, 2171 X86cmpms_su, X86cmpmsSAE_su, 2172 SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA; 2173 2174multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, 2175 X86FoldableSchedWrite sched, 2176 X86VectorVTInfo _, bit IsCommutable> { 2177 let isCommutable = IsCommutable, hasSideEffects = 0 in 2178 def rr : AVX512BI<opc, MRMSrcReg, 2179 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2180 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2181 []>, EVEX_4V, Sched<[sched]>; 2182 let mayLoad = 1, hasSideEffects = 0 in 2183 def rm : AVX512BI<opc, MRMSrcMem, 2184 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2185 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2186 []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2187 let isCommutable = IsCommutable, hasSideEffects = 0 in 2188 def rrk : AVX512BI<opc, MRMSrcReg, 2189 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2190 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2191 "$dst {${mask}}, $src1, $src2}"), 2192 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 2193 let mayLoad = 1, hasSideEffects = 0 in 2194 def rmk : AVX512BI<opc, MRMSrcMem, 2195 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2196 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2197 "$dst {${mask}}, $src1, $src2}"), 2198 []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2199} 2200 2201multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, 2202 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2203 bit IsCommutable> : 2204 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { 2205 let mayLoad = 1, hasSideEffects = 0 in { 2206 def rmb : AVX512BI<opc, MRMSrcMem, 2207 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2208 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2209 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2210 []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2211 def rmbk : AVX512BI<opc, MRMSrcMem, 2212 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2213 _.ScalarMemOp:$src2), 2214 !strconcat(OpcodeStr, 2215 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2216 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2217 []>, EVEX_4V, EVEX_K, EVEX_B, 2218 Sched<[sched.Folded, sched.ReadAfterFold]>; 2219 } 2220} 2221 2222multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, 2223 X86SchedWriteWidths sched, 2224 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2225 bit IsCommutable = 0> { 2226 let Predicates = [prd] in 2227 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, 2228 VTInfo.info512, IsCommutable>, EVEX_V512; 2229 2230 let Predicates = [prd, HasVLX] in { 2231 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, 2232 VTInfo.info256, IsCommutable>, EVEX_V256; 2233 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, 2234 VTInfo.info128, IsCommutable>, EVEX_V128; 2235 } 2236} 2237 2238multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2239 X86SchedWriteWidths sched, 2240 AVX512VLVectorVTInfo VTInfo, 2241 Predicate prd, bit IsCommutable = 0> { 2242 let Predicates = [prd] in 2243 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, 2244 VTInfo.info512, IsCommutable>, EVEX_V512; 2245 2246 let Predicates = [prd, HasVLX] in { 2247 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, 2248 VTInfo.info256, IsCommutable>, EVEX_V256; 2249 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, 2250 VTInfo.info128, IsCommutable>, EVEX_V128; 2251 } 2252} 2253 2254// This fragment treats X86cmpm as commutable to help match loads in both 2255// operands for PCMPEQ. 2256def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; 2257def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), 2258 (setcc node:$src1, node:$src2, SETGT)>; 2259 2260// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2261// increase the pattern complexity the way an immediate would. 2262let AddedComplexity = 2 in { 2263// FIXME: Is there a better scheduler class for VPCMP? 2264defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", 2265 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2266 EVEX_CD8<8, CD8VF>, VEX_WIG; 2267 2268defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", 2269 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2270 EVEX_CD8<16, CD8VF>, VEX_WIG; 2271 2272defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", 2273 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2274 EVEX_CD8<32, CD8VF>; 2275 2276defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", 2277 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2278 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2279 2280defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", 2281 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2282 EVEX_CD8<8, CD8VF>, VEX_WIG; 2283 2284defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", 2285 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2286 EVEX_CD8<16, CD8VF>, VEX_WIG; 2287 2288defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", 2289 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2290 EVEX_CD8<32, CD8VF>; 2291 2292defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", 2293 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2294 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2295} 2296 2297def X86pcmpm_imm : SDNodeXForm<setcc, [{ 2298 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2299 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2300 return getI8Imm(SSECC, SDLoc(N)); 2301}]>; 2302 2303// Swapped operand version of the above. 2304def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{ 2305 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2306 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2307 SSECC = X86::getSwappedVPCMPImm(SSECC); 2308 return getI8Imm(SSECC, SDLoc(N)); 2309}]>; 2310 2311multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2312 PatFrag Frag_su, 2313 X86FoldableSchedWrite sched, 2314 X86VectorVTInfo _, string Name> { 2315 let isCommutable = 1 in 2316 def rri : AVX512AIi8<opc, MRMSrcReg, 2317 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2318 !strconcat("vpcmp", Suffix, 2319 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2320 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2321 (_.VT _.RC:$src2), 2322 cond)))]>, 2323 EVEX_4V, Sched<[sched]>; 2324 def rmi : AVX512AIi8<opc, MRMSrcMem, 2325 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2326 !strconcat("vpcmp", Suffix, 2327 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2328 [(set _.KRC:$dst, (_.KVT 2329 (Frag:$cc 2330 (_.VT _.RC:$src1), 2331 (_.VT (_.LdFrag addr:$src2)), 2332 cond)))]>, 2333 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2334 let isCommutable = 1 in 2335 def rrik : AVX512AIi8<opc, MRMSrcReg, 2336 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2337 u8imm:$cc), 2338 !strconcat("vpcmp", Suffix, 2339 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2340 "$dst {${mask}}, $src1, $src2, $cc}"), 2341 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2342 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), 2343 (_.VT _.RC:$src2), 2344 cond))))]>, 2345 EVEX_4V, EVEX_K, Sched<[sched]>; 2346 def rmik : AVX512AIi8<opc, MRMSrcMem, 2347 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2348 u8imm:$cc), 2349 !strconcat("vpcmp", Suffix, 2350 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2351 "$dst {${mask}}, $src1, $src2, $cc}"), 2352 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2353 (_.KVT 2354 (Frag_su:$cc 2355 (_.VT _.RC:$src1), 2356 (_.VT (_.LdFrag addr:$src2)), 2357 cond))))]>, 2358 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2359 2360 def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2), 2361 (_.VT _.RC:$src1), cond)), 2362 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2363 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2364 2365 def : Pat<(and _.KRCWM:$mask, 2366 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2), 2367 (_.VT _.RC:$src1), cond))), 2368 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2369 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2370 (X86pcmpm_imm_commute $cc))>; 2371} 2372 2373multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2374 PatFrag Frag_su, X86FoldableSchedWrite sched, 2375 X86VectorVTInfo _, string Name> : 2376 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> { 2377 def rmib : AVX512AIi8<opc, MRMSrcMem, 2378 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2379 u8imm:$cc), 2380 !strconcat("vpcmp", Suffix, 2381 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2382 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2383 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2384 (_.VT _.RC:$src1), 2385 (_.BroadcastLdFrag addr:$src2), 2386 cond)))]>, 2387 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2388 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2389 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2390 _.ScalarMemOp:$src2, u8imm:$cc), 2391 !strconcat("vpcmp", Suffix, 2392 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2393 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2394 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2395 (_.KVT (Frag_su:$cc 2396 (_.VT _.RC:$src1), 2397 (_.BroadcastLdFrag addr:$src2), 2398 cond))))]>, 2399 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2400 2401 def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2), 2402 (_.VT _.RC:$src1), cond)), 2403 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2404 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2405 2406 def : Pat<(and _.KRCWM:$mask, 2407 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2), 2408 (_.VT _.RC:$src1), cond))), 2409 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2410 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2411 (X86pcmpm_imm_commute $cc))>; 2412} 2413 2414multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2415 PatFrag Frag_su, X86SchedWriteWidths sched, 2416 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2417 let Predicates = [prd] in 2418 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2419 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2420 2421 let Predicates = [prd, HasVLX] in { 2422 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2423 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2424 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2425 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2426 } 2427} 2428 2429multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2430 PatFrag Frag_su, X86SchedWriteWidths sched, 2431 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2432 let Predicates = [prd] in 2433 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2434 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2435 2436 let Predicates = [prd, HasVLX] in { 2437 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2438 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2439 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2440 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2441 } 2442} 2443 2444def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2445 (setcc node:$src1, node:$src2, node:$cc), [{ 2446 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2447 return !ISD::isUnsignedIntSetCC(CC); 2448}], X86pcmpm_imm>; 2449 2450def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2451 (setcc node:$src1, node:$src2, node:$cc), [{ 2452 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2453 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2454}], X86pcmpm_imm>; 2455 2456def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2457 (setcc node:$src1, node:$src2, node:$cc), [{ 2458 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2459 return ISD::isUnsignedIntSetCC(CC); 2460}], X86pcmpm_imm>; 2461 2462def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2463 (setcc node:$src1, node:$src2, node:$cc), [{ 2464 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2465 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2466}], X86pcmpm_imm>; 2467 2468// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2469defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su, 2470 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2471 EVEX_CD8<8, CD8VF>; 2472defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su, 2473 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2474 EVEX_CD8<8, CD8VF>; 2475 2476defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su, 2477 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2478 VEX_W, EVEX_CD8<16, CD8VF>; 2479defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su, 2480 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2481 VEX_W, EVEX_CD8<16, CD8VF>; 2482 2483defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su, 2484 SchedWriteVecALU, avx512vl_i32_info, 2485 HasAVX512>, EVEX_CD8<32, CD8VF>; 2486defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su, 2487 SchedWriteVecALU, avx512vl_i32_info, 2488 HasAVX512>, EVEX_CD8<32, CD8VF>; 2489 2490defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su, 2491 SchedWriteVecALU, avx512vl_i64_info, 2492 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2493defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su, 2494 SchedWriteVecALU, avx512vl_i64_info, 2495 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2496 2497def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2498 (X86cmpm node:$src1, node:$src2, node:$cc), [{ 2499 return N->hasOneUse(); 2500}]>; 2501 2502def X86cmpm_imm_commute : SDNodeXForm<timm, [{ 2503 uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f); 2504 return getI8Imm(Imm, SDLoc(N)); 2505}]>; 2506 2507multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2508 string Name> { 2509let Uses = [MXCSR], mayRaiseFPException = 1 in { 2510 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2511 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), 2512 "vcmp"#_.Suffix, 2513 "$cc, $src2, $src1", "$src1, $src2, $cc", 2514 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2515 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2516 1>, Sched<[sched]>; 2517 2518 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2519 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2520 "vcmp"#_.Suffix, 2521 "$cc, $src2, $src1", "$src1, $src2, $cc", 2522 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2523 timm:$cc), 2524 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2525 timm:$cc)>, 2526 Sched<[sched.Folded, sched.ReadAfterFold]>; 2527 2528 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2529 (outs _.KRC:$dst), 2530 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2531 "vcmp"#_.Suffix, 2532 "$cc, ${src2}"#_.BroadcastStr#", $src1", 2533 "$src1, ${src2}"#_.BroadcastStr#", $cc", 2534 (X86any_cmpm (_.VT _.RC:$src1), 2535 (_.VT (_.BroadcastLdFrag addr:$src2)), 2536 timm:$cc), 2537 (X86cmpm_su (_.VT _.RC:$src1), 2538 (_.VT (_.BroadcastLdFrag addr:$src2)), 2539 timm:$cc)>, 2540 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2541 } 2542 2543 // Patterns for selecting with loads in other operand. 2544 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2545 timm:$cc), 2546 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2547 (X86cmpm_imm_commute timm:$cc))>; 2548 2549 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), 2550 (_.VT _.RC:$src1), 2551 timm:$cc)), 2552 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2553 _.RC:$src1, addr:$src2, 2554 (X86cmpm_imm_commute timm:$cc))>; 2555 2556 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2), 2557 (_.VT _.RC:$src1), timm:$cc), 2558 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2559 (X86cmpm_imm_commute timm:$cc))>; 2560 2561 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), 2562 (_.VT _.RC:$src1), 2563 timm:$cc)), 2564 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2565 _.RC:$src1, addr:$src2, 2566 (X86cmpm_imm_commute timm:$cc))>; 2567 2568 // Patterns for mask intrinsics. 2569 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, 2570 (_.KVT immAllOnesV)), 2571 (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>; 2572 2573 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask), 2574 (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1, 2575 _.RC:$src2, timm:$cc)>; 2576 2577 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2578 (_.KVT immAllOnesV)), 2579 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>; 2580 2581 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2582 _.KRCWM:$mask), 2583 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, 2584 addr:$src2, timm:$cc)>; 2585 2586 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2587 (_.KVT immAllOnesV)), 2588 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>; 2589 2590 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2591 _.KRCWM:$mask), 2592 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, 2593 addr:$src2, timm:$cc)>; 2594 2595 // Patterns for mask intrinsics with loads in other operand. 2596 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2597 (_.KVT immAllOnesV)), 2598 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2599 (X86cmpm_imm_commute timm:$cc))>; 2600 2601 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2602 _.KRCWM:$mask), 2603 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2604 _.RC:$src1, addr:$src2, 2605 (X86cmpm_imm_commute timm:$cc))>; 2606 2607 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2608 (_.KVT immAllOnesV)), 2609 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2610 (X86cmpm_imm_commute timm:$cc))>; 2611 2612 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2613 _.KRCWM:$mask), 2614 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2615 _.RC:$src1, addr:$src2, 2616 (X86cmpm_imm_commute timm:$cc))>; 2617} 2618 2619multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2620 // comparison code form (VCMP[EQ/LT/LE/...] 2621 let Uses = [MXCSR] in 2622 defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst), 2623 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2624 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc), 2625 "vcmp"#_.Suffix, 2626 "$cc, {sae}, $src2, $src1", 2627 "$src1, $src2, {sae}, $cc", 2628 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2629 (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))], 2630 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2631 (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>, 2632 EVEX_B, Sched<[sched]>; 2633} 2634 2635multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 2636 Predicate Pred = HasAVX512> { 2637 let Predicates = [Pred] in { 2638 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2639 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2640 2641 } 2642 let Predicates = [Pred,HasVLX] in { 2643 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2644 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2645 } 2646} 2647 2648defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2649 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 2650defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2651 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 2652defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>, 2653 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA; 2654 2655// Patterns to select fp compares with load as first operand. 2656let Predicates = [HasAVX512] in { 2657 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)), 2658 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2659 2660 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)), 2661 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2662} 2663 2664let Predicates = [HasFP16] in { 2665 def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)), 2666 (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2667} 2668 2669// ---------------------------------------------------------------- 2670// FPClass 2671 2672def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2), 2673 (X86Vfpclasss node:$src1, node:$src2), [{ 2674 return N->hasOneUse(); 2675}]>; 2676 2677def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2), 2678 (X86Vfpclass node:$src1, node:$src2), [{ 2679 return N->hasOneUse(); 2680}]>; 2681 2682//handle fpclass instruction mask = op(reg_scalar,imm) 2683// op(mem_scalar,imm) 2684multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, 2685 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2686 Predicate prd> { 2687 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2688 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2689 (ins _.RC:$src1, i32u8imm:$src2), 2690 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2691 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), 2692 (i32 timm:$src2)))]>, 2693 Sched<[sched]>; 2694 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2695 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2696 OpcodeStr#_.Suffix# 2697 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2698 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2699 (X86Vfpclasss_su (_.VT _.RC:$src1), 2700 (i32 timm:$src2))))]>, 2701 EVEX_K, Sched<[sched]>; 2702 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2703 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2704 OpcodeStr#_.Suffix# 2705 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2706 [(set _.KRC:$dst, 2707 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1), 2708 (i32 timm:$src2)))]>, 2709 Sched<[sched.Folded, sched.ReadAfterFold]>; 2710 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2711 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2712 OpcodeStr#_.Suffix# 2713 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2714 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2715 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1), 2716 (i32 timm:$src2))))]>, 2717 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2718 } 2719} 2720 2721//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2722// fpclass(reg_vec, mem_vec, imm) 2723// fpclass(reg_vec, broadcast(eltVt), imm) 2724multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, 2725 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2726 string mem>{ 2727 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2728 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2729 (ins _.RC:$src1, i32u8imm:$src2), 2730 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2731 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), 2732 (i32 timm:$src2)))]>, 2733 Sched<[sched]>; 2734 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2735 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2736 OpcodeStr#_.Suffix# 2737 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2738 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2739 (X86Vfpclass_su (_.VT _.RC:$src1), 2740 (i32 timm:$src2))))]>, 2741 EVEX_K, Sched<[sched]>; 2742 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2743 (ins _.MemOp:$src1, i32u8imm:$src2), 2744 OpcodeStr#_.Suffix#"{"#mem#"}"# 2745 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2746 [(set _.KRC:$dst,(X86Vfpclass 2747 (_.VT (_.LdFrag addr:$src1)), 2748 (i32 timm:$src2)))]>, 2749 Sched<[sched.Folded, sched.ReadAfterFold]>; 2750 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2751 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2752 OpcodeStr#_.Suffix#"{"#mem#"}"# 2753 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2754 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su 2755 (_.VT (_.LdFrag addr:$src1)), 2756 (i32 timm:$src2))))]>, 2757 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2758 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2759 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2760 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2761 _.BroadcastStr#", $dst|$dst, ${src1}" 2762 #_.BroadcastStr#", $src2}", 2763 [(set _.KRC:$dst,(X86Vfpclass 2764 (_.VT (_.BroadcastLdFrag addr:$src1)), 2765 (i32 timm:$src2)))]>, 2766 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2767 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2768 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2769 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2770 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"# 2771 _.BroadcastStr#", $src2}", 2772 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su 2773 (_.VT (_.BroadcastLdFrag addr:$src1)), 2774 (i32 timm:$src2))))]>, 2775 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2776 } 2777 2778 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate 2779 // the memory form. 2780 def : InstAlias<OpcodeStr#_.Suffix#mem# 2781 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2782 (!cast<Instruction>(NAME#"rr") 2783 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2784 def : InstAlias<OpcodeStr#_.Suffix#mem# 2785 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2786 (!cast<Instruction>(NAME#"rrk") 2787 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2788 def : InstAlias<OpcodeStr#_.Suffix#mem# 2789 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"# 2790 _.BroadcastStr#", $src2}", 2791 (!cast<Instruction>(NAME#"rmb") 2792 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2793 def : InstAlias<OpcodeStr#_.Suffix#mem# 2794 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|" 2795 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}", 2796 (!cast<Instruction>(NAME#"rmbk") 2797 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2798} 2799 2800multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2801 bits<8> opc, X86SchedWriteWidths sched, 2802 Predicate prd>{ 2803 let Predicates = [prd] in { 2804 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM, 2805 _.info512, "z">, EVEX_V512; 2806 } 2807 let Predicates = [prd, HasVLX] in { 2808 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM, 2809 _.info128, "x">, EVEX_V128; 2810 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM, 2811 _.info256, "y">, EVEX_V256; 2812 } 2813} 2814 2815multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2816 bits<8> opcScalar, X86SchedWriteWidths sched> { 2817 defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec, 2818 sched, HasFP16>, 2819 EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA; 2820 defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2821 sched.Scl, f16x_info, HasFP16>, 2822 EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA; 2823 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2824 sched, HasDQI>, 2825 EVEX_CD8<32, CD8VF>, AVX512AIi8Base; 2826 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2827 sched, HasDQI>, 2828 EVEX_CD8<64, CD8VF>, AVX512AIi8Base, VEX_W; 2829 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2830 sched.Scl, f32x_info, HasDQI>, VEX_LIG, 2831 EVEX_CD8<32, CD8VT1>, AVX512AIi8Base; 2832 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2833 sched.Scl, f64x_info, HasDQI>, VEX_LIG, 2834 EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, VEX_W; 2835} 2836 2837defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX; 2838 2839//----------------------------------------------------------------- 2840// Mask register copy, including 2841// - copy between mask registers 2842// - load/store mask registers 2843// - copy from GPR to mask register and vice versa 2844// 2845multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2846 string OpcodeStr, RegisterClass KRC, 2847 ValueType vvt, X86MemOperand x86memop> { 2848 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in 2849 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2850 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2851 Sched<[WriteMove]>; 2852 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2853 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2854 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2855 Sched<[WriteLoad]>; 2856 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2857 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2858 [(store KRC:$src, addr:$dst)]>, 2859 Sched<[WriteStore]>; 2860} 2861 2862multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2863 string OpcodeStr, 2864 RegisterClass KRC, RegisterClass GRC> { 2865 let hasSideEffects = 0 in { 2866 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2867 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2868 Sched<[WriteMove]>; 2869 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2870 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2871 Sched<[WriteMove]>; 2872 } 2873} 2874 2875let Predicates = [HasDQI] in 2876 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2877 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2878 VEX, PD; 2879 2880let Predicates = [HasAVX512] in 2881 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2882 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2883 VEX, PS; 2884 2885let Predicates = [HasBWI] in { 2886 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2887 VEX, PD, VEX_W; 2888 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2889 VEX, XD; 2890 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2891 VEX, PS, VEX_W; 2892 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2893 VEX, XD, VEX_W; 2894} 2895 2896// GR from/to mask register 2897def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2898 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2899def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2900 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2901def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))), 2902 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>; 2903 2904def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2905 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2906def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2907 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2908 2909def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2910 (KMOVWrk VK16:$src)>; 2911def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2912 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; 2913def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2914 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2915def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2916 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; 2917 2918def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2919 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2920def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2921 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; 2922def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2923 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2924def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2925 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; 2926 2927def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2928 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2929def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2930 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2931def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2932 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2933def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2934 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2935 2936// Load/store kreg 2937let Predicates = [HasDQI] in { 2938 def : Pat<(v1i1 (load addr:$src)), 2939 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2940 def : Pat<(v2i1 (load addr:$src)), 2941 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2942 def : Pat<(v4i1 (load addr:$src)), 2943 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2944} 2945 2946let Predicates = [HasAVX512] in { 2947 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2948 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2949 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), 2950 (KMOVWkm addr:$src)>; 2951} 2952 2953def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", 2954 SDTypeProfile<1, 2, [SDTCisVT<0, i8>, 2955 SDTCVecEltisVT<1, i1>, 2956 SDTCisPtrTy<2>]>>; 2957 2958let Predicates = [HasAVX512] in { 2959 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2960 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2961 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2962 2963 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2964 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2965 2966 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), 2967 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; 2968 2969 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), 2970 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; 2971 } 2972 2973 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2974 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2975 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2976 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2977 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2978 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2979 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2980 2981 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2982 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2983 (KMOVWkr (AND32ri8 2984 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2985 (i32 1)))>; 2986} 2987 2988// Mask unary operation 2989// - KNOT 2990multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 2991 RegisterClass KRC, SDPatternOperator OpNode, 2992 X86FoldableSchedWrite sched, Predicate prd> { 2993 let Predicates = [prd] in 2994 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2995 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2996 [(set KRC:$dst, (OpNode KRC:$src))]>, 2997 Sched<[sched]>; 2998} 2999 3000multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 3001 SDPatternOperator OpNode, 3002 X86FoldableSchedWrite sched> { 3003 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3004 sched, HasDQI>, VEX, PD; 3005 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3006 sched, HasAVX512>, VEX, PS; 3007 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3008 sched, HasBWI>, VEX, PD, VEX_W; 3009 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3010 sched, HasBWI>, VEX, PS, VEX_W; 3011} 3012 3013// TODO - do we need a X86SchedWriteWidths::KMASK type? 3014defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 3015 3016// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 3017let Predicates = [HasAVX512, NoDQI] in 3018def : Pat<(vnot VK8:$src), 3019 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 3020 3021def : Pat<(vnot VK4:$src), 3022 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 3023def : Pat<(vnot VK2:$src), 3024 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 3025def : Pat<(vnot VK1:$src), 3026 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>; 3027 3028// Mask binary operation 3029// - KAND, KANDN, KOR, KXNOR, KXOR 3030multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 3031 RegisterClass KRC, SDPatternOperator OpNode, 3032 X86FoldableSchedWrite sched, Predicate prd, 3033 bit IsCommutable> { 3034 let Predicates = [prd], isCommutable = IsCommutable in 3035 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 3036 !strconcat(OpcodeStr, 3037 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3038 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 3039 Sched<[sched]>; 3040} 3041 3042multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 3043 SDPatternOperator OpNode, 3044 X86FoldableSchedWrite sched, bit IsCommutable, 3045 Predicate prdW = HasAVX512> { 3046 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3047 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; 3048 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3049 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS; 3050 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3051 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; 3052 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3053 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; 3054} 3055 3056// These nodes use 'vnot' instead of 'not' to support vectors. 3057def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; 3058def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; 3059 3060// TODO - do we need a X86SchedWriteWidths::KMASK type? 3061defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 3062defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 3063defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 3064defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 3065defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 3066defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 3067 3068multiclass avx512_binop_pat<SDPatternOperator VOpNode, 3069 Instruction Inst> { 3070 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 3071 // for the DQI set, this type is legal and KxxxB instruction is used 3072 let Predicates = [NoDQI] in 3073 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 3074 (COPY_TO_REGCLASS 3075 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 3076 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 3077 3078 // All types smaller than 8 bits require conversion anyway 3079 def : Pat<(VOpNode VK1:$src1, VK1:$src2), 3080 (COPY_TO_REGCLASS (Inst 3081 (COPY_TO_REGCLASS VK1:$src1, VK16), 3082 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 3083 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 3084 (COPY_TO_REGCLASS (Inst 3085 (COPY_TO_REGCLASS VK2:$src1, VK16), 3086 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; 3087 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 3088 (COPY_TO_REGCLASS (Inst 3089 (COPY_TO_REGCLASS VK4:$src1, VK16), 3090 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; 3091} 3092 3093defm : avx512_binop_pat<and, KANDWrr>; 3094defm : avx512_binop_pat<vandn, KANDNWrr>; 3095defm : avx512_binop_pat<or, KORWrr>; 3096defm : avx512_binop_pat<vxnor, KXNORWrr>; 3097defm : avx512_binop_pat<xor, KXORWrr>; 3098 3099// Mask unpacking 3100multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, 3101 X86KVectorVTInfo Src, X86FoldableSchedWrite sched, 3102 Predicate prd> { 3103 let Predicates = [prd] in { 3104 let hasSideEffects = 0 in 3105 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), 3106 (ins Src.KRC:$src1, Src.KRC:$src2), 3107 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 3108 VEX_4V, VEX_L, Sched<[sched]>; 3109 3110 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), 3111 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>; 3112 } 3113} 3114 3115defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD; 3116defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS; 3117defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W; 3118 3119// Mask bit testing 3120multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3121 SDNode OpNode, X86FoldableSchedWrite sched, 3122 Predicate prd> { 3123 let Predicates = [prd], Defs = [EFLAGS] in 3124 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 3125 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 3126 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 3127 Sched<[sched]>; 3128} 3129 3130multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 3131 X86FoldableSchedWrite sched, 3132 Predicate prdW = HasAVX512> { 3133 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 3134 VEX, PD; 3135 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 3136 VEX, PS; 3137 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 3138 VEX, PS, VEX_W; 3139 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 3140 VEX, PD, VEX_W; 3141} 3142 3143// TODO - do we need a X86SchedWriteWidths::KMASK type? 3144defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 3145defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 3146 3147// Mask shift 3148multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3149 SDNode OpNode, X86FoldableSchedWrite sched> { 3150 let Predicates = [HasAVX512] in 3151 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 3152 !strconcat(OpcodeStr, 3153 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 3154 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, 3155 Sched<[sched]>; 3156} 3157 3158multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 3159 SDNode OpNode, X86FoldableSchedWrite sched> { 3160 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3161 sched>, VEX, TAPD, VEX_W; 3162 let Predicates = [HasDQI] in 3163 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3164 sched>, VEX, TAPD; 3165 let Predicates = [HasBWI] in { 3166 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3167 sched>, VEX, TAPD, VEX_W; 3168 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3169 sched>, VEX, TAPD; 3170 } 3171} 3172 3173defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 3174defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 3175 3176// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3177multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3178 string InstStr, 3179 X86VectorVTInfo Narrow, 3180 X86VectorVTInfo Wide> { 3181def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3182 (Narrow.VT Narrow.RC:$src2), cond)), 3183 (COPY_TO_REGCLASS 3184 (!cast<Instruction>(InstStr#"Zrri") 3185 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3186 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3187 (X86pcmpm_imm $cc)), Narrow.KRC)>; 3188 3189def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3190 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3191 (Narrow.VT Narrow.RC:$src2), 3192 cond)))), 3193 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3194 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3195 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3196 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3197 (X86pcmpm_imm $cc)), Narrow.KRC)>; 3198} 3199 3200multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3201 string InstStr, 3202 X86VectorVTInfo Narrow, 3203 X86VectorVTInfo Wide> { 3204// Broadcast load. 3205def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3206 (Narrow.BroadcastLdFrag addr:$src2), cond)), 3207 (COPY_TO_REGCLASS 3208 (!cast<Instruction>(InstStr#"Zrmib") 3209 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3210 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3211 3212def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3213 (Narrow.KVT 3214 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3215 (Narrow.BroadcastLdFrag addr:$src2), 3216 cond)))), 3217 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3218 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3219 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3220 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3221 3222// Commuted with broadcast load. 3223def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2), 3224 (Narrow.VT Narrow.RC:$src1), 3225 cond)), 3226 (COPY_TO_REGCLASS 3227 (!cast<Instruction>(InstStr#"Zrmib") 3228 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3229 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3230 3231def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3232 (Narrow.KVT 3233 (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), 3234 (Narrow.VT Narrow.RC:$src1), 3235 cond)))), 3236 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3237 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3238 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3239 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3240} 3241 3242// Same as above, but for fp types which don't use PatFrags. 3243multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, 3244 X86VectorVTInfo Narrow, 3245 X86VectorVTInfo Wide> { 3246def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3247 (Narrow.VT Narrow.RC:$src2), timm:$cc)), 3248 (COPY_TO_REGCLASS 3249 (!cast<Instruction>(InstStr#"Zrri") 3250 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3251 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3252 timm:$cc), Narrow.KRC)>; 3253 3254def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3255 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3256 (Narrow.VT Narrow.RC:$src2), timm:$cc))), 3257 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3258 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3259 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3260 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3261 timm:$cc), Narrow.KRC)>; 3262 3263// Broadcast load. 3264def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3265 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), 3266 (COPY_TO_REGCLASS 3267 (!cast<Instruction>(InstStr#"Zrmbi") 3268 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3269 addr:$src2, timm:$cc), Narrow.KRC)>; 3270 3271def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3272 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3273 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), 3274 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3275 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3276 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3277 addr:$src2, timm:$cc), Narrow.KRC)>; 3278 3279// Commuted with broadcast load. 3280def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3281 (Narrow.VT Narrow.RC:$src1), timm:$cc)), 3282 (COPY_TO_REGCLASS 3283 (!cast<Instruction>(InstStr#"Zrmbi") 3284 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3285 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3286 3287def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3288 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3289 (Narrow.VT Narrow.RC:$src1), timm:$cc))), 3290 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3291 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3292 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3293 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3294} 3295 3296let Predicates = [HasAVX512, NoVLX] in { 3297 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3298 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3299 3300 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3301 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3302 3303 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3304 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3305 3306 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3307 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3308 3309 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3310 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3311 3312 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3313 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3314 3315 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3316 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3317 3318 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3319 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3320 3321 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; 3322 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; 3323 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; 3324 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; 3325} 3326 3327let Predicates = [HasBWI, NoVLX] in { 3328 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; 3329 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; 3330 3331 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>; 3332 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>; 3333 3334 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>; 3335 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>; 3336 3337 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>; 3338 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>; 3339} 3340 3341// Mask setting all 0s or 1s 3342multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> { 3343 let Predicates = [HasAVX512] in 3344 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3345 SchedRW = [WriteZero] in 3346 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3347 [(set KRC:$dst, (VT Val))]>; 3348} 3349 3350multiclass avx512_mask_setop_w<SDPatternOperator Val> { 3351 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3352 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3353 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3354} 3355 3356defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3357defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3358 3359// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3360let Predicates = [HasAVX512] in { 3361 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3362 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3363 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3364 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3365 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3366 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3367 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3368 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3369} 3370 3371// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3372multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3373 RegisterClass RC, ValueType VT> { 3374 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3375 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3376 3377 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3378 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3379} 3380defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3381defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3382defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3383defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3384defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3385defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3386 3387defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3388defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3389defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3390defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3391defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3392 3393defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3394defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3395defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3396defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3397 3398defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3399defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3400defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3401 3402defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3403defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3404 3405defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3406 3407//===----------------------------------------------------------------------===// 3408// AVX-512 - Aligned and unaligned load and store 3409// 3410 3411multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3412 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3413 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3414 bit NoRMPattern = 0, 3415 SDPatternOperator SelectOprr = vselect> { 3416 let hasSideEffects = 0 in { 3417 let isMoveReg = 1 in 3418 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3419 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3420 _.ExeDomain>, EVEX, Sched<[Sched.RR]>, 3421 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 3422 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3423 (ins _.KRCWM:$mask, _.RC:$src), 3424 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3425 "${dst} {${mask}} {z}, $src}"), 3426 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3427 (_.VT _.RC:$src), 3428 _.ImmAllZerosV)))], _.ExeDomain>, 3429 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3430 3431 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3432 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3433 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3434 !if(NoRMPattern, [], 3435 [(set _.RC:$dst, 3436 (_.VT (ld_frag addr:$src)))]), 3437 _.ExeDomain>, EVEX, Sched<[Sched.RM]>, 3438 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 3439 3440 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3441 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3442 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3443 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3444 "${dst} {${mask}}, $src1}"), 3445 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3446 (_.VT _.RC:$src1), 3447 (_.VT _.RC:$src0))))], _.ExeDomain>, 3448 EVEX, EVEX_K, Sched<[Sched.RR]>; 3449 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3450 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3451 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3452 "${dst} {${mask}}, $src1}"), 3453 [(set _.RC:$dst, (_.VT 3454 (vselect_mask _.KRCWM:$mask, 3455 (_.VT (ld_frag addr:$src1)), 3456 (_.VT _.RC:$src0))))], _.ExeDomain>, 3457 EVEX, EVEX_K, Sched<[Sched.RM]>; 3458 } 3459 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3460 (ins _.KRCWM:$mask, _.MemOp:$src), 3461 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3462 "${dst} {${mask}} {z}, $src}", 3463 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask, 3464 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], 3465 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3466 } 3467 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3468 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3469 3470 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3471 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3472 3473 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3474 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0, 3475 _.KRCWM:$mask, addr:$ptr)>; 3476} 3477 3478multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3479 AVX512VLVectorVTInfo _, Predicate prd, 3480 X86SchedWriteMoveLSWidths Sched, 3481 string EVEX2VEXOvrd, bit NoRMPattern = 0> { 3482 let Predicates = [prd] in 3483 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3484 _.info512.AlignedLdFrag, masked_load_aligned, 3485 Sched.ZMM, "", NoRMPattern>, EVEX_V512; 3486 3487 let Predicates = [prd, HasVLX] in { 3488 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3489 _.info256.AlignedLdFrag, masked_load_aligned, 3490 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256; 3491 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3492 _.info128.AlignedLdFrag, masked_load_aligned, 3493 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128; 3494 } 3495} 3496 3497multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3498 AVX512VLVectorVTInfo _, Predicate prd, 3499 X86SchedWriteMoveLSWidths Sched, 3500 string EVEX2VEXOvrd, bit NoRMPattern = 0, 3501 SDPatternOperator SelectOprr = vselect> { 3502 let Predicates = [prd] in 3503 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3504 masked_load, Sched.ZMM, "", 3505 NoRMPattern, SelectOprr>, EVEX_V512; 3506 3507 let Predicates = [prd, HasVLX] in { 3508 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3509 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y", 3510 NoRMPattern, SelectOprr>, EVEX_V256; 3511 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3512 masked_load, Sched.XMM, EVEX2VEXOvrd, 3513 NoRMPattern, SelectOprr>, EVEX_V128; 3514 } 3515} 3516 3517multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3518 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3519 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3520 bit NoMRPattern = 0> { 3521 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3522 let isMoveReg = 1 in 3523 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3524 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3525 [], _.ExeDomain>, EVEX, 3526 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>, 3527 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">; 3528 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3529 (ins _.KRCWM:$mask, _.RC:$src), 3530 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3531 "${dst} {${mask}}, $src}", 3532 [], _.ExeDomain>, EVEX, EVEX_K, 3533 FoldGenData<BaseName#_.ZSuffix#rrk>, 3534 Sched<[Sched.RR]>; 3535 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3536 (ins _.KRCWM:$mask, _.RC:$src), 3537 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3538 "${dst} {${mask}} {z}, $src}", 3539 [], _.ExeDomain>, EVEX, EVEX_KZ, 3540 FoldGenData<BaseName#_.ZSuffix#rrkz>, 3541 Sched<[Sched.RR]>; 3542 } 3543 3544 let hasSideEffects = 0, mayStore = 1 in 3545 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3546 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3547 !if(NoMRPattern, [], 3548 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3549 _.ExeDomain>, EVEX, Sched<[Sched.MR]>, 3550 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">; 3551 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3552 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3553 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3554 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>, 3555 NotMemoryFoldable; 3556 3557 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask), 3558 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3559 _.KRCWM:$mask, _.RC:$src)>; 3560 3561 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3562 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3563 _.RC:$dst, _.RC:$src), 0>; 3564 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3565 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3566 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3567 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3568 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3569 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3570} 3571 3572multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3573 AVX512VLVectorVTInfo _, Predicate prd, 3574 X86SchedWriteMoveLSWidths Sched, 3575 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3576 let Predicates = [prd] in 3577 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3578 masked_store, Sched.ZMM, "", 3579 NoMRPattern>, EVEX_V512; 3580 let Predicates = [prd, HasVLX] in { 3581 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3582 masked_store, Sched.YMM, 3583 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3584 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3585 masked_store, Sched.XMM, EVEX2VEXOvrd, 3586 NoMRPattern>, EVEX_V128; 3587 } 3588} 3589 3590multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3591 AVX512VLVectorVTInfo _, Predicate prd, 3592 X86SchedWriteMoveLSWidths Sched, 3593 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3594 let Predicates = [prd] in 3595 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3596 masked_store_aligned, Sched.ZMM, "", 3597 NoMRPattern>, EVEX_V512; 3598 3599 let Predicates = [prd, HasVLX] in { 3600 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3601 masked_store_aligned, Sched.YMM, 3602 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3603 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3604 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd, 3605 NoMRPattern>, EVEX_V128; 3606 } 3607} 3608 3609defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3610 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3611 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3612 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3613 PS, EVEX_CD8<32, CD8VF>; 3614 3615defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3616 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3617 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3618 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3619 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3620 3621defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3622 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, 3623 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3624 SchedWriteFMoveLS, "VMOVUPS">, 3625 PS, EVEX_CD8<32, CD8VF>; 3626 3627defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3628 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, 3629 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3630 SchedWriteFMoveLS, "VMOVUPD">, 3631 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3632 3633defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3634 HasAVX512, SchedWriteVecMoveLS, 3635 "VMOVDQA", 1>, 3636 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3637 HasAVX512, SchedWriteVecMoveLS, 3638 "VMOVDQA", 1>, 3639 PD, EVEX_CD8<32, CD8VF>; 3640 3641defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3642 HasAVX512, SchedWriteVecMoveLS, 3643 "VMOVDQA">, 3644 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3645 HasAVX512, SchedWriteVecMoveLS, 3646 "VMOVDQA">, 3647 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3648 3649defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3650 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3651 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3652 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3653 XD, EVEX_CD8<8, CD8VF>; 3654 3655defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3656 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3657 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3658 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3659 XD, VEX_W, EVEX_CD8<16, CD8VF>; 3660 3661defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3662 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, 3663 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3664 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3665 XS, EVEX_CD8<32, CD8VF>; 3666 3667defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3668 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, 3669 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3670 SchedWriteVecMoveLS, "VMOVDQU">, 3671 XS, VEX_W, EVEX_CD8<64, CD8VF>; 3672 3673// Special instructions to help with spilling when we don't have VLX. We need 3674// to load or store from a ZMM register instead. These are converted in 3675// expandPostRAPseudos. 3676let isReMaterializable = 1, canFoldAsLoad = 1, 3677 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3678def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3679 "", []>, Sched<[WriteFLoadX]>; 3680def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3681 "", []>, Sched<[WriteFLoadY]>; 3682def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3683 "", []>, Sched<[WriteFLoadX]>; 3684def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3685 "", []>, Sched<[WriteFLoadY]>; 3686} 3687 3688let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3689def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3690 "", []>, Sched<[WriteFStoreX]>; 3691def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3692 "", []>, Sched<[WriteFStoreY]>; 3693def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3694 "", []>, Sched<[WriteFStoreX]>; 3695def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3696 "", []>, Sched<[WriteFStoreY]>; 3697} 3698 3699def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), 3700 (v8i64 VR512:$src))), 3701 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3702 VK8), VR512:$src)>; 3703 3704def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3705 (v16i32 VR512:$src))), 3706 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3707 3708// These patterns exist to prevent the above patterns from introducing a second 3709// mask inversion when one already exists. 3710def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)), 3711 (v8i64 immAllZerosV), 3712 (v8i64 VR512:$src))), 3713 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3714def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)), 3715 (v16i32 immAllZerosV), 3716 (v16i32 VR512:$src))), 3717 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3718 3719multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3720 X86VectorVTInfo Wide> { 3721 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3722 Narrow.RC:$src1, Narrow.RC:$src0)), 3723 (EXTRACT_SUBREG 3724 (Wide.VT 3725 (!cast<Instruction>(InstrStr#"rrk") 3726 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3727 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3728 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3729 Narrow.SubRegIdx)>; 3730 3731 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3732 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3733 (EXTRACT_SUBREG 3734 (Wide.VT 3735 (!cast<Instruction>(InstrStr#"rrkz") 3736 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3737 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3738 Narrow.SubRegIdx)>; 3739} 3740 3741// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3742// available. Use a 512-bit operation and extract. 3743let Predicates = [HasAVX512, NoVLX] in { 3744 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3745 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3746 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3747 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3748 3749 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3750 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3751 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3752 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3753} 3754 3755let Predicates = [HasBWI, NoVLX] in { 3756 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3757 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3758 3759 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3760 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3761 3762 defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>; 3763 defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>; 3764} 3765 3766let Predicates = [HasAVX512] in { 3767 // 512-bit load. 3768 def : Pat<(alignedloadv16i32 addr:$src), 3769 (VMOVDQA64Zrm addr:$src)>; 3770 def : Pat<(alignedloadv32i16 addr:$src), 3771 (VMOVDQA64Zrm addr:$src)>; 3772 def : Pat<(alignedloadv32f16 addr:$src), 3773 (VMOVAPSZrm addr:$src)>; 3774 def : Pat<(alignedloadv64i8 addr:$src), 3775 (VMOVDQA64Zrm addr:$src)>; 3776 def : Pat<(loadv16i32 addr:$src), 3777 (VMOVDQU64Zrm addr:$src)>; 3778 def : Pat<(loadv32i16 addr:$src), 3779 (VMOVDQU64Zrm addr:$src)>; 3780 def : Pat<(loadv32f16 addr:$src), 3781 (VMOVUPSZrm addr:$src)>; 3782 def : Pat<(loadv64i8 addr:$src), 3783 (VMOVDQU64Zrm addr:$src)>; 3784 3785 // 512-bit store. 3786 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3787 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3788 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3789 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3790 def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst), 3791 (VMOVAPSZmr addr:$dst, VR512:$src)>; 3792 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3793 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3794 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3795 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3796 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3797 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3798 def : Pat<(store (v32f16 VR512:$src), addr:$dst), 3799 (VMOVUPSZmr addr:$dst, VR512:$src)>; 3800 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3801 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3802} 3803 3804let Predicates = [HasVLX] in { 3805 // 128-bit load. 3806 def : Pat<(alignedloadv4i32 addr:$src), 3807 (VMOVDQA64Z128rm addr:$src)>; 3808 def : Pat<(alignedloadv8i16 addr:$src), 3809 (VMOVDQA64Z128rm addr:$src)>; 3810 def : Pat<(alignedloadv8f16 addr:$src), 3811 (VMOVAPSZ128rm addr:$src)>; 3812 def : Pat<(alignedloadv16i8 addr:$src), 3813 (VMOVDQA64Z128rm addr:$src)>; 3814 def : Pat<(loadv4i32 addr:$src), 3815 (VMOVDQU64Z128rm addr:$src)>; 3816 def : Pat<(loadv8i16 addr:$src), 3817 (VMOVDQU64Z128rm addr:$src)>; 3818 def : Pat<(loadv8f16 addr:$src), 3819 (VMOVUPSZ128rm addr:$src)>; 3820 def : Pat<(loadv16i8 addr:$src), 3821 (VMOVDQU64Z128rm addr:$src)>; 3822 3823 // 128-bit store. 3824 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3825 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3826 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3827 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3828 def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst), 3829 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 3830 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3831 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3832 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3833 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3834 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3835 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3836 def : Pat<(store (v8f16 VR128X:$src), addr:$dst), 3837 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 3838 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3839 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3840 3841 // 256-bit load. 3842 def : Pat<(alignedloadv8i32 addr:$src), 3843 (VMOVDQA64Z256rm addr:$src)>; 3844 def : Pat<(alignedloadv16i16 addr:$src), 3845 (VMOVDQA64Z256rm addr:$src)>; 3846 def : Pat<(alignedloadv16f16 addr:$src), 3847 (VMOVAPSZ256rm addr:$src)>; 3848 def : Pat<(alignedloadv32i8 addr:$src), 3849 (VMOVDQA64Z256rm addr:$src)>; 3850 def : Pat<(loadv8i32 addr:$src), 3851 (VMOVDQU64Z256rm addr:$src)>; 3852 def : Pat<(loadv16i16 addr:$src), 3853 (VMOVDQU64Z256rm addr:$src)>; 3854 def : Pat<(loadv16f16 addr:$src), 3855 (VMOVUPSZ256rm addr:$src)>; 3856 def : Pat<(loadv32i8 addr:$src), 3857 (VMOVDQU64Z256rm addr:$src)>; 3858 3859 // 256-bit store. 3860 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3861 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3862 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3863 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3864 def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst), 3865 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; 3866 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3867 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3868 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3869 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3870 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3871 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3872 def : Pat<(store (v16f16 VR256X:$src), addr:$dst), 3873 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; 3874 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3875 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3876} 3877let Predicates = [HasBWI] in { 3878 def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), (v32f16 VR512:$src0))), 3879 (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>; 3880 def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)), 3881 (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>; 3882 def : Pat<(v32f16 (vselect VK32WM:$mask, 3883 (v32f16 (alignedloadv32f16 addr:$src)), (v32f16 VR512:$src0))), 3884 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3885 def : Pat<(v32f16 (vselect VK32WM:$mask, 3886 (v32f16 (alignedloadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)), 3887 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3888 def : Pat<(v32f16 (vselect VK32WM:$mask, 3889 (v32f16 (loadv32f16 addr:$src)), (v32f16 VR512:$src0))), 3890 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3891 def : Pat<(v32f16 (vselect VK32WM:$mask, 3892 (v32f16 (loadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)), 3893 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3894 def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, (v32f16 VR512:$src0))), 3895 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3896 def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, undef)), 3897 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3898 def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, v32f16_info.ImmAllZerosV)), 3899 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3900 3901 def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask), 3902 (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>; 3903} 3904let Predicates = [HasBWI, HasVLX] in { 3905 def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), (v16f16 VR256X:$src0))), 3906 (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>; 3907 def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)), 3908 (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>; 3909 def : Pat<(v16f16 (vselect VK16WM:$mask, 3910 (v16f16 (alignedloadv16f16 addr:$src)), (v16f16 VR256X:$src0))), 3911 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3912 def : Pat<(v16f16 (vselect VK16WM:$mask, 3913 (v16f16 (alignedloadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)), 3914 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3915 def : Pat<(v16f16 (vselect VK16WM:$mask, 3916 (v16f16 (loadv16f16 addr:$src)), (v16f16 VR256X:$src0))), 3917 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3918 def : Pat<(v16f16 (vselect VK16WM:$mask, 3919 (v16f16 (loadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)), 3920 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3921 def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, (v16f16 VR256X:$src0))), 3922 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3923 def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, undef)), 3924 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3925 def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, v16f16x_info.ImmAllZerosV)), 3926 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3927 3928 def : Pat<(masked_store (v16f16 VR256X:$src), addr:$dst, VK16WM:$mask), 3929 (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>; 3930 3931 def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), (v8f16 VR128X:$src0))), 3932 (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>; 3933 def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), v8f16x_info.ImmAllZerosV)), 3934 (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>; 3935 def : Pat<(v8f16 (vselect VK8WM:$mask, 3936 (v8f16 (alignedloadv8f16 addr:$src)), (v8f16 VR128X:$src0))), 3937 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3938 def : Pat<(v8f16 (vselect VK8WM:$mask, 3939 (v8f16 (alignedloadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)), 3940 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3941 def : Pat<(v8f16 (vselect VK8WM:$mask, 3942 (v8f16 (loadv8f16 addr:$src)), (v8f16 VR128X:$src0))), 3943 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3944 def : Pat<(v8f16 (vselect VK8WM:$mask, 3945 (v8f16 (loadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)), 3946 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3947 def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, (v8f16 VR128X:$src0))), 3948 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3949 def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, undef)), 3950 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3951 def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, v8f16x_info.ImmAllZerosV)), 3952 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3953 3954 def : Pat<(masked_store (v8f16 VR128X:$src), addr:$dst, VK8WM:$mask), 3955 (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>; 3956} 3957 3958// Move Int Doubleword to Packed Double Int 3959// 3960let ExeDomain = SSEPackedInt in { 3961def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 3962 "vmovd\t{$src, $dst|$dst, $src}", 3963 [(set VR128X:$dst, 3964 (v4i32 (scalar_to_vector GR32:$src)))]>, 3965 EVEX, Sched<[WriteVecMoveFromGpr]>; 3966def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 3967 "vmovd\t{$src, $dst|$dst, $src}", 3968 [(set VR128X:$dst, 3969 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3970 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3971def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 3972 "vmovq\t{$src, $dst|$dst, $src}", 3973 [(set VR128X:$dst, 3974 (v2i64 (scalar_to_vector GR64:$src)))]>, 3975 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3976let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3977def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 3978 (ins i64mem:$src), 3979 "vmovq\t{$src, $dst|$dst, $src}", []>, 3980 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 3981let isCodeGenOnly = 1 in { 3982def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 3983 "vmovq\t{$src, $dst|$dst, $src}", 3984 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 3985 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3986def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 3987 "vmovq\t{$src, $dst|$dst, $src}", 3988 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 3989 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3990} 3991} // ExeDomain = SSEPackedInt 3992 3993// Move Int Doubleword to Single Scalar 3994// 3995let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3996def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 3997 "vmovd\t{$src, $dst|$dst, $src}", 3998 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 3999 EVEX, Sched<[WriteVecMoveFromGpr]>; 4000} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4001 4002// Move doubleword from xmm register to r/m32 4003// 4004let ExeDomain = SSEPackedInt in { 4005def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 4006 "vmovd\t{$src, $dst|$dst, $src}", 4007 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 4008 (iPTR 0)))]>, 4009 EVEX, Sched<[WriteVecMoveToGpr]>; 4010def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 4011 (ins i32mem:$dst, VR128X:$src), 4012 "vmovd\t{$src, $dst|$dst, $src}", 4013 [(store (i32 (extractelt (v4i32 VR128X:$src), 4014 (iPTR 0))), addr:$dst)]>, 4015 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 4016} // ExeDomain = SSEPackedInt 4017 4018// Move quadword from xmm1 register to r/m64 4019// 4020let ExeDomain = SSEPackedInt in { 4021def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 4022 "vmovq\t{$src, $dst|$dst, $src}", 4023 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 4024 (iPTR 0)))]>, 4025 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>, 4026 Requires<[HasAVX512]>; 4027 4028let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 4029def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 4030 "vmovq\t{$src, $dst|$dst, $src}", []>, PD, 4031 EVEX, VEX_W, Sched<[WriteVecStore]>, 4032 Requires<[HasAVX512, In64BitMode]>; 4033 4034def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 4035 (ins i64mem:$dst, VR128X:$src), 4036 "vmovq\t{$src, $dst|$dst, $src}", 4037 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 4038 addr:$dst)]>, 4039 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>, 4040 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 4041 4042let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 4043def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 4044 (ins VR128X:$src), 4045 "vmovq\t{$src, $dst|$dst, $src}", []>, 4046 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>; 4047} // ExeDomain = SSEPackedInt 4048 4049def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 4050 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 4051 4052let Predicates = [HasAVX512] in { 4053 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), 4054 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; 4055} 4056 4057// Move Scalar Single to Double Int 4058// 4059let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4060def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 4061 (ins FR32X:$src), 4062 "vmovd\t{$src, $dst|$dst, $src}", 4063 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 4064 EVEX, Sched<[WriteVecMoveToGpr]>; 4065} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4066 4067// Move Quadword Int to Packed Quadword Int 4068// 4069let ExeDomain = SSEPackedInt in { 4070def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 4071 (ins i64mem:$src), 4072 "vmovq\t{$src, $dst|$dst, $src}", 4073 [(set VR128X:$dst, 4074 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 4075 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 4076} // ExeDomain = SSEPackedInt 4077 4078// Allow "vmovd" but print "vmovq". 4079def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4080 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 4081def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4082 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 4083 4084// Conversions between masks and scalar fp. 4085def : Pat<(v32i1 (bitconvert FR32X:$src)), 4086 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>; 4087def : Pat<(f32 (bitconvert VK32:$src)), 4088 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>; 4089 4090def : Pat<(v64i1 (bitconvert FR64X:$src)), 4091 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>; 4092def : Pat<(f64 (bitconvert VK64:$src)), 4093 (VMOV64toSDZrr (KMOVQrk VK64:$src))>; 4094 4095//===----------------------------------------------------------------------===// 4096// AVX-512 MOVSH, MOVSS, MOVSD 4097//===----------------------------------------------------------------------===// 4098 4099multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, 4100 X86VectorVTInfo _, Predicate prd = HasAVX512> { 4101 let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in 4102 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 4103 (ins _.RC:$src1, _.RC:$src2), 4104 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4105 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 4106 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 4107 let Predicates = [prd] in { 4108 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 4109 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 4110 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 4111 "$dst {${mask}} {z}, $src1, $src2}"), 4112 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 4113 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4114 _.ImmAllZerosV)))], 4115 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 4116 let Constraints = "$src0 = $dst" in 4117 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 4118 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 4119 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 4120 "$dst {${mask}}, $src1, $src2}"), 4121 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 4122 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4123 (_.VT _.RC:$src0))))], 4124 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 4125 let canFoldAsLoad = 1, isReMaterializable = 1 in { 4126 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), 4127 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4128 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))], 4129 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 4130 // _alt version uses FR32/FR64 register class. 4131 let isCodeGenOnly = 1 in 4132 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 4133 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4134 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 4135 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 4136 } 4137 let mayLoad = 1, hasSideEffects = 0 in { 4138 let Constraints = "$src0 = $dst" in 4139 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 4140 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 4141 !strconcat(asm, "\t{$src, $dst {${mask}}|", 4142 "$dst {${mask}}, $src}"), 4143 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 4144 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 4145 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 4146 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 4147 "$dst {${mask}} {z}, $src}"), 4148 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 4149 } 4150 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 4151 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4152 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 4153 EVEX, Sched<[WriteFStore]>; 4154 let mayStore = 1, hasSideEffects = 0 in 4155 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 4156 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src), 4157 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 4158 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>, 4159 NotMemoryFoldable; 4160 } 4161} 4162 4163defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, 4164 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; 4165 4166defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, 4167 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 4168 4169defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info, 4170 HasFP16>, 4171 VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 4172 4173multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 4174 PatLeaf ZeroFP, X86VectorVTInfo _> { 4175 4176def : Pat<(_.VT (OpNode _.RC:$src0, 4177 (_.VT (scalar_to_vector 4178 (_.EltVT (X86selects VK1WM:$mask, 4179 (_.EltVT _.FRC:$src1), 4180 (_.EltVT _.FRC:$src2))))))), 4181 (!cast<Instruction>(InstrStr#rrk) 4182 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 4183 VK1WM:$mask, 4184 (_.VT _.RC:$src0), 4185 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4186 4187def : Pat<(_.VT (OpNode _.RC:$src0, 4188 (_.VT (scalar_to_vector 4189 (_.EltVT (X86selects VK1WM:$mask, 4190 (_.EltVT _.FRC:$src1), 4191 (_.EltVT ZeroFP))))))), 4192 (!cast<Instruction>(InstrStr#rrkz) 4193 VK1WM:$mask, 4194 (_.VT _.RC:$src0), 4195 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4196} 4197 4198multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4199 dag Mask, RegisterClass MaskRC> { 4200 4201def : Pat<(masked_store 4202 (_.info512.VT (insert_subvector undef, 4203 (_.info128.VT _.info128.RC:$src), 4204 (iPTR 0))), addr:$dst, Mask), 4205 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4206 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4207 _.info128.RC:$src)>; 4208 4209} 4210 4211multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 4212 AVX512VLVectorVTInfo _, 4213 dag Mask, RegisterClass MaskRC, 4214 SubRegIndex subreg> { 4215 4216def : Pat<(masked_store 4217 (_.info512.VT (insert_subvector undef, 4218 (_.info128.VT _.info128.RC:$src), 4219 (iPTR 0))), addr:$dst, Mask), 4220 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4221 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4222 _.info128.RC:$src)>; 4223 4224} 4225 4226// This matches the more recent codegen from clang that avoids emitting a 512 4227// bit masked store directly. Codegen will widen 128-bit masked store to 512 4228// bits on AVX512F only targets. 4229multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4230 AVX512VLVectorVTInfo _, 4231 dag Mask512, dag Mask128, 4232 RegisterClass MaskRC, 4233 SubRegIndex subreg> { 4234 4235// AVX512F pattern. 4236def : Pat<(masked_store 4237 (_.info512.VT (insert_subvector undef, 4238 (_.info128.VT _.info128.RC:$src), 4239 (iPTR 0))), addr:$dst, Mask512), 4240 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4241 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4242 _.info128.RC:$src)>; 4243 4244// AVX512VL pattern. 4245def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128), 4246 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4247 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4248 _.info128.RC:$src)>; 4249} 4250 4251multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4252 dag Mask, RegisterClass MaskRC> { 4253 4254def : Pat<(_.info128.VT (extract_subvector 4255 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4256 _.info512.ImmAllZerosV)), 4257 (iPTR 0))), 4258 (!cast<Instruction>(InstrStr#rmkz) 4259 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4260 addr:$srcAddr)>; 4261 4262def : Pat<(_.info128.VT (extract_subvector 4263 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4264 (_.info512.VT (insert_subvector undef, 4265 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4266 (iPTR 0))))), 4267 (iPTR 0))), 4268 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4269 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4270 addr:$srcAddr)>; 4271 4272} 4273 4274multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4275 AVX512VLVectorVTInfo _, 4276 dag Mask, RegisterClass MaskRC, 4277 SubRegIndex subreg> { 4278 4279def : Pat<(_.info128.VT (extract_subvector 4280 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4281 _.info512.ImmAllZerosV)), 4282 (iPTR 0))), 4283 (!cast<Instruction>(InstrStr#rmkz) 4284 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4285 addr:$srcAddr)>; 4286 4287def : Pat<(_.info128.VT (extract_subvector 4288 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4289 (_.info512.VT (insert_subvector undef, 4290 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4291 (iPTR 0))))), 4292 (iPTR 0))), 4293 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4294 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4295 addr:$srcAddr)>; 4296 4297} 4298 4299// This matches the more recent codegen from clang that avoids emitting a 512 4300// bit masked load directly. Codegen will widen 128-bit masked load to 512 4301// bits on AVX512F only targets. 4302multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4303 AVX512VLVectorVTInfo _, 4304 dag Mask512, dag Mask128, 4305 RegisterClass MaskRC, 4306 SubRegIndex subreg> { 4307// AVX512F patterns. 4308def : Pat<(_.info128.VT (extract_subvector 4309 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4310 _.info512.ImmAllZerosV)), 4311 (iPTR 0))), 4312 (!cast<Instruction>(InstrStr#rmkz) 4313 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4314 addr:$srcAddr)>; 4315 4316def : Pat<(_.info128.VT (extract_subvector 4317 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4318 (_.info512.VT (insert_subvector undef, 4319 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4320 (iPTR 0))))), 4321 (iPTR 0))), 4322 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4323 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4324 addr:$srcAddr)>; 4325 4326// AVX512Vl patterns. 4327def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4328 _.info128.ImmAllZerosV)), 4329 (!cast<Instruction>(InstrStr#rmkz) 4330 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4331 addr:$srcAddr)>; 4332 4333def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4334 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4335 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4336 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4337 addr:$srcAddr)>; 4338} 4339 4340defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4341defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4342 4343defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4344 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4345defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4346 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4347defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4348 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4349 4350let Predicates = [HasFP16] in { 4351defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>; 4352defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4353 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4354defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4355 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4356defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4357 (v32i1 (insert_subvector 4358 (v32i1 immAllZerosV), 4359 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4360 (iPTR 0))), 4361 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4362 GR8, sub_8bit>; 4363 4364defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4365 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4366defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4367 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4368defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4369 (v32i1 (insert_subvector 4370 (v32i1 immAllZerosV), 4371 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4372 (iPTR 0))), 4373 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4374 GR8, sub_8bit>; 4375 4376def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))), 4377 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk 4378 (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)), 4379 VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4380 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4381 4382def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)), 4383 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4384 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4385} 4386 4387defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4388 (v16i1 (insert_subvector 4389 (v16i1 immAllZerosV), 4390 (v4i1 (extract_subvector 4391 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4392 (iPTR 0))), 4393 (iPTR 0))), 4394 (v4i1 (extract_subvector 4395 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4396 (iPTR 0))), GR8, sub_8bit>; 4397defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4398 (v8i1 4399 (extract_subvector 4400 (v16i1 4401 (insert_subvector 4402 (v16i1 immAllZerosV), 4403 (v2i1 (extract_subvector 4404 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4405 (iPTR 0))), 4406 (iPTR 0))), 4407 (iPTR 0))), 4408 (v2i1 (extract_subvector 4409 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4410 (iPTR 0))), GR8, sub_8bit>; 4411 4412defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4413 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4414defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4415 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4416defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4417 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4418 4419defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4420 (v16i1 (insert_subvector 4421 (v16i1 immAllZerosV), 4422 (v4i1 (extract_subvector 4423 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4424 (iPTR 0))), 4425 (iPTR 0))), 4426 (v4i1 (extract_subvector 4427 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4428 (iPTR 0))), GR8, sub_8bit>; 4429defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4430 (v8i1 4431 (extract_subvector 4432 (v16i1 4433 (insert_subvector 4434 (v16i1 immAllZerosV), 4435 (v2i1 (extract_subvector 4436 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4437 (iPTR 0))), 4438 (iPTR 0))), 4439 (iPTR 0))), 4440 (v2i1 (extract_subvector 4441 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4442 (iPTR 0))), GR8, sub_8bit>; 4443 4444def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4445 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4446 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4447 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4448 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4449 4450def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4451 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4452 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4453 4454def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), 4455 (COPY_TO_REGCLASS 4456 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), 4457 VK1WM:$mask, addr:$src)), 4458 FR32X)>; 4459def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), 4460 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; 4461 4462def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4463 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4464 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4465 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4466 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4467 4468def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), 4469 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4470 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4471 4472def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), 4473 (COPY_TO_REGCLASS 4474 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), 4475 VK1WM:$mask, addr:$src)), 4476 FR64X)>; 4477def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), 4478 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; 4479 4480 4481def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))), 4482 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4483def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))), 4484 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4485 4486def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))), 4487 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4488def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))), 4489 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4490 4491let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4492 let Predicates = [HasFP16] in { 4493 def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4494 (ins VR128X:$src1, VR128X:$src2), 4495 "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4496 []>, T_MAP5XS, EVEX_4V, VEX_LIG, 4497 FoldGenData<"VMOVSHZrr">, 4498 Sched<[SchedWriteFShuffle.XMM]>; 4499 4500 let Constraints = "$src0 = $dst" in 4501 def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4502 (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask, 4503 VR128X:$src1, VR128X:$src2), 4504 "vmovsh\t{$src2, $src1, $dst {${mask}}|"# 4505 "$dst {${mask}}, $src1, $src2}", 4506 []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG, 4507 FoldGenData<"VMOVSHZrrk">, 4508 Sched<[SchedWriteFShuffle.XMM]>; 4509 4510 def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4511 (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4512 "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"# 4513 "$dst {${mask}} {z}, $src1, $src2}", 4514 []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG, 4515 FoldGenData<"VMOVSHZrrkz">, 4516 Sched<[SchedWriteFShuffle.XMM]>; 4517 } 4518 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4519 (ins VR128X:$src1, VR128X:$src2), 4520 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4521 []>, XS, EVEX_4V, VEX_LIG, 4522 FoldGenData<"VMOVSSZrr">, 4523 Sched<[SchedWriteFShuffle.XMM]>; 4524 4525 let Constraints = "$src0 = $dst" in 4526 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4527 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4528 VR128X:$src1, VR128X:$src2), 4529 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4530 "$dst {${mask}}, $src1, $src2}", 4531 []>, EVEX_K, XS, EVEX_4V, VEX_LIG, 4532 FoldGenData<"VMOVSSZrrk">, 4533 Sched<[SchedWriteFShuffle.XMM]>; 4534 4535 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4536 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4537 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4538 "$dst {${mask}} {z}, $src1, $src2}", 4539 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, 4540 FoldGenData<"VMOVSSZrrkz">, 4541 Sched<[SchedWriteFShuffle.XMM]>; 4542 4543 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4544 (ins VR128X:$src1, VR128X:$src2), 4545 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4546 []>, XD, EVEX_4V, VEX_LIG, VEX_W, 4547 FoldGenData<"VMOVSDZrr">, 4548 Sched<[SchedWriteFShuffle.XMM]>; 4549 4550 let Constraints = "$src0 = $dst" in 4551 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4552 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4553 VR128X:$src1, VR128X:$src2), 4554 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4555 "$dst {${mask}}, $src1, $src2}", 4556 []>, EVEX_K, XD, EVEX_4V, VEX_LIG, 4557 VEX_W, FoldGenData<"VMOVSDZrrk">, 4558 Sched<[SchedWriteFShuffle.XMM]>; 4559 4560 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4561 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4562 VR128X:$src2), 4563 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4564 "$dst {${mask}} {z}, $src1, $src2}", 4565 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, 4566 VEX_W, FoldGenData<"VMOVSDZrrkz">, 4567 Sched<[SchedWriteFShuffle.XMM]>; 4568} 4569 4570def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4571 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4572def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"# 4573 "$dst {${mask}}, $src1, $src2}", 4574 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask, 4575 VR128X:$src1, VR128X:$src2), 0>; 4576def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4577 "$dst {${mask}} {z}, $src1, $src2}", 4578 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask, 4579 VR128X:$src1, VR128X:$src2), 0>; 4580def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4581 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4582def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4583 "$dst {${mask}}, $src1, $src2}", 4584 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4585 VR128X:$src1, VR128X:$src2), 0>; 4586def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4587 "$dst {${mask}} {z}, $src1, $src2}", 4588 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4589 VR128X:$src1, VR128X:$src2), 0>; 4590def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4591 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4592def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4593 "$dst {${mask}}, $src1, $src2}", 4594 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4595 VR128X:$src1, VR128X:$src2), 0>; 4596def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4597 "$dst {${mask}} {z}, $src1, $src2}", 4598 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4599 VR128X:$src1, VR128X:$src2), 0>; 4600 4601let Predicates = [HasAVX512, OptForSize] in { 4602 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4603 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4604 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4605 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4606 4607 // Move low f32 and clear high bits. 4608 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4609 (SUBREG_TO_REG (i32 0), 4610 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4611 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4612 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4613 (SUBREG_TO_REG (i32 0), 4614 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4615 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4616 4617 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4618 (SUBREG_TO_REG (i32 0), 4619 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4620 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4621 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4622 (SUBREG_TO_REG (i32 0), 4623 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4624 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4625} 4626 4627// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4628// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4629let Predicates = [HasAVX512, OptForSpeed] in { 4630 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4631 (SUBREG_TO_REG (i32 0), 4632 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4633 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4634 (i8 1))), sub_xmm)>; 4635 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4636 (SUBREG_TO_REG (i32 0), 4637 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4638 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4639 (i8 3))), sub_xmm)>; 4640} 4641 4642let Predicates = [HasAVX512] in { 4643 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 4644 (VMOVSSZrm addr:$src)>; 4645 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 4646 (VMOVSDZrm addr:$src)>; 4647 4648 // Represent the same patterns above but in the form they appear for 4649 // 256-bit types 4650 def : Pat<(v8f32 (X86vzload32 addr:$src)), 4651 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4652 def : Pat<(v4f64 (X86vzload64 addr:$src)), 4653 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4654 4655 // Represent the same patterns above but in the form they appear for 4656 // 512-bit types 4657 def : Pat<(v16f32 (X86vzload32 addr:$src)), 4658 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4659 def : Pat<(v8f64 (X86vzload64 addr:$src)), 4660 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4661} 4662let Predicates = [HasFP16] in { 4663 def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))), 4664 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>; 4665 4666 // FIXME we need better canonicalization in dag combine 4667 def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))), 4668 (SUBREG_TO_REG (i32 0), 4669 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4670 (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>; 4671 def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))), 4672 (SUBREG_TO_REG (i32 0), 4673 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4674 (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>; 4675 4676 def : Pat<(v8f16 (X86vzload16 addr:$src)), 4677 (VMOVSHZrm addr:$src)>; 4678 4679 def : Pat<(v16f16 (X86vzload16 addr:$src)), 4680 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4681 4682 def : Pat<(v32f16 (X86vzload16 addr:$src)), 4683 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4684} 4685 4686let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4687def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4688 (ins VR128X:$src), 4689 "vmovq\t{$src, $dst|$dst, $src}", 4690 [(set VR128X:$dst, (v2i64 (X86vzmovl 4691 (v2i64 VR128X:$src))))]>, 4692 EVEX, VEX_W; 4693} 4694 4695let Predicates = [HasAVX512] in { 4696 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4697 (VMOVDI2PDIZrr GR32:$src)>; 4698 4699 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4700 (VMOV64toPQIZrr GR64:$src)>; 4701 4702 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4703 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4704 (VMOVDI2PDIZrm addr:$src)>; 4705 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4706 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4707 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4708 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4709 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4710 (VMOVQI2PQIZrm addr:$src)>; 4711 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4712 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4713 4714 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4715 def : Pat<(v16i32 (X86vzload32 addr:$src)), 4716 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4717 def : Pat<(v8i64 (X86vzload64 addr:$src)), 4718 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4719 4720 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4721 (SUBREG_TO_REG (i32 0), 4722 (v2f64 (VMOVZPQILo2PQIZrr 4723 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), 4724 sub_xmm)>; 4725 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4726 (SUBREG_TO_REG (i32 0), 4727 (v2i64 (VMOVZPQILo2PQIZrr 4728 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), 4729 sub_xmm)>; 4730 4731 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4732 (SUBREG_TO_REG (i32 0), 4733 (v2f64 (VMOVZPQILo2PQIZrr 4734 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), 4735 sub_xmm)>; 4736 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4737 (SUBREG_TO_REG (i32 0), 4738 (v2i64 (VMOVZPQILo2PQIZrr 4739 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), 4740 sub_xmm)>; 4741} 4742 4743//===----------------------------------------------------------------------===// 4744// AVX-512 - Non-temporals 4745//===----------------------------------------------------------------------===// 4746 4747def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4748 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4749 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4750 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4751 4752let Predicates = [HasVLX] in { 4753 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4754 (ins i256mem:$src), 4755 "vmovntdqa\t{$src, $dst|$dst, $src}", 4756 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4757 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4758 4759 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4760 (ins i128mem:$src), 4761 "vmovntdqa\t{$src, $dst|$dst, $src}", 4762 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4763 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4764} 4765 4766multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4767 X86SchedWriteMoveLS Sched, 4768 PatFrag st_frag = alignednontemporalstore> { 4769 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4770 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4771 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4772 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4773 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4774} 4775 4776multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4777 AVX512VLVectorVTInfo VTInfo, 4778 X86SchedWriteMoveLSWidths Sched> { 4779 let Predicates = [HasAVX512] in 4780 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4781 4782 let Predicates = [HasAVX512, HasVLX] in { 4783 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4784 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4785 } 4786} 4787 4788defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4789 SchedWriteVecMoveLSNT>, PD; 4790defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4791 SchedWriteFMoveLSNT>, PD, VEX_W; 4792defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4793 SchedWriteFMoveLSNT>, PS; 4794 4795let Predicates = [HasAVX512], AddedComplexity = 400 in { 4796 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4797 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4798 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4799 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4800 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4801 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4802 4803 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4804 (VMOVNTDQAZrm addr:$src)>; 4805 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4806 (VMOVNTDQAZrm addr:$src)>; 4807 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4808 (VMOVNTDQAZrm addr:$src)>; 4809 def : Pat<(v16i32 (alignednontemporalload addr:$src)), 4810 (VMOVNTDQAZrm addr:$src)>; 4811 def : Pat<(v32i16 (alignednontemporalload addr:$src)), 4812 (VMOVNTDQAZrm addr:$src)>; 4813 def : Pat<(v64i8 (alignednontemporalload addr:$src)), 4814 (VMOVNTDQAZrm addr:$src)>; 4815} 4816 4817let Predicates = [HasVLX], AddedComplexity = 400 in { 4818 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4819 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4820 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4821 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4822 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4823 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4824 4825 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4826 (VMOVNTDQAZ256rm addr:$src)>; 4827 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4828 (VMOVNTDQAZ256rm addr:$src)>; 4829 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4830 (VMOVNTDQAZ256rm addr:$src)>; 4831 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 4832 (VMOVNTDQAZ256rm addr:$src)>; 4833 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 4834 (VMOVNTDQAZ256rm addr:$src)>; 4835 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 4836 (VMOVNTDQAZ256rm addr:$src)>; 4837 4838 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4839 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4840 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4841 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4842 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4843 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4844 4845 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4846 (VMOVNTDQAZ128rm addr:$src)>; 4847 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4848 (VMOVNTDQAZ128rm addr:$src)>; 4849 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4850 (VMOVNTDQAZ128rm addr:$src)>; 4851 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 4852 (VMOVNTDQAZ128rm addr:$src)>; 4853 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 4854 (VMOVNTDQAZ128rm addr:$src)>; 4855 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 4856 (VMOVNTDQAZ128rm addr:$src)>; 4857} 4858 4859//===----------------------------------------------------------------------===// 4860// AVX-512 - Integer arithmetic 4861// 4862multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4863 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4864 bit IsCommutable = 0> { 4865 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4866 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4867 "$src2, $src1", "$src1, $src2", 4868 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4869 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V, 4870 Sched<[sched]>; 4871 4872 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4873 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4874 "$src2, $src1", "$src1, $src2", 4875 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, 4876 AVX512BIBase, EVEX_4V, 4877 Sched<[sched.Folded, sched.ReadAfterFold]>; 4878} 4879 4880multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4881 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4882 bit IsCommutable = 0> : 4883 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4884 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4885 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4886 "${src2}"#_.BroadcastStr#", $src1", 4887 "$src1, ${src2}"#_.BroadcastStr, 4888 (_.VT (OpNode _.RC:$src1, 4889 (_.BroadcastLdFrag addr:$src2)))>, 4890 AVX512BIBase, EVEX_4V, EVEX_B, 4891 Sched<[sched.Folded, sched.ReadAfterFold]>; 4892} 4893 4894multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4895 AVX512VLVectorVTInfo VTInfo, 4896 X86SchedWriteWidths sched, Predicate prd, 4897 bit IsCommutable = 0> { 4898 let Predicates = [prd] in 4899 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4900 IsCommutable>, EVEX_V512; 4901 4902 let Predicates = [prd, HasVLX] in { 4903 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4904 sched.YMM, IsCommutable>, EVEX_V256; 4905 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4906 sched.XMM, IsCommutable>, EVEX_V128; 4907 } 4908} 4909 4910multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4911 AVX512VLVectorVTInfo VTInfo, 4912 X86SchedWriteWidths sched, Predicate prd, 4913 bit IsCommutable = 0> { 4914 let Predicates = [prd] in 4915 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4916 IsCommutable>, EVEX_V512; 4917 4918 let Predicates = [prd, HasVLX] in { 4919 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4920 sched.YMM, IsCommutable>, EVEX_V256; 4921 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4922 sched.XMM, IsCommutable>, EVEX_V128; 4923 } 4924} 4925 4926multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4927 X86SchedWriteWidths sched, Predicate prd, 4928 bit IsCommutable = 0> { 4929 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4930 sched, prd, IsCommutable>, 4931 VEX_W, EVEX_CD8<64, CD8VF>; 4932} 4933 4934multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4935 X86SchedWriteWidths sched, Predicate prd, 4936 bit IsCommutable = 0> { 4937 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4938 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4939} 4940 4941multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4942 X86SchedWriteWidths sched, Predicate prd, 4943 bit IsCommutable = 0> { 4944 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4945 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4946 VEX_WIG; 4947} 4948 4949multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4950 X86SchedWriteWidths sched, Predicate prd, 4951 bit IsCommutable = 0> { 4952 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4953 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4954 VEX_WIG; 4955} 4956 4957multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 4958 SDNode OpNode, X86SchedWriteWidths sched, 4959 Predicate prd, bit IsCommutable = 0> { 4960 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 4961 IsCommutable>; 4962 4963 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 4964 IsCommutable>; 4965} 4966 4967multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 4968 SDNode OpNode, X86SchedWriteWidths sched, 4969 Predicate prd, bit IsCommutable = 0> { 4970 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 4971 IsCommutable>; 4972 4973 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 4974 IsCommutable>; 4975} 4976 4977multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 4978 bits<8> opc_d, bits<8> opc_q, 4979 string OpcodeStr, SDNode OpNode, 4980 X86SchedWriteWidths sched, 4981 bit IsCommutable = 0> { 4982 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 4983 sched, HasAVX512, IsCommutable>, 4984 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 4985 sched, HasBWI, IsCommutable>; 4986} 4987 4988multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 4989 X86FoldableSchedWrite sched, 4990 SDNode OpNode,X86VectorVTInfo _Src, 4991 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 4992 bit IsCommutable = 0> { 4993 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4994 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4995 "$src2, $src1","$src1, $src2", 4996 (_Dst.VT (OpNode 4997 (_Src.VT _Src.RC:$src1), 4998 (_Src.VT _Src.RC:$src2))), 4999 IsCommutable>, 5000 AVX512BIBase, EVEX_4V, Sched<[sched]>; 5001 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5002 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 5003 "$src2, $src1", "$src1, $src2", 5004 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 5005 (_Src.LdFrag addr:$src2)))>, 5006 AVX512BIBase, EVEX_4V, 5007 Sched<[sched.Folded, sched.ReadAfterFold]>; 5008 5009 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5010 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 5011 OpcodeStr, 5012 "${src2}"#_Brdct.BroadcastStr#", $src1", 5013 "$src1, ${src2}"#_Brdct.BroadcastStr, 5014 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 5015 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, 5016 AVX512BIBase, EVEX_4V, EVEX_B, 5017 Sched<[sched.Folded, sched.ReadAfterFold]>; 5018} 5019 5020defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 5021 SchedWriteVecALU, 1>; 5022defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 5023 SchedWriteVecALU, 0>; 5024defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, 5025 SchedWriteVecALU, HasBWI, 1>; 5026defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, 5027 SchedWriteVecALU, HasBWI, 0>; 5028defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, 5029 SchedWriteVecALU, HasBWI, 1>; 5030defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, 5031 SchedWriteVecALU, HasBWI, 0>; 5032defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 5033 SchedWritePMULLD, HasAVX512, 1>, T8PD; 5034defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 5035 SchedWriteVecIMul, HasBWI, 1>; 5036defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 5037 SchedWriteVecIMul, HasDQI, 1>, T8PD, 5038 NotEVEX2VEXConvertible; 5039defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 5040 HasBWI, 1>; 5041defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 5042 HasBWI, 1>; 5043defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 5044 SchedWriteVecIMul, HasBWI, 1>, T8PD; 5045defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu, 5046 SchedWriteVecALU, HasBWI, 1>; 5047defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 5048 SchedWriteVecIMul, HasAVX512, 1>, T8PD; 5049defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 5050 SchedWriteVecIMul, HasAVX512, 1>; 5051 5052multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 5053 X86SchedWriteWidths sched, 5054 AVX512VLVectorVTInfo _SrcVTInfo, 5055 AVX512VLVectorVTInfo _DstVTInfo, 5056 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 5057 let Predicates = [prd] in 5058 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 5059 _SrcVTInfo.info512, _DstVTInfo.info512, 5060 v8i64_info, IsCommutable>, 5061 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; 5062 let Predicates = [HasVLX, prd] in { 5063 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 5064 _SrcVTInfo.info256, _DstVTInfo.info256, 5065 v4i64x_info, IsCommutable>, 5066 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; 5067 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 5068 _SrcVTInfo.info128, _DstVTInfo.info128, 5069 v2i64x_info, IsCommutable>, 5070 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; 5071 } 5072} 5073 5074defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 5075 avx512vl_i8_info, avx512vl_i8_info, 5076 X86multishift, HasVBMI, 0>, T8PD; 5077 5078multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 5079 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 5080 X86FoldableSchedWrite sched> { 5081 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5082 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 5083 OpcodeStr, 5084 "${src2}"#_Src.BroadcastStr#", $src1", 5085 "$src1, ${src2}"#_Src.BroadcastStr, 5086 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 5087 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, 5088 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 5089 Sched<[sched.Folded, sched.ReadAfterFold]>; 5090} 5091 5092multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 5093 SDNode OpNode,X86VectorVTInfo _Src, 5094 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 5095 bit IsCommutable = 0> { 5096 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 5097 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 5098 "$src2, $src1","$src1, $src2", 5099 (_Dst.VT (OpNode 5100 (_Src.VT _Src.RC:$src1), 5101 (_Src.VT _Src.RC:$src2))), 5102 IsCommutable, IsCommutable>, 5103 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; 5104 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5105 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 5106 "$src2, $src1", "$src1, $src2", 5107 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 5108 (_Src.LdFrag addr:$src2)))>, 5109 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, 5110 Sched<[sched.Folded, sched.ReadAfterFold]>; 5111} 5112 5113multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 5114 SDNode OpNode> { 5115 let Predicates = [HasBWI] in 5116 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 5117 v32i16_info, SchedWriteShuffle.ZMM>, 5118 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 5119 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 5120 let Predicates = [HasBWI, HasVLX] in { 5121 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 5122 v16i16x_info, SchedWriteShuffle.YMM>, 5123 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 5124 v16i16x_info, SchedWriteShuffle.YMM>, 5125 EVEX_V256; 5126 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 5127 v8i16x_info, SchedWriteShuffle.XMM>, 5128 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 5129 v8i16x_info, SchedWriteShuffle.XMM>, 5130 EVEX_V128; 5131 } 5132} 5133multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 5134 SDNode OpNode> { 5135 let Predicates = [HasBWI] in 5136 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 5137 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG; 5138 let Predicates = [HasBWI, HasVLX] in { 5139 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 5140 v32i8x_info, SchedWriteShuffle.YMM>, 5141 EVEX_V256, VEX_WIG; 5142 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 5143 v16i8x_info, SchedWriteShuffle.XMM>, 5144 EVEX_V128, VEX_WIG; 5145 } 5146} 5147 5148multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 5149 SDNode OpNode, AVX512VLVectorVTInfo _Src, 5150 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 5151 let Predicates = [HasBWI] in 5152 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 5153 _Dst.info512, SchedWriteVecIMul.ZMM, 5154 IsCommutable>, EVEX_V512; 5155 let Predicates = [HasBWI, HasVLX] in { 5156 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 5157 _Dst.info256, SchedWriteVecIMul.YMM, 5158 IsCommutable>, EVEX_V256; 5159 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 5160 _Dst.info128, SchedWriteVecIMul.XMM, 5161 IsCommutable>, EVEX_V128; 5162 } 5163} 5164 5165defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 5166defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 5167defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 5168defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 5169 5170defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 5171 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG; 5172defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 5173 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG; 5174 5175defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 5176 SchedWriteVecALU, HasBWI, 1>, T8PD; 5177defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 5178 SchedWriteVecALU, HasBWI, 1>; 5179defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 5180 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5181defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 5182 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5183 NotEVEX2VEXConvertible; 5184 5185defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 5186 SchedWriteVecALU, HasBWI, 1>; 5187defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 5188 SchedWriteVecALU, HasBWI, 1>, T8PD; 5189defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 5190 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5191defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 5192 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5193 NotEVEX2VEXConvertible; 5194 5195defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 5196 SchedWriteVecALU, HasBWI, 1>, T8PD; 5197defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 5198 SchedWriteVecALU, HasBWI, 1>; 5199defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 5200 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5201defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 5202 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5203 NotEVEX2VEXConvertible; 5204 5205defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 5206 SchedWriteVecALU, HasBWI, 1>; 5207defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 5208 SchedWriteVecALU, HasBWI, 1>, T8PD; 5209defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 5210 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5211defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 5212 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5213 NotEVEX2VEXConvertible; 5214 5215// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 5216let Predicates = [HasDQI, NoVLX] in { 5217 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 5218 (EXTRACT_SUBREG 5219 (VPMULLQZrr 5220 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5221 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5222 sub_ymm)>; 5223 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5224 (EXTRACT_SUBREG 5225 (VPMULLQZrmb 5226 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5227 addr:$src2), 5228 sub_ymm)>; 5229 5230 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5231 (EXTRACT_SUBREG 5232 (VPMULLQZrr 5233 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5234 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5235 sub_xmm)>; 5236 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5237 (EXTRACT_SUBREG 5238 (VPMULLQZrmb 5239 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5240 addr:$src2), 5241 sub_xmm)>; 5242} 5243 5244multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { 5245 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 5246 (EXTRACT_SUBREG 5247 (!cast<Instruction>(Instr#"rr") 5248 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5249 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5250 sub_ymm)>; 5251 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5252 (EXTRACT_SUBREG 5253 (!cast<Instruction>(Instr#"rmb") 5254 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5255 addr:$src2), 5256 sub_ymm)>; 5257 5258 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 5259 (EXTRACT_SUBREG 5260 (!cast<Instruction>(Instr#"rr") 5261 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5262 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5263 sub_xmm)>; 5264 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5265 (EXTRACT_SUBREG 5266 (!cast<Instruction>(Instr#"rmb") 5267 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5268 addr:$src2), 5269 sub_xmm)>; 5270} 5271 5272let Predicates = [HasAVX512, NoVLX] in { 5273 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; 5274 defm : avx512_min_max_lowering<"VPMINUQZ", umin>; 5275 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; 5276 defm : avx512_min_max_lowering<"VPMINSQZ", smin>; 5277} 5278 5279//===----------------------------------------------------------------------===// 5280// AVX-512 Logical Instructions 5281//===----------------------------------------------------------------------===// 5282 5283defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, 5284 SchedWriteVecLogic, HasAVX512, 1>; 5285defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, 5286 SchedWriteVecLogic, HasAVX512, 1>; 5287defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 5288 SchedWriteVecLogic, HasAVX512, 1>; 5289defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5290 SchedWriteVecLogic, HasAVX512>; 5291 5292let Predicates = [HasVLX] in { 5293 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)), 5294 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5295 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)), 5296 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5297 5298 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)), 5299 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5300 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)), 5301 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5302 5303 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)), 5304 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5305 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)), 5306 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5307 5308 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)), 5309 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5310 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)), 5311 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5312 5313 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)), 5314 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5315 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)), 5316 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5317 5318 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)), 5319 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5320 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)), 5321 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5322 5323 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)), 5324 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5325 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)), 5326 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5327 5328 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)), 5329 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5330 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), 5331 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5332 5333 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), 5334 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5335 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), 5336 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5337 5338 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)), 5339 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5340 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)), 5341 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5342 5343 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)), 5344 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5345 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)), 5346 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5347 5348 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)), 5349 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5350 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)), 5351 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5352 5353 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)), 5354 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5355 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)), 5356 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5357 5358 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)), 5359 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5360 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)), 5361 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5362 5363 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)), 5364 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5365 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)), 5366 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5367 5368 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)), 5369 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5370 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), 5371 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5372} 5373 5374let Predicates = [HasAVX512] in { 5375 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)), 5376 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5377 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)), 5378 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5379 5380 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)), 5381 (VPORQZrr VR512:$src1, VR512:$src2)>; 5382 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)), 5383 (VPORQZrr VR512:$src1, VR512:$src2)>; 5384 5385 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)), 5386 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5387 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)), 5388 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5389 5390 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)), 5391 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5392 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)), 5393 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5394 5395 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)), 5396 (VPANDQZrm VR512:$src1, addr:$src2)>; 5397 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)), 5398 (VPANDQZrm VR512:$src1, addr:$src2)>; 5399 5400 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)), 5401 (VPORQZrm VR512:$src1, addr:$src2)>; 5402 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)), 5403 (VPORQZrm VR512:$src1, addr:$src2)>; 5404 5405 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)), 5406 (VPXORQZrm VR512:$src1, addr:$src2)>; 5407 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)), 5408 (VPXORQZrm VR512:$src1, addr:$src2)>; 5409 5410 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)), 5411 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5412 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), 5413 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5414} 5415 5416// Patterns to catch vselect with different type than logic op. 5417multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, 5418 X86VectorVTInfo _, 5419 X86VectorVTInfo IntInfo> { 5420 // Masked register-register logical operations. 5421 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5422 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5423 _.RC:$src0)), 5424 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5425 _.RC:$src1, _.RC:$src2)>; 5426 5427 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5428 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5429 _.ImmAllZerosV)), 5430 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5431 _.RC:$src2)>; 5432 5433 // Masked register-memory logical operations. 5434 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5435 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5436 (load addr:$src2)))), 5437 _.RC:$src0)), 5438 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5439 _.RC:$src1, addr:$src2)>; 5440 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5441 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5442 (load addr:$src2)))), 5443 _.ImmAllZerosV)), 5444 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5445 addr:$src2)>; 5446} 5447 5448multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, 5449 X86VectorVTInfo _, 5450 X86VectorVTInfo IntInfo> { 5451 // Register-broadcast logical operations. 5452 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5453 (bitconvert 5454 (IntInfo.VT (OpNode _.RC:$src1, 5455 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5456 _.RC:$src0)), 5457 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5458 _.RC:$src1, addr:$src2)>; 5459 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5460 (bitconvert 5461 (IntInfo.VT (OpNode _.RC:$src1, 5462 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5463 _.ImmAllZerosV)), 5464 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5465 _.RC:$src1, addr:$src2)>; 5466} 5467 5468multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode, 5469 AVX512VLVectorVTInfo SelectInfo, 5470 AVX512VLVectorVTInfo IntInfo> { 5471let Predicates = [HasVLX] in { 5472 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128, 5473 IntInfo.info128>; 5474 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256, 5475 IntInfo.info256>; 5476} 5477let Predicates = [HasAVX512] in { 5478 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512, 5479 IntInfo.info512>; 5480} 5481} 5482 5483multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode, 5484 AVX512VLVectorVTInfo SelectInfo, 5485 AVX512VLVectorVTInfo IntInfo> { 5486let Predicates = [HasVLX] in { 5487 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode, 5488 SelectInfo.info128, IntInfo.info128>; 5489 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode, 5490 SelectInfo.info256, IntInfo.info256>; 5491} 5492let Predicates = [HasAVX512] in { 5493 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode, 5494 SelectInfo.info512, IntInfo.info512>; 5495} 5496} 5497 5498multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> { 5499 // i64 vselect with i32/i16/i8 logic op 5500 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5501 avx512vl_i32_info>; 5502 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5503 avx512vl_i16_info>; 5504 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5505 avx512vl_i8_info>; 5506 5507 // i32 vselect with i64/i16/i8 logic op 5508 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5509 avx512vl_i64_info>; 5510 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5511 avx512vl_i16_info>; 5512 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5513 avx512vl_i8_info>; 5514 5515 // f32 vselect with i64/i32/i16/i8 logic op 5516 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5517 avx512vl_i64_info>; 5518 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5519 avx512vl_i32_info>; 5520 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5521 avx512vl_i16_info>; 5522 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5523 avx512vl_i8_info>; 5524 5525 // f64 vselect with i64/i32/i16/i8 logic op 5526 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5527 avx512vl_i64_info>; 5528 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5529 avx512vl_i32_info>; 5530 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5531 avx512vl_i16_info>; 5532 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5533 avx512vl_i8_info>; 5534 5535 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode, 5536 avx512vl_f32_info, 5537 avx512vl_i32_info>; 5538 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode, 5539 avx512vl_f64_info, 5540 avx512vl_i64_info>; 5541} 5542 5543defm : avx512_logical_lowering_types<"VPAND", and>; 5544defm : avx512_logical_lowering_types<"VPOR", or>; 5545defm : avx512_logical_lowering_types<"VPXOR", xor>; 5546defm : avx512_logical_lowering_types<"VPANDN", X86andnp>; 5547 5548//===----------------------------------------------------------------------===// 5549// AVX-512 FP arithmetic 5550//===----------------------------------------------------------------------===// 5551 5552multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5553 SDPatternOperator OpNode, SDNode VecNode, 5554 X86FoldableSchedWrite sched, bit IsCommutable> { 5555 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5556 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5557 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5558 "$src2, $src1", "$src1, $src2", 5559 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5560 Sched<[sched]>; 5561 5562 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5563 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5564 "$src2, $src1", "$src1, $src2", 5565 (_.VT (VecNode _.RC:$src1, 5566 (_.ScalarIntMemFrags addr:$src2)))>, 5567 Sched<[sched.Folded, sched.ReadAfterFold]>; 5568 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5569 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5570 (ins _.FRC:$src1, _.FRC:$src2), 5571 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5572 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5573 Sched<[sched]> { 5574 let isCommutable = IsCommutable; 5575 } 5576 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5577 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5578 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5579 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5580 (_.ScalarLdFrag addr:$src2)))]>, 5581 Sched<[sched.Folded, sched.ReadAfterFold]>; 5582 } 5583 } 5584} 5585 5586multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5587 SDNode VecNode, X86FoldableSchedWrite sched> { 5588 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5589 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5590 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5591 "$rc, $src2, $src1", "$src1, $src2, $rc", 5592 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5593 (i32 timm:$rc))>, 5594 EVEX_B, EVEX_RC, Sched<[sched]>; 5595} 5596multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5597 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5598 X86FoldableSchedWrite sched, bit IsCommutable, 5599 string EVEX2VexOvrd> { 5600 let ExeDomain = _.ExeDomain in { 5601 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5602 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5603 "$src2, $src1", "$src1, $src2", 5604 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5605 Sched<[sched]>, SIMD_EXC; 5606 5607 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5608 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5609 "$src2, $src1", "$src1, $src2", 5610 (_.VT (VecNode _.RC:$src1, 5611 (_.ScalarIntMemFrags addr:$src2)))>, 5612 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 5613 5614 let isCodeGenOnly = 1, Predicates = [HasAVX512], 5615 Uses = [MXCSR], mayRaiseFPException = 1 in { 5616 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5617 (ins _.FRC:$src1, _.FRC:$src2), 5618 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5619 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5620 Sched<[sched]>, 5621 EVEX2VEXOverride<EVEX2VexOvrd#"rr"> { 5622 let isCommutable = IsCommutable; 5623 } 5624 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5625 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5626 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5627 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5628 (_.ScalarLdFrag addr:$src2)))]>, 5629 Sched<[sched.Folded, sched.ReadAfterFold]>, 5630 EVEX2VEXOverride<EVEX2VexOvrd#"rm">; 5631 } 5632 5633 let Uses = [MXCSR] in 5634 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5635 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5636 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5637 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 5638 EVEX_B, Sched<[sched]>; 5639 } 5640} 5641 5642multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5643 SDNode VecNode, SDNode RndNode, 5644 X86SchedWriteSizes sched, bit IsCommutable> { 5645 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5646 sched.PS.Scl, IsCommutable>, 5647 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, 5648 sched.PS.Scl>, 5649 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5650 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5651 sched.PD.Scl, IsCommutable>, 5652 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, 5653 sched.PD.Scl>, 5654 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5655 let Predicates = [HasFP16] in 5656 defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode, 5657 VecNode, sched.PH.Scl, IsCommutable>, 5658 avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode, 5659 sched.PH.Scl>, 5660 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>; 5661} 5662 5663multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5664 SDNode VecNode, SDNode SaeNode, 5665 X86SchedWriteSizes sched, bit IsCommutable> { 5666 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5667 VecNode, SaeNode, sched.PS.Scl, IsCommutable, 5668 NAME#"SS">, 5669 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5670 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5671 VecNode, SaeNode, sched.PD.Scl, IsCommutable, 5672 NAME#"SD">, 5673 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5674 let Predicates = [HasFP16] in { 5675 defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode, 5676 VecNode, SaeNode, sched.PH.Scl, IsCommutable, 5677 NAME#"SH">, 5678 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, 5679 NotEVEX2VEXConvertible; 5680 } 5681} 5682defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, 5683 SchedWriteFAddSizes, 1>; 5684defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, 5685 SchedWriteFMulSizes, 1>; 5686defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, 5687 SchedWriteFAddSizes, 0>; 5688defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, 5689 SchedWriteFDivSizes, 0>; 5690defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, 5691 SchedWriteFCmpSizes, 0>; 5692defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, 5693 SchedWriteFCmpSizes, 0>; 5694 5695// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5696// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5697multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5698 X86VectorVTInfo _, SDNode OpNode, 5699 X86FoldableSchedWrite sched, 5700 string EVEX2VEXOvrd> { 5701 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5702 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5703 (ins _.FRC:$src1, _.FRC:$src2), 5704 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5705 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5706 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> { 5707 let isCommutable = 1; 5708 } 5709 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5710 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5711 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5712 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5713 (_.ScalarLdFrag addr:$src2)))]>, 5714 Sched<[sched.Folded, sched.ReadAfterFold]>, 5715 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 5716 } 5717} 5718defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5719 SchedWriteFCmp.Scl, "VMINCSS">, XS, 5720 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5721 5722defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5723 SchedWriteFCmp.Scl, "VMINCSD">, XD, 5724 VEX_W, EVEX_4V, VEX_LIG, 5725 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5726 5727defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5728 SchedWriteFCmp.Scl, "VMAXCSS">, XS, 5729 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5730 5731defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5732 SchedWriteFCmp.Scl, "VMAXCSD">, XD, 5733 VEX_W, EVEX_4V, VEX_LIG, 5734 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5735 5736defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc, 5737 SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS, 5738 EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, 5739 NotEVEX2VEXConvertible; 5740defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc, 5741 SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS, 5742 EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, 5743 NotEVEX2VEXConvertible; 5744 5745multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5746 SDPatternOperator MaskOpNode, 5747 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5748 bit IsCommutable, 5749 bit IsKCommutable = IsCommutable, 5750 string suffix = _.Suffix, 5751 string ClobberConstraint = "", 5752 bit MayRaiseFPException = 1> { 5753 let ExeDomain = _.ExeDomain, hasSideEffects = 0, 5754 Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in { 5755 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 5756 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix, 5757 "$src2, $src1", "$src1, $src2", 5758 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 5759 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint, 5760 IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>; 5761 let mayLoad = 1 in { 5762 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5763 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix, 5764 "$src2, $src1", "$src1, $src2", 5765 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5766 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5767 ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5768 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5769 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix, 5770 "${src2}"#_.BroadcastStr#", $src1", 5771 "$src1, ${src2}"#_.BroadcastStr, 5772 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5773 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5774 ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5775 } 5776 } 5777} 5778 5779multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5780 SDPatternOperator OpNodeRnd, 5781 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5782 string suffix = _.Suffix, 5783 string ClobberConstraint = ""> { 5784 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5785 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5786 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix, 5787 "$rc, $src2, $src1", "$src1, $src2, $rc", 5788 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))), 5789 0, 0, 0, vselect_mask, ClobberConstraint>, 5790 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 5791} 5792 5793multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5794 SDPatternOperator OpNodeSAE, 5795 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5796 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5797 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5798 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5799 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5800 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, 5801 EVEX_4V, EVEX_B, Sched<[sched]>; 5802} 5803 5804multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5805 SDPatternOperator MaskOpNode, 5806 Predicate prd, X86SchedWriteSizes sched, 5807 bit IsCommutable = 0, 5808 bit IsPD128Commutable = IsCommutable> { 5809 let Predicates = [prd] in { 5810 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 5811 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, 5812 EVEX_CD8<32, CD8VF>; 5813 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info, 5814 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, 5815 EVEX_CD8<64, CD8VF>; 5816 } 5817 5818 // Define only if AVX512VL feature is present. 5819 let Predicates = [prd, HasVLX] in { 5820 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 5821 sched.PS.XMM, IsCommutable>, EVEX_V128, PS, 5822 EVEX_CD8<32, CD8VF>; 5823 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 5824 sched.PS.YMM, IsCommutable>, EVEX_V256, PS, 5825 EVEX_CD8<32, CD8VF>; 5826 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info, 5827 sched.PD.XMM, IsPD128Commutable, 5828 IsCommutable>, EVEX_V128, PD, VEX_W, 5829 EVEX_CD8<64, CD8VF>; 5830 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info, 5831 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, 5832 EVEX_CD8<64, CD8VF>; 5833 } 5834} 5835 5836multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5837 SDPatternOperator MaskOpNode, 5838 X86SchedWriteSizes sched, bit IsCommutable = 0> { 5839 let Predicates = [HasFP16] in { 5840 defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info, 5841 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS, 5842 EVEX_CD8<16, CD8VF>; 5843 } 5844 let Predicates = [HasVLX, HasFP16] in { 5845 defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info, 5846 sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS, 5847 EVEX_CD8<16, CD8VF>; 5848 defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info, 5849 sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS, 5850 EVEX_CD8<16, CD8VF>; 5851 } 5852} 5853 5854let Uses = [MXCSR] in 5855multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5856 X86SchedWriteSizes sched> { 5857 let Predicates = [HasFP16] in { 5858 defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5859 v32f16_info>, 5860 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 5861 } 5862 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5863 v16f32_info>, 5864 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5865 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5866 v8f64_info>, 5867 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5868} 5869 5870let Uses = [MXCSR] in 5871multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5872 X86SchedWriteSizes sched> { 5873 let Predicates = [HasFP16] in { 5874 defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5875 v32f16_info>, 5876 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 5877 } 5878 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5879 v16f32_info>, 5880 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5881 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5882 v8f64_info>, 5883 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5884} 5885 5886defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512, 5887 SchedWriteFAddSizes, 1>, 5888 avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>, 5889 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5890defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512, 5891 SchedWriteFMulSizes, 1>, 5892 avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>, 5893 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5894defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512, 5895 SchedWriteFAddSizes>, 5896 avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>, 5897 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5898defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512, 5899 SchedWriteFDivSizes>, 5900 avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>, 5901 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5902defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512, 5903 SchedWriteFCmpSizes, 0>, 5904 avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>, 5905 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 5906defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512, 5907 SchedWriteFCmpSizes, 0>, 5908 avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>, 5909 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 5910let isCodeGenOnly = 1 in { 5911 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512, 5912 SchedWriteFCmpSizes, 1>, 5913 avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc, 5914 SchedWriteFCmpSizes, 1>; 5915 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512, 5916 SchedWriteFCmpSizes, 1>, 5917 avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc, 5918 SchedWriteFCmpSizes, 1>; 5919} 5920let Uses = []<Register>, mayRaiseFPException = 0 in { 5921defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI, 5922 SchedWriteFLogicSizes, 1>; 5923defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI, 5924 SchedWriteFLogicSizes, 0>; 5925defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI, 5926 SchedWriteFLogicSizes, 1>; 5927defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI, 5928 SchedWriteFLogicSizes, 1>; 5929} 5930 5931multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5932 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5933 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5934 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5935 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5936 "$src2, $src1", "$src1, $src2", 5937 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5938 EVEX_4V, Sched<[sched]>; 5939 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5940 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, 5941 "$src2, $src1", "$src1, $src2", 5942 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5943 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5944 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5945 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, 5946 "${src2}"#_.BroadcastStr#", $src1", 5947 "$src1, ${src2}"#_.BroadcastStr, 5948 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5949 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5950 } 5951} 5952 5953multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5954 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5955 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5956 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5957 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5958 "$src2, $src1", "$src1, $src2", 5959 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5960 Sched<[sched]>; 5961 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5962 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix, 5963 "$src2, $src1", "$src1, $src2", 5964 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>, 5965 Sched<[sched.Folded, sched.ReadAfterFold]>; 5966 } 5967} 5968 5969multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 5970 X86SchedWriteWidths sched> { 5971 let Predicates = [HasFP16] in { 5972 defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>, 5973 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>, 5974 EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>; 5975 defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>, 5976 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>, 5977 EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>; 5978 } 5979 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, 5980 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, 5981 EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD; 5982 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, 5983 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, 5984 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>, T8PD; 5985 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, 5986 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info, 5987 X86scalefsRnd, sched.Scl>, 5988 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD; 5989 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, 5990 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info, 5991 X86scalefsRnd, sched.Scl>, 5992 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W, T8PD; 5993 5994 // Define only if AVX512VL feature is present. 5995 let Predicates = [HasVLX] in { 5996 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, 5997 EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD; 5998 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, 5999 EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD; 6000 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, 6001 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>, T8PD; 6002 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, 6003 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>, T8PD; 6004 } 6005 6006 let Predicates = [HasFP16, HasVLX] in { 6007 defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>, 6008 EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD; 6009 defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>, 6010 EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD; 6011 } 6012} 6013defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", 6014 SchedWriteFAdd>, NotEVEX2VEXConvertible; 6015 6016//===----------------------------------------------------------------------===// 6017// AVX-512 VPTESTM instructions 6018//===----------------------------------------------------------------------===// 6019 6020multiclass avx512_vptest<bits<8> opc, string OpcodeStr, 6021 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6022 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. 6023 // There are just too many permutations due to commutability and bitcasts. 6024 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6025 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 6026 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 6027 "$src2, $src1", "$src1, $src2", 6028 (null_frag), (null_frag), 1>, 6029 EVEX_4V, Sched<[sched]>; 6030 let mayLoad = 1 in 6031 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 6032 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 6033 "$src2, $src1", "$src1, $src2", 6034 (null_frag), (null_frag)>, 6035 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6036 Sched<[sched.Folded, sched.ReadAfterFold]>; 6037 } 6038} 6039 6040multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, 6041 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6042 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 6043 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 6044 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6045 "${src2}"#_.BroadcastStr#", $src1", 6046 "$src1, ${src2}"#_.BroadcastStr, 6047 (null_frag), (null_frag)>, 6048 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6049 Sched<[sched.Folded, sched.ReadAfterFold]>; 6050} 6051 6052multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, 6053 X86SchedWriteWidths sched, 6054 AVX512VLVectorVTInfo _> { 6055 let Predicates = [HasAVX512] in 6056 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>, 6057 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512; 6058 6059 let Predicates = [HasAVX512, HasVLX] in { 6060 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>, 6061 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256; 6062 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>, 6063 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128; 6064 } 6065} 6066 6067multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, 6068 X86SchedWriteWidths sched> { 6069 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched, 6070 avx512vl_i32_info>; 6071 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched, 6072 avx512vl_i64_info>, VEX_W; 6073} 6074 6075multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 6076 X86SchedWriteWidths sched> { 6077 let Predicates = [HasBWI] in { 6078 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM, 6079 v32i16_info>, EVEX_V512, VEX_W; 6080 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM, 6081 v64i8_info>, EVEX_V512; 6082 } 6083 6084 let Predicates = [HasVLX, HasBWI] in { 6085 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM, 6086 v16i16x_info>, EVEX_V256, VEX_W; 6087 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM, 6088 v8i16x_info>, EVEX_V128, VEX_W; 6089 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM, 6090 v32i8x_info>, EVEX_V256; 6091 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM, 6092 v16i8x_info>, EVEX_V128; 6093 } 6094} 6095 6096multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 6097 X86SchedWriteWidths sched> : 6098 avx512_vptest_wb<opc_wb, OpcodeStr, sched>, 6099 avx512_vptest_dq<opc_dq, OpcodeStr, sched>; 6100 6101defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", 6102 SchedWriteVecLogic>, T8PD; 6103defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", 6104 SchedWriteVecLogic>, T8XS; 6105 6106//===----------------------------------------------------------------------===// 6107// AVX-512 Shift instructions 6108//===----------------------------------------------------------------------===// 6109 6110multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 6111 string OpcodeStr, SDNode OpNode, 6112 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6113 let ExeDomain = _.ExeDomain in { 6114 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 6115 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 6116 "$src2, $src1", "$src1, $src2", 6117 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, 6118 Sched<[sched]>; 6119 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 6120 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 6121 "$src2, $src1", "$src1, $src2", 6122 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), 6123 (i8 timm:$src2)))>, 6124 Sched<[sched.Folded]>; 6125 } 6126} 6127 6128multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 6129 string OpcodeStr, SDNode OpNode, 6130 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6131 let ExeDomain = _.ExeDomain in 6132 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 6133 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 6134 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2", 6135 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, 6136 EVEX_B, Sched<[sched.Folded]>; 6137} 6138 6139multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6140 X86FoldableSchedWrite sched, ValueType SrcVT, 6141 X86VectorVTInfo _> { 6142 // src2 is always 128-bit 6143 let ExeDomain = _.ExeDomain in { 6144 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 6145 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 6146 "$src2, $src1", "$src1, $src2", 6147 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 6148 AVX512BIBase, EVEX_4V, Sched<[sched]>; 6149 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6150 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 6151 "$src2, $src1", "$src1, $src2", 6152 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, 6153 AVX512BIBase, 6154 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6155 } 6156} 6157 6158multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6159 X86SchedWriteWidths sched, ValueType SrcVT, 6160 AVX512VLVectorVTInfo VTInfo, 6161 Predicate prd> { 6162 let Predicates = [prd] in 6163 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 6164 VTInfo.info512>, EVEX_V512, 6165 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 6166 let Predicates = [prd, HasVLX] in { 6167 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 6168 VTInfo.info256>, EVEX_V256, 6169 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 6170 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 6171 VTInfo.info128>, EVEX_V128, 6172 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 6173 } 6174} 6175 6176multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 6177 string OpcodeStr, SDNode OpNode, 6178 X86SchedWriteWidths sched, 6179 bit NotEVEX2VEXConvertibleQ = 0> { 6180 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 6181 avx512vl_i32_info, HasAVX512>; 6182 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 6183 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 6184 avx512vl_i64_info, HasAVX512>, VEX_W; 6185 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 6186 avx512vl_i16_info, HasBWI>; 6187} 6188 6189multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6190 string OpcodeStr, SDNode OpNode, 6191 X86SchedWriteWidths sched, 6192 AVX512VLVectorVTInfo VTInfo> { 6193 let Predicates = [HasAVX512] in 6194 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6195 sched.ZMM, VTInfo.info512>, 6196 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 6197 VTInfo.info512>, EVEX_V512; 6198 let Predicates = [HasAVX512, HasVLX] in { 6199 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6200 sched.YMM, VTInfo.info256>, 6201 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 6202 VTInfo.info256>, EVEX_V256; 6203 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6204 sched.XMM, VTInfo.info128>, 6205 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 6206 VTInfo.info128>, EVEX_V128; 6207 } 6208} 6209 6210multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 6211 string OpcodeStr, SDNode OpNode, 6212 X86SchedWriteWidths sched> { 6213 let Predicates = [HasBWI] in 6214 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6215 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG; 6216 let Predicates = [HasVLX, HasBWI] in { 6217 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6218 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG; 6219 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6220 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG; 6221 } 6222} 6223 6224multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 6225 Format ImmFormR, Format ImmFormM, 6226 string OpcodeStr, SDNode OpNode, 6227 X86SchedWriteWidths sched, 6228 bit NotEVEX2VEXConvertibleQ = 0> { 6229 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 6230 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 6231 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 6232 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 6233 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 6234} 6235 6236defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 6237 SchedWriteVecShiftImm>, 6238 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 6239 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6240 6241defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 6242 SchedWriteVecShiftImm>, 6243 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 6244 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6245 6246defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 6247 SchedWriteVecShiftImm, 1>, 6248 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 6249 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6250 6251defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 6252 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6253defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 6254 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6255 6256defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 6257 SchedWriteVecShift>; 6258defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 6259 SchedWriteVecShift, 1>; 6260defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 6261 SchedWriteVecShift>; 6262 6263// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 6264let Predicates = [HasAVX512, NoVLX] in { 6265 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 6266 (EXTRACT_SUBREG (v8i64 6267 (VPSRAQZrr 6268 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6269 VR128X:$src2)), sub_ymm)>; 6270 6271 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6272 (EXTRACT_SUBREG (v8i64 6273 (VPSRAQZrr 6274 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6275 VR128X:$src2)), sub_xmm)>; 6276 6277 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), 6278 (EXTRACT_SUBREG (v8i64 6279 (VPSRAQZri 6280 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6281 timm:$src2)), sub_ymm)>; 6282 6283 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), 6284 (EXTRACT_SUBREG (v8i64 6285 (VPSRAQZri 6286 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6287 timm:$src2)), sub_xmm)>; 6288} 6289 6290//===-------------------------------------------------------------------===// 6291// Variable Bit Shifts 6292//===-------------------------------------------------------------------===// 6293 6294multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 6295 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6296 let ExeDomain = _.ExeDomain in { 6297 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 6298 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 6299 "$src2, $src1", "$src1, $src2", 6300 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 6301 AVX5128IBase, EVEX_4V, Sched<[sched]>; 6302 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6303 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 6304 "$src2, $src1", "$src1, $src2", 6305 (_.VT (OpNode _.RC:$src1, 6306 (_.VT (_.LdFrag addr:$src2))))>, 6307 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6308 Sched<[sched.Folded, sched.ReadAfterFold]>; 6309 } 6310} 6311 6312multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 6313 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6314 let ExeDomain = _.ExeDomain in 6315 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6316 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6317 "${src2}"#_.BroadcastStr#", $src1", 6318 "$src1, ${src2}"#_.BroadcastStr, 6319 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, 6320 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6321 Sched<[sched.Folded, sched.ReadAfterFold]>; 6322} 6323 6324multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6325 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 6326 let Predicates = [HasAVX512] in 6327 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 6328 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 6329 6330 let Predicates = [HasAVX512, HasVLX] in { 6331 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 6332 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 6333 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 6334 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 6335 } 6336} 6337 6338multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 6339 SDNode OpNode, X86SchedWriteWidths sched> { 6340 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 6341 avx512vl_i32_info>; 6342 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 6343 avx512vl_i64_info>, VEX_W; 6344} 6345 6346// Use 512bit version to implement 128/256 bit in case NoVLX. 6347multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 6348 SDNode OpNode, list<Predicate> p> { 6349 let Predicates = p in { 6350 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 6351 (_.info256.VT _.info256.RC:$src2))), 6352 (EXTRACT_SUBREG 6353 (!cast<Instruction>(OpcodeStr#"Zrr") 6354 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 6355 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 6356 sub_ymm)>; 6357 6358 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 6359 (_.info128.VT _.info128.RC:$src2))), 6360 (EXTRACT_SUBREG 6361 (!cast<Instruction>(OpcodeStr#"Zrr") 6362 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6363 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6364 sub_xmm)>; 6365 } 6366} 6367multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6368 SDNode OpNode, X86SchedWriteWidths sched> { 6369 let Predicates = [HasBWI] in 6370 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6371 EVEX_V512, VEX_W; 6372 let Predicates = [HasVLX, HasBWI] in { 6373 6374 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6375 EVEX_V256, VEX_W; 6376 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6377 EVEX_V128, VEX_W; 6378 } 6379} 6380 6381defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, 6382 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; 6383 6384defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, 6385 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; 6386 6387defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, 6388 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; 6389 6390defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6391defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6392 6393defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>; 6394defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>; 6395defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>; 6396defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>; 6397 6398 6399// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6400let Predicates = [HasAVX512, NoVLX] in { 6401 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6402 (EXTRACT_SUBREG (v8i64 6403 (VPROLVQZrr 6404 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6405 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6406 sub_xmm)>; 6407 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6408 (EXTRACT_SUBREG (v8i64 6409 (VPROLVQZrr 6410 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6411 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6412 sub_ymm)>; 6413 6414 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6415 (EXTRACT_SUBREG (v16i32 6416 (VPROLVDZrr 6417 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6418 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6419 sub_xmm)>; 6420 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6421 (EXTRACT_SUBREG (v16i32 6422 (VPROLVDZrr 6423 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6424 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6425 sub_ymm)>; 6426 6427 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), 6428 (EXTRACT_SUBREG (v8i64 6429 (VPROLQZri 6430 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6431 timm:$src2)), sub_xmm)>; 6432 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), 6433 (EXTRACT_SUBREG (v8i64 6434 (VPROLQZri 6435 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6436 timm:$src2)), sub_ymm)>; 6437 6438 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), 6439 (EXTRACT_SUBREG (v16i32 6440 (VPROLDZri 6441 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6442 timm:$src2)), sub_xmm)>; 6443 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), 6444 (EXTRACT_SUBREG (v16i32 6445 (VPROLDZri 6446 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6447 timm:$src2)), sub_ymm)>; 6448} 6449 6450// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6451let Predicates = [HasAVX512, NoVLX] in { 6452 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6453 (EXTRACT_SUBREG (v8i64 6454 (VPRORVQZrr 6455 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6456 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6457 sub_xmm)>; 6458 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6459 (EXTRACT_SUBREG (v8i64 6460 (VPRORVQZrr 6461 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6462 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6463 sub_ymm)>; 6464 6465 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6466 (EXTRACT_SUBREG (v16i32 6467 (VPRORVDZrr 6468 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6469 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6470 sub_xmm)>; 6471 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6472 (EXTRACT_SUBREG (v16i32 6473 (VPRORVDZrr 6474 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6475 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6476 sub_ymm)>; 6477 6478 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), 6479 (EXTRACT_SUBREG (v8i64 6480 (VPRORQZri 6481 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6482 timm:$src2)), sub_xmm)>; 6483 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), 6484 (EXTRACT_SUBREG (v8i64 6485 (VPRORQZri 6486 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6487 timm:$src2)), sub_ymm)>; 6488 6489 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), 6490 (EXTRACT_SUBREG (v16i32 6491 (VPRORDZri 6492 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6493 timm:$src2)), sub_xmm)>; 6494 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), 6495 (EXTRACT_SUBREG (v16i32 6496 (VPRORDZri 6497 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6498 timm:$src2)), sub_ymm)>; 6499} 6500 6501//===-------------------------------------------------------------------===// 6502// 1-src variable permutation VPERMW/D/Q 6503//===-------------------------------------------------------------------===// 6504 6505multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6506 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6507 let Predicates = [HasAVX512] in 6508 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6509 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6510 6511 let Predicates = [HasAVX512, HasVLX] in 6512 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6513 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6514} 6515 6516multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6517 string OpcodeStr, SDNode OpNode, 6518 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6519 let Predicates = [HasAVX512] in 6520 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6521 sched, VTInfo.info512>, 6522 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6523 sched, VTInfo.info512>, EVEX_V512; 6524 let Predicates = [HasAVX512, HasVLX] in 6525 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6526 sched, VTInfo.info256>, 6527 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6528 sched, VTInfo.info256>, EVEX_V256; 6529} 6530 6531multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6532 Predicate prd, SDNode OpNode, 6533 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6534 let Predicates = [prd] in 6535 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6536 EVEX_V512 ; 6537 let Predicates = [HasVLX, prd] in { 6538 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6539 EVEX_V256 ; 6540 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6541 EVEX_V128 ; 6542 } 6543} 6544 6545defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6546 WriteVarShuffle256, avx512vl_i16_info>, VEX_W; 6547defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6548 WriteVarShuffle256, avx512vl_i8_info>; 6549 6550defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6551 WriteVarShuffle256, avx512vl_i32_info>; 6552defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6553 WriteVarShuffle256, avx512vl_i64_info>, VEX_W; 6554defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6555 WriteFVarShuffle256, avx512vl_f32_info>; 6556defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6557 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W; 6558 6559defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6560 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6561 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6562defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6563 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6564 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6565 6566//===----------------------------------------------------------------------===// 6567// AVX-512 - VPERMIL 6568//===----------------------------------------------------------------------===// 6569 6570multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6571 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6572 X86VectorVTInfo Ctrl> { 6573 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6574 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6575 "$src2, $src1", "$src1, $src2", 6576 (_.VT (OpNode _.RC:$src1, 6577 (Ctrl.VT Ctrl.RC:$src2)))>, 6578 T8PD, EVEX_4V, Sched<[sched]>; 6579 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6580 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6581 "$src2, $src1", "$src1, $src2", 6582 (_.VT (OpNode 6583 _.RC:$src1, 6584 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, 6585 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6586 Sched<[sched.Folded, sched.ReadAfterFold]>; 6587 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6588 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6589 "${src2}"#_.BroadcastStr#", $src1", 6590 "$src1, ${src2}"#_.BroadcastStr, 6591 (_.VT (OpNode 6592 _.RC:$src1, 6593 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, 6594 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6595 Sched<[sched.Folded, sched.ReadAfterFold]>; 6596} 6597 6598multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6599 X86SchedWriteWidths sched, 6600 AVX512VLVectorVTInfo _, 6601 AVX512VLVectorVTInfo Ctrl> { 6602 let Predicates = [HasAVX512] in { 6603 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6604 _.info512, Ctrl.info512>, EVEX_V512; 6605 } 6606 let Predicates = [HasAVX512, HasVLX] in { 6607 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6608 _.info128, Ctrl.info128>, EVEX_V128; 6609 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6610 _.info256, Ctrl.info256>, EVEX_V256; 6611 } 6612} 6613 6614multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6615 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6616 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6617 _, Ctrl>; 6618 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6619 X86VPermilpi, SchedWriteFShuffle, _>, 6620 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6621} 6622 6623let ExeDomain = SSEPackedSingle in 6624defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6625 avx512vl_i32_info>; 6626let ExeDomain = SSEPackedDouble in 6627defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6628 avx512vl_i64_info>, VEX_W1X; 6629 6630//===----------------------------------------------------------------------===// 6631// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6632//===----------------------------------------------------------------------===// 6633 6634defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6635 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6636 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6637defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6638 X86PShufhw, SchedWriteShuffle>, 6639 EVEX, AVX512XSIi8Base; 6640defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6641 X86PShuflw, SchedWriteShuffle>, 6642 EVEX, AVX512XDIi8Base; 6643 6644//===----------------------------------------------------------------------===// 6645// AVX-512 - VPSHUFB 6646//===----------------------------------------------------------------------===// 6647 6648multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6649 X86SchedWriteWidths sched> { 6650 let Predicates = [HasBWI] in 6651 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6652 EVEX_V512; 6653 6654 let Predicates = [HasVLX, HasBWI] in { 6655 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6656 EVEX_V256; 6657 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6658 EVEX_V128; 6659 } 6660} 6661 6662defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6663 SchedWriteVarShuffle>, VEX_WIG; 6664 6665//===----------------------------------------------------------------------===// 6666// Move Low to High and High to Low packed FP Instructions 6667//===----------------------------------------------------------------------===// 6668 6669def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6670 (ins VR128X:$src1, VR128X:$src2), 6671 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6672 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6673 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; 6674let isCommutable = 1 in 6675def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6676 (ins VR128X:$src1, VR128X:$src2), 6677 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6678 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6679 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable; 6680 6681//===----------------------------------------------------------------------===// 6682// VMOVHPS/PD VMOVLPS Instructions 6683// All patterns was taken from SSS implementation. 6684//===----------------------------------------------------------------------===// 6685 6686multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6687 SDPatternOperator OpNode, 6688 X86VectorVTInfo _> { 6689 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6690 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6691 (ins _.RC:$src1, f64mem:$src2), 6692 !strconcat(OpcodeStr, 6693 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6694 [(set _.RC:$dst, 6695 (OpNode _.RC:$src1, 6696 (_.VT (bitconvert 6697 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6698 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V; 6699} 6700 6701// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6702// SSE1. And MOVLPS pattern is even more complex. 6703defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6704 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6705defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6706 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6707defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6708 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6709defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6710 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6711 6712let Predicates = [HasAVX512] in { 6713 // VMOVHPD patterns 6714 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), 6715 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6716 6717 // VMOVLPD patterns 6718 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), 6719 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; 6720} 6721 6722let SchedRW = [WriteFStore] in { 6723let mayStore = 1, hasSideEffects = 0 in 6724def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6725 (ins f64mem:$dst, VR128X:$src), 6726 "vmovhps\t{$src, $dst|$dst, $src}", 6727 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6728def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6729 (ins f64mem:$dst, VR128X:$src), 6730 "vmovhpd\t{$src, $dst|$dst, $src}", 6731 [(store (f64 (extractelt 6732 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6733 (iPTR 0))), addr:$dst)]>, 6734 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6735let mayStore = 1, hasSideEffects = 0 in 6736def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6737 (ins f64mem:$dst, VR128X:$src), 6738 "vmovlps\t{$src, $dst|$dst, $src}", 6739 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6740def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6741 (ins f64mem:$dst, VR128X:$src), 6742 "vmovlpd\t{$src, $dst|$dst, $src}", 6743 [(store (f64 (extractelt (v2f64 VR128X:$src), 6744 (iPTR 0))), addr:$dst)]>, 6745 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6746} // SchedRW 6747 6748let Predicates = [HasAVX512] in { 6749 // VMOVHPD patterns 6750 def : Pat<(store (f64 (extractelt 6751 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6752 (iPTR 0))), addr:$dst), 6753 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6754} 6755//===----------------------------------------------------------------------===// 6756// FMA - Fused Multiply Operations 6757// 6758 6759multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6760 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6761 X86VectorVTInfo _> { 6762 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6763 Uses = [MXCSR], mayRaiseFPException = 1 in { 6764 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6765 (ins _.RC:$src2, _.RC:$src3), 6766 OpcodeStr, "$src3, $src2", "$src2, $src3", 6767 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 6768 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6769 EVEX_4V, Sched<[sched]>; 6770 6771 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6772 (ins _.RC:$src2, _.MemOp:$src3), 6773 OpcodeStr, "$src3, $src2", "$src2, $src3", 6774 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 6775 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6776 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6777 6778 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6779 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6780 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6781 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6782 (OpNode _.RC:$src2, 6783 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 6784 (MaskOpNode _.RC:$src2, 6785 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, 6786 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6787 } 6788} 6789 6790multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6791 X86FoldableSchedWrite sched, 6792 X86VectorVTInfo _> { 6793 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6794 Uses = [MXCSR] in 6795 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6796 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6797 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6798 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 6799 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, 6800 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 6801} 6802 6803multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6804 SDNode MaskOpNode, SDNode OpNodeRnd, 6805 X86SchedWriteWidths sched, 6806 AVX512VLVectorVTInfo _, 6807 Predicate prd = HasAVX512> { 6808 let Predicates = [prd] in { 6809 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6810 sched.ZMM, _.info512>, 6811 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6812 _.info512>, 6813 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6814 } 6815 let Predicates = [HasVLX, prd] in { 6816 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6817 sched.YMM, _.info256>, 6818 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6819 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6820 sched.XMM, _.info128>, 6821 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6822 } 6823} 6824 6825multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6826 SDNode MaskOpNode, SDNode OpNodeRnd> { 6827 defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6828 OpNodeRnd, SchedWriteFMA, 6829 avx512vl_f16_info, HasFP16>, T_MAP6PD; 6830 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6831 OpNodeRnd, SchedWriteFMA, 6832 avx512vl_f32_info>, T8PD; 6833 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6834 OpNodeRnd, SchedWriteFMA, 6835 avx512vl_f64_info>, T8PD, VEX_W; 6836} 6837 6838defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma, 6839 fma, X86FmaddRnd>; 6840defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub, 6841 X86Fmsub, X86FmsubRnd>; 6842defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, 6843 X86Fmaddsub, X86FmaddsubRnd>; 6844defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, 6845 X86Fmsubadd, X86FmsubaddRnd>; 6846defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd, 6847 X86Fnmadd, X86FnmaddRnd>; 6848defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub, 6849 X86Fnmsub, X86FnmsubRnd>; 6850 6851 6852multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6853 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6854 X86VectorVTInfo _> { 6855 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6856 Uses = [MXCSR], mayRaiseFPException = 1 in { 6857 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6858 (ins _.RC:$src2, _.RC:$src3), 6859 OpcodeStr, "$src3, $src2", "$src2, $src3", 6860 (null_frag), 6861 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6862 EVEX_4V, Sched<[sched]>; 6863 6864 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6865 (ins _.RC:$src2, _.MemOp:$src3), 6866 OpcodeStr, "$src3, $src2", "$src2, $src3", 6867 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 6868 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6869 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6870 6871 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6872 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6873 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6874 "$src2, ${src3}"#_.BroadcastStr, 6875 (_.VT (OpNode _.RC:$src2, 6876 (_.VT (_.BroadcastLdFrag addr:$src3)), 6877 _.RC:$src1)), 6878 (_.VT (MaskOpNode _.RC:$src2, 6879 (_.VT (_.BroadcastLdFrag addr:$src3)), 6880 _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B, 6881 Sched<[sched.Folded, sched.ReadAfterFold]>; 6882 } 6883} 6884 6885multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6886 X86FoldableSchedWrite sched, 6887 X86VectorVTInfo _> { 6888 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6889 Uses = [MXCSR] in 6890 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6891 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6892 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6893 (null_frag), 6894 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), 6895 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 6896} 6897 6898multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6899 SDNode MaskOpNode, SDNode OpNodeRnd, 6900 X86SchedWriteWidths sched, 6901 AVX512VLVectorVTInfo _, 6902 Predicate prd = HasAVX512> { 6903 let Predicates = [prd] in { 6904 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6905 sched.ZMM, _.info512>, 6906 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6907 _.info512>, 6908 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6909 } 6910 let Predicates = [HasVLX, prd] in { 6911 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6912 sched.YMM, _.info256>, 6913 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6914 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6915 sched.XMM, _.info128>, 6916 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6917 } 6918} 6919 6920multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6921 SDNode MaskOpNode, SDNode OpNodeRnd > { 6922 defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6923 OpNodeRnd, SchedWriteFMA, 6924 avx512vl_f16_info, HasFP16>, T_MAP6PD; 6925 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6926 OpNodeRnd, SchedWriteFMA, 6927 avx512vl_f32_info>, T8PD; 6928 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6929 OpNodeRnd, SchedWriteFMA, 6930 avx512vl_f64_info>, T8PD, VEX_W; 6931} 6932 6933defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma, 6934 fma, X86FmaddRnd>; 6935defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub, 6936 X86Fmsub, X86FmsubRnd>; 6937defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, 6938 X86Fmaddsub, X86FmaddsubRnd>; 6939defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, 6940 X86Fmsubadd, X86FmsubaddRnd>; 6941defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd, 6942 X86Fnmadd, X86FnmaddRnd>; 6943defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub, 6944 X86Fnmsub, X86FnmsubRnd>; 6945 6946multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6947 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6948 X86VectorVTInfo _> { 6949 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6950 Uses = [MXCSR], mayRaiseFPException = 1 in { 6951 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6952 (ins _.RC:$src2, _.RC:$src3), 6953 OpcodeStr, "$src3, $src2", "$src2, $src3", 6954 (null_frag), 6955 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>, 6956 EVEX_4V, Sched<[sched]>; 6957 6958 // Pattern is 312 order so that the load is in a different place from the 6959 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6960 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6961 (ins _.RC:$src2, _.MemOp:$src3), 6962 OpcodeStr, "$src3, $src2", "$src2, $src3", 6963 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 6964 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 6965 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6966 6967 // Pattern is 312 order so that the load is in a different place from the 6968 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6969 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6970 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6971 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6972 "$src2, ${src3}"#_.BroadcastStr, 6973 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6974 _.RC:$src1, _.RC:$src2)), 6975 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6976 _.RC:$src1, _.RC:$src2)), 1, 0>, 6977 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6978 } 6979} 6980 6981multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6982 X86FoldableSchedWrite sched, 6983 X86VectorVTInfo _> { 6984 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6985 Uses = [MXCSR] in 6986 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6987 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6988 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6989 (null_frag), 6990 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), 6991 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 6992} 6993 6994multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6995 SDNode MaskOpNode, SDNode OpNodeRnd, 6996 X86SchedWriteWidths sched, 6997 AVX512VLVectorVTInfo _, 6998 Predicate prd = HasAVX512> { 6999 let Predicates = [prd] in { 7000 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 7001 sched.ZMM, _.info512>, 7002 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 7003 _.info512>, 7004 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7005 } 7006 let Predicates = [HasVLX, prd] in { 7007 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 7008 sched.YMM, _.info256>, 7009 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7010 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 7011 sched.XMM, _.info128>, 7012 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7013 } 7014} 7015 7016multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 7017 SDNode MaskOpNode, SDNode OpNodeRnd > { 7018 defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 7019 OpNodeRnd, SchedWriteFMA, 7020 avx512vl_f16_info, HasFP16>, T_MAP6PD; 7021 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 7022 OpNodeRnd, SchedWriteFMA, 7023 avx512vl_f32_info>, T8PD; 7024 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 7025 OpNodeRnd, SchedWriteFMA, 7026 avx512vl_f64_info>, T8PD, VEX_W; 7027} 7028 7029defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma, 7030 fma, X86FmaddRnd>; 7031defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub, 7032 X86Fmsub, X86FmsubRnd>; 7033defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, 7034 X86Fmaddsub, X86FmaddsubRnd>; 7035defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, 7036 X86Fmsubadd, X86FmsubaddRnd>; 7037defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd, 7038 X86Fnmadd, X86FnmaddRnd>; 7039defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub, 7040 X86Fnmsub, X86FnmsubRnd>; 7041 7042// Scalar FMA 7043multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7044 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 7045let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 7046 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7047 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 7048 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 7049 EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 7050 7051 let mayLoad = 1 in 7052 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7053 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 7054 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 7055 EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 7056 7057 let Uses = [MXCSR] in 7058 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7059 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 7060 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 7061 EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 7062 7063 let isCodeGenOnly = 1, isCommutable = 1 in { 7064 def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 7065 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 7066 !strconcat(OpcodeStr, 7067 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7068 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC; 7069 def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst), 7070 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 7071 !strconcat(OpcodeStr, 7072 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7073 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC; 7074 7075 let Uses = [MXCSR] in 7076 def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 7077 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 7078 !strconcat(OpcodeStr, 7079 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), 7080 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 7081 Sched<[SchedWriteFMA.Scl]>, EVEX_4V; 7082 }// isCodeGenOnly = 1 7083}// Constraints = "$src1 = $dst" 7084} 7085 7086multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 7087 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd, 7088 X86VectorVTInfo _, string SUFF> { 7089 let ExeDomain = _.ExeDomain in { 7090 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 7091 // Operands for intrinsic are in 123 order to preserve passthu 7092 // semantics. 7093 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 7094 _.FRC:$src3))), 7095 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 7096 (_.ScalarLdFrag addr:$src3)))), 7097 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 7098 _.FRC:$src3, (i32 timm:$rc)))), 0>; 7099 7100 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 7101 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 7102 _.FRC:$src1))), 7103 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 7104 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 7105 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 7106 _.FRC:$src1, (i32 timm:$rc)))), 1>; 7107 7108 // One pattern is 312 order so that the load is in a different place from the 7109 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 7110 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 7111 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 7112 _.FRC:$src2))), 7113 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 7114 _.FRC:$src1, _.FRC:$src2))), 7115 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 7116 _.FRC:$src2, (i32 timm:$rc)))), 1>; 7117 } 7118} 7119 7120multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 7121 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> { 7122 let Predicates = [HasAVX512] in { 7123 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 7124 OpNodeRnd, f32x_info, "SS">, 7125 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD; 7126 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 7127 OpNodeRnd, f64x_info, "SD">, 7128 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD; 7129 } 7130 let Predicates = [HasFP16] in { 7131 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 7132 OpNodeRnd, f16x_info, "SH">, 7133 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD; 7134 } 7135} 7136 7137defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>; 7138defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>; 7139defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>; 7140defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>; 7141 7142multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp, 7143 SDNode RndOp, string Prefix, 7144 string Suffix, SDNode Move, 7145 X86VectorVTInfo _, PatLeaf ZeroFP, 7146 Predicate prd = HasAVX512> { 7147 let Predicates = [prd] in { 7148 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7149 (Op _.FRC:$src2, 7150 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7151 _.FRC:$src3))))), 7152 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 7153 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7154 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7155 7156 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7157 (Op _.FRC:$src2, _.FRC:$src3, 7158 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7159 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 7160 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7161 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7162 7163 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7164 (Op _.FRC:$src2, 7165 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7166 (_.ScalarLdFrag addr:$src3)))))), 7167 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 7168 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7169 addr:$src3)>; 7170 7171 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7172 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7173 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 7174 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 7175 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7176 addr:$src3)>; 7177 7178 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7179 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7180 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7181 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 7182 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7183 addr:$src3)>; 7184 7185 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7186 (X86selects_mask VK1WM:$mask, 7187 (MaskedOp _.FRC:$src2, 7188 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7189 _.FRC:$src3), 7190 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7191 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 7192 VR128X:$src1, VK1WM:$mask, 7193 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7194 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7195 7196 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7197 (X86selects_mask VK1WM:$mask, 7198 (MaskedOp _.FRC:$src2, 7199 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7200 (_.ScalarLdFrag addr:$src3)), 7201 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7202 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 7203 VR128X:$src1, VK1WM:$mask, 7204 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7205 7206 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7207 (X86selects_mask VK1WM:$mask, 7208 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7209 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 7210 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7211 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 7212 VR128X:$src1, VK1WM:$mask, 7213 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7214 7215 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7216 (X86selects_mask VK1WM:$mask, 7217 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7218 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7219 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7220 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 7221 VR128X:$src1, VK1WM:$mask, 7222 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7223 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7224 7225 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7226 (X86selects_mask VK1WM:$mask, 7227 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7228 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7229 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7230 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 7231 VR128X:$src1, VK1WM:$mask, 7232 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7233 7234 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7235 (X86selects_mask VK1WM:$mask, 7236 (MaskedOp _.FRC:$src2, 7237 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7238 _.FRC:$src3), 7239 (_.EltVT ZeroFP)))))), 7240 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 7241 VR128X:$src1, VK1WM:$mask, 7242 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7243 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7244 7245 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7246 (X86selects_mask VK1WM:$mask, 7247 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7248 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7249 (_.EltVT ZeroFP)))))), 7250 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 7251 VR128X:$src1, VK1WM:$mask, 7252 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7253 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7254 7255 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7256 (X86selects_mask VK1WM:$mask, 7257 (MaskedOp _.FRC:$src2, 7258 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7259 (_.ScalarLdFrag addr:$src3)), 7260 (_.EltVT ZeroFP)))))), 7261 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 7262 VR128X:$src1, VK1WM:$mask, 7263 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7264 7265 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7266 (X86selects_mask VK1WM:$mask, 7267 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7268 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 7269 (_.EltVT ZeroFP)))))), 7270 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 7271 VR128X:$src1, VK1WM:$mask, 7272 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7273 7274 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7275 (X86selects_mask VK1WM:$mask, 7276 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7277 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7278 (_.EltVT ZeroFP)))))), 7279 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 7280 VR128X:$src1, VK1WM:$mask, 7281 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7282 7283 // Patterns with rounding mode. 7284 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7285 (RndOp _.FRC:$src2, 7286 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7287 _.FRC:$src3, (i32 timm:$rc)))))), 7288 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 7289 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7290 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7291 7292 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7293 (RndOp _.FRC:$src2, _.FRC:$src3, 7294 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7295 (i32 timm:$rc)))))), 7296 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 7297 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7298 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7299 7300 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7301 (X86selects_mask VK1WM:$mask, 7302 (RndOp _.FRC:$src2, 7303 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7304 _.FRC:$src3, (i32 timm:$rc)), 7305 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7306 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 7307 VR128X:$src1, VK1WM:$mask, 7308 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7309 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7310 7311 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7312 (X86selects_mask VK1WM:$mask, 7313 (RndOp _.FRC:$src2, _.FRC:$src3, 7314 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7315 (i32 timm:$rc)), 7316 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7317 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 7318 VR128X:$src1, VK1WM:$mask, 7319 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7320 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7321 7322 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7323 (X86selects_mask VK1WM:$mask, 7324 (RndOp _.FRC:$src2, 7325 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7326 _.FRC:$src3, (i32 timm:$rc)), 7327 (_.EltVT ZeroFP)))))), 7328 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 7329 VR128X:$src1, VK1WM:$mask, 7330 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7331 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7332 7333 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7334 (X86selects_mask VK1WM:$mask, 7335 (RndOp _.FRC:$src2, _.FRC:$src3, 7336 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7337 (i32 timm:$rc)), 7338 (_.EltVT ZeroFP)))))), 7339 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 7340 VR128X:$src1, VK1WM:$mask, 7341 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7342 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7343 } 7344} 7345defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH", 7346 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7347defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH", 7348 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7349defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH", 7350 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7351defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH", 7352 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7353 7354defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7355 "SS", X86Movss, v4f32x_info, fp32imm0>; 7356defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7357 "SS", X86Movss, v4f32x_info, fp32imm0>; 7358defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7359 "SS", X86Movss, v4f32x_info, fp32imm0>; 7360defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7361 "SS", X86Movss, v4f32x_info, fp32imm0>; 7362 7363defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7364 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7365defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7366 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7367defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7368 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7369defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7370 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7371 7372//===----------------------------------------------------------------------===// 7373// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 7374//===----------------------------------------------------------------------===// 7375let Constraints = "$src1 = $dst" in { 7376multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7377 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 7378 // NOTE: The SDNode have the multiply operands first with the add last. 7379 // This enables commuted load patterns to be autogenerated by tablegen. 7380 let ExeDomain = _.ExeDomain in { 7381 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 7382 (ins _.RC:$src2, _.RC:$src3), 7383 OpcodeStr, "$src3, $src2", "$src2, $src3", 7384 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 7385 T8PD, EVEX_4V, Sched<[sched]>; 7386 7387 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7388 (ins _.RC:$src2, _.MemOp:$src3), 7389 OpcodeStr, "$src3, $src2", "$src2, $src3", 7390 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 7391 T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7392 7393 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7394 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7395 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 7396 !strconcat("$src2, ${src3}", _.BroadcastStr ), 7397 (OpNode _.RC:$src2, 7398 (_.VT (_.BroadcastLdFrag addr:$src3)), 7399 _.RC:$src1)>, 7400 T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 7401 } 7402} 7403} // Constraints = "$src1 = $dst" 7404 7405multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 7406 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 7407 let Predicates = [HasIFMA] in { 7408 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 7409 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7410 } 7411 let Predicates = [HasVLX, HasIFMA] in { 7412 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 7413 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7414 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 7415 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7416 } 7417} 7418 7419defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 7420 SchedWriteVecIMul, avx512vl_i64_info>, 7421 VEX_W; 7422defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 7423 SchedWriteVecIMul, avx512vl_i64_info>, 7424 VEX_W; 7425 7426//===----------------------------------------------------------------------===// 7427// AVX-512 Scalar convert from sign integer to float/double 7428//===----------------------------------------------------------------------===// 7429 7430multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, 7431 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7432 X86MemOperand x86memop, PatFrag ld_frag, string asm, 7433 string mem, list<Register> _Uses = [MXCSR], 7434 bit _mayRaiseFPException = 1> { 7435let ExeDomain = DstVT.ExeDomain, Uses = _Uses, 7436 mayRaiseFPException = _mayRaiseFPException in { 7437 let hasSideEffects = 0, isCodeGenOnly = 1 in { 7438 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7439 (ins DstVT.FRC:$src1, SrcRC:$src), 7440 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7441 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7442 let mayLoad = 1 in 7443 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7444 (ins DstVT.FRC:$src1, x86memop:$src), 7445 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 7446 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7447 } // hasSideEffects = 0 7448 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7449 (ins DstVT.RC:$src1, SrcRC:$src2), 7450 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7451 [(set DstVT.RC:$dst, 7452 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, 7453 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7454 7455 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7456 (ins DstVT.RC:$src1, x86memop:$src2), 7457 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7458 [(set DstVT.RC:$dst, 7459 (OpNode (DstVT.VT DstVT.RC:$src1), 7460 (ld_frag addr:$src2)))]>, 7461 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7462} 7463 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7464 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, 7465 DstVT.RC:$src1, SrcRC:$src2), 0, "att">; 7466} 7467 7468multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7469 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7470 X86VectorVTInfo DstVT, string asm, 7471 string mem> { 7472 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in 7473 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7474 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7475 !strconcat(asm, 7476 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7477 [(set DstVT.RC:$dst, 7478 (OpNode (DstVT.VT DstVT.RC:$src1), 7479 SrcRC:$src2, 7480 (i32 timm:$rc)))]>, 7481 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7482 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", 7483 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, 7484 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; 7485} 7486 7487multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd, 7488 X86FoldableSchedWrite sched, 7489 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7490 X86MemOperand x86memop, PatFrag ld_frag, 7491 string asm, string mem> { 7492 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>, 7493 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7494 ld_frag, asm, mem>, VEX_LIG; 7495} 7496 7497let Predicates = [HasAVX512] in { 7498defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7499 WriteCvtI2SS, GR32, 7500 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, 7501 XS, EVEX_CD8<32, CD8VT1>; 7502defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7503 WriteCvtI2SS, GR64, 7504 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, 7505 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7506defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, 7507 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>, 7508 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7509defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7510 WriteCvtI2SD, GR64, 7511 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, 7512 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7513 7514def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7515 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7516def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7517 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7518 7519def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 7520 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7521def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 7522 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7523def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 7524 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7525def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 7526 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7527 7528def : Pat<(f32 (any_sint_to_fp GR32:$src)), 7529 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7530def : Pat<(f32 (any_sint_to_fp GR64:$src)), 7531 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7532def : Pat<(f64 (any_sint_to_fp GR32:$src)), 7533 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7534def : Pat<(f64 (any_sint_to_fp GR64:$src)), 7535 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7536 7537defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7538 WriteCvtI2SS, GR32, 7539 v4f32x_info, i32mem, loadi32, 7540 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>; 7541defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7542 WriteCvtI2SS, GR64, 7543 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, 7544 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7545defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, 7546 i32mem, loadi32, "cvtusi2sd", "l", [], 0>, 7547 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7548defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7549 WriteCvtI2SD, GR64, 7550 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, 7551 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7552 7553def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7554 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7555def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7556 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7557 7558def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))), 7559 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7560def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))), 7561 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7562def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))), 7563 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7564def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))), 7565 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7566 7567def : Pat<(f32 (any_uint_to_fp GR32:$src)), 7568 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7569def : Pat<(f32 (any_uint_to_fp GR64:$src)), 7570 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7571def : Pat<(f64 (any_uint_to_fp GR32:$src)), 7572 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7573def : Pat<(f64 (any_uint_to_fp GR64:$src)), 7574 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7575} 7576 7577//===----------------------------------------------------------------------===// 7578// AVX-512 Scalar convert from float/double to integer 7579//===----------------------------------------------------------------------===// 7580 7581multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7582 X86VectorVTInfo DstVT, SDNode OpNode, 7583 SDNode OpNodeRnd, 7584 X86FoldableSchedWrite sched, string asm, 7585 string aliasStr, Predicate prd = HasAVX512> { 7586 let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in { 7587 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7588 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7589 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, 7590 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7591 let Uses = [MXCSR] in 7592 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7593 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7594 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, 7595 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7596 Sched<[sched]>; 7597 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7598 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7599 [(set DstVT.RC:$dst, (OpNode 7600 (SrcVT.ScalarIntMemFrags addr:$src)))]>, 7601 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7602 } // Predicates = [prd] 7603 7604 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7605 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7606 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7607 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7608 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7609 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7610 SrcVT.IntScalarMemOp:$src), 0, "att">; 7611} 7612 7613// Convert float/double to signed/unsigned int 32/64 7614defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, 7615 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, 7616 XS, EVEX_CD8<32, CD8VT1>; 7617defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, 7618 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, 7619 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7620defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, 7621 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, 7622 XS, EVEX_CD8<32, CD8VT1>; 7623defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, 7624 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, 7625 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7626defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, 7627 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, 7628 XD, EVEX_CD8<64, CD8VT1>; 7629defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, 7630 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, 7631 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7632defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, 7633 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7634 XD, EVEX_CD8<64, CD8VT1>; 7635defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, 7636 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7637 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7638 7639multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT, 7640 X86VectorVTInfo DstVT, SDNode OpNode, 7641 X86FoldableSchedWrite sched> { 7642 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7643 let isCodeGenOnly = 1 in { 7644 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src), 7645 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7646 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>, 7647 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7648 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src), 7649 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7650 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>, 7651 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7652 } 7653 } // Predicates = [HasAVX512] 7654} 7655 7656defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info, 7657 lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>; 7658defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info, 7659 llrint, WriteCvtSS2I>, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7660defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info, 7661 lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>; 7662defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info, 7663 llrint, WriteCvtSD2I>, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7664 7665let Predicates = [HasAVX512] in { 7666 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>; 7667 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>; 7668 7669 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>; 7670 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>; 7671} 7672 7673// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7674// which produce unnecessary vmovs{s,d} instructions 7675let Predicates = [HasAVX512] in { 7676def : Pat<(v4f32 (X86Movss 7677 (v4f32 VR128X:$dst), 7678 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 7679 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7680 7681def : Pat<(v4f32 (X86Movss 7682 (v4f32 VR128X:$dst), 7683 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 7684 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7685 7686def : Pat<(v4f32 (X86Movss 7687 (v4f32 VR128X:$dst), 7688 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 7689 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7690 7691def : Pat<(v4f32 (X86Movss 7692 (v4f32 VR128X:$dst), 7693 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 7694 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7695 7696def : Pat<(v2f64 (X86Movsd 7697 (v2f64 VR128X:$dst), 7698 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 7699 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7700 7701def : Pat<(v2f64 (X86Movsd 7702 (v2f64 VR128X:$dst), 7703 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 7704 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7705 7706def : Pat<(v2f64 (X86Movsd 7707 (v2f64 VR128X:$dst), 7708 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 7709 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7710 7711def : Pat<(v2f64 (X86Movsd 7712 (v2f64 VR128X:$dst), 7713 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 7714 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7715 7716def : Pat<(v4f32 (X86Movss 7717 (v4f32 VR128X:$dst), 7718 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))), 7719 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7720 7721def : Pat<(v4f32 (X86Movss 7722 (v4f32 VR128X:$dst), 7723 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))), 7724 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7725 7726def : Pat<(v4f32 (X86Movss 7727 (v4f32 VR128X:$dst), 7728 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))), 7729 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7730 7731def : Pat<(v4f32 (X86Movss 7732 (v4f32 VR128X:$dst), 7733 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))), 7734 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7735 7736def : Pat<(v2f64 (X86Movsd 7737 (v2f64 VR128X:$dst), 7738 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))), 7739 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7740 7741def : Pat<(v2f64 (X86Movsd 7742 (v2f64 VR128X:$dst), 7743 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))), 7744 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7745 7746def : Pat<(v2f64 (X86Movsd 7747 (v2f64 VR128X:$dst), 7748 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))), 7749 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7750 7751def : Pat<(v2f64 (X86Movsd 7752 (v2f64 VR128X:$dst), 7753 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))), 7754 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7755} // Predicates = [HasAVX512] 7756 7757// Convert float/double to signed/unsigned int 32/64 with truncation 7758multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7759 X86VectorVTInfo _DstRC, SDPatternOperator OpNode, 7760 SDNode OpNodeInt, SDNode OpNodeSAE, 7761 X86FoldableSchedWrite sched, string aliasStr, 7762 Predicate prd = HasAVX512> { 7763let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in { 7764 let isCodeGenOnly = 1 in { 7765 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7766 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7767 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7768 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7769 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7770 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7771 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7772 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7773 } 7774 7775 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7776 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7777 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 7778 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7779 let Uses = [MXCSR] in 7780 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7781 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7782 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 7783 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 7784 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7785 (ins _SrcRC.IntScalarMemOp:$src), 7786 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7787 [(set _DstRC.RC:$dst, 7788 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, 7789 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7790} // Predicates = [prd] 7791 7792 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7793 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7794 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7795 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7796 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7797 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7798 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7799} 7800 7801defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7802 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7803 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7804defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7805 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7806 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7807defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7808 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7809 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7810defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7811 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7812 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7813 7814defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, 7815 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7816 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7817defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, 7818 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7819 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>; 7820defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7821 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7822 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7823defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7824 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7825 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7826 7827//===----------------------------------------------------------------------===// 7828// AVX-512 Convert form float to double and back 7829//===----------------------------------------------------------------------===// 7830 7831let Uses = [MXCSR], mayRaiseFPException = 1 in 7832multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7833 X86VectorVTInfo _Src, SDNode OpNode, 7834 X86FoldableSchedWrite sched> { 7835 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7836 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7837 "$src2, $src1", "$src1, $src2", 7838 (_.VT (OpNode (_.VT _.RC:$src1), 7839 (_Src.VT _Src.RC:$src2)))>, 7840 EVEX_4V, VEX_LIG, Sched<[sched]>; 7841 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7842 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7843 "$src2, $src1", "$src1, $src2", 7844 (_.VT (OpNode (_.VT _.RC:$src1), 7845 (_Src.ScalarIntMemFrags addr:$src2)))>, 7846 EVEX_4V, VEX_LIG, 7847 Sched<[sched.Folded, sched.ReadAfterFold]>; 7848 7849 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7850 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7851 (ins _.FRC:$src1, _Src.FRC:$src2), 7852 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7853 EVEX_4V, VEX_LIG, Sched<[sched]>; 7854 let mayLoad = 1 in 7855 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7856 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7857 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7858 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7859 } 7860} 7861 7862// Scalar Conversion with SAE - suppress all exceptions 7863multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7864 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7865 X86FoldableSchedWrite sched> { 7866 let Uses = [MXCSR] in 7867 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7868 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7869 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7870 (_.VT (OpNodeSAE (_.VT _.RC:$src1), 7871 (_Src.VT _Src.RC:$src2)))>, 7872 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 7873} 7874 7875// Scalar Conversion with rounding control (RC) 7876multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7877 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7878 X86FoldableSchedWrite sched> { 7879 let Uses = [MXCSR] in 7880 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7881 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7882 "$rc, $src2, $src1", "$src1, $src2, $rc", 7883 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7884 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 7885 EVEX_4V, VEX_LIG, Sched<[sched]>, 7886 EVEX_B, EVEX_RC; 7887} 7888multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr, 7889 SDNode OpNode, SDNode OpNodeRnd, 7890 X86FoldableSchedWrite sched, 7891 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7892 Predicate prd = HasAVX512> { 7893 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7894 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7895 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7896 OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>; 7897 } 7898} 7899 7900multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr, 7901 SDNode OpNode, SDNode OpNodeSAE, 7902 X86FoldableSchedWrite sched, 7903 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7904 Predicate prd = HasAVX512> { 7905 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7906 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7907 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, 7908 EVEX_CD8<_src.EltSize, CD8VT1>; 7909 } 7910} 7911defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds, 7912 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7913 f32x_info>, XD, VEX_W; 7914defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts, 7915 X86fpextsSAE, WriteCvtSS2SD, f32x_info, 7916 f64x_info>, XS; 7917defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds, 7918 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7919 f16x_info, HasFP16>, T_MAP5XD, VEX_W; 7920defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts, 7921 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7922 f64x_info, HasFP16>, T_MAP5XS; 7923defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds, 7924 X86froundsRnd, WriteCvtSD2SS, f32x_info, 7925 f16x_info, HasFP16>, T_MAP5PS; 7926defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts, 7927 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7928 f32x_info, HasFP16>, T_MAP6PS; 7929 7930def : Pat<(f64 (any_fpextend FR32X:$src)), 7931 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7932 Requires<[HasAVX512]>; 7933def : Pat<(f64 (any_fpextend (loadf32 addr:$src))), 7934 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7935 Requires<[HasAVX512, OptForSize]>; 7936 7937def : Pat<(f32 (any_fpround FR64X:$src)), 7938 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7939 Requires<[HasAVX512]>; 7940 7941def : Pat<(f32 (any_fpextend FR16X:$src)), 7942 (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>, 7943 Requires<[HasFP16]>; 7944def : Pat<(f32 (any_fpextend (loadf16 addr:$src))), 7945 (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>, 7946 Requires<[HasFP16, OptForSize]>; 7947 7948def : Pat<(f64 (any_fpextend FR16X:$src)), 7949 (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>, 7950 Requires<[HasFP16]>; 7951def : Pat<(f64 (any_fpextend (loadf16 addr:$src))), 7952 (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7953 Requires<[HasFP16, OptForSize]>; 7954 7955def : Pat<(f16 (any_fpround FR32X:$src)), 7956 (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>, 7957 Requires<[HasFP16]>; 7958def : Pat<(f16 (any_fpround FR64X:$src)), 7959 (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>, 7960 Requires<[HasFP16]>; 7961 7962def : Pat<(v4f32 (X86Movss 7963 (v4f32 VR128X:$dst), 7964 (v4f32 (scalar_to_vector 7965 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 7966 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 7967 Requires<[HasAVX512]>; 7968 7969def : Pat<(v2f64 (X86Movsd 7970 (v2f64 VR128X:$dst), 7971 (v2f64 (scalar_to_vector 7972 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 7973 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 7974 Requires<[HasAVX512]>; 7975 7976//===----------------------------------------------------------------------===// 7977// AVX-512 Vector convert from signed/unsigned integer to float/double 7978// and from float/double to signed/unsigned integer 7979//===----------------------------------------------------------------------===// 7980 7981multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7982 X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode, 7983 X86FoldableSchedWrite sched, 7984 string Broadcast = _.BroadcastStr, 7985 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7986 RegisterClass MaskRC = _.KRCWM, 7987 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))), 7988 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> { 7989let Uses = [MXCSR], mayRaiseFPException = 1 in { 7990 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst), 7991 (ins _Src.RC:$src), 7992 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), 7993 (ins MaskRC:$mask, _Src.RC:$src), 7994 OpcodeStr, "$src", "$src", 7995 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7996 (vselect_mask MaskRC:$mask, 7997 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7998 _.RC:$src0), 7999 (vselect_mask MaskRC:$mask, 8000 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 8001 _.ImmAllZerosV)>, 8002 EVEX, Sched<[sched]>; 8003 8004 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 8005 (ins MemOp:$src), 8006 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), 8007 (ins MaskRC:$mask, MemOp:$src), 8008 OpcodeStr#Alias, "$src", "$src", 8009 LdDAG, 8010 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0), 8011 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>, 8012 EVEX, Sched<[sched.Folded]>; 8013 8014 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 8015 (ins _Src.ScalarMemOp:$src), 8016 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), 8017 (ins MaskRC:$mask, _Src.ScalarMemOp:$src), 8018 OpcodeStr, 8019 "${src}"#Broadcast, "${src}"#Broadcast, 8020 (_.VT (OpNode (_Src.VT 8021 (_Src.BroadcastLdFrag addr:$src)) 8022 )), 8023 (vselect_mask MaskRC:$mask, 8024 (_.VT 8025 (MaskOpNode 8026 (_Src.VT 8027 (_Src.BroadcastLdFrag addr:$src)))), 8028 _.RC:$src0), 8029 (vselect_mask MaskRC:$mask, 8030 (_.VT 8031 (MaskOpNode 8032 (_Src.VT 8033 (_Src.BroadcastLdFrag addr:$src)))), 8034 _.ImmAllZerosV)>, 8035 EVEX, EVEX_B, Sched<[sched.Folded]>; 8036 } 8037} 8038// Conversion with SAE - suppress all exceptions 8039multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8040 X86VectorVTInfo _Src, SDNode OpNodeSAE, 8041 X86FoldableSchedWrite sched> { 8042 let Uses = [MXCSR] in 8043 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8044 (ins _Src.RC:$src), OpcodeStr, 8045 "{sae}, $src", "$src, {sae}", 8046 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, 8047 EVEX, EVEX_B, Sched<[sched]>; 8048} 8049 8050// Conversion with rounding control (RC) 8051multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8052 X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd, 8053 X86FoldableSchedWrite sched> { 8054 let Uses = [MXCSR] in 8055 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8056 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 8057 "$rc, $src", "$src, $rc", 8058 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, 8059 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 8060} 8061 8062// Similar to avx512_vcvt_fp, but uses an extload for the memory form. 8063multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8064 X86VectorVTInfo _Src, SDPatternOperator OpNode, 8065 SDNode MaskOpNode, 8066 X86FoldableSchedWrite sched, 8067 string Broadcast = _.BroadcastStr, 8068 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 8069 RegisterClass MaskRC = _.KRCWM> 8070 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast, 8071 Alias, MemOp, MaskRC, 8072 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)), 8073 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; 8074 8075// Extend [Float to Double, Half to Float] 8076multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr, 8077 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8078 X86SchedWriteWidths sched, Predicate prd = HasAVX512> { 8079 let Predicates = [prd] in { 8080 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256, 8081 any_fpextend, fpextend, sched.ZMM>, 8082 avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256, 8083 X86vfpextSAE, sched.ZMM>, EVEX_V512; 8084 } 8085 let Predicates = [prd, HasVLX] in { 8086 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128, 8087 X86any_vfpext, X86vfpext, sched.XMM, 8088 _dst.info128.BroadcastStr, 8089 "", f64mem>, EVEX_V128; 8090 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128, 8091 any_fpextend, fpextend, sched.YMM>, EVEX_V256; 8092 } 8093} 8094 8095// Truncate [Double to Float, Float to Half] 8096multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr, 8097 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8098 X86SchedWriteWidths sched, Predicate prd = HasAVX512, 8099 PatFrag bcast128 = _src.info128.BroadcastLdFrag, 8100 PatFrag loadVT128 = _src.info128.LdFrag, 8101 RegisterClass maskRC128 = _src.info128.KRCWM> { 8102 let Predicates = [prd] in { 8103 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, 8104 X86any_vfpround, X86vfpround, sched.ZMM>, 8105 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 8106 X86vfproundRnd, sched.ZMM>, EVEX_V512; 8107 } 8108 let Predicates = [prd, HasVLX] in { 8109 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, 8110 null_frag, null_frag, sched.XMM, 8111 _src.info128.BroadcastStr, "{x}", 8112 f128mem, maskRC128>, EVEX_V128; 8113 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, 8114 X86any_vfpround, X86vfpround, 8115 sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256; 8116 8117 // Special patterns to allow use of X86vmfpround for masking. Instruction 8118 // patterns have been disabled with null_frag. 8119 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))), 8120 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 8121 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 8122 maskRC128:$mask), 8123 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>; 8124 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 8125 maskRC128:$mask), 8126 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>; 8127 8128 def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))), 8129 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 8130 def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0), 8131 maskRC128:$mask), 8132 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 8133 def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV, 8134 maskRC128:$mask), 8135 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>; 8136 8137 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))), 8138 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 8139 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 8140 (_dst.info128.VT VR128X:$src0), maskRC128:$mask), 8141 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 8142 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 8143 _dst.info128.ImmAllZerosV, maskRC128:$mask), 8144 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>; 8145 } 8146 8147 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8148 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 8149 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8150 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8151 VK2WM:$mask, VR128X:$src), 0, "att">; 8152 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|" 8153 "$dst {${mask}} {z}, $src}", 8154 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8155 VK2WM:$mask, VR128X:$src), 0, "att">; 8156 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8157 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; 8158 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8159 "$dst {${mask}}, ${src}{1to2}}", 8160 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8161 VK2WM:$mask, f64mem:$src), 0, "att">; 8162 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8163 "$dst {${mask}} {z}, ${src}{1to2}}", 8164 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8165 VK2WM:$mask, f64mem:$src), 0, "att">; 8166 8167 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8168 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 8169 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8170 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8171 VK4WM:$mask, VR256X:$src), 0, "att">; 8172 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8173 "$dst {${mask}} {z}, $src}", 8174 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8175 VK4WM:$mask, VR256X:$src), 0, "att">; 8176 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8177 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; 8178 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8179 "$dst {${mask}}, ${src}{1to4}}", 8180 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8181 VK4WM:$mask, f64mem:$src), 0, "att">; 8182 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8183 "$dst {${mask}} {z}, ${src}{1to4}}", 8184 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8185 VK4WM:$mask, f64mem:$src), 0, "att">; 8186} 8187 8188defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps", 8189 avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>, 8190 VEX_W, PD, EVEX_CD8<64, CD8VF>; 8191defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd", 8192 avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>, 8193 PS, EVEX_CD8<32, CD8VH>; 8194 8195// Extend Half to Double 8196multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr, 8197 X86SchedWriteWidths sched> { 8198 let Predicates = [HasFP16] in { 8199 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info, 8200 any_fpextend, fpextend, sched.ZMM>, 8201 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info, 8202 X86vfpextSAE, sched.ZMM>, EVEX_V512; 8203 def : Pat<(v8f64 (extloadv8f16 addr:$src)), 8204 (!cast<Instruction>(NAME # "Zrm") addr:$src)>; 8205 } 8206 let Predicates = [HasFP16, HasVLX] in { 8207 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info, 8208 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "", 8209 f32mem>, EVEX_V128; 8210 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info, 8211 X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "", 8212 f64mem>, EVEX_V256; 8213 } 8214} 8215 8216// Truncate Double to Half 8217multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 8218 let Predicates = [HasFP16] in { 8219 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info, 8220 X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">, 8221 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info, 8222 X86vfproundRnd, sched.ZMM>, EVEX_V512; 8223 } 8224 let Predicates = [HasFP16, HasVLX] in { 8225 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag, 8226 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8227 VK2WM>, EVEX_V128; 8228 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag, 8229 null_frag, sched.YMM, "{1to4}", "{y}", f256mem, 8230 VK4WM>, EVEX_V256; 8231 } 8232 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8233 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8234 VR128X:$src), 0, "att">; 8235 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8236 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8237 VK2WM:$mask, VR128X:$src), 0, "att">; 8238 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8239 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8240 VK2WM:$mask, VR128X:$src), 0, "att">; 8241 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8242 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8243 i64mem:$src), 0, "att">; 8244 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8245 "$dst {${mask}}, ${src}{1to2}}", 8246 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8247 VK2WM:$mask, i64mem:$src), 0, "att">; 8248 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8249 "$dst {${mask}} {z}, ${src}{1to2}}", 8250 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8251 VK2WM:$mask, i64mem:$src), 0, "att">; 8252 8253 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8254 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8255 VR256X:$src), 0, "att">; 8256 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8257 "$dst {${mask}}, $src}", 8258 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8259 VK4WM:$mask, VR256X:$src), 0, "att">; 8260 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8261 "$dst {${mask}} {z}, $src}", 8262 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8263 VK4WM:$mask, VR256X:$src), 0, "att">; 8264 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8265 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8266 i64mem:$src), 0, "att">; 8267 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8268 "$dst {${mask}}, ${src}{1to4}}", 8269 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8270 VK4WM:$mask, i64mem:$src), 0, "att">; 8271 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8272 "$dst {${mask}} {z}, ${src}{1to4}}", 8273 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8274 VK4WM:$mask, i64mem:$src), 0, "att">; 8275 8276 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 8277 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 8278 VR512:$src), 0, "att">; 8279 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 8280 "$dst {${mask}}, $src}", 8281 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 8282 VK8WM:$mask, VR512:$src), 0, "att">; 8283 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 8284 "$dst {${mask}} {z}, $src}", 8285 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 8286 VK8WM:$mask, VR512:$src), 0, "att">; 8287 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 8288 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 8289 i64mem:$src), 0, "att">; 8290 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 8291 "$dst {${mask}}, ${src}{1to8}}", 8292 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 8293 VK8WM:$mask, i64mem:$src), 0, "att">; 8294 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 8295 "$dst {${mask}} {z}, ${src}{1to8}}", 8296 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 8297 VK8WM:$mask, i64mem:$src), 0, "att">; 8298} 8299 8300defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info, 8301 avx512vl_f32_info, SchedWriteCvtPD2PS, 8302 HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>; 8303defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info, 8304 avx512vl_f16_info, SchedWriteCvtPS2PD, 8305 HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>; 8306defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>, 8307 VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>; 8308defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>, 8309 T_MAP5PS, EVEX_CD8<16, CD8VQ>; 8310 8311let Predicates = [HasFP16, HasVLX] in { 8312 // Special patterns to allow use of X86vmfpround for masking. Instruction 8313 // patterns have been disabled with null_frag. 8314 def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))), 8315 (VCVTPD2PHZ256rr VR256X:$src)>; 8316 def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0), 8317 VK4WM:$mask)), 8318 (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 8319 def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV, 8320 VK4WM:$mask), 8321 (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 8322 8323 def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))), 8324 (VCVTPD2PHZ256rm addr:$src)>; 8325 def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0), 8326 VK4WM:$mask), 8327 (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8328 def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV, 8329 VK4WM:$mask), 8330 (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>; 8331 8332 def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))), 8333 (VCVTPD2PHZ256rmb addr:$src)>; 8334 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8335 (v8f16 VR128X:$src0), VK4WM:$mask), 8336 (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8337 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8338 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 8339 (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 8340 8341 def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))), 8342 (VCVTPD2PHZ128rr VR128X:$src)>; 8343 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0), 8344 VK2WM:$mask), 8345 (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8346 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV, 8347 VK2WM:$mask), 8348 (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 8349 8350 def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))), 8351 (VCVTPD2PHZ128rm addr:$src)>; 8352 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0), 8353 VK2WM:$mask), 8354 (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8355 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV, 8356 VK2WM:$mask), 8357 (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>; 8358 8359 def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))), 8360 (VCVTPD2PHZ128rmb addr:$src)>; 8361 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8362 (v8f16 VR128X:$src0), VK2WM:$mask), 8363 (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8364 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8365 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 8366 (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 8367} 8368 8369// Convert Signed/Unsigned Doubleword to Double 8370let Uses = []<Register>, mayRaiseFPException = 0 in 8371multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8372 SDNode MaskOpNode, SDPatternOperator OpNode128, 8373 SDNode MaskOpNode128, 8374 X86SchedWriteWidths sched> { 8375 // No rounding in this op 8376 let Predicates = [HasAVX512] in 8377 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 8378 MaskOpNode, sched.ZMM>, EVEX_V512; 8379 8380 let Predicates = [HasVLX] in { 8381 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 8382 OpNode128, MaskOpNode128, sched.XMM, "{1to2}", 8383 "", i64mem, VK2WM, 8384 (v2f64 (OpNode128 (bc_v4i32 8385 (v2i64 8386 (scalar_to_vector (loadi64 addr:$src)))))), 8387 (v2f64 (MaskOpNode128 (bc_v4i32 8388 (v2i64 8389 (scalar_to_vector (loadi64 addr:$src))))))>, 8390 EVEX_V128; 8391 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 8392 MaskOpNode, sched.YMM>, EVEX_V256; 8393 } 8394} 8395 8396// Convert Signed/Unsigned Doubleword to Float 8397multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8398 SDNode MaskOpNode, SDNode OpNodeRnd, 8399 X86SchedWriteWidths sched> { 8400 let Predicates = [HasAVX512] in 8401 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 8402 MaskOpNode, sched.ZMM>, 8403 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 8404 OpNodeRnd, sched.ZMM>, EVEX_V512; 8405 8406 let Predicates = [HasVLX] in { 8407 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 8408 MaskOpNode, sched.XMM>, EVEX_V128; 8409 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 8410 MaskOpNode, sched.YMM>, EVEX_V256; 8411 } 8412} 8413 8414// Convert Float to Signed/Unsigned Doubleword with truncation 8415multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8416 SDNode MaskOpNode, 8417 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 8418 let Predicates = [HasAVX512] in { 8419 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8420 MaskOpNode, sched.ZMM>, 8421 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 8422 OpNodeSAE, sched.ZMM>, EVEX_V512; 8423 } 8424 let Predicates = [HasVLX] in { 8425 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8426 MaskOpNode, sched.XMM>, EVEX_V128; 8427 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8428 MaskOpNode, sched.YMM>, EVEX_V256; 8429 } 8430} 8431 8432// Convert Float to Signed/Unsigned Doubleword 8433multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8434 SDNode MaskOpNode, SDNode OpNodeRnd, 8435 X86SchedWriteWidths sched> { 8436 let Predicates = [HasAVX512] in { 8437 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8438 MaskOpNode, sched.ZMM>, 8439 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 8440 OpNodeRnd, sched.ZMM>, EVEX_V512; 8441 } 8442 let Predicates = [HasVLX] in { 8443 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8444 MaskOpNode, sched.XMM>, EVEX_V128; 8445 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8446 MaskOpNode, sched.YMM>, EVEX_V256; 8447 } 8448} 8449 8450// Convert Double to Signed/Unsigned Doubleword with truncation 8451multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8452 SDNode MaskOpNode, SDNode OpNodeSAE, 8453 X86SchedWriteWidths sched> { 8454 let Predicates = [HasAVX512] in { 8455 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8456 MaskOpNode, sched.ZMM>, 8457 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 8458 OpNodeSAE, sched.ZMM>, EVEX_V512; 8459 } 8460 let Predicates = [HasVLX] in { 8461 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8462 // memory forms of these instructions in Asm Parser. They have the same 8463 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8464 // due to the same reason. 8465 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8466 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8467 VK2WM>, EVEX_V128; 8468 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8469 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8470 } 8471 8472 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8473 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8474 VR128X:$src), 0, "att">; 8475 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8476 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8477 VK2WM:$mask, VR128X:$src), 0, "att">; 8478 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8479 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8480 VK2WM:$mask, VR128X:$src), 0, "att">; 8481 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8482 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8483 f64mem:$src), 0, "att">; 8484 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8485 "$dst {${mask}}, ${src}{1to2}}", 8486 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8487 VK2WM:$mask, f64mem:$src), 0, "att">; 8488 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8489 "$dst {${mask}} {z}, ${src}{1to2}}", 8490 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8491 VK2WM:$mask, f64mem:$src), 0, "att">; 8492 8493 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8494 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8495 VR256X:$src), 0, "att">; 8496 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8497 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8498 VK4WM:$mask, VR256X:$src), 0, "att">; 8499 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8500 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8501 VK4WM:$mask, VR256X:$src), 0, "att">; 8502 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8503 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8504 f64mem:$src), 0, "att">; 8505 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8506 "$dst {${mask}}, ${src}{1to4}}", 8507 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8508 VK4WM:$mask, f64mem:$src), 0, "att">; 8509 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8510 "$dst {${mask}} {z}, ${src}{1to4}}", 8511 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8512 VK4WM:$mask, f64mem:$src), 0, "att">; 8513} 8514 8515// Convert Double to Signed/Unsigned Doubleword 8516multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8517 SDNode MaskOpNode, SDNode OpNodeRnd, 8518 X86SchedWriteWidths sched> { 8519 let Predicates = [HasAVX512] in { 8520 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8521 MaskOpNode, sched.ZMM>, 8522 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 8523 OpNodeRnd, sched.ZMM>, EVEX_V512; 8524 } 8525 let Predicates = [HasVLX] in { 8526 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8527 // memory forms of these instructions in Asm Parcer. They have the same 8528 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8529 // due to the same reason. 8530 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8531 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8532 VK2WM>, EVEX_V128; 8533 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8534 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8535 } 8536 8537 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8538 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 8539 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8540 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8541 VK2WM:$mask, VR128X:$src), 0, "att">; 8542 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8543 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8544 VK2WM:$mask, VR128X:$src), 0, "att">; 8545 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8546 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8547 f64mem:$src), 0, "att">; 8548 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8549 "$dst {${mask}}, ${src}{1to2}}", 8550 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8551 VK2WM:$mask, f64mem:$src), 0, "att">; 8552 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8553 "$dst {${mask}} {z}, ${src}{1to2}}", 8554 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8555 VK2WM:$mask, f64mem:$src), 0, "att">; 8556 8557 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8558 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 8559 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8560 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8561 VK4WM:$mask, VR256X:$src), 0, "att">; 8562 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8563 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8564 VK4WM:$mask, VR256X:$src), 0, "att">; 8565 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8566 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8567 f64mem:$src), 0, "att">; 8568 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8569 "$dst {${mask}}, ${src}{1to4}}", 8570 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8571 VK4WM:$mask, f64mem:$src), 0, "att">; 8572 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8573 "$dst {${mask}} {z}, ${src}{1to4}}", 8574 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8575 VK4WM:$mask, f64mem:$src), 0, "att">; 8576} 8577 8578// Convert Double to Signed/Unsigned Quardword 8579multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8580 SDNode MaskOpNode, SDNode OpNodeRnd, 8581 X86SchedWriteWidths sched> { 8582 let Predicates = [HasDQI] in { 8583 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8584 MaskOpNode, sched.ZMM>, 8585 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 8586 OpNodeRnd, sched.ZMM>, EVEX_V512; 8587 } 8588 let Predicates = [HasDQI, HasVLX] in { 8589 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8590 MaskOpNode, sched.XMM>, EVEX_V128; 8591 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8592 MaskOpNode, sched.YMM>, EVEX_V256; 8593 } 8594} 8595 8596// Convert Double to Signed/Unsigned Quardword with truncation 8597multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8598 SDNode MaskOpNode, SDNode OpNodeRnd, 8599 X86SchedWriteWidths sched> { 8600 let Predicates = [HasDQI] in { 8601 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8602 MaskOpNode, sched.ZMM>, 8603 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 8604 OpNodeRnd, sched.ZMM>, EVEX_V512; 8605 } 8606 let Predicates = [HasDQI, HasVLX] in { 8607 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8608 MaskOpNode, sched.XMM>, EVEX_V128; 8609 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8610 MaskOpNode, sched.YMM>, EVEX_V256; 8611 } 8612} 8613 8614// Convert Signed/Unsigned Quardword to Double 8615multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8616 SDNode MaskOpNode, SDNode OpNodeRnd, 8617 X86SchedWriteWidths sched> { 8618 let Predicates = [HasDQI] in { 8619 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 8620 MaskOpNode, sched.ZMM>, 8621 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 8622 OpNodeRnd, sched.ZMM>, EVEX_V512; 8623 } 8624 let Predicates = [HasDQI, HasVLX] in { 8625 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 8626 MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; 8627 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 8628 MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; 8629 } 8630} 8631 8632// Convert Float to Signed/Unsigned Quardword 8633multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8634 SDNode MaskOpNode, SDNode OpNodeRnd, 8635 X86SchedWriteWidths sched> { 8636 let Predicates = [HasDQI] in { 8637 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8638 MaskOpNode, sched.ZMM>, 8639 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 8640 OpNodeRnd, sched.ZMM>, EVEX_V512; 8641 } 8642 let Predicates = [HasDQI, HasVLX] in { 8643 // Explicitly specified broadcast string, since we take only 2 elements 8644 // from v4f32x_info source 8645 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8646 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8647 (v2i64 (OpNode (bc_v4f32 8648 (v2f64 8649 (scalar_to_vector (loadf64 addr:$src)))))), 8650 (v2i64 (MaskOpNode (bc_v4f32 8651 (v2f64 8652 (scalar_to_vector (loadf64 addr:$src))))))>, 8653 EVEX_V128; 8654 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8655 MaskOpNode, sched.YMM>, EVEX_V256; 8656 } 8657} 8658 8659// Convert Float to Signed/Unsigned Quardword with truncation 8660multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8661 SDNode MaskOpNode, SDNode OpNodeRnd, 8662 X86SchedWriteWidths sched> { 8663 let Predicates = [HasDQI] in { 8664 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8665 MaskOpNode, sched.ZMM>, 8666 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 8667 OpNodeRnd, sched.ZMM>, EVEX_V512; 8668 } 8669 let Predicates = [HasDQI, HasVLX] in { 8670 // Explicitly specified broadcast string, since we take only 2 elements 8671 // from v4f32x_info source 8672 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8673 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8674 (v2i64 (OpNode (bc_v4f32 8675 (v2f64 8676 (scalar_to_vector (loadf64 addr:$src)))))), 8677 (v2i64 (MaskOpNode (bc_v4f32 8678 (v2f64 8679 (scalar_to_vector (loadf64 addr:$src))))))>, 8680 EVEX_V128; 8681 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8682 MaskOpNode, sched.YMM>, EVEX_V256; 8683 } 8684} 8685 8686// Convert Signed/Unsigned Quardword to Float 8687// Also Convert Signed/Unsigned Doubleword to Half 8688multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8689 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128, 8690 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd, 8691 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8692 X86SchedWriteWidths sched, Predicate prd = HasDQI> { 8693 let Predicates = [prd] in { 8694 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode, 8695 MaskOpNode, sched.ZMM>, 8696 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 8697 OpNodeRnd, sched.ZMM>, EVEX_V512; 8698 } 8699 let Predicates = [prd, HasVLX] in { 8700 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8701 // memory forms of these instructions in Asm Parcer. They have the same 8702 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8703 // due to the same reason. 8704 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag, 8705 null_frag, sched.XMM, _src.info128.BroadcastStr, 8706 "{x}", i128mem, _src.info128.KRCWM>, 8707 EVEX_V128, NotEVEX2VEXConvertible; 8708 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode, 8709 MaskOpNode, sched.YMM, _src.info256.BroadcastStr, 8710 "{y}">, EVEX_V256, 8711 NotEVEX2VEXConvertible; 8712 8713 // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction 8714 // patterns have been disabled with null_frag. 8715 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))), 8716 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 8717 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 8718 _src.info128.KRCWM:$mask), 8719 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>; 8720 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 8721 _src.info128.KRCWM:$mask), 8722 (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>; 8723 8724 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))), 8725 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 8726 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0), 8727 _src.info128.KRCWM:$mask), 8728 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8729 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV, 8730 _src.info128.KRCWM:$mask), 8731 (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>; 8732 8733 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))), 8734 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 8735 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8736 (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask), 8737 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8738 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8739 _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask), 8740 (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>; 8741 } 8742 8743 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8744 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8745 VR128X:$src), 0, "att">; 8746 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8747 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8748 VK2WM:$mask, VR128X:$src), 0, "att">; 8749 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8750 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8751 VK2WM:$mask, VR128X:$src), 0, "att">; 8752 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8753 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8754 i64mem:$src), 0, "att">; 8755 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8756 "$dst {${mask}}, ${src}{1to2}}", 8757 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8758 VK2WM:$mask, i64mem:$src), 0, "att">; 8759 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8760 "$dst {${mask}} {z}, ${src}{1to2}}", 8761 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8762 VK2WM:$mask, i64mem:$src), 0, "att">; 8763 8764 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8765 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8766 VR256X:$src), 0, "att">; 8767 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8768 "$dst {${mask}}, $src}", 8769 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8770 VK4WM:$mask, VR256X:$src), 0, "att">; 8771 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8772 "$dst {${mask}} {z}, $src}", 8773 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8774 VK4WM:$mask, VR256X:$src), 0, "att">; 8775 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8776 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8777 i64mem:$src), 0, "att">; 8778 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8779 "$dst {${mask}}, ${src}{1to4}}", 8780 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8781 VK4WM:$mask, i64mem:$src), 0, "att">; 8782 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8783 "$dst {${mask}} {z}, ${src}{1to4}}", 8784 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8785 VK4WM:$mask, i64mem:$src), 0, "att">; 8786} 8787 8788defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp, 8789 X86any_VSintToFP, X86VSintToFP, 8790 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8791 8792defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp, 8793 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8794 PS, EVEX_CD8<32, CD8VF>; 8795 8796defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, 8797 X86cvttp2si, X86cvttp2siSAE, 8798 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>; 8799 8800defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, 8801 X86cvttp2si, X86cvttp2siSAE, 8802 SchedWriteCvtPD2DQ>, 8803 PD, VEX_W, EVEX_CD8<64, CD8VF>; 8804 8805defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, 8806 X86cvttp2ui, X86cvttp2uiSAE, 8807 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; 8808 8809defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, 8810 X86cvttp2ui, X86cvttp2uiSAE, 8811 SchedWriteCvtPD2DQ>, 8812 PS, VEX_W, EVEX_CD8<64, CD8VF>; 8813 8814defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, 8815 uint_to_fp, X86any_VUintToFP, X86VUintToFP, 8816 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8817 8818defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, 8819 uint_to_fp, X86VUintToFpRnd, 8820 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>; 8821 8822defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int, 8823 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8824 EVEX_CD8<32, CD8VF>; 8825 8826defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int, 8827 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, 8828 VEX_W, EVEX_CD8<64, CD8VF>; 8829 8830defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt, 8831 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8832 PS, EVEX_CD8<32, CD8VF>; 8833 8834defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt, 8835 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8836 PS, EVEX_CD8<64, CD8VF>; 8837 8838defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int, 8839 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8840 PD, EVEX_CD8<64, CD8VF>; 8841 8842defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int, 8843 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8844 EVEX_CD8<32, CD8VH>; 8845 8846defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt, 8847 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8848 PD, EVEX_CD8<64, CD8VF>; 8849 8850defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt, 8851 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, 8852 EVEX_CD8<32, CD8VH>; 8853 8854defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, 8855 X86cvttp2si, X86cvttp2siSAE, 8856 SchedWriteCvtPD2DQ>, VEX_W, 8857 PD, EVEX_CD8<64, CD8VF>; 8858 8859defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, 8860 X86cvttp2si, X86cvttp2siSAE, 8861 SchedWriteCvtPS2DQ>, PD, 8862 EVEX_CD8<32, CD8VH>; 8863 8864defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, 8865 X86cvttp2ui, X86cvttp2uiSAE, 8866 SchedWriteCvtPD2DQ>, VEX_W, 8867 PD, EVEX_CD8<64, CD8VF>; 8868 8869defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, 8870 X86cvttp2ui, X86cvttp2uiSAE, 8871 SchedWriteCvtPS2DQ>, PD, 8872 EVEX_CD8<32, CD8VH>; 8873 8874defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp, 8875 sint_to_fp, X86VSintToFpRnd, 8876 SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>; 8877 8878defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp, 8879 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, 8880 VEX_W, XS, EVEX_CD8<64, CD8VF>; 8881 8882defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp, 8883 X86any_VSintToFP, X86VMSintToFP, 8884 X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8885 SchedWriteCvtDQ2PS, HasFP16>, 8886 T_MAP5PS, EVEX_CD8<32, CD8VF>; 8887 8888defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp, 8889 X86any_VUintToFP, X86VMUintToFP, 8890 X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8891 SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD, 8892 EVEX_CD8<32, CD8VF>; 8893 8894defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp, 8895 X86any_VSintToFP, X86VMSintToFP, 8896 X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8897 SchedWriteCvtDQ2PS>, VEX_W, PS, 8898 EVEX_CD8<64, CD8VF>; 8899 8900defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp, 8901 X86any_VUintToFP, X86VMUintToFP, 8902 X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8903 SchedWriteCvtDQ2PS>, VEX_W, XD, 8904 EVEX_CD8<64, CD8VF>; 8905 8906let Predicates = [HasVLX] in { 8907 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction 8908 // patterns have been disabled with null_frag. 8909 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), 8910 (VCVTPD2DQZ128rr VR128X:$src)>; 8911 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8912 VK2WM:$mask), 8913 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8914 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8915 VK2WM:$mask), 8916 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8917 8918 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), 8919 (VCVTPD2DQZ128rm addr:$src)>; 8920 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8921 VK2WM:$mask), 8922 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8923 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8924 VK2WM:$mask), 8925 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8926 8927 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), 8928 (VCVTPD2DQZ128rmb addr:$src)>; 8929 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8930 (v4i32 VR128X:$src0), VK2WM:$mask), 8931 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8932 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8933 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8934 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8935 8936 // Special patterns to allow use of X86mcvttp2si for masking. Instruction 8937 // patterns have been disabled with null_frag. 8938 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))), 8939 (VCVTTPD2DQZ128rr VR128X:$src)>; 8940 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8941 VK2WM:$mask), 8942 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8943 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8944 VK2WM:$mask), 8945 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8946 8947 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))), 8948 (VCVTTPD2DQZ128rm addr:$src)>; 8949 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8950 VK2WM:$mask), 8951 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8952 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8953 VK2WM:$mask), 8954 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8955 8956 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), 8957 (VCVTTPD2DQZ128rmb addr:$src)>; 8958 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8959 (v4i32 VR128X:$src0), VK2WM:$mask), 8960 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8961 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8962 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8963 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8964 8965 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8966 // patterns have been disabled with null_frag. 8967 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), 8968 (VCVTPD2UDQZ128rr VR128X:$src)>; 8969 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8970 VK2WM:$mask), 8971 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8972 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8973 VK2WM:$mask), 8974 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8975 8976 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), 8977 (VCVTPD2UDQZ128rm addr:$src)>; 8978 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8979 VK2WM:$mask), 8980 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8981 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8982 VK2WM:$mask), 8983 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8984 8985 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), 8986 (VCVTPD2UDQZ128rmb addr:$src)>; 8987 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8988 (v4i32 VR128X:$src0), VK2WM:$mask), 8989 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8990 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8991 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8992 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8993 8994 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8995 // patterns have been disabled with null_frag. 8996 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))), 8997 (VCVTTPD2UDQZ128rr VR128X:$src)>; 8998 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8999 VK2WM:$mask), 9000 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 9001 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 9002 VK2WM:$mask), 9003 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 9004 9005 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))), 9006 (VCVTTPD2UDQZ128rm addr:$src)>; 9007 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 9008 VK2WM:$mask), 9009 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9010 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 9011 VK2WM:$mask), 9012 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 9013 9014 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), 9015 (VCVTTPD2UDQZ128rmb addr:$src)>; 9016 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 9017 (v4i32 VR128X:$src0), VK2WM:$mask), 9018 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9019 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 9020 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 9021 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 9022} 9023 9024let Predicates = [HasDQI, HasVLX] in { 9025 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9026 (VCVTPS2QQZ128rm addr:$src)>; 9027 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9028 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9029 VR128X:$src0)), 9030 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9031 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9032 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9033 v2i64x_info.ImmAllZerosV)), 9034 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 9035 9036 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9037 (VCVTPS2UQQZ128rm addr:$src)>; 9038 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9039 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9040 VR128X:$src0)), 9041 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9042 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9043 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9044 v2i64x_info.ImmAllZerosV)), 9045 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 9046 9047 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9048 (VCVTTPS2QQZ128rm addr:$src)>; 9049 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9050 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9051 VR128X:$src0)), 9052 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9053 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9054 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9055 v2i64x_info.ImmAllZerosV)), 9056 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 9057 9058 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9059 (VCVTTPS2UQQZ128rm addr:$src)>; 9060 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9061 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9062 VR128X:$src0)), 9063 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9064 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9065 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9066 v2i64x_info.ImmAllZerosV)), 9067 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 9068} 9069 9070let Predicates = [HasVLX] in { 9071 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9072 (VCVTDQ2PDZ128rm addr:$src)>; 9073 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9074 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9075 VR128X:$src0)), 9076 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9077 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9078 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9079 v2f64x_info.ImmAllZerosV)), 9080 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 9081 9082 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9083 (VCVTUDQ2PDZ128rm addr:$src)>; 9084 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9085 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9086 VR128X:$src0)), 9087 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9088 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9089 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9090 v2f64x_info.ImmAllZerosV)), 9091 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 9092} 9093 9094//===----------------------------------------------------------------------===// 9095// Half precision conversion instructions 9096//===----------------------------------------------------------------------===// 9097 9098let Uses = [MXCSR], mayRaiseFPException = 1 in 9099multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9100 X86MemOperand x86memop, dag ld_dag, 9101 X86FoldableSchedWrite sched> { 9102 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 9103 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 9104 (X86any_cvtph2ps (_src.VT _src.RC:$src)), 9105 (X86cvtph2ps (_src.VT _src.RC:$src))>, 9106 T8PD, Sched<[sched]>; 9107 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 9108 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 9109 (X86any_cvtph2ps (_src.VT ld_dag)), 9110 (X86cvtph2ps (_src.VT ld_dag))>, 9111 T8PD, Sched<[sched.Folded]>; 9112} 9113 9114multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9115 X86FoldableSchedWrite sched> { 9116 let Uses = [MXCSR] in 9117 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 9118 (ins _src.RC:$src), "vcvtph2ps", 9119 "{sae}, $src", "$src, {sae}", 9120 (X86cvtph2psSAE (_src.VT _src.RC:$src))>, 9121 T8PD, EVEX_B, Sched<[sched]>; 9122} 9123 9124let Predicates = [HasAVX512] in 9125 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, 9126 (load addr:$src), WriteCvtPH2PSZ>, 9127 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 9128 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 9129 9130let Predicates = [HasVLX] in { 9131 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 9132 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256, 9133 EVEX_CD8<32, CD8VH>; 9134 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 9135 (bitconvert (v2i64 (X86vzload64 addr:$src))), 9136 WriteCvtPH2PS>, EVEX, EVEX_V128, 9137 EVEX_CD8<32, CD8VH>; 9138 9139 // Pattern match vcvtph2ps of a scalar i64 load. 9140 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert 9141 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 9142 (VCVTPH2PSZ128rm addr:$src)>; 9143} 9144 9145multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9146 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 9147let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9148 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9149 (ins _src.RC:$src1, i32u8imm:$src2), 9150 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 9151 [(set _dest.RC:$dst, 9152 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 9153 Sched<[RR]>; 9154 let Constraints = "$src0 = $dst" in 9155 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9156 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9157 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 9158 [(set _dest.RC:$dst, 9159 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 9160 _dest.RC:$src0, _src.KRCWM:$mask))]>, 9161 Sched<[RR]>, EVEX_K; 9162 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9163 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9164 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", 9165 [(set _dest.RC:$dst, 9166 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 9167 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 9168 Sched<[RR]>, EVEX_KZ; 9169 let hasSideEffects = 0, mayStore = 1 in { 9170 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 9171 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 9172 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9173 Sched<[MR]>; 9174 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 9175 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9176 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 9177 EVEX_K, Sched<[MR]>, NotMemoryFoldable; 9178 } 9179} 9180} 9181 9182multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9183 SchedWrite Sched> { 9184 let hasSideEffects = 0, Uses = [MXCSR] in 9185 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, 9186 (outs _dest.RC:$dst), 9187 (ins _src.RC:$src1, i32u8imm:$src2), 9188 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>, 9189 EVEX_B, AVX512AIi8Base, Sched<[Sched]>; 9190} 9191 9192let Predicates = [HasAVX512] in { 9193 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 9194 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 9195 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 9196 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 9197 9198 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), 9199 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; 9200} 9201 9202let Predicates = [HasVLX] in { 9203 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 9204 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 9205 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 9206 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 9207 WriteCvtPS2PH, WriteCvtPS2PHSt>, 9208 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 9209 9210 def : Pat<(store (f64 (extractelt 9211 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9212 (iPTR 0))), addr:$dst), 9213 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9214 def : Pat<(store (i64 (extractelt 9215 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9216 (iPTR 0))), addr:$dst), 9217 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9218 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), 9219 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; 9220} 9221 9222// Unordered/Ordered scalar fp compare with Sae and set EFLAGS 9223multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 9224 string OpcodeStr, Domain d, 9225 X86FoldableSchedWrite sched = WriteFComX> { 9226 let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in 9227 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 9228 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 9229 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 9230} 9231 9232let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9233 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>, 9234 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9235 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>, 9236 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 9237 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>, 9238 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9239 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>, 9240 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 9241} 9242 9243let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9244 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32, 9245 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9246 EVEX_CD8<32, CD8VT1>; 9247 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64, 9248 "ucomisd", SSEPackedDouble>, PD, EVEX, 9249 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9250 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32, 9251 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9252 EVEX_CD8<32, CD8VT1>; 9253 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64, 9254 "comisd", SSEPackedDouble>, PD, EVEX, 9255 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9256 let isCodeGenOnly = 1 in { 9257 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 9258 sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9259 EVEX_CD8<32, CD8VT1>; 9260 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 9261 sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX, 9262 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9263 9264 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 9265 sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9266 EVEX_CD8<32, CD8VT1>; 9267 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 9268 sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX, 9269 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9270 } 9271} 9272 9273let Defs = [EFLAGS], Predicates = [HasFP16] in { 9274 defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish", 9275 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS, 9276 EVEX_CD8<16, CD8VT1>; 9277 defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish", 9278 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS, 9279 EVEX_CD8<16, CD8VT1>; 9280 defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16, 9281 "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX, 9282 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9283 defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16, 9284 "comish", SSEPackedSingle>, T_MAP5PS, EVEX, 9285 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9286 let isCodeGenOnly = 1 in { 9287 defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem, 9288 sse_load_f16, "ucomish", SSEPackedSingle>, 9289 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9290 9291 defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem, 9292 sse_load_f16, "comish", SSEPackedSingle>, 9293 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9294 } 9295} 9296 9297/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh 9298multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9299 X86FoldableSchedWrite sched, X86VectorVTInfo _, 9300 Predicate prd = HasAVX512> { 9301 let Predicates = [prd], ExeDomain = _.ExeDomain in { 9302 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9303 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9304 "$src2, $src1", "$src1, $src2", 9305 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9306 EVEX_4V, VEX_LIG, Sched<[sched]>; 9307 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9308 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9309 "$src2, $src1", "$src1, $src2", 9310 (OpNode (_.VT _.RC:$src1), 9311 (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG, 9312 Sched<[sched.Folded, sched.ReadAfterFold]>; 9313} 9314} 9315 9316defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl, 9317 f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>, 9318 T_MAP6PD; 9319defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s, 9320 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>, 9321 EVEX_CD8<16, CD8VT1>, T_MAP6PD; 9322let Uses = [MXCSR] in { 9323defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 9324 f32x_info>, EVEX_CD8<32, CD8VT1>, 9325 T8PD; 9326defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 9327 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, 9328 T8PD; 9329defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 9330 SchedWriteFRsqrt.Scl, f32x_info>, 9331 EVEX_CD8<32, CD8VT1>, T8PD; 9332defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 9333 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W, 9334 EVEX_CD8<64, CD8VT1>, T8PD; 9335} 9336 9337/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 9338multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 9339 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9340 let ExeDomain = _.ExeDomain in { 9341 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9342 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9343 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD, 9344 Sched<[sched]>; 9345 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9346 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9347 (OpNode (_.VT 9348 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, 9349 Sched<[sched.Folded, sched.ReadAfterFold]>; 9350 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9351 (ins _.ScalarMemOp:$src), OpcodeStr, 9352 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9353 (OpNode (_.VT 9354 (_.BroadcastLdFrag addr:$src)))>, 9355 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9356 } 9357} 9358 9359multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 9360 X86SchedWriteWidths sched> { 9361 let Uses = [MXCSR] in { 9362 defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM, 9363 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 9364 defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM, 9365 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 9366 } 9367 let Predicates = [HasFP16] in 9368 defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM, 9369 v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9370 9371 // Define only if AVX512VL feature is present. 9372 let Predicates = [HasVLX], Uses = [MXCSR] in { 9373 defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9374 OpNode, sched.XMM, v4f32x_info>, 9375 EVEX_V128, EVEX_CD8<32, CD8VF>; 9376 defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9377 OpNode, sched.YMM, v8f32x_info>, 9378 EVEX_V256, EVEX_CD8<32, CD8VF>; 9379 defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9380 OpNode, sched.XMM, v2f64x_info>, 9381 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 9382 defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9383 OpNode, sched.YMM, v4f64x_info>, 9384 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 9385 } 9386 let Predicates = [HasFP16, HasVLX] in { 9387 defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9388 OpNode, sched.XMM, v8f16x_info>, 9389 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9390 defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9391 OpNode, sched.YMM, v16f16x_info>, 9392 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9393 } 9394} 9395 9396defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>; 9397defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>; 9398 9399/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 9400multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 9401 SDNode OpNode, SDNode OpNodeSAE, 9402 X86FoldableSchedWrite sched> { 9403 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 9404 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9405 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9406 "$src2, $src1", "$src1, $src2", 9407 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9408 Sched<[sched]>, SIMD_EXC; 9409 9410 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9411 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9412 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 9413 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9414 EVEX_B, Sched<[sched]>; 9415 9416 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9417 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9418 "$src2, $src1", "$src1, $src2", 9419 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>, 9420 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9421 } 9422} 9423 9424multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9425 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9426 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, 9427 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V; 9428 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, 9429 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD, EVEX_4V; 9430} 9431 9432multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode, 9433 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9434 let Predicates = [HasFP16] in 9435 defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>, 9436 EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V; 9437} 9438 9439let Predicates = [HasERI] in { 9440 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs, 9441 SchedWriteFRcp.Scl>; 9442 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs, 9443 SchedWriteFRsqrt.Scl>; 9444} 9445 9446defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9447 SchedWriteFRnd.Scl>, 9448 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9449 SchedWriteFRnd.Scl>; 9450/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 9451 9452multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9453 SDNode OpNode, X86FoldableSchedWrite sched> { 9454 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9455 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9456 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9457 (OpNode (_.VT _.RC:$src))>, 9458 Sched<[sched]>; 9459 9460 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9461 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9462 (OpNode (_.VT 9463 (bitconvert (_.LdFrag addr:$src))))>, 9464 Sched<[sched.Folded, sched.ReadAfterFold]>; 9465 9466 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9467 (ins _.ScalarMemOp:$src), OpcodeStr, 9468 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9469 (OpNode (_.VT 9470 (_.BroadcastLdFrag addr:$src)))>, 9471 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9472 } 9473} 9474multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9475 SDNode OpNode, X86FoldableSchedWrite sched> { 9476 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 9477 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9478 (ins _.RC:$src), OpcodeStr, 9479 "{sae}, $src", "$src, {sae}", 9480 (OpNode (_.VT _.RC:$src))>, 9481 EVEX_B, Sched<[sched]>; 9482} 9483 9484multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 9485 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9486 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 9487 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, 9488 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 9489 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 9490 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, 9491 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 9492} 9493 9494multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 9495 SDNode OpNode, X86SchedWriteWidths sched> { 9496 // Define only if AVX512VL feature is present. 9497 let Predicates = [HasVLX] in { 9498 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, 9499 sched.XMM>, 9500 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; 9501 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, 9502 sched.YMM>, 9503 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; 9504 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, 9505 sched.XMM>, 9506 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 9507 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, 9508 sched.YMM>, 9509 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 9510 } 9511} 9512 9513multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode, 9514 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9515 let Predicates = [HasFP16] in 9516 defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>, 9517 avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>, 9518 T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>; 9519 let Predicates = [HasFP16, HasVLX] in { 9520 defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>, 9521 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9522 defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>, 9523 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9524 } 9525} 9526let Predicates = [HasERI] in { 9527 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE, 9528 SchedWriteFRsqrt>, EVEX; 9529 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE, 9530 SchedWriteFRcp>, EVEX; 9531 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE, 9532 SchedWriteFAdd>, EVEX; 9533} 9534defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9535 SchedWriteFRnd>, 9536 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9537 SchedWriteFRnd>, 9538 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, 9539 SchedWriteFRnd>, EVEX; 9540 9541multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 9542 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9543 let ExeDomain = _.ExeDomain in 9544 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9545 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 9546 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, 9547 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 9548} 9549 9550multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 9551 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9552 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9553 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 9554 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9555 (_.VT (any_fsqrt _.RC:$src)), 9556 (_.VT (fsqrt _.RC:$src))>, EVEX, 9557 Sched<[sched]>; 9558 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9559 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9560 (any_fsqrt (_.VT (_.LdFrag addr:$src))), 9561 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX, 9562 Sched<[sched.Folded, sched.ReadAfterFold]>; 9563 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9564 (ins _.ScalarMemOp:$src), OpcodeStr, 9565 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9566 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))), 9567 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>, 9568 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9569 } 9570} 9571 9572let Uses = [MXCSR], mayRaiseFPException = 1 in 9573multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 9574 X86SchedWriteSizes sched> { 9575 let Predicates = [HasFP16] in 9576 defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9577 sched.PH.ZMM, v32f16_info>, 9578 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9579 let Predicates = [HasFP16, HasVLX] in { 9580 defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9581 sched.PH.XMM, v8f16x_info>, 9582 EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9583 defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9584 sched.PH.YMM, v16f16x_info>, 9585 EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9586 } 9587 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9588 sched.PS.ZMM, v16f32_info>, 9589 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 9590 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9591 sched.PD.ZMM, v8f64_info>, 9592 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9593 // Define only if AVX512VL feature is present. 9594 let Predicates = [HasVLX] in { 9595 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9596 sched.PS.XMM, v4f32x_info>, 9597 EVEX_V128, PS, EVEX_CD8<32, CD8VF>; 9598 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9599 sched.PS.YMM, v8f32x_info>, 9600 EVEX_V256, PS, EVEX_CD8<32, CD8VF>; 9601 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9602 sched.PD.XMM, v2f64x_info>, 9603 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9604 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9605 sched.PD.YMM, v4f64x_info>, 9606 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9607 } 9608} 9609 9610let Uses = [MXCSR] in 9611multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 9612 X86SchedWriteSizes sched> { 9613 let Predicates = [HasFP16] in 9614 defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"), 9615 sched.PH.ZMM, v32f16_info>, 9616 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9617 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 9618 sched.PS.ZMM, v16f32_info>, 9619 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 9620 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 9621 sched.PD.ZMM, v8f64_info>, 9622 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9623} 9624 9625multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9626 X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> { 9627 let ExeDomain = _.ExeDomain, Predicates = [prd] in { 9628 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9629 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9630 "$src2, $src1", "$src1, $src2", 9631 (X86fsqrts (_.VT _.RC:$src1), 9632 (_.VT _.RC:$src2))>, 9633 Sched<[sched]>, SIMD_EXC; 9634 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9635 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9636 "$src2, $src1", "$src1, $src2", 9637 (X86fsqrts (_.VT _.RC:$src1), 9638 (_.ScalarIntMemFrags addr:$src2))>, 9639 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9640 let Uses = [MXCSR] in 9641 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9642 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 9643 "$rc, $src2, $src1", "$src1, $src2, $rc", 9644 (X86fsqrtRnds (_.VT _.RC:$src1), 9645 (_.VT _.RC:$src2), 9646 (i32 timm:$rc))>, 9647 EVEX_B, EVEX_RC, Sched<[sched]>; 9648 9649 let isCodeGenOnly = 1, hasSideEffects = 0 in { 9650 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9651 (ins _.FRC:$src1, _.FRC:$src2), 9652 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9653 Sched<[sched]>, SIMD_EXC; 9654 let mayLoad = 1 in 9655 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9656 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 9657 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9658 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9659 } 9660 } 9661 9662 let Predicates = [prd] in { 9663 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)), 9664 (!cast<Instruction>(Name#Zr) 9665 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 9666 } 9667 9668 let Predicates = [prd, OptForSize] in { 9669 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))), 9670 (!cast<Instruction>(Name#Zm) 9671 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 9672 } 9673} 9674 9675multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 9676 X86SchedWriteSizes sched> { 9677 defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>, 9678 EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS; 9679 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 9680 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; 9681 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 9682 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; 9683} 9684 9685defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 9686 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 9687 9688defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 9689 9690multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 9691 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9692 let ExeDomain = _.ExeDomain in { 9693 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9694 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9695 "$src3, $src2, $src1", "$src1, $src2, $src3", 9696 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9697 (i32 timm:$src3)))>, 9698 Sched<[sched]>, SIMD_EXC; 9699 9700 let Uses = [MXCSR] in 9701 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9702 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9703 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 9704 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9705 (i32 timm:$src3)))>, EVEX_B, 9706 Sched<[sched]>; 9707 9708 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9709 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 9710 OpcodeStr, 9711 "$src3, $src2, $src1", "$src1, $src2, $src3", 9712 (_.VT (X86RndScales _.RC:$src1, 9713 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>, 9714 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9715 9716 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 9717 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9718 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 9719 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9720 []>, Sched<[sched]>, SIMD_EXC; 9721 9722 let mayLoad = 1 in 9723 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9724 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9725 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9726 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9727 } 9728 } 9729 9730 let Predicates = [HasAVX512] in { 9731 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2), 9732 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)), 9733 _.FRC:$src1, timm:$src2))>; 9734 } 9735 9736 let Predicates = [HasAVX512, OptForSize] in { 9737 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), 9738 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)), 9739 addr:$src1, timm:$src2))>; 9740 } 9741} 9742 9743let Predicates = [HasFP16] in 9744defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh", 9745 SchedWriteFRnd.Scl, f16x_info>, 9746 AVX512PSIi8Base, TA, EVEX_4V, 9747 EVEX_CD8<16, CD8VT1>; 9748 9749defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 9750 SchedWriteFRnd.Scl, f32x_info>, 9751 AVX512AIi8Base, EVEX_4V, VEX_LIG, 9752 EVEX_CD8<32, CD8VT1>; 9753 9754defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 9755 SchedWriteFRnd.Scl, f64x_info>, 9756 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG, 9757 EVEX_CD8<64, CD8VT1>; 9758 9759multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 9760 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 9761 dag OutMask, Predicate BasePredicate> { 9762 let Predicates = [BasePredicate] in { 9763 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9764 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9765 (extractelt _.VT:$dst, (iPTR 0))))), 9766 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 9767 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 9768 9769 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9770 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9771 ZeroFP))), 9772 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 9773 OutMask, _.VT:$src2, _.VT:$src1)>; 9774 } 9775} 9776 9777defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh, 9778 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info, 9779 fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>; 9780defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 9781 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 9782 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9783defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 9784 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 9785 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9786 9787 9788//------------------------------------------------- 9789// Integer truncate and extend operations 9790//------------------------------------------------- 9791 9792// PatFrags that contain a select and a truncate op. The take operands in the 9793// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass 9794// either to the multiclasses. 9795def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask), 9796 (vselect_mask node:$mask, 9797 (trunc node:$src), node:$src0)>; 9798def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask), 9799 (vselect_mask node:$mask, 9800 (X86vtruncs node:$src), node:$src0)>; 9801def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask), 9802 (vselect_mask node:$mask, 9803 (X86vtruncus node:$src), node:$src0)>; 9804 9805multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9806 SDPatternOperator MaskNode, 9807 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9808 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9809 let ExeDomain = DestInfo.ExeDomain in { 9810 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9811 (ins SrcInfo.RC:$src), 9812 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9813 [(set DestInfo.RC:$dst, 9814 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>, 9815 EVEX, Sched<[sched]>; 9816 let Constraints = "$src0 = $dst" in 9817 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9818 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9819 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9820 [(set DestInfo.RC:$dst, 9821 (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9822 (DestInfo.VT DestInfo.RC:$src0), 9823 SrcInfo.KRCWM:$mask))]>, 9824 EVEX, EVEX_K, Sched<[sched]>; 9825 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9826 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9827 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 9828 [(set DestInfo.RC:$dst, 9829 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9830 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>, 9831 EVEX, EVEX_KZ, Sched<[sched]>; 9832 } 9833 9834 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9835 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9836 (ins x86memop:$dst, SrcInfo.RC:$src), 9837 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9838 EVEX, Sched<[sched.Folded]>; 9839 9840 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9841 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9842 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9843 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable; 9844 }//mayStore = 1, hasSideEffects = 0 9845} 9846 9847multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9848 PatFrag truncFrag, PatFrag mtruncFrag, 9849 string Name> { 9850 9851 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9852 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr) 9853 addr:$dst, SrcInfo.RC:$src)>; 9854 9855 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, 9856 SrcInfo.KRCWM:$mask), 9857 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk) 9858 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9859} 9860 9861multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9862 SDNode OpNode256, SDNode OpNode512, 9863 SDPatternOperator MaskNode128, 9864 SDPatternOperator MaskNode256, 9865 SDPatternOperator MaskNode512, 9866 X86FoldableSchedWrite sched, 9867 AVX512VLVectorVTInfo VTSrcInfo, 9868 X86VectorVTInfo DestInfoZ128, 9869 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9870 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9871 X86MemOperand x86memopZ, PatFrag truncFrag, 9872 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9873 9874 let Predicates = [HasVLX, prd] in { 9875 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched, 9876 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9877 avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag, 9878 mtruncFrag, NAME>, EVEX_V128; 9879 9880 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched, 9881 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9882 avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag, 9883 mtruncFrag, NAME>, EVEX_V256; 9884 } 9885 let Predicates = [prd] in 9886 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched, 9887 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9888 avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag, 9889 mtruncFrag, NAME>, EVEX_V512; 9890} 9891 9892multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, 9893 X86FoldableSchedWrite sched, PatFrag StoreNode, 9894 PatFrag MaskedStoreNode, SDNode InVecNode, 9895 SDPatternOperator InVecMaskNode> { 9896 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, 9897 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched, 9898 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9899 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9900 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9901} 9902 9903multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9904 SDPatternOperator MaskNode, 9905 X86FoldableSchedWrite sched, PatFrag StoreNode, 9906 PatFrag MaskedStoreNode, SDNode InVecNode, 9907 SDPatternOperator InVecMaskNode> { 9908 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9909 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9910 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9911 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9912 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9913} 9914 9915multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9916 SDPatternOperator MaskNode, 9917 X86FoldableSchedWrite sched, PatFrag StoreNode, 9918 PatFrag MaskedStoreNode, SDNode InVecNode, 9919 SDPatternOperator InVecMaskNode> { 9920 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9921 InVecMaskNode, MaskNode, MaskNode, sched, 9922 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9923 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9924 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9925} 9926 9927multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 9928 SDPatternOperator MaskNode, 9929 X86FoldableSchedWrite sched, PatFrag StoreNode, 9930 PatFrag MaskedStoreNode, SDNode InVecNode, 9931 SDPatternOperator InVecMaskNode> { 9932 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9933 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9934 avx512vl_i32_info, v16i8x_info, v16i8x_info, 9935 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 9936 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 9937} 9938 9939multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9940 SDPatternOperator MaskNode, 9941 X86FoldableSchedWrite sched, PatFrag StoreNode, 9942 PatFrag MaskedStoreNode, SDNode InVecNode, 9943 SDPatternOperator InVecMaskNode> { 9944 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9945 InVecMaskNode, MaskNode, MaskNode, sched, 9946 avx512vl_i32_info, v8i16x_info, v8i16x_info, 9947 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 9948 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 9949} 9950 9951multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9952 SDPatternOperator MaskNode, 9953 X86FoldableSchedWrite sched, PatFrag StoreNode, 9954 PatFrag MaskedStoreNode, SDNode InVecNode, 9955 SDPatternOperator InVecMaskNode> { 9956 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9957 InVecMaskNode, MaskNode, MaskNode, sched, 9958 avx512vl_i16_info, v16i8x_info, v16i8x_info, 9959 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 9960 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 9961} 9962 9963defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", 9964 WriteVPMOV256, truncstorevi8, 9965 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9966defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", 9967 WriteVPMOV256, truncstore_s_vi8, 9968 masked_truncstore_s_vi8, X86vtruncs, 9969 X86vmtruncs>; 9970defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", 9971 WriteVPMOV256, truncstore_us_vi8, 9972 masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; 9973 9974defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, 9975 WriteVPMOV256, truncstorevi16, 9976 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9977defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, 9978 WriteVPMOV256, truncstore_s_vi16, 9979 masked_truncstore_s_vi16, X86vtruncs, 9980 X86vmtruncs>; 9981defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, 9982 select_truncus, WriteVPMOV256, 9983 truncstore_us_vi16, masked_truncstore_us_vi16, 9984 X86vtruncus, X86vmtruncus>; 9985 9986defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, 9987 WriteVPMOV256, truncstorevi32, 9988 masked_truncstorevi32, X86vtrunc, X86vmtrunc>; 9989defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, 9990 WriteVPMOV256, truncstore_s_vi32, 9991 masked_truncstore_s_vi32, X86vtruncs, 9992 X86vmtruncs>; 9993defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, 9994 select_truncus, WriteVPMOV256, 9995 truncstore_us_vi32, masked_truncstore_us_vi32, 9996 X86vtruncus, X86vmtruncus>; 9997 9998defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, 9999 WriteVPMOV256, truncstorevi8, 10000 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 10001defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, 10002 WriteVPMOV256, truncstore_s_vi8, 10003 masked_truncstore_s_vi8, X86vtruncs, 10004 X86vmtruncs>; 10005defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, 10006 select_truncus, WriteVPMOV256, 10007 truncstore_us_vi8, masked_truncstore_us_vi8, 10008 X86vtruncus, X86vmtruncus>; 10009 10010defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, 10011 WriteVPMOV256, truncstorevi16, 10012 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 10013defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, 10014 WriteVPMOV256, truncstore_s_vi16, 10015 masked_truncstore_s_vi16, X86vtruncs, 10016 X86vmtruncs>; 10017defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, 10018 select_truncus, WriteVPMOV256, 10019 truncstore_us_vi16, masked_truncstore_us_vi16, 10020 X86vtruncus, X86vmtruncus>; 10021 10022defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, 10023 WriteVPMOV256, truncstorevi8, 10024 masked_truncstorevi8, X86vtrunc, 10025 X86vmtrunc>; 10026defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, 10027 WriteVPMOV256, truncstore_s_vi8, 10028 masked_truncstore_s_vi8, X86vtruncs, 10029 X86vmtruncs>; 10030defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, 10031 select_truncus, WriteVPMOV256, 10032 truncstore_us_vi8, masked_truncstore_us_vi8, 10033 X86vtruncus, X86vmtruncus>; 10034 10035let Predicates = [HasAVX512, NoVLX] in { 10036def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 10037 (v8i16 (EXTRACT_SUBREG 10038 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 10039 VR256X:$src, sub_ymm)))), sub_xmm))>; 10040def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 10041 (v4i32 (EXTRACT_SUBREG 10042 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 10043 VR256X:$src, sub_ymm)))), sub_xmm))>; 10044} 10045 10046let Predicates = [HasBWI, NoVLX] in { 10047def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 10048 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 10049 VR256X:$src, sub_ymm))), sub_xmm))>; 10050} 10051 10052// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. 10053multiclass mtrunc_lowering<string InstrName, SDNode OpNode, 10054 X86VectorVTInfo DestInfo, 10055 X86VectorVTInfo SrcInfo> { 10056 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 10057 DestInfo.RC:$src0, 10058 SrcInfo.KRCWM:$mask)), 10059 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0, 10060 SrcInfo.KRCWM:$mask, 10061 SrcInfo.RC:$src)>; 10062 10063 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 10064 DestInfo.ImmAllZerosV, 10065 SrcInfo.KRCWM:$mask)), 10066 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask, 10067 SrcInfo.RC:$src)>; 10068} 10069 10070let Predicates = [HasVLX] in { 10071defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; 10072defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; 10073defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; 10074} 10075 10076let Predicates = [HasAVX512] in { 10077defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; 10078defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; 10079defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; 10080 10081defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; 10082defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; 10083defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; 10084 10085defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; 10086defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; 10087defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; 10088} 10089 10090multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 10091 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 10092 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 10093 let ExeDomain = DestInfo.ExeDomain in { 10094 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10095 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 10096 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 10097 EVEX, Sched<[sched]>; 10098 10099 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10100 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 10101 (DestInfo.VT (LdFrag addr:$src))>, 10102 EVEX, Sched<[sched.Folded]>; 10103 } 10104} 10105 10106multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr, 10107 SDNode OpNode, SDNode InVecNode, string ExtTy, 10108 X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ, 10109 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10110 let Predicates = [HasVLX, HasBWI] in { 10111 defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v8i16x_info, 10112 v16i8x_info, i64mem, LdFrag, InVecNode>, 10113 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 10114 10115 defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i16x_info, 10116 v16i8x_info, i128mem, LdFrag, OpNode>, 10117 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 10118 } 10119 let Predicates = [HasBWI] in { 10120 defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v32i16_info, 10121 v32i8x_info, i256mem, LdFrag, OpNode>, 10122 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 10123 } 10124} 10125 10126multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr, 10127 SDNode OpNode, SDNode InVecNode, string ExtTy, 10128 X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ, 10129 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10130 let Predicates = [HasVLX, HasAVX512] in { 10131 defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v4i32x_info, 10132 v16i8x_info, i32mem, LdFrag, InVecNode>, 10133 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 10134 10135 defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i32x_info, 10136 v16i8x_info, i64mem, LdFrag, InVecNode>, 10137 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 10138 } 10139 let Predicates = [HasAVX512] in { 10140 defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i32_info, 10141 v16i8x_info, i128mem, LdFrag, OpNode>, 10142 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 10143 } 10144} 10145 10146multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr, 10147 SDNode InVecNode, string ExtTy, 10148 X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ, 10149 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10150 let Predicates = [HasVLX, HasAVX512] in { 10151 defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info, 10152 v16i8x_info, i16mem, LdFrag, InVecNode>, 10153 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; 10154 10155 defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info, 10156 v16i8x_info, i32mem, LdFrag, InVecNode>, 10157 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; 10158 } 10159 let Predicates = [HasAVX512] in { 10160 defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info, 10161 v16i8x_info, i64mem, LdFrag, InVecNode>, 10162 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; 10163 } 10164} 10165 10166multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr, 10167 SDNode OpNode, SDNode InVecNode, string ExtTy, 10168 X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ, 10169 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 10170 let Predicates = [HasVLX, HasAVX512] in { 10171 defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v4i32x_info, 10172 v8i16x_info, i64mem, LdFrag, InVecNode>, 10173 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 10174 10175 defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i32x_info, 10176 v8i16x_info, i128mem, LdFrag, OpNode>, 10177 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 10178 } 10179 let Predicates = [HasAVX512] in { 10180 defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i32_info, 10181 v16i16x_info, i256mem, LdFrag, OpNode>, 10182 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 10183 } 10184} 10185 10186multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr, 10187 SDNode OpNode, SDNode InVecNode, string ExtTy, 10188 X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ, 10189 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 10190 let Predicates = [HasVLX, HasAVX512] in { 10191 defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info, 10192 v8i16x_info, i32mem, LdFrag, InVecNode>, 10193 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 10194 10195 defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info, 10196 v8i16x_info, i64mem, LdFrag, InVecNode>, 10197 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 10198 } 10199 let Predicates = [HasAVX512] in { 10200 defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info, 10201 v8i16x_info, i128mem, LdFrag, OpNode>, 10202 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 10203 } 10204} 10205 10206multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr, 10207 SDNode OpNode, SDNode InVecNode, string ExtTy, 10208 X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ, 10209 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 10210 10211 let Predicates = [HasVLX, HasAVX512] in { 10212 defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info, 10213 v4i32x_info, i64mem, LdFrag, InVecNode>, 10214 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; 10215 10216 defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info, 10217 v4i32x_info, i128mem, LdFrag, OpNode>, 10218 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; 10219 } 10220 let Predicates = [HasAVX512] in { 10221 defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info, 10222 v8i32x_info, i256mem, LdFrag, OpNode>, 10223 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; 10224 } 10225} 10226 10227defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>; 10228defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>; 10229defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>; 10230defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>; 10231defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>; 10232defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>; 10233 10234defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>; 10235defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>; 10236defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>; 10237defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>; 10238defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>; 10239defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>; 10240 10241 10242// Patterns that we also need any extend versions of. aext_vector_inreg 10243// is currently legalized to zext_vector_inreg. 10244multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { 10245 // 256-bit patterns 10246 let Predicates = [HasVLX, HasBWI] in { 10247 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 10248 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 10249 } 10250 10251 let Predicates = [HasVLX] in { 10252 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 10253 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 10254 10255 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 10256 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 10257 } 10258 10259 // 512-bit patterns 10260 let Predicates = [HasBWI] in { 10261 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))), 10262 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 10263 } 10264 let Predicates = [HasAVX512] in { 10265 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))), 10266 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 10267 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))), 10268 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 10269 10270 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))), 10271 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 10272 10273 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))), 10274 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 10275 } 10276} 10277 10278multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 10279 SDNode InVecOp> : 10280 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { 10281 // 128-bit patterns 10282 let Predicates = [HasVLX, HasBWI] in { 10283 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10284 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10285 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10286 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10287 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10288 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10289 } 10290 let Predicates = [HasVLX] in { 10291 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10292 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10293 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10294 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10295 10296 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 10297 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 10298 10299 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10300 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10301 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10302 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10303 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10304 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10305 10306 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10307 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10308 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 10309 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10310 10311 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10312 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10313 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10314 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10315 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 10316 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10317 } 10318 let Predicates = [HasVLX] in { 10319 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10320 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10321 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10322 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10323 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10324 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10325 10326 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10327 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10328 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10329 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10330 10331 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10332 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10333 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10334 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10335 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10336 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10337 } 10338 // 512-bit patterns 10339 let Predicates = [HasAVX512] in { 10340 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10341 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10342 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10343 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10344 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10345 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10346 } 10347} 10348 10349defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; 10350defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; 10351 10352// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge 10353// ext+trunc aggressively making it impossible to legalize the DAG to this 10354// pattern directly. 10355let Predicates = [HasAVX512, NoBWI] in { 10356def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 10357 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; 10358def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), 10359 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; 10360} 10361 10362//===----------------------------------------------------------------------===// 10363// GATHER - SCATTER Operations 10364 10365// FIXME: Improve scheduling of gather/scatter instructions. 10366multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10367 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10368 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 10369 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 10370 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 10371 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 10372 !strconcat(OpcodeStr#_.Suffix, 10373 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 10374 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10375 Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 10376} 10377 10378multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 10379 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10380 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, 10381 vy512xmem>, EVEX_V512, VEX_W; 10382 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512, 10383 vz512mem>, EVEX_V512, VEX_W; 10384let Predicates = [HasVLX] in { 10385 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10386 vx256xmem>, EVEX_V256, VEX_W; 10387 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256, 10388 vy256xmem>, EVEX_V256, VEX_W; 10389 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10390 vx128xmem>, EVEX_V128, VEX_W; 10391 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10392 vx128xmem>, EVEX_V128, VEX_W; 10393} 10394} 10395 10396multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 10397 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10398 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10399 EVEX_V512; 10400 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10401 EVEX_V512; 10402let Predicates = [HasVLX] in { 10403 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10404 vy256xmem>, EVEX_V256; 10405 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10406 vy128xmem>, EVEX_V256; 10407 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10408 vx128xmem>, EVEX_V128; 10409 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10410 vx64xmem, VK2WM>, EVEX_V128; 10411} 10412} 10413 10414 10415defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 10416 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 10417 10418defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 10419 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 10420 10421multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10422 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10423 10424let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain, 10425 hasSideEffects = 0 in 10426 10427 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 10428 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 10429 !strconcat(OpcodeStr#_.Suffix, 10430 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 10431 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10432 Sched<[WriteStore]>; 10433} 10434 10435multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 10436 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10437 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, 10438 vy512xmem>, EVEX_V512, VEX_W; 10439 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512, 10440 vz512mem>, EVEX_V512, VEX_W; 10441let Predicates = [HasVLX] in { 10442 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10443 vx256xmem>, EVEX_V256, VEX_W; 10444 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256, 10445 vy256xmem>, EVEX_V256, VEX_W; 10446 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10447 vx128xmem>, EVEX_V128, VEX_W; 10448 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10449 vx128xmem>, EVEX_V128, VEX_W; 10450} 10451} 10452 10453multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 10454 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10455 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10456 EVEX_V512; 10457 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10458 EVEX_V512; 10459let Predicates = [HasVLX] in { 10460 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10461 vy256xmem>, EVEX_V256; 10462 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10463 vy128xmem>, EVEX_V256; 10464 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10465 vx128xmem>, EVEX_V128; 10466 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10467 vx64xmem, VK2WM>, EVEX_V128; 10468} 10469} 10470 10471defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 10472 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 10473 10474defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 10475 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 10476 10477// prefetch 10478multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 10479 RegisterClass KRC, X86MemOperand memop> { 10480 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in 10481 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 10482 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 10483 EVEX, EVEX_K, Sched<[WriteLoad]>; 10484} 10485 10486defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 10487 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10488 10489defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 10490 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10491 10492defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 10493 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10494 10495defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 10496 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10497 10498defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 10499 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10500 10501defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 10502 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10503 10504defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 10505 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10506 10507defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 10508 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10509 10510defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 10511 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10512 10513defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 10514 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10515 10516defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 10517 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10518 10519defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 10520 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10521 10522defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 10523 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10524 10525defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 10526 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10527 10528defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 10529 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10530 10531defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 10532 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10533 10534multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> { 10535def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 10536 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 10537 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 10538 EVEX, Sched<[Sched]>; 10539} 10540 10541multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 10542 string OpcodeStr, Predicate prd> { 10543let Predicates = [prd] in 10544 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512; 10545 10546 let Predicates = [prd, HasVLX] in { 10547 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256; 10548 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128; 10549 } 10550} 10551 10552defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 10553defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W; 10554defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 10555defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W; 10556 10557multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 10558 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 10559 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 10560 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 10561 EVEX, Sched<[WriteMove]>; 10562} 10563 10564// Use 512bit version to implement 128/256 bit in case NoVLX. 10565multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 10566 X86VectorVTInfo _, 10567 string Name> { 10568 10569 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 10570 (_.KVT (COPY_TO_REGCLASS 10571 (!cast<Instruction>(Name#"Zrr") 10572 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 10573 _.RC:$src, _.SubRegIdx)), 10574 _.KRC))>; 10575} 10576 10577multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 10578 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10579 let Predicates = [prd] in 10580 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 10581 EVEX_V512; 10582 10583 let Predicates = [prd, HasVLX] in { 10584 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 10585 EVEX_V256; 10586 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 10587 EVEX_V128; 10588 } 10589 let Predicates = [prd, NoVLX] in { 10590 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 10591 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 10592 } 10593} 10594 10595defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 10596 avx512vl_i8_info, HasBWI>; 10597defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 10598 avx512vl_i16_info, HasBWI>, VEX_W; 10599defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 10600 avx512vl_i32_info, HasDQI>; 10601defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 10602 avx512vl_i64_info, HasDQI>, VEX_W; 10603 10604// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 10605// is available, but BWI is not. We can't handle this in lowering because 10606// a target independent DAG combine likes to combine sext and trunc. 10607let Predicates = [HasDQI, NoBWI] in { 10608 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 10609 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10610 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 10611 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10612} 10613 10614let Predicates = [HasDQI, NoBWI, HasVLX] in { 10615 def : Pat<(v8i16 (sext (v8i1 VK8:$src))), 10616 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 10617} 10618 10619//===----------------------------------------------------------------------===// 10620// AVX-512 - COMPRESS and EXPAND 10621// 10622 10623multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 10624 string OpcodeStr, X86FoldableSchedWrite sched> { 10625 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 10626 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10627 (null_frag)>, AVX5128IBase, 10628 Sched<[sched]>; 10629 10630 let mayStore = 1, hasSideEffects = 0 in 10631 def mr : AVX5128I<opc, MRMDestMem, (outs), 10632 (ins _.MemOp:$dst, _.RC:$src), 10633 OpcodeStr # "\t{$src, $dst|$dst, $src}", 10634 []>, EVEX_CD8<_.EltSize, CD8VT1>, 10635 Sched<[sched.Folded]>; 10636 10637 def mrk : AVX5128I<opc, MRMDestMem, (outs), 10638 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 10639 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 10640 []>, 10641 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10642 Sched<[sched.Folded]>; 10643} 10644 10645multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10646 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), 10647 (!cast<Instruction>(Name#_.ZSuffix#mrk) 10648 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 10649 10650 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10651 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10652 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10653 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10654 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10655 _.KRCWM:$mask, _.RC:$src)>; 10656} 10657 10658multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 10659 X86FoldableSchedWrite sched, 10660 AVX512VLVectorVTInfo VTInfo, 10661 Predicate Pred = HasAVX512> { 10662 let Predicates = [Pred] in 10663 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 10664 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10665 10666 let Predicates = [Pred, HasVLX] in { 10667 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 10668 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10669 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 10670 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10671 } 10672} 10673 10674// FIXME: Is there a better scheduler class for VPCOMPRESS? 10675defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 10676 avx512vl_i32_info>, EVEX, NotMemoryFoldable; 10677defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 10678 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable; 10679defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 10680 avx512vl_f32_info>, EVEX, NotMemoryFoldable; 10681defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 10682 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable; 10683 10684// expand 10685multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 10686 string OpcodeStr, X86FoldableSchedWrite sched> { 10687 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10688 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10689 (null_frag)>, AVX5128IBase, 10690 Sched<[sched]>; 10691 10692 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10693 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 10694 (null_frag)>, 10695 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 10696 Sched<[sched.Folded, sched.ReadAfterFold]>; 10697} 10698 10699multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10700 10701 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 10702 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10703 _.KRCWM:$mask, addr:$src)>; 10704 10705 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 10706 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10707 _.KRCWM:$mask, addr:$src)>; 10708 10709 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 10710 (_.VT _.RC:$src0))), 10711 (!cast<Instruction>(Name#_.ZSuffix#rmk) 10712 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 10713 10714 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10715 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10716 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10717 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10718 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10719 _.KRCWM:$mask, _.RC:$src)>; 10720} 10721 10722multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 10723 X86FoldableSchedWrite sched, 10724 AVX512VLVectorVTInfo VTInfo, 10725 Predicate Pred = HasAVX512> { 10726 let Predicates = [Pred] in 10727 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 10728 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10729 10730 let Predicates = [Pred, HasVLX] in { 10731 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 10732 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10733 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 10734 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10735 } 10736} 10737 10738// FIXME: Is there a better scheduler class for VPEXPAND? 10739defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 10740 avx512vl_i32_info>, EVEX; 10741defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 10742 avx512vl_i64_info>, EVEX, VEX_W; 10743defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 10744 avx512vl_f32_info>, EVEX; 10745defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 10746 avx512vl_f64_info>, EVEX, VEX_W; 10747 10748//handle instruction reg_vec1 = op(reg_vec,imm) 10749// op(mem_vec,imm) 10750// op(broadcast(eltVt),imm) 10751//all instruction created with FROUND_CURRENT 10752multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, 10753 SDPatternOperator OpNode, 10754 SDPatternOperator MaskOpNode, 10755 X86FoldableSchedWrite sched, 10756 X86VectorVTInfo _> { 10757 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10758 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 10759 (ins _.RC:$src1, i32u8imm:$src2), 10760 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10761 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)), 10762 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>, 10763 Sched<[sched]>; 10764 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10765 (ins _.MemOp:$src1, i32u8imm:$src2), 10766 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10767 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10768 (i32 timm:$src2)), 10769 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10770 (i32 timm:$src2))>, 10771 Sched<[sched.Folded, sched.ReadAfterFold]>; 10772 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10773 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 10774 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr, 10775 "${src1}"#_.BroadcastStr#", $src2", 10776 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10777 (i32 timm:$src2)), 10778 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10779 (i32 timm:$src2))>, EVEX_B, 10780 Sched<[sched.Folded, sched.ReadAfterFold]>; 10781 } 10782} 10783 10784//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10785multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10786 SDNode OpNode, X86FoldableSchedWrite sched, 10787 X86VectorVTInfo _> { 10788 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10789 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10790 (ins _.RC:$src1, i32u8imm:$src2), 10791 OpcodeStr#_.Suffix, "$src2, {sae}, $src1", 10792 "$src1, {sae}, $src2", 10793 (OpNode (_.VT _.RC:$src1), 10794 (i32 timm:$src2))>, 10795 EVEX_B, Sched<[sched]>; 10796} 10797 10798multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 10799 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode, 10800 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched, 10801 Predicate prd>{ 10802 let Predicates = [prd] in { 10803 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10804 sched.ZMM, _.info512>, 10805 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, 10806 sched.ZMM, _.info512>, EVEX_V512; 10807 } 10808 let Predicates = [prd, HasVLX] in { 10809 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10810 sched.XMM, _.info128>, EVEX_V128; 10811 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10812 sched.YMM, _.info256>, EVEX_V256; 10813 } 10814} 10815 10816//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10817// op(reg_vec2,mem_vec,imm) 10818// op(reg_vec2,broadcast(eltVt),imm) 10819//all instruction created with FROUND_CURRENT 10820multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10821 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10822 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10823 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10824 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10825 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10826 (OpNode (_.VT _.RC:$src1), 10827 (_.VT _.RC:$src2), 10828 (i32 timm:$src3))>, 10829 Sched<[sched]>; 10830 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10831 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 10832 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10833 (OpNode (_.VT _.RC:$src1), 10834 (_.VT (bitconvert (_.LdFrag addr:$src2))), 10835 (i32 timm:$src3))>, 10836 Sched<[sched.Folded, sched.ReadAfterFold]>; 10837 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10838 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10839 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10840 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10841 (OpNode (_.VT _.RC:$src1), 10842 (_.VT (_.BroadcastLdFrag addr:$src2)), 10843 (i32 timm:$src3))>, EVEX_B, 10844 Sched<[sched.Folded, sched.ReadAfterFold]>; 10845 } 10846} 10847 10848//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10849// op(reg_vec2,mem_vec,imm) 10850multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10851 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 10852 X86VectorVTInfo SrcInfo>{ 10853 let ExeDomain = DestInfo.ExeDomain in { 10854 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10855 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 10856 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10857 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10858 (SrcInfo.VT SrcInfo.RC:$src2), 10859 (i8 timm:$src3)))>, 10860 Sched<[sched]>; 10861 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10862 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 10863 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10864 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10865 (SrcInfo.VT (bitconvert 10866 (SrcInfo.LdFrag addr:$src2))), 10867 (i8 timm:$src3)))>, 10868 Sched<[sched.Folded, sched.ReadAfterFold]>; 10869 } 10870} 10871 10872//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10873// op(reg_vec2,mem_vec,imm) 10874// op(reg_vec2,broadcast(eltVt),imm) 10875multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10876 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 10877 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 10878 10879 let ExeDomain = _.ExeDomain in 10880 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10881 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10882 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10883 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10884 (OpNode (_.VT _.RC:$src1), 10885 (_.VT (_.BroadcastLdFrag addr:$src2)), 10886 (i8 timm:$src3))>, EVEX_B, 10887 Sched<[sched.Folded, sched.ReadAfterFold]>; 10888} 10889 10890//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10891// op(reg_vec2,mem_scalar,imm) 10892multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10893 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10894 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10895 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10896 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10897 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10898 (OpNode (_.VT _.RC:$src1), 10899 (_.VT _.RC:$src2), 10900 (i32 timm:$src3))>, 10901 Sched<[sched]>; 10902 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 10903 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 10904 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10905 (OpNode (_.VT _.RC:$src1), 10906 (_.ScalarIntMemFrags addr:$src2), 10907 (i32 timm:$src3))>, 10908 Sched<[sched.Folded, sched.ReadAfterFold]>; 10909 } 10910} 10911 10912//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10913multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10914 SDNode OpNode, X86FoldableSchedWrite sched, 10915 X86VectorVTInfo _> { 10916 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10917 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10918 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10919 OpcodeStr, "$src3, {sae}, $src2, $src1", 10920 "$src1, $src2, {sae}, $src3", 10921 (OpNode (_.VT _.RC:$src1), 10922 (_.VT _.RC:$src2), 10923 (i32 timm:$src3))>, 10924 EVEX_B, Sched<[sched]>; 10925} 10926 10927//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10928multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10929 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10930 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10931 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10932 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10933 OpcodeStr, "$src3, {sae}, $src2, $src1", 10934 "$src1, $src2, {sae}, $src3", 10935 (OpNode (_.VT _.RC:$src1), 10936 (_.VT _.RC:$src2), 10937 (i32 timm:$src3))>, 10938 EVEX_B, Sched<[sched]>; 10939} 10940 10941multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 10942 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10943 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10944 let Predicates = [prd] in { 10945 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10946 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, 10947 EVEX_V512; 10948 10949 } 10950 let Predicates = [prd, HasVLX] in { 10951 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10952 EVEX_V128; 10953 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10954 EVEX_V256; 10955 } 10956} 10957 10958multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 10959 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 10960 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 10961 let Predicates = [Pred] in { 10962 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 10963 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V; 10964 } 10965 let Predicates = [Pred, HasVLX] in { 10966 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 10967 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V; 10968 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 10969 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V; 10970 } 10971} 10972 10973multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 10974 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 10975 Predicate Pred = HasAVX512> { 10976 let Predicates = [Pred] in { 10977 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10978 EVEX_V512; 10979 } 10980 let Predicates = [Pred, HasVLX] in { 10981 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10982 EVEX_V128; 10983 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10984 EVEX_V256; 10985 } 10986} 10987 10988multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 10989 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 10990 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> { 10991 let Predicates = [prd] in { 10992 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 10993 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>; 10994 } 10995} 10996 10997multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 10998 bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode, 10999 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, 11000 X86SchedWriteWidths sched, Predicate prd>{ 11001 defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info, 11002 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>, 11003 AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 11004 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 11005 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 11006 AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>; 11007 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 11008 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 11009 AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, VEX_W; 11010} 11011 11012defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 11013 X86VReduce, X86VReduce, X86VReduceSAE, 11014 SchedWriteFRnd, HasDQI>; 11015defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 11016 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE, 11017 SchedWriteFRnd, HasAVX512>; 11018defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 11019 X86VGetMant, X86VGetMant, X86VGetMantSAE, 11020 SchedWriteFRnd, HasAVX512>; 11021 11022defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 11023 0x50, X86VRange, X86VRangeSAE, 11024 SchedWriteFAdd, HasDQI>, 11025 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11026defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 11027 0x50, X86VRange, X86VRangeSAE, 11028 SchedWriteFAdd, HasDQI>, 11029 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11030 11031defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 11032 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 11033 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11034defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 11035 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 11036 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11037 11038defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 11039 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 11040 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11041defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 11042 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 11043 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11044defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info, 11045 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>, 11046 AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>; 11047 11048defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 11049 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 11050 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11051defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 11052 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 11053 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11054defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info, 11055 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>, 11056 AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>; 11057 11058multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 11059 X86FoldableSchedWrite sched, 11060 X86VectorVTInfo _, 11061 X86VectorVTInfo CastInfo, 11062 string EVEX2VEXOvrd> { 11063 let ExeDomain = _.ExeDomain in { 11064 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11065 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 11066 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11067 (_.VT (bitconvert 11068 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 11069 (i8 timm:$src3)))))>, 11070 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 11071 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11072 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 11073 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11074 (_.VT 11075 (bitconvert 11076 (CastInfo.VT (X86Shuf128 _.RC:$src1, 11077 (CastInfo.LdFrag addr:$src2), 11078 (i8 timm:$src3)))))>, 11079 Sched<[sched.Folded, sched.ReadAfterFold]>, 11080 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 11081 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11082 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11083 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 11084 "$src1, ${src2}"#_.BroadcastStr#", $src3", 11085 (_.VT 11086 (bitconvert 11087 (CastInfo.VT 11088 (X86Shuf128 _.RC:$src1, 11089 (_.BroadcastLdFrag addr:$src2), 11090 (i8 timm:$src3)))))>, EVEX_B, 11091 Sched<[sched.Folded, sched.ReadAfterFold]>; 11092 } 11093} 11094 11095multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 11096 AVX512VLVectorVTInfo _, 11097 AVX512VLVectorVTInfo CastInfo, bits<8> opc, 11098 string EVEX2VEXOvrd>{ 11099 let Predicates = [HasAVX512] in 11100 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 11101 _.info512, CastInfo.info512, "">, EVEX_V512; 11102 11103 let Predicates = [HasAVX512, HasVLX] in 11104 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 11105 _.info256, CastInfo.info256, 11106 EVEX2VEXOvrd>, EVEX_V256; 11107} 11108 11109defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 11110 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11111defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 11112 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11113defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 11114 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11115defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 11116 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11117 11118multiclass avx512_valign<bits<8> opc, string OpcodeStr, 11119 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11120 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the 11121 // instantiation of this class. 11122 let ExeDomain = _.ExeDomain in { 11123 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11124 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 11125 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11126 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, 11127 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; 11128 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11129 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 11130 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11131 (_.VT (X86VAlign _.RC:$src1, 11132 (bitconvert (_.LdFrag addr:$src2)), 11133 (i8 timm:$src3)))>, 11134 Sched<[sched.Folded, sched.ReadAfterFold]>, 11135 EVEX2VEXOverride<"VPALIGNRrmi">; 11136 11137 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11138 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11139 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 11140 "$src1, ${src2}"#_.BroadcastStr#", $src3", 11141 (X86VAlign _.RC:$src1, 11142 (_.VT (_.BroadcastLdFrag addr:$src2)), 11143 (i8 timm:$src3))>, EVEX_B, 11144 Sched<[sched.Folded, sched.ReadAfterFold]>; 11145 } 11146} 11147 11148multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 11149 AVX512VLVectorVTInfo _> { 11150 let Predicates = [HasAVX512] in { 11151 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 11152 AVX512AIi8Base, EVEX_4V, EVEX_V512; 11153 } 11154 let Predicates = [HasAVX512, HasVLX] in { 11155 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 11156 AVX512AIi8Base, EVEX_4V, EVEX_V128; 11157 // We can't really override the 256-bit version so change it back to unset. 11158 let EVEX2VEXOverride = ? in 11159 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 11160 AVX512AIi8Base, EVEX_4V, EVEX_V256; 11161 } 11162} 11163 11164defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 11165 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11166defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 11167 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 11168 VEX_W; 11169 11170defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 11171 SchedWriteShuffle, avx512vl_i8_info, 11172 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 11173 11174// Fragments to help convert valignq into masked valignd. Or valignq/valignd 11175// into vpalignr. 11176def ValignqImm32XForm : SDNodeXForm<timm, [{ 11177 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 11178}]>; 11179def ValignqImm8XForm : SDNodeXForm<timm, [{ 11180 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 11181}]>; 11182def ValigndImm8XForm : SDNodeXForm<timm, [{ 11183 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 11184}]>; 11185 11186multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 11187 X86VectorVTInfo From, X86VectorVTInfo To, 11188 SDNodeXForm ImmXForm> { 11189 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11190 (bitconvert 11191 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 11192 timm:$src3))), 11193 To.RC:$src0)), 11194 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 11195 To.RC:$src1, To.RC:$src2, 11196 (ImmXForm timm:$src3))>; 11197 11198 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11199 (bitconvert 11200 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 11201 timm:$src3))), 11202 To.ImmAllZerosV)), 11203 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 11204 To.RC:$src1, To.RC:$src2, 11205 (ImmXForm timm:$src3))>; 11206 11207 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11208 (bitconvert 11209 (From.VT (OpNode From.RC:$src1, 11210 (From.LdFrag addr:$src2), 11211 timm:$src3))), 11212 To.RC:$src0)), 11213 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 11214 To.RC:$src1, addr:$src2, 11215 (ImmXForm timm:$src3))>; 11216 11217 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11218 (bitconvert 11219 (From.VT (OpNode From.RC:$src1, 11220 (From.LdFrag addr:$src2), 11221 timm:$src3))), 11222 To.ImmAllZerosV)), 11223 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 11224 To.RC:$src1, addr:$src2, 11225 (ImmXForm timm:$src3))>; 11226} 11227 11228multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 11229 X86VectorVTInfo From, 11230 X86VectorVTInfo To, 11231 SDNodeXForm ImmXForm> : 11232 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 11233 def : Pat<(From.VT (OpNode From.RC:$src1, 11234 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), 11235 timm:$src3)), 11236 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 11237 (ImmXForm timm:$src3))>; 11238 11239 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11240 (bitconvert 11241 (From.VT (OpNode From.RC:$src1, 11242 (bitconvert 11243 (To.VT (To.BroadcastLdFrag addr:$src2))), 11244 timm:$src3))), 11245 To.RC:$src0)), 11246 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 11247 To.RC:$src1, addr:$src2, 11248 (ImmXForm timm:$src3))>; 11249 11250 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11251 (bitconvert 11252 (From.VT (OpNode From.RC:$src1, 11253 (bitconvert 11254 (To.VT (To.BroadcastLdFrag addr:$src2))), 11255 timm:$src3))), 11256 To.ImmAllZerosV)), 11257 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 11258 To.RC:$src1, addr:$src2, 11259 (ImmXForm timm:$src3))>; 11260} 11261 11262let Predicates = [HasAVX512] in { 11263 // For 512-bit we lower to the widest element type we can. So we only need 11264 // to handle converting valignq to valignd. 11265 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 11266 v16i32_info, ValignqImm32XForm>; 11267} 11268 11269let Predicates = [HasVLX] in { 11270 // For 128-bit we lower to the widest element type we can. So we only need 11271 // to handle converting valignq to valignd. 11272 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 11273 v4i32x_info, ValignqImm32XForm>; 11274 // For 256-bit we lower to the widest element type we can. So we only need 11275 // to handle converting valignq to valignd. 11276 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 11277 v8i32x_info, ValignqImm32XForm>; 11278} 11279 11280let Predicates = [HasVLX, HasBWI] in { 11281 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 11282 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 11283 v16i8x_info, ValignqImm8XForm>; 11284 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 11285 v16i8x_info, ValigndImm8XForm>; 11286} 11287 11288defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 11289 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 11290 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible; 11291 11292multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 11293 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11294 let ExeDomain = _.ExeDomain in { 11295 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11296 (ins _.RC:$src1), OpcodeStr, 11297 "$src1", "$src1", 11298 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, 11299 Sched<[sched]>; 11300 11301 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11302 (ins _.MemOp:$src1), OpcodeStr, 11303 "$src1", "$src1", 11304 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, 11305 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 11306 Sched<[sched.Folded]>; 11307 } 11308} 11309 11310multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 11311 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 11312 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 11313 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11314 (ins _.ScalarMemOp:$src1), OpcodeStr, 11315 "${src1}"#_.BroadcastStr, 11316 "${src1}"#_.BroadcastStr, 11317 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, 11318 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 11319 Sched<[sched.Folded]>; 11320} 11321 11322multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11323 X86SchedWriteWidths sched, 11324 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 11325 let Predicates = [prd] in 11326 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11327 EVEX_V512; 11328 11329 let Predicates = [prd, HasVLX] in { 11330 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11331 EVEX_V256; 11332 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11333 EVEX_V128; 11334 } 11335} 11336 11337multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11338 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 11339 Predicate prd> { 11340 let Predicates = [prd] in 11341 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11342 EVEX_V512; 11343 11344 let Predicates = [prd, HasVLX] in { 11345 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11346 EVEX_V256; 11347 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11348 EVEX_V128; 11349 } 11350} 11351 11352multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 11353 SDNode OpNode, X86SchedWriteWidths sched, 11354 Predicate prd> { 11355 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 11356 avx512vl_i64_info, prd>, VEX_W; 11357 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 11358 avx512vl_i32_info, prd>; 11359} 11360 11361multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 11362 SDNode OpNode, X86SchedWriteWidths sched, 11363 Predicate prd> { 11364 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 11365 avx512vl_i16_info, prd>, VEX_WIG; 11366 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 11367 avx512vl_i8_info, prd>, VEX_WIG; 11368} 11369 11370multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 11371 bits<8> opc_d, bits<8> opc_q, 11372 string OpcodeStr, SDNode OpNode, 11373 X86SchedWriteWidths sched> { 11374 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 11375 HasAVX512>, 11376 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 11377 HasBWI>; 11378} 11379 11380defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 11381 SchedWriteVecALU>; 11382 11383// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 11384let Predicates = [HasAVX512, NoVLX] in { 11385 def : Pat<(v4i64 (abs VR256X:$src)), 11386 (EXTRACT_SUBREG 11387 (VPABSQZrr 11388 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 11389 sub_ymm)>; 11390 def : Pat<(v2i64 (abs VR128X:$src)), 11391 (EXTRACT_SUBREG 11392 (VPABSQZrr 11393 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 11394 sub_xmm)>; 11395} 11396 11397// Use 512bit version to implement 128/256 bit. 11398multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 11399 AVX512VLVectorVTInfo _, Predicate prd> { 11400 let Predicates = [prd, NoVLX] in { 11401 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), 11402 (EXTRACT_SUBREG 11403 (!cast<Instruction>(InstrStr # "Zrr") 11404 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11405 _.info256.RC:$src1, 11406 _.info256.SubRegIdx)), 11407 _.info256.SubRegIdx)>; 11408 11409 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), 11410 (EXTRACT_SUBREG 11411 (!cast<Instruction>(InstrStr # "Zrr") 11412 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11413 _.info128.RC:$src1, 11414 _.info128.SubRegIdx)), 11415 _.info128.SubRegIdx)>; 11416 } 11417} 11418 11419defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 11420 SchedWriteVecIMul, HasCDI>; 11421 11422// FIXME: Is there a better scheduler class for VPCONFLICT? 11423defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 11424 SchedWriteVecALU, HasCDI>; 11425 11426// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 11427defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 11428defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 11429 11430//===---------------------------------------------------------------------===// 11431// Counts number of ones - VPOPCNTD and VPOPCNTQ 11432//===---------------------------------------------------------------------===// 11433 11434// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 11435defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 11436 SchedWriteVecALU, HasVPOPCNTDQ>; 11437 11438defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 11439defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 11440 11441//===---------------------------------------------------------------------===// 11442// Replicate Single FP - MOVSHDUP and MOVSLDUP 11443//===---------------------------------------------------------------------===// 11444 11445multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 11446 X86SchedWriteWidths sched> { 11447 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 11448 avx512vl_f32_info, HasAVX512>, XS; 11449} 11450 11451defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 11452 SchedWriteFShuffle>; 11453defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 11454 SchedWriteFShuffle>; 11455 11456//===----------------------------------------------------------------------===// 11457// AVX-512 - MOVDDUP 11458//===----------------------------------------------------------------------===// 11459 11460multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, 11461 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11462 let ExeDomain = _.ExeDomain in { 11463 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11464 (ins _.RC:$src), OpcodeStr, "$src", "$src", 11465 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, 11466 Sched<[sched]>; 11467 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11468 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 11469 (_.VT (_.BroadcastLdFrag addr:$src))>, 11470 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 11471 Sched<[sched.Folded]>; 11472 } 11473} 11474 11475multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, 11476 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 11477 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 11478 VTInfo.info512>, EVEX_V512; 11479 11480 let Predicates = [HasAVX512, HasVLX] in { 11481 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 11482 VTInfo.info256>, EVEX_V256; 11483 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM, 11484 VTInfo.info128>, EVEX_V128; 11485 } 11486} 11487 11488multiclass avx512_movddup<bits<8> opc, string OpcodeStr, 11489 X86SchedWriteWidths sched> { 11490 defm NAME: avx512_movddup_common<opc, OpcodeStr, sched, 11491 avx512vl_f64_info>, XD, VEX_W; 11492} 11493 11494defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>; 11495 11496let Predicates = [HasVLX] in { 11497def : Pat<(v2f64 (X86VBroadcast f64:$src)), 11498 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11499 11500def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11501 (v2f64 VR128X:$src0)), 11502 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 11503 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11504def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11505 immAllZerosV), 11506 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11507} 11508 11509//===----------------------------------------------------------------------===// 11510// AVX-512 - Unpack Instructions 11511//===----------------------------------------------------------------------===// 11512 11513let Uses = []<Register>, mayRaiseFPException = 0 in { 11514defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512, 11515 SchedWriteFShuffleSizes, 0, 1>; 11516defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512, 11517 SchedWriteFShuffleSizes>; 11518} 11519 11520defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 11521 SchedWriteShuffle, HasBWI>; 11522defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 11523 SchedWriteShuffle, HasBWI>; 11524defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 11525 SchedWriteShuffle, HasBWI>; 11526defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 11527 SchedWriteShuffle, HasBWI>; 11528 11529defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 11530 SchedWriteShuffle, HasAVX512>; 11531defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 11532 SchedWriteShuffle, HasAVX512>; 11533defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 11534 SchedWriteShuffle, HasAVX512>; 11535defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 11536 SchedWriteShuffle, HasAVX512>; 11537 11538//===----------------------------------------------------------------------===// 11539// AVX-512 - Extract & Insert Integer Instructions 11540//===----------------------------------------------------------------------===// 11541 11542multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11543 X86VectorVTInfo _> { 11544 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 11545 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11546 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11547 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))), 11548 addr:$dst)]>, 11549 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 11550} 11551 11552multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 11553 let Predicates = [HasBWI] in { 11554 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 11555 (ins _.RC:$src1, u8imm:$src2), 11556 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11557 [(set GR32orGR64:$dst, 11558 (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>, 11559 EVEX, TAPD, Sched<[WriteVecExtract]>; 11560 11561 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; 11562 } 11563} 11564 11565multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 11566 let Predicates = [HasBWI] in { 11567 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 11568 (ins _.RC:$src1, u8imm:$src2), 11569 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11570 [(set GR32orGR64:$dst, 11571 (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>, 11572 EVEX, PD, Sched<[WriteVecExtract]>; 11573 11574 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 11575 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 11576 (ins _.RC:$src1, u8imm:$src2), 11577 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 11578 EVEX, TAPD, FoldGenData<NAME#rr>, 11579 Sched<[WriteVecExtract]>; 11580 11581 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; 11582 } 11583} 11584 11585multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 11586 RegisterClass GRC> { 11587 let Predicates = [HasDQI] in { 11588 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 11589 (ins _.RC:$src1, u8imm:$src2), 11590 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11591 [(set GRC:$dst, 11592 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 11593 EVEX, TAPD, Sched<[WriteVecExtract]>; 11594 11595 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 11596 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11597 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11598 [(store (extractelt (_.VT _.RC:$src1), 11599 imm:$src2),addr:$dst)]>, 11600 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, 11601 Sched<[WriteVecExtractSt]>; 11602 } 11603} 11604 11605defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG; 11606defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG; 11607defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 11608defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; 11609 11610multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11611 X86VectorVTInfo _, PatFrag LdFrag, 11612 SDPatternOperator immoperator> { 11613 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 11614 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11615 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11616 [(set _.RC:$dst, 11617 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>, 11618 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 11619} 11620 11621multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 11622 X86VectorVTInfo _, PatFrag LdFrag> { 11623 let Predicates = [HasBWI] in { 11624 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11625 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 11626 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11627 [(set _.RC:$dst, 11628 (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V, 11629 Sched<[WriteVecInsert]>; 11630 11631 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>; 11632 } 11633} 11634 11635multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 11636 X86VectorVTInfo _, RegisterClass GRC> { 11637 let Predicates = [HasDQI] in { 11638 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11639 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 11640 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11641 [(set _.RC:$dst, 11642 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 11643 EVEX_4V, TAPD, Sched<[WriteVecInsert]>; 11644 11645 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 11646 _.ScalarLdFrag, imm>, TAPD; 11647 } 11648} 11649 11650defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 11651 extloadi8>, TAPD, VEX_WIG; 11652defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 11653 extloadi16>, PD, VEX_WIG; 11654defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 11655defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; 11656 11657// Always select FP16 instructions if available. 11658let Predicates = [HasBWI], AddedComplexity = -10 in { 11659 def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>; 11660 def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>; 11661 def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>; 11662 def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>; 11663} 11664 11665//===----------------------------------------------------------------------===// 11666// VSHUFPS - VSHUFPD Operations 11667//===----------------------------------------------------------------------===// 11668 11669multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{ 11670 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 11671 SchedWriteFShuffle>, 11672 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 11673 AVX512AIi8Base, EVEX_4V; 11674} 11675 11676defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS; 11677defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, VEX_W; 11678 11679//===----------------------------------------------------------------------===// 11680// AVX-512 - Byte shift Left/Right 11681//===----------------------------------------------------------------------===// 11682 11683multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 11684 Format MRMm, string OpcodeStr, 11685 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11686 def ri : AVX512<opc, MRMr, 11687 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 11688 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11689 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, 11690 Sched<[sched]>; 11691 def mi : AVX512<opc, MRMm, 11692 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 11693 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11694 [(set _.RC:$dst,(_.VT (OpNode 11695 (_.VT (bitconvert (_.LdFrag addr:$src1))), 11696 (i8 timm:$src2))))]>, 11697 Sched<[sched.Folded, sched.ReadAfterFold]>; 11698} 11699 11700multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 11701 Format MRMm, string OpcodeStr, 11702 X86SchedWriteWidths sched, Predicate prd>{ 11703 let Predicates = [prd] in 11704 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11705 sched.ZMM, v64i8_info>, EVEX_V512; 11706 let Predicates = [prd, HasVLX] in { 11707 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11708 sched.YMM, v32i8x_info>, EVEX_V256; 11709 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11710 sched.XMM, v16i8x_info>, EVEX_V128; 11711 } 11712} 11713defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11714 SchedWriteShuffle, HasBWI>, 11715 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11716defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11717 SchedWriteShuffle, HasBWI>, 11718 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11719 11720multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11721 string OpcodeStr, X86FoldableSchedWrite sched, 11722 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11723 let isCommutable = 1 in 11724 def rr : AVX512BI<opc, MRMSrcReg, 11725 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11726 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11727 [(set _dst.RC:$dst,(_dst.VT 11728 (OpNode (_src.VT _src.RC:$src1), 11729 (_src.VT _src.RC:$src2))))]>, 11730 Sched<[sched]>; 11731 def rm : AVX512BI<opc, MRMSrcMem, 11732 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11733 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11734 [(set _dst.RC:$dst,(_dst.VT 11735 (OpNode (_src.VT _src.RC:$src1), 11736 (_src.VT (bitconvert 11737 (_src.LdFrag addr:$src2))))))]>, 11738 Sched<[sched.Folded, sched.ReadAfterFold]>; 11739} 11740 11741multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11742 string OpcodeStr, X86SchedWriteWidths sched, 11743 Predicate prd> { 11744 let Predicates = [prd] in 11745 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11746 v8i64_info, v64i8_info>, EVEX_V512; 11747 let Predicates = [prd, HasVLX] in { 11748 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11749 v4i64x_info, v32i8x_info>, EVEX_V256; 11750 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11751 v2i64x_info, v16i8x_info>, EVEX_V128; 11752 } 11753} 11754 11755defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11756 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG; 11757 11758// Transforms to swizzle an immediate to enable better matching when 11759// memory operand isn't in the right place. 11760def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{ 11761 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11762 uint8_t Imm = N->getZExtValue(); 11763 // Swap bits 1/4 and 3/6. 11764 uint8_t NewImm = Imm & 0xa5; 11765 if (Imm & 0x02) NewImm |= 0x10; 11766 if (Imm & 0x10) NewImm |= 0x02; 11767 if (Imm & 0x08) NewImm |= 0x40; 11768 if (Imm & 0x40) NewImm |= 0x08; 11769 return getI8Imm(NewImm, SDLoc(N)); 11770}]>; 11771def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{ 11772 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11773 uint8_t Imm = N->getZExtValue(); 11774 // Swap bits 2/4 and 3/5. 11775 uint8_t NewImm = Imm & 0xc3; 11776 if (Imm & 0x04) NewImm |= 0x10; 11777 if (Imm & 0x10) NewImm |= 0x04; 11778 if (Imm & 0x08) NewImm |= 0x20; 11779 if (Imm & 0x20) NewImm |= 0x08; 11780 return getI8Imm(NewImm, SDLoc(N)); 11781}]>; 11782def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{ 11783 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11784 uint8_t Imm = N->getZExtValue(); 11785 // Swap bits 1/2 and 5/6. 11786 uint8_t NewImm = Imm & 0x99; 11787 if (Imm & 0x02) NewImm |= 0x04; 11788 if (Imm & 0x04) NewImm |= 0x02; 11789 if (Imm & 0x20) NewImm |= 0x40; 11790 if (Imm & 0x40) NewImm |= 0x20; 11791 return getI8Imm(NewImm, SDLoc(N)); 11792}]>; 11793def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{ 11794 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11795 uint8_t Imm = N->getZExtValue(); 11796 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11797 uint8_t NewImm = Imm & 0x81; 11798 if (Imm & 0x02) NewImm |= 0x04; 11799 if (Imm & 0x04) NewImm |= 0x10; 11800 if (Imm & 0x08) NewImm |= 0x40; 11801 if (Imm & 0x10) NewImm |= 0x02; 11802 if (Imm & 0x20) NewImm |= 0x08; 11803 if (Imm & 0x40) NewImm |= 0x20; 11804 return getI8Imm(NewImm, SDLoc(N)); 11805}]>; 11806def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{ 11807 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11808 uint8_t Imm = N->getZExtValue(); 11809 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11810 uint8_t NewImm = Imm & 0x81; 11811 if (Imm & 0x02) NewImm |= 0x10; 11812 if (Imm & 0x04) NewImm |= 0x02; 11813 if (Imm & 0x08) NewImm |= 0x20; 11814 if (Imm & 0x10) NewImm |= 0x04; 11815 if (Imm & 0x20) NewImm |= 0x40; 11816 if (Imm & 0x40) NewImm |= 0x08; 11817 return getI8Imm(NewImm, SDLoc(N)); 11818}]>; 11819 11820multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11821 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11822 string Name>{ 11823 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11824 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11825 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11826 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11827 (OpNode (_.VT _.RC:$src1), 11828 (_.VT _.RC:$src2), 11829 (_.VT _.RC:$src3), 11830 (i8 timm:$src4)), 1, 1>, 11831 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 11832 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11833 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11834 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11835 (OpNode (_.VT _.RC:$src1), 11836 (_.VT _.RC:$src2), 11837 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11838 (i8 timm:$src4)), 1, 0>, 11839 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11840 Sched<[sched.Folded, sched.ReadAfterFold]>; 11841 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11842 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11843 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11844 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11845 (OpNode (_.VT _.RC:$src1), 11846 (_.VT _.RC:$src2), 11847 (_.VT (_.BroadcastLdFrag addr:$src3)), 11848 (i8 timm:$src4)), 1, 0>, EVEX_B, 11849 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11850 Sched<[sched.Folded, sched.ReadAfterFold]>; 11851 }// Constraints = "$src1 = $dst" 11852 11853 // Additional patterns for matching passthru operand in other positions. 11854 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11855 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11856 _.RC:$src1)), 11857 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11858 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11859 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11860 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), 11861 _.RC:$src1)), 11862 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11863 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11864 11865 // Additional patterns for matching zero masking with loads in other 11866 // positions. 11867 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11868 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11869 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11870 _.ImmAllZerosV)), 11871 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11872 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11873 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11874 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11875 _.RC:$src2, (i8 timm:$src4)), 11876 _.ImmAllZerosV)), 11877 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11878 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11879 11880 // Additional patterns for matching masked loads with different 11881 // operand orders. 11882 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11883 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11884 _.RC:$src2, (i8 timm:$src4)), 11885 _.RC:$src1)), 11886 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11887 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11888 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11889 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11890 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11891 _.RC:$src1)), 11892 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11893 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11894 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11895 (OpNode _.RC:$src2, _.RC:$src1, 11896 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), 11897 _.RC:$src1)), 11898 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11899 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11900 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11901 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11902 _.RC:$src1, (i8 timm:$src4)), 11903 _.RC:$src1)), 11904 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11905 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11906 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11907 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11908 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11909 _.RC:$src1)), 11910 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11911 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11912 11913 // Additional patterns for matching zero masking with broadcasts in other 11914 // positions. 11915 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11916 (OpNode (_.BroadcastLdFrag addr:$src3), 11917 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11918 _.ImmAllZerosV)), 11919 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11920 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11921 (VPTERNLOG321_imm8 timm:$src4))>; 11922 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11923 (OpNode _.RC:$src1, 11924 (_.BroadcastLdFrag addr:$src3), 11925 _.RC:$src2, (i8 timm:$src4)), 11926 _.ImmAllZerosV)), 11927 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11928 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11929 (VPTERNLOG132_imm8 timm:$src4))>; 11930 11931 // Additional patterns for matching masked broadcasts with different 11932 // operand orders. 11933 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11934 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), 11935 _.RC:$src2, (i8 timm:$src4)), 11936 _.RC:$src1)), 11937 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11938 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11939 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11940 (OpNode (_.BroadcastLdFrag addr:$src3), 11941 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11942 _.RC:$src1)), 11943 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11944 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11945 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11946 (OpNode _.RC:$src2, _.RC:$src1, 11947 (_.BroadcastLdFrag addr:$src3), 11948 (i8 timm:$src4)), _.RC:$src1)), 11949 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11950 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11951 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11952 (OpNode _.RC:$src2, 11953 (_.BroadcastLdFrag addr:$src3), 11954 _.RC:$src1, (i8 timm:$src4)), 11955 _.RC:$src1)), 11956 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11957 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11958 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11959 (OpNode (_.BroadcastLdFrag addr:$src3), 11960 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11961 _.RC:$src1)), 11962 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11963 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11964} 11965 11966multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 11967 AVX512VLVectorVTInfo _> { 11968 let Predicates = [HasAVX512] in 11969 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 11970 _.info512, NAME>, EVEX_V512; 11971 let Predicates = [HasAVX512, HasVLX] in { 11972 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 11973 _.info128, NAME>, EVEX_V128; 11974 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 11975 _.info256, NAME>, EVEX_V256; 11976 } 11977} 11978 11979defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 11980 avx512vl_i32_info>; 11981defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 11982 avx512vl_i64_info>, VEX_W; 11983 11984// Patterns to implement vnot using vpternlog instead of creating all ones 11985// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 11986// so that the result is only dependent on src0. But we use the same source 11987// for all operands to prevent a false dependency. 11988// TODO: We should maybe have a more generalized algorithm for folding to 11989// vpternlog. 11990let Predicates = [HasAVX512] in { 11991 def : Pat<(v64i8 (vnot VR512:$src)), 11992 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11993 def : Pat<(v32i16 (vnot VR512:$src)), 11994 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11995 def : Pat<(v16i32 (vnot VR512:$src)), 11996 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11997 def : Pat<(v8i64 (vnot VR512:$src)), 11998 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11999} 12000 12001let Predicates = [HasAVX512, NoVLX] in { 12002 def : Pat<(v16i8 (vnot VR128X:$src)), 12003 (EXTRACT_SUBREG 12004 (VPTERNLOGQZrri 12005 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12006 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12007 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12008 (i8 15)), sub_xmm)>; 12009 def : Pat<(v8i16 (vnot VR128X:$src)), 12010 (EXTRACT_SUBREG 12011 (VPTERNLOGQZrri 12012 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12013 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12014 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12015 (i8 15)), sub_xmm)>; 12016 def : Pat<(v4i32 (vnot VR128X:$src)), 12017 (EXTRACT_SUBREG 12018 (VPTERNLOGQZrri 12019 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12020 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12021 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12022 (i8 15)), sub_xmm)>; 12023 def : Pat<(v2i64 (vnot VR128X:$src)), 12024 (EXTRACT_SUBREG 12025 (VPTERNLOGQZrri 12026 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12027 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12028 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12029 (i8 15)), sub_xmm)>; 12030 12031 def : Pat<(v32i8 (vnot VR256X:$src)), 12032 (EXTRACT_SUBREG 12033 (VPTERNLOGQZrri 12034 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12035 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12036 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12037 (i8 15)), sub_ymm)>; 12038 def : Pat<(v16i16 (vnot VR256X:$src)), 12039 (EXTRACT_SUBREG 12040 (VPTERNLOGQZrri 12041 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12042 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12043 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12044 (i8 15)), sub_ymm)>; 12045 def : Pat<(v8i32 (vnot VR256X:$src)), 12046 (EXTRACT_SUBREG 12047 (VPTERNLOGQZrri 12048 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12049 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12050 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12051 (i8 15)), sub_ymm)>; 12052 def : Pat<(v4i64 (vnot VR256X:$src)), 12053 (EXTRACT_SUBREG 12054 (VPTERNLOGQZrri 12055 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12056 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12057 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12058 (i8 15)), sub_ymm)>; 12059} 12060 12061let Predicates = [HasVLX] in { 12062 def : Pat<(v16i8 (vnot VR128X:$src)), 12063 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12064 def : Pat<(v8i16 (vnot VR128X:$src)), 12065 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12066 def : Pat<(v4i32 (vnot VR128X:$src)), 12067 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12068 def : Pat<(v2i64 (vnot VR128X:$src)), 12069 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12070 12071 def : Pat<(v32i8 (vnot VR256X:$src)), 12072 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12073 def : Pat<(v16i16 (vnot VR256X:$src)), 12074 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12075 def : Pat<(v8i32 (vnot VR256X:$src)), 12076 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12077 def : Pat<(v4i64 (vnot VR256X:$src)), 12078 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12079} 12080 12081//===----------------------------------------------------------------------===// 12082// AVX-512 - FixupImm 12083//===----------------------------------------------------------------------===// 12084 12085multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, 12086 X86FoldableSchedWrite sched, X86VectorVTInfo _, 12087 X86VectorVTInfo TblVT>{ 12088 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 12089 Uses = [MXCSR], mayRaiseFPException = 1 in { 12090 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12091 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12092 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12093 (X86VFixupimm (_.VT _.RC:$src1), 12094 (_.VT _.RC:$src2), 12095 (TblVT.VT _.RC:$src3), 12096 (i32 timm:$src4))>, Sched<[sched]>; 12097 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12098 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 12099 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12100 (X86VFixupimm (_.VT _.RC:$src1), 12101 (_.VT _.RC:$src2), 12102 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 12103 (i32 timm:$src4))>, 12104 Sched<[sched.Folded, sched.ReadAfterFold]>; 12105 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12106 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 12107 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2", 12108 "$src2, ${src3}"#_.BroadcastStr#", $src4", 12109 (X86VFixupimm (_.VT _.RC:$src1), 12110 (_.VT _.RC:$src2), 12111 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), 12112 (i32 timm:$src4))>, 12113 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 12114 } // Constraints = "$src1 = $dst" 12115} 12116 12117multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 12118 X86FoldableSchedWrite sched, 12119 X86VectorVTInfo _, X86VectorVTInfo TblVT> 12120 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> { 12121let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 12122 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12123 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12124 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 12125 "$src2, $src3, {sae}, $src4", 12126 (X86VFixupimmSAE (_.VT _.RC:$src1), 12127 (_.VT _.RC:$src2), 12128 (TblVT.VT _.RC:$src3), 12129 (i32 timm:$src4))>, 12130 EVEX_B, Sched<[sched]>; 12131 } 12132} 12133 12134multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, 12135 X86FoldableSchedWrite sched, X86VectorVTInfo _, 12136 X86VectorVTInfo _src3VT> { 12137 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 12138 ExeDomain = _.ExeDomain in { 12139 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 12140 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12141 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12142 (X86VFixupimms (_.VT _.RC:$src1), 12143 (_.VT _.RC:$src2), 12144 (_src3VT.VT _src3VT.RC:$src3), 12145 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC; 12146 let Uses = [MXCSR] in 12147 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 12148 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12149 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 12150 "$src2, $src3, {sae}, $src4", 12151 (X86VFixupimmSAEs (_.VT _.RC:$src1), 12152 (_.VT _.RC:$src2), 12153 (_src3VT.VT _src3VT.RC:$src3), 12154 (i32 timm:$src4))>, 12155 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 12156 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 12157 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 12158 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12159 (X86VFixupimms (_.VT _.RC:$src1), 12160 (_.VT _.RC:$src2), 12161 (_src3VT.VT (scalar_to_vector 12162 (_src3VT.ScalarLdFrag addr:$src3))), 12163 (i32 timm:$src4))>, 12164 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 12165 } 12166} 12167 12168multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 12169 AVX512VLVectorVTInfo _Vec, 12170 AVX512VLVectorVTInfo _Tbl> { 12171 let Predicates = [HasAVX512] in 12172 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, 12173 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 12174 EVEX_4V, EVEX_V512; 12175 let Predicates = [HasAVX512, HasVLX] in { 12176 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, 12177 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 12178 EVEX_4V, EVEX_V128; 12179 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, 12180 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 12181 EVEX_4V, EVEX_V256; 12182 } 12183} 12184 12185defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 12186 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 12187 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 12188defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 12189 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 12190 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 12191defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 12192 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12193defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 12194 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 12195 12196// Patterns used to select SSE scalar fp arithmetic instructions from 12197// either: 12198// 12199// (1) a scalar fp operation followed by a blend 12200// 12201// The effect is that the backend no longer emits unnecessary vector 12202// insert instructions immediately after SSE scalar fp instructions 12203// like addss or mulss. 12204// 12205// For example, given the following code: 12206// __m128 foo(__m128 A, __m128 B) { 12207// A[0] += B[0]; 12208// return A; 12209// } 12210// 12211// Previously we generated: 12212// addss %xmm0, %xmm1 12213// movss %xmm1, %xmm0 12214// 12215// We now generate: 12216// addss %xmm1, %xmm0 12217// 12218// (2) a vector packed single/double fp operation followed by a vector insert 12219// 12220// The effect is that the backend converts the packed fp instruction 12221// followed by a vector insert into a single SSE scalar fp instruction. 12222// 12223// For example, given the following code: 12224// __m128 foo(__m128 A, __m128 B) { 12225// __m128 C = A + B; 12226// return (__m128) {c[0], a[1], a[2], a[3]}; 12227// } 12228// 12229// Previously we generated: 12230// addps %xmm0, %xmm1 12231// movss %xmm1, %xmm0 12232// 12233// We now generate: 12234// addss %xmm1, %xmm0 12235 12236// TODO: Some canonicalization in lowering would simplify the number of 12237// patterns we have to try to match. 12238multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp, 12239 string OpcPrefix, SDNode MoveNode, 12240 X86VectorVTInfo _, PatLeaf ZeroFP> { 12241 let Predicates = [HasAVX512] in { 12242 // extracted scalar math op with insert via movss 12243 def : Pat<(MoveNode 12244 (_.VT VR128X:$dst), 12245 (_.VT (scalar_to_vector 12246 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12247 _.FRC:$src)))), 12248 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst, 12249 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 12250 def : Pat<(MoveNode 12251 (_.VT VR128X:$dst), 12252 (_.VT (scalar_to_vector 12253 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12254 (_.ScalarLdFrag addr:$src))))), 12255 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>; 12256 12257 // extracted masked scalar math op with insert via movss 12258 def : Pat<(MoveNode (_.VT VR128X:$src1), 12259 (scalar_to_vector 12260 (X86selects_mask VK1WM:$mask, 12261 (MaskedOp (_.EltVT 12262 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12263 _.FRC:$src2), 12264 _.FRC:$src0))), 12265 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk") 12266 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12267 VK1WM:$mask, _.VT:$src1, 12268 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12269 def : Pat<(MoveNode (_.VT VR128X:$src1), 12270 (scalar_to_vector 12271 (X86selects_mask VK1WM:$mask, 12272 (MaskedOp (_.EltVT 12273 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12274 (_.ScalarLdFrag addr:$src2)), 12275 _.FRC:$src0))), 12276 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk") 12277 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12278 VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12279 12280 // extracted masked scalar math op with insert via movss 12281 def : Pat<(MoveNode (_.VT VR128X:$src1), 12282 (scalar_to_vector 12283 (X86selects_mask VK1WM:$mask, 12284 (MaskedOp (_.EltVT 12285 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12286 _.FRC:$src2), (_.EltVT ZeroFP)))), 12287 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz") 12288 VK1WM:$mask, _.VT:$src1, 12289 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12290 def : Pat<(MoveNode (_.VT VR128X:$src1), 12291 (scalar_to_vector 12292 (X86selects_mask VK1WM:$mask, 12293 (MaskedOp (_.EltVT 12294 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12295 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), 12296 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12297 } 12298} 12299 12300defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 12301defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 12302defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 12303defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 12304 12305defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 12306defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 12307defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 12308defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 12309 12310defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>; 12311defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>; 12312defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>; 12313defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>; 12314 12315multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, 12316 SDNode Move, X86VectorVTInfo _> { 12317 let Predicates = [HasAVX512] in { 12318 def : Pat<(_.VT (Move _.VT:$dst, 12319 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 12320 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>; 12321 } 12322} 12323 12324defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 12325defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 12326defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>; 12327 12328//===----------------------------------------------------------------------===// 12329// AES instructions 12330//===----------------------------------------------------------------------===// 12331 12332multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 12333 let Predicates = [HasVLX, HasVAES] in { 12334 defm Z128 : AESI_binop_rm_int<Op, OpStr, 12335 !cast<Intrinsic>(IntPrefix), 12336 loadv2i64, 0, VR128X, i128mem>, 12337 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG; 12338 defm Z256 : AESI_binop_rm_int<Op, OpStr, 12339 !cast<Intrinsic>(IntPrefix#"_256"), 12340 loadv4i64, 0, VR256X, i256mem>, 12341 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG; 12342 } 12343 let Predicates = [HasAVX512, HasVAES] in 12344 defm Z : AESI_binop_rm_int<Op, OpStr, 12345 !cast<Intrinsic>(IntPrefix#"_512"), 12346 loadv8i64, 0, VR512, i512mem>, 12347 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG; 12348} 12349 12350defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 12351defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 12352defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 12353defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 12354 12355//===----------------------------------------------------------------------===// 12356// PCLMUL instructions - Carry less multiplication 12357//===----------------------------------------------------------------------===// 12358 12359let Predicates = [HasAVX512, HasVPCLMULQDQ] in 12360defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 12361 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG; 12362 12363let Predicates = [HasVLX, HasVPCLMULQDQ] in { 12364defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 12365 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG; 12366 12367defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 12368 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256, 12369 EVEX_CD8<64, CD8VF>, VEX_WIG; 12370} 12371 12372// Aliases 12373defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 12374defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 12375defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 12376 12377//===----------------------------------------------------------------------===// 12378// VBMI2 12379//===----------------------------------------------------------------------===// 12380 12381multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 12382 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12383 let Constraints = "$src1 = $dst", 12384 ExeDomain = VTI.ExeDomain in { 12385 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12386 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12387 "$src3, $src2", "$src2, $src3", 12388 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 12389 T8PD, EVEX_4V, Sched<[sched]>; 12390 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12391 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12392 "$src3, $src2", "$src2, $src3", 12393 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12394 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12395 T8PD, EVEX_4V, 12396 Sched<[sched.Folded, sched.ReadAfterFold]>; 12397 } 12398} 12399 12400multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12401 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 12402 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 12403 let Constraints = "$src1 = $dst", 12404 ExeDomain = VTI.ExeDomain in 12405 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12406 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 12407 "${src3}"#VTI.BroadcastStr#", $src2", 12408 "$src2, ${src3}"#VTI.BroadcastStr, 12409 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12410 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12411 T8PD, EVEX_4V, EVEX_B, 12412 Sched<[sched.Folded, sched.ReadAfterFold]>; 12413} 12414 12415multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 12416 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12417 let Predicates = [HasVBMI2] in 12418 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12419 EVEX_V512; 12420 let Predicates = [HasVBMI2, HasVLX] in { 12421 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12422 EVEX_V256; 12423 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12424 EVEX_V128; 12425 } 12426} 12427 12428multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 12429 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12430 let Predicates = [HasVBMI2] in 12431 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12432 EVEX_V512; 12433 let Predicates = [HasVBMI2, HasVLX] in { 12434 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12435 EVEX_V256; 12436 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12437 EVEX_V128; 12438 } 12439} 12440multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 12441 SDNode OpNode, X86SchedWriteWidths sched> { 12442 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched, 12443 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; 12444 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched, 12445 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12446 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched, 12447 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 12448} 12449 12450multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 12451 SDNode OpNode, X86SchedWriteWidths sched> { 12452 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched, 12453 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 12454 VEX_W, EVEX_CD8<16, CD8VF>; 12455 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp, 12456 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 12457 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode, 12458 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 12459} 12460 12461// Concat & Shift 12462defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 12463defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 12464defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 12465defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 12466 12467// Compress 12468defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 12469 avx512vl_i8_info, HasVBMI2>, EVEX, 12470 NotMemoryFoldable; 12471defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 12472 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W, 12473 NotMemoryFoldable; 12474// Expand 12475defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 12476 avx512vl_i8_info, HasVBMI2>, EVEX; 12477defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 12478 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; 12479 12480//===----------------------------------------------------------------------===// 12481// VNNI 12482//===----------------------------------------------------------------------===// 12483 12484let Constraints = "$src1 = $dst" in 12485multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12486 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12487 bit IsCommutable> { 12488 let ExeDomain = VTI.ExeDomain in { 12489 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12490 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12491 "$src3, $src2", "$src2, $src3", 12492 (VTI.VT (OpNode VTI.RC:$src1, 12493 VTI.RC:$src2, VTI.RC:$src3)), 12494 IsCommutable, IsCommutable>, 12495 EVEX_4V, T8PD, Sched<[sched]>; 12496 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12497 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12498 "$src3, $src2", "$src2, $src3", 12499 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12500 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12501 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, 12502 Sched<[sched.Folded, sched.ReadAfterFold]>; 12503 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12504 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 12505 OpStr, "${src3}"#VTI.BroadcastStr#", $src2", 12506 "$src2, ${src3}"#VTI.BroadcastStr, 12507 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12508 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12509 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, 12510 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>; 12511 } 12512} 12513 12514multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 12515 X86SchedWriteWidths sched, bit IsCommutable> { 12516 let Predicates = [HasVNNI] in 12517 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info, 12518 IsCommutable>, EVEX_V512; 12519 let Predicates = [HasVNNI, HasVLX] in { 12520 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info, 12521 IsCommutable>, EVEX_V256; 12522 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info, 12523 IsCommutable>, EVEX_V128; 12524 } 12525} 12526 12527// FIXME: Is there a better scheduler class for VPDP? 12528defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; 12529defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; 12530defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; 12531defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; 12532 12533// Patterns to match VPDPWSSD from existing instructions/intrinsics. 12534let Predicates = [HasVNNI] in { 12535 def : Pat<(v16i32 (add VR512:$src1, 12536 (X86vpmaddwd_su VR512:$src2, VR512:$src3))), 12537 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; 12538 def : Pat<(v16i32 (add VR512:$src1, 12539 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), 12540 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; 12541} 12542let Predicates = [HasVNNI,HasVLX] in { 12543 def : Pat<(v8i32 (add VR256X:$src1, 12544 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), 12545 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; 12546 def : Pat<(v8i32 (add VR256X:$src1, 12547 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), 12548 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; 12549 def : Pat<(v4i32 (add VR128X:$src1, 12550 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), 12551 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; 12552 def : Pat<(v4i32 (add VR128X:$src1, 12553 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), 12554 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; 12555} 12556 12557//===----------------------------------------------------------------------===// 12558// Bit Algorithms 12559//===----------------------------------------------------------------------===// 12560 12561// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 12562defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 12563 avx512vl_i8_info, HasBITALG>; 12564defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 12565 avx512vl_i16_info, HasBITALG>, VEX_W; 12566 12567defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 12568defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 12569 12570def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2), 12571 (X86Vpshufbitqmb node:$src1, node:$src2), [{ 12572 return N->hasOneUse(); 12573}]>; 12574 12575multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12576 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 12577 (ins VTI.RC:$src1, VTI.RC:$src2), 12578 "vpshufbitqmb", 12579 "$src2, $src1", "$src1, $src2", 12580 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12581 (VTI.VT VTI.RC:$src2)), 12582 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12583 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, 12584 Sched<[sched]>; 12585 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 12586 (ins VTI.RC:$src1, VTI.MemOp:$src2), 12587 "vpshufbitqmb", 12588 "$src2, $src1", "$src1, $src2", 12589 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12590 (VTI.VT (VTI.LdFrag addr:$src2))), 12591 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12592 (VTI.VT (VTI.LdFrag addr:$src2)))>, 12593 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, 12594 Sched<[sched.Folded, sched.ReadAfterFold]>; 12595} 12596 12597multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12598 let Predicates = [HasBITALG] in 12599 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 12600 let Predicates = [HasBITALG, HasVLX] in { 12601 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 12602 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 12603 } 12604} 12605 12606// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 12607defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 12608 12609//===----------------------------------------------------------------------===// 12610// GFNI 12611//===----------------------------------------------------------------------===// 12612 12613multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12614 X86SchedWriteWidths sched> { 12615 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 12616 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 12617 EVEX_V512; 12618 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 12619 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 12620 EVEX_V256; 12621 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 12622 EVEX_V128; 12623 } 12624} 12625 12626defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 12627 SchedWriteVecALU>, 12628 EVEX_CD8<8, CD8VF>, T8PD; 12629 12630multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 12631 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12632 X86VectorVTInfo BcstVTI> 12633 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 12634 let ExeDomain = VTI.ExeDomain in 12635 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12636 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), 12637 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1", 12638 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3", 12639 (OpNode (VTI.VT VTI.RC:$src1), 12640 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), 12641 (i8 timm:$src3))>, EVEX_B, 12642 Sched<[sched.Folded, sched.ReadAfterFold]>; 12643} 12644 12645multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12646 X86SchedWriteWidths sched> { 12647 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 12648 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 12649 v64i8_info, v8i64_info>, EVEX_V512; 12650 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 12651 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 12652 v32i8x_info, v4i64x_info>, EVEX_V256; 12653 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 12654 v16i8x_info, v2i64x_info>, EVEX_V128; 12655 } 12656} 12657 12658defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 12659 X86GF2P8affineinvqb, SchedWriteVecIMul>, 12660 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12661defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 12662 X86GF2P8affineqb, SchedWriteVecIMul>, 12663 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12664 12665 12666//===----------------------------------------------------------------------===// 12667// AVX5124FMAPS 12668//===----------------------------------------------------------------------===// 12669 12670let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 12671 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in { 12672defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 12673 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12674 "v4fmaddps", "$src3, $src2", "$src2, $src3", 12675 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12676 Sched<[SchedWriteFMA.ZMM.Folded]>; 12677 12678defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 12679 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12680 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 12681 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12682 Sched<[SchedWriteFMA.ZMM.Folded]>; 12683 12684defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 12685 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12686 "v4fmaddss", "$src3, $src2", "$src2, $src3", 12687 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12688 Sched<[SchedWriteFMA.Scl.Folded]>; 12689 12690defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 12691 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12692 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 12693 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12694 Sched<[SchedWriteFMA.Scl.Folded]>; 12695} 12696 12697//===----------------------------------------------------------------------===// 12698// AVX5124VNNIW 12699//===----------------------------------------------------------------------===// 12700 12701let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 12702 Constraints = "$src1 = $dst" in { 12703defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 12704 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12705 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 12706 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12707 Sched<[SchedWriteFMA.ZMM.Folded]>; 12708 12709defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 12710 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12711 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 12712 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12713 Sched<[SchedWriteFMA.ZMM.Folded]>; 12714} 12715 12716let hasSideEffects = 0 in { 12717 let mayStore = 1, SchedRW = [WriteFStoreX] in 12718 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>; 12719 let mayLoad = 1, SchedRW = [WriteFLoadX] in 12720 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>; 12721} 12722 12723//===----------------------------------------------------------------------===// 12724// VP2INTERSECT 12725//===----------------------------------------------------------------------===// 12726 12727multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 12728 def rr : I<0x68, MRMSrcReg, 12729 (outs _.KRPC:$dst), 12730 (ins _.RC:$src1, _.RC:$src2), 12731 !strconcat("vp2intersect", _.Suffix, 12732 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12733 [(set _.KRPC:$dst, (X86vp2intersect 12734 _.RC:$src1, (_.VT _.RC:$src2)))]>, 12735 EVEX_4V, T8XD, Sched<[sched]>; 12736 12737 def rm : I<0x68, MRMSrcMem, 12738 (outs _.KRPC:$dst), 12739 (ins _.RC:$src1, _.MemOp:$src2), 12740 !strconcat("vp2intersect", _.Suffix, 12741 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12742 [(set _.KRPC:$dst, (X86vp2intersect 12743 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 12744 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>, 12745 Sched<[sched.Folded, sched.ReadAfterFold]>; 12746 12747 def rmb : I<0x68, MRMSrcMem, 12748 (outs _.KRPC:$dst), 12749 (ins _.RC:$src1, _.ScalarMemOp:$src2), 12750 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, 12751 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), 12752 [(set _.KRPC:$dst, (X86vp2intersect 12753 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, 12754 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 12755 Sched<[sched.Folded, sched.ReadAfterFold]>; 12756} 12757 12758multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 12759 let Predicates = [HasAVX512, HasVP2INTERSECT] in 12760 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512; 12761 12762 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { 12763 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256; 12764 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128; 12765 } 12766} 12767 12768defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>; 12769defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W; 12770 12771multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, 12772 X86SchedWriteWidths sched, 12773 AVX512VLVectorVTInfo _SrcVTInfo, 12774 AVX512VLVectorVTInfo _DstVTInfo, 12775 SDNode OpNode, Predicate prd, 12776 bit IsCommutable = 0> { 12777 let Predicates = [prd] in 12778 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 12779 _SrcVTInfo.info512, _DstVTInfo.info512, 12780 _SrcVTInfo.info512, IsCommutable>, 12781 EVEX_V512, EVEX_CD8<32, CD8VF>; 12782 let Predicates = [HasVLX, prd] in { 12783 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 12784 _SrcVTInfo.info256, _DstVTInfo.info256, 12785 _SrcVTInfo.info256, IsCommutable>, 12786 EVEX_V256, EVEX_CD8<32, CD8VF>; 12787 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 12788 _SrcVTInfo.info128, _DstVTInfo.info128, 12789 _SrcVTInfo.info128, IsCommutable>, 12790 EVEX_V128, EVEX_CD8<32, CD8VF>; 12791 } 12792} 12793 12794let ExeDomain = SSEPackedSingle in 12795defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", 12796 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF 12797 avx512vl_f32_info, avx512vl_i16_info, 12798 X86cvtne2ps2bf16, HasBF16, 0>, T8XD; 12799 12800// Truncate Float to BFloat16 12801multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, 12802 X86SchedWriteWidths sched> { 12803 let ExeDomain = SSEPackedSingle in { 12804 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in { 12805 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info, 12806 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512; 12807 } 12808 let Predicates = [HasBF16, HasVLX] in { 12809 let Uses = []<Register>, mayRaiseFPException = 0 in { 12810 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info, 12811 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem, 12812 VK4WM>, EVEX_V128; 12813 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info, 12814 X86cvtneps2bf16, X86cvtneps2bf16, 12815 sched.YMM, "{1to8}", "{y}">, EVEX_V256; 12816 } 12817 } // Predicates = [HasBF16, HasVLX] 12818 } // ExeDomain = SSEPackedSingle 12819 12820 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12821 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 12822 VR128X:$src), 0>; 12823 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12824 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, 12825 f128mem:$src), 0, "intel">; 12826 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12827 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 12828 VR256X:$src), 0>; 12829 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12830 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, 12831 f256mem:$src), 0, "intel">; 12832} 12833 12834defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", 12835 SchedWriteCvtPD2PS>, T8XS, 12836 EVEX_CD8<32, CD8VF>; 12837 12838let Predicates = [HasBF16, HasVLX] in { 12839 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction 12840 // patterns have been disabled with null_frag. 12841 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))), 12842 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12843 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0), 12844 VK4WM:$mask), 12845 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>; 12846 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV, 12847 VK4WM:$mask), 12848 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>; 12849 12850 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), 12851 (VCVTNEPS2BF16Z128rm addr:$src)>; 12852 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0), 12853 VK4WM:$mask), 12854 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12855 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV, 12856 VK4WM:$mask), 12857 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; 12858 12859 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 12860 (X86VBroadcastld32 addr:$src)))), 12861 (VCVTNEPS2BF16Z128rmb addr:$src)>; 12862 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12863 (v8i16 VR128X:$src0), VK4WM:$mask), 12864 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12865 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12866 v8i16x_info.ImmAllZerosV, VK4WM:$mask), 12867 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; 12868} 12869 12870let Constraints = "$src1 = $dst" in { 12871multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 12872 X86FoldableSchedWrite sched, 12873 X86VectorVTInfo _, X86VectorVTInfo src_v> { 12874 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12875 (ins src_v.RC:$src2, src_v.RC:$src3), 12876 OpcodeStr, "$src3, $src2", "$src2, $src3", 12877 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>, 12878 EVEX_4V, Sched<[sched]>; 12879 12880 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12881 (ins src_v.RC:$src2, src_v.MemOp:$src3), 12882 OpcodeStr, "$src3, $src2", "$src2, $src3", 12883 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12884 (src_v.LdFrag addr:$src3)))>, EVEX_4V, 12885 Sched<[sched.Folded, sched.ReadAfterFold]>; 12886 12887 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12888 (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3), 12889 OpcodeStr, 12890 !strconcat("${src3}", _.BroadcastStr,", $src2"), 12891 !strconcat("$src2, ${src3}", _.BroadcastStr), 12892 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12893 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, 12894 EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 12895 12896} 12897} // Constraints = "$src1 = $dst" 12898 12899multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 12900 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 12901 AVX512VLVectorVTInfo src_v, Predicate prd> { 12902 let Predicates = [prd] in { 12903 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, 12904 src_v.info512>, EVEX_V512; 12905 } 12906 let Predicates = [HasVLX, prd] in { 12907 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256, 12908 src_v.info256>, EVEX_V256; 12909 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128, 12910 src_v.info128>, EVEX_V128; 12911 } 12912} 12913 12914let ExeDomain = SSEPackedSingle in 12915defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, 12916 avx512vl_f32_info, avx512vl_i32_info, 12917 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>; 12918 12919//===----------------------------------------------------------------------===// 12920// AVX512FP16 12921//===----------------------------------------------------------------------===// 12922 12923let Predicates = [HasFP16] in { 12924// Move word ( r/m16) to Packed word 12925def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 12926 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>; 12927def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src), 12928 "vmovw\t{$src, $dst|$dst, $src}", 12929 [(set VR128X:$dst, 12930 (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>, 12931 T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>; 12932 12933def : Pat<(f16 (bitconvert GR16:$src)), 12934 (f16 (COPY_TO_REGCLASS 12935 (VMOVW2SHrr 12936 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), 12937 FR16X))>; 12938def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))), 12939 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 12940def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))), 12941 (VMOVW2SHrr GR32:$src)>; 12942// FIXME: We should really find a way to improve these patterns. 12943def : Pat<(v8i32 (X86vzmovl 12944 (insert_subvector undef, 12945 (v4i32 (scalar_to_vector 12946 (and GR32:$src, 0xffff))), 12947 (iPTR 0)))), 12948 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 12949def : Pat<(v16i32 (X86vzmovl 12950 (insert_subvector undef, 12951 (v4i32 (scalar_to_vector 12952 (and GR32:$src, 0xffff))), 12953 (iPTR 0)))), 12954 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 12955 12956def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))), 12957 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 12958 12959// AVX 128-bit movw instruction write zeros in the high 128-bit part. 12960def : Pat<(v8i16 (X86vzload16 addr:$src)), 12961 (VMOVWrm addr:$src)>; 12962def : Pat<(v16i16 (X86vzload16 addr:$src)), 12963 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 12964 12965// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 12966def : Pat<(v32i16 (X86vzload16 addr:$src)), 12967 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 12968 12969def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))), 12970 (VMOVWrm addr:$src)>; 12971def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))), 12972 (VMOVWrm addr:$src)>; 12973def : Pat<(v8i32 (X86vzmovl 12974 (insert_subvector undef, 12975 (v4i32 (scalar_to_vector 12976 (i32 (zextloadi16 addr:$src)))), 12977 (iPTR 0)))), 12978 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 12979def : Pat<(v16i32 (X86vzmovl 12980 (insert_subvector undef, 12981 (v4i32 (scalar_to_vector 12982 (i32 (zextloadi16 addr:$src)))), 12983 (iPTR 0)))), 12984 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 12985 12986// Move word from xmm register to r/m16 12987def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 12988 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>; 12989def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs), 12990 (ins i16mem:$dst, VR128X:$src), 12991 "vmovw\t{$src, $dst|$dst, $src}", 12992 [(store (i16 (extractelt (v8i16 VR128X:$src), 12993 (iPTR 0))), addr:$dst)]>, 12994 T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>; 12995 12996def : Pat<(i16 (bitconvert FR16X:$src)), 12997 (i16 (EXTRACT_SUBREG 12998 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)), 12999 sub_16bit))>; 13000def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))), 13001 (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>; 13002 13003// Allow "vmovw" to use GR64 13004let hasSideEffects = 0 in { 13005 def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 13006 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 13007 def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 13008 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>; 13009} 13010} 13011 13012// Convert 16-bit float to i16/u16 13013multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13014 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13015 AVX512VLVectorVTInfo _Dst, 13016 AVX512VLVectorVTInfo _Src, 13017 X86SchedWriteWidths sched> { 13018 let Predicates = [HasFP16] in { 13019 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 13020 OpNode, MaskOpNode, sched.ZMM>, 13021 avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512, 13022 OpNodeRnd, sched.ZMM>, EVEX_V512; 13023 } 13024 let Predicates = [HasFP16, HasVLX] in { 13025 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 13026 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 13027 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 13028 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 13029 } 13030} 13031 13032// Convert 16-bit float to i16/u16 truncate 13033multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13034 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13035 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src, 13036 X86SchedWriteWidths sched> { 13037 let Predicates = [HasFP16] in { 13038 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 13039 OpNode, MaskOpNode, sched.ZMM>, 13040 avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512, 13041 OpNodeRnd, sched.ZMM>, EVEX_V512; 13042 } 13043 let Predicates = [HasFP16, HasVLX] in { 13044 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 13045 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 13046 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 13047 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 13048 } 13049} 13050 13051defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt, 13052 X86cvtp2UIntRnd, avx512vl_i16_info, 13053 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 13054 T_MAP5PS, EVEX_CD8<16, CD8VF>; 13055defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp, 13056 X86VUintToFpRnd, avx512vl_f16_info, 13057 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 13058 T_MAP5XD, EVEX_CD8<16, CD8VF>; 13059defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si, 13060 X86cvttp2si, X86cvttp2siSAE, 13061 avx512vl_i16_info, avx512vl_f16_info, 13062 SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>; 13063defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui, 13064 X86cvttp2ui, X86cvttp2uiSAE, 13065 avx512vl_i16_info, avx512vl_f16_info, 13066 SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>; 13067defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int, 13068 X86cvtp2IntRnd, avx512vl_i16_info, 13069 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 13070 T_MAP5PD, EVEX_CD8<16, CD8VF>; 13071defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp, 13072 X86VSintToFpRnd, avx512vl_f16_info, 13073 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 13074 T_MAP5XS, EVEX_CD8<16, CD8VF>; 13075 13076// Convert Half to Signed/Unsigned Doubleword 13077multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13078 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13079 X86SchedWriteWidths sched> { 13080 let Predicates = [HasFP16] in { 13081 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 13082 MaskOpNode, sched.ZMM>, 13083 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info, 13084 OpNodeRnd, sched.ZMM>, EVEX_V512; 13085 } 13086 let Predicates = [HasFP16, HasVLX] in { 13087 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 13088 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 13089 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 13090 MaskOpNode, sched.YMM>, EVEX_V256; 13091 } 13092} 13093 13094// Convert Half to Signed/Unsigned Doubleword with truncation 13095multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13096 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13097 X86SchedWriteWidths sched> { 13098 let Predicates = [HasFP16] in { 13099 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 13100 MaskOpNode, sched.ZMM>, 13101 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info, 13102 OpNodeRnd, sched.ZMM>, EVEX_V512; 13103 } 13104 let Predicates = [HasFP16, HasVLX] in { 13105 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 13106 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 13107 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 13108 MaskOpNode, sched.YMM>, EVEX_V256; 13109 } 13110} 13111 13112 13113defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int, 13114 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, 13115 EVEX_CD8<16, CD8VH>; 13116defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt, 13117 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS, 13118 EVEX_CD8<16, CD8VH>; 13119 13120defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si, 13121 X86cvttp2si, X86cvttp2siSAE, 13122 SchedWriteCvtPS2DQ>, T_MAP5XS, 13123 EVEX_CD8<16, CD8VH>; 13124 13125defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui, 13126 X86cvttp2ui, X86cvttp2uiSAE, 13127 SchedWriteCvtPS2DQ>, T_MAP5PS, 13128 EVEX_CD8<16, CD8VH>; 13129 13130// Convert Half to Signed/Unsigned Quardword 13131multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13132 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13133 X86SchedWriteWidths sched> { 13134 let Predicates = [HasFP16] in { 13135 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 13136 MaskOpNode, sched.ZMM>, 13137 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info, 13138 OpNodeRnd, sched.ZMM>, EVEX_V512; 13139 } 13140 let Predicates = [HasFP16, HasVLX] in { 13141 // Explicitly specified broadcast string, since we take only 2 elements 13142 // from v8f16x_info source 13143 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13144 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, 13145 EVEX_V128; 13146 // Explicitly specified broadcast string, since we take only 4 elements 13147 // from v8f16x_info source 13148 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13149 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, 13150 EVEX_V256; 13151 } 13152} 13153 13154// Convert Half to Signed/Unsigned Quardword with truncation 13155multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13156 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13157 X86SchedWriteWidths sched> { 13158 let Predicates = [HasFP16] in { 13159 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 13160 MaskOpNode, sched.ZMM>, 13161 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info, 13162 OpNodeRnd, sched.ZMM>, EVEX_V512; 13163 } 13164 let Predicates = [HasFP16, HasVLX] in { 13165 // Explicitly specified broadcast string, since we take only 2 elements 13166 // from v8f16x_info source 13167 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13168 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128; 13169 // Explicitly specified broadcast string, since we take only 4 elements 13170 // from v8f16x_info source 13171 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13172 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256; 13173 } 13174} 13175 13176defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int, 13177 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, 13178 EVEX_CD8<16, CD8VQ>; 13179 13180defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt, 13181 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, 13182 EVEX_CD8<16, CD8VQ>; 13183 13184defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si, 13185 X86cvttp2si, X86cvttp2siSAE, 13186 SchedWriteCvtPS2DQ>, T_MAP5PD, 13187 EVEX_CD8<16, CD8VQ>; 13188 13189defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui, 13190 X86cvttp2ui, X86cvttp2uiSAE, 13191 SchedWriteCvtPS2DQ>, T_MAP5PD, 13192 EVEX_CD8<16, CD8VQ>; 13193 13194// Convert Signed/Unsigned Quardword to Half 13195multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13196 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13197 X86SchedWriteWidths sched> { 13198 // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and 13199 // 512 memory forms of these instructions in Asm Parcer. They have the same 13200 // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly 13201 // due to the same reason. 13202 let Predicates = [HasFP16] in { 13203 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode, 13204 MaskOpNode, sched.ZMM, "{1to8}", "{z}">, 13205 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info, 13206 OpNodeRnd, sched.ZMM>, EVEX_V512; 13207 } 13208 let Predicates = [HasFP16, HasVLX] in { 13209 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info, 13210 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", 13211 i128mem, VK2WM>, 13212 EVEX_V128, NotEVEX2VEXConvertible; 13213 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info, 13214 null_frag, null_frag, sched.YMM, "{1to4}", "{y}", 13215 i256mem, VK4WM>, 13216 EVEX_V256, NotEVEX2VEXConvertible; 13217 } 13218 13219 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 13220 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 13221 VR128X:$src), 0, "att">; 13222 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 13223 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 13224 VK2WM:$mask, VR128X:$src), 0, "att">; 13225 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 13226 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 13227 VK2WM:$mask, VR128X:$src), 0, "att">; 13228 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 13229 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 13230 i64mem:$src), 0, "att">; 13231 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 13232 "$dst {${mask}}, ${src}{1to2}}", 13233 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 13234 VK2WM:$mask, i64mem:$src), 0, "att">; 13235 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 13236 "$dst {${mask}} {z}, ${src}{1to2}}", 13237 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 13238 VK2WM:$mask, i64mem:$src), 0, "att">; 13239 13240 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 13241 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 13242 VR256X:$src), 0, "att">; 13243 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 13244 "$dst {${mask}}, $src}", 13245 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 13246 VK4WM:$mask, VR256X:$src), 0, "att">; 13247 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 13248 "$dst {${mask}} {z}, $src}", 13249 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 13250 VK4WM:$mask, VR256X:$src), 0, "att">; 13251 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 13252 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 13253 i64mem:$src), 0, "att">; 13254 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 13255 "$dst {${mask}}, ${src}{1to4}}", 13256 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 13257 VK4WM:$mask, i64mem:$src), 0, "att">; 13258 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 13259 "$dst {${mask}} {z}, ${src}{1to4}}", 13260 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 13261 VK4WM:$mask, i64mem:$src), 0, "att">; 13262 13263 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 13264 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 13265 VR512:$src), 0, "att">; 13266 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 13267 "$dst {${mask}}, $src}", 13268 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 13269 VK8WM:$mask, VR512:$src), 0, "att">; 13270 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 13271 "$dst {${mask}} {z}, $src}", 13272 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 13273 VK8WM:$mask, VR512:$src), 0, "att">; 13274 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 13275 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 13276 i64mem:$src), 0, "att">; 13277 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 13278 "$dst {${mask}}, ${src}{1to8}}", 13279 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 13280 VK8WM:$mask, i64mem:$src), 0, "att">; 13281 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 13282 "$dst {${mask}} {z}, ${src}{1to8}}", 13283 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 13284 VK8WM:$mask, i64mem:$src), 0, "att">; 13285} 13286 13287defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp, 13288 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS, 13289 EVEX_CD8<64, CD8VF>; 13290 13291defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp, 13292 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD, 13293 EVEX_CD8<64, CD8VF>; 13294 13295// Convert half to signed/unsigned int 32/64 13296defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si, 13297 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>, 13298 T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13299defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si, 13300 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>, 13301 T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>; 13302defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi, 13303 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>, 13304 T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13305defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi, 13306 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>, 13307 T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>; 13308 13309defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info, 13310 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13311 "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13312defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info, 13313 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13314 "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13315defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info, 13316 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13317 "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13318defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info, 13319 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13320 "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>; 13321 13322let Predicates = [HasFP16] in { 13323 defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32, 13324 v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">, 13325 T_MAP5XS, EVEX_CD8<32, CD8VT1>; 13326 defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64, 13327 v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">, 13328 T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>; 13329 defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32, 13330 v8f16x_info, i32mem, loadi32, 13331 "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>; 13332 defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64, 13333 v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">, 13334 T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>; 13335 def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13336 (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13337 13338 def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13339 (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13340 13341 13342 def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))), 13343 (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13344 def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))), 13345 (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13346 13347 def : Pat<(f16 (any_sint_to_fp GR32:$src)), 13348 (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13349 def : Pat<(f16 (any_sint_to_fp GR64:$src)), 13350 (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13351 13352 def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))), 13353 (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13354 def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))), 13355 (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13356 13357 def : Pat<(f16 (any_uint_to_fp GR32:$src)), 13358 (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13359 def : Pat<(f16 (any_uint_to_fp GR64:$src)), 13360 (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13361 13362 // Patterns used for matching vcvtsi2sh intrinsic sequences from clang 13363 // which produce unnecessary vmovsh instructions 13364 def : Pat<(v8f16 (X86Movsh 13365 (v8f16 VR128X:$dst), 13366 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))), 13367 (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13368 13369 def : Pat<(v8f16 (X86Movsh 13370 (v8f16 VR128X:$dst), 13371 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))), 13372 (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13373 13374 def : Pat<(v8f16 (X86Movsh 13375 (v8f16 VR128X:$dst), 13376 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))), 13377 (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13378 13379 def : Pat<(v8f16 (X86Movsh 13380 (v8f16 VR128X:$dst), 13381 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))), 13382 (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13383 13384 def : Pat<(v8f16 (X86Movsh 13385 (v8f16 VR128X:$dst), 13386 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))), 13387 (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13388 13389 def : Pat<(v8f16 (X86Movsh 13390 (v8f16 VR128X:$dst), 13391 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))), 13392 (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13393 13394 def : Pat<(v8f16 (X86Movsh 13395 (v8f16 VR128X:$dst), 13396 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))), 13397 (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13398 13399 def : Pat<(v8f16 (X86Movsh 13400 (v8f16 VR128X:$dst), 13401 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))), 13402 (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13403} // Predicates = [HasFP16] 13404 13405let Predicates = [HasFP16, HasVLX] in { 13406 // Special patterns to allow use of X86VMSintToFP for masking. Instruction 13407 // patterns have been disabled with null_frag. 13408 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))), 13409 (VCVTQQ2PHZ256rr VR256X:$src)>; 13410 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13411 VK4WM:$mask), 13412 (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13413 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13414 VK4WM:$mask), 13415 (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13416 13417 def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))), 13418 (VCVTQQ2PHZ256rm addr:$src)>; 13419 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13420 VK4WM:$mask), 13421 (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13422 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13423 VK4WM:$mask), 13424 (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13425 13426 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13427 (VCVTQQ2PHZ256rmb addr:$src)>; 13428 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13429 (v8f16 VR128X:$src0), VK4WM:$mask), 13430 (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13431 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13432 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13433 (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13434 13435 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))), 13436 (VCVTQQ2PHZ128rr VR128X:$src)>; 13437 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13438 VK2WM:$mask), 13439 (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13440 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13441 VK2WM:$mask), 13442 (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13443 13444 def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))), 13445 (VCVTQQ2PHZ128rm addr:$src)>; 13446 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13447 VK2WM:$mask), 13448 (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13449 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13450 VK2WM:$mask), 13451 (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13452 13453 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13454 (VCVTQQ2PHZ128rmb addr:$src)>; 13455 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13456 (v8f16 VR128X:$src0), VK2WM:$mask), 13457 (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13458 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13459 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13460 (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13461 13462 // Special patterns to allow use of X86VMUintToFP for masking. Instruction 13463 // patterns have been disabled with null_frag. 13464 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))), 13465 (VCVTUQQ2PHZ256rr VR256X:$src)>; 13466 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13467 VK4WM:$mask), 13468 (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13469 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13470 VK4WM:$mask), 13471 (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13472 13473 def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))), 13474 (VCVTUQQ2PHZ256rm addr:$src)>; 13475 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13476 VK4WM:$mask), 13477 (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13478 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13479 VK4WM:$mask), 13480 (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13481 13482 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13483 (VCVTUQQ2PHZ256rmb addr:$src)>; 13484 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13485 (v8f16 VR128X:$src0), VK4WM:$mask), 13486 (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13487 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13488 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13489 (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13490 13491 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))), 13492 (VCVTUQQ2PHZ128rr VR128X:$src)>; 13493 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13494 VK2WM:$mask), 13495 (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13496 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13497 VK2WM:$mask), 13498 (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13499 13500 def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))), 13501 (VCVTUQQ2PHZ128rm addr:$src)>; 13502 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13503 VK2WM:$mask), 13504 (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13505 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13506 VK2WM:$mask), 13507 (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13508 13509 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13510 (VCVTUQQ2PHZ128rmb addr:$src)>; 13511 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13512 (v8f16 VR128X:$src0), VK2WM:$mask), 13513 (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13514 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13515 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13516 (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13517} 13518 13519let Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13520 multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> { 13521 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13522 (ins _.RC:$src2, _.RC:$src3), 13523 OpcodeStr, "$src3, $src2", "$src2, $src3", 13524 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V; 13525 13526 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13527 (ins _.RC:$src2, _.MemOp:$src3), 13528 OpcodeStr, "$src3, $src2", "$src2, $src3", 13529 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V; 13530 13531 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13532 (ins _.RC:$src2, _.ScalarMemOp:$src3), 13533 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr), 13534 (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V; 13535 } 13536} // Constraints = "@earlyclobber $dst, $src1 = $dst" 13537 13538multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 13539 X86VectorVTInfo _> { 13540 let Constraints = "@earlyclobber $dst, $src1 = $dst" in 13541 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13542 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 13543 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 13544 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>, 13545 EVEX_4V, EVEX_B, EVEX_RC; 13546} 13547 13548 13549multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> { 13550 let Predicates = [HasFP16] in { 13551 defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>, 13552 avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>, 13553 EVEX_V512, Sched<[WriteFMAZ]>; 13554 } 13555 let Predicates = [HasVLX, HasFP16] in { 13556 defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>; 13557 defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>; 13558 } 13559} 13560 13561multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13562 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> { 13563 let Predicates = [HasFP16] in { 13564 defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 13565 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, 13566 avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info, 13567 "", "@earlyclobber $dst">, EVEX_V512; 13568 } 13569 let Predicates = [HasVLX, HasFP16] in { 13570 defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 13571 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256; 13572 defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 13573 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128; 13574 } 13575} 13576 13577 13578let Uses = [MXCSR] in { 13579 defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>, 13580 T_MAP6XS, EVEX_CD8<32, CD8VF>; 13581 defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>, 13582 T_MAP6XD, EVEX_CD8<32, CD8VF>; 13583 13584 defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc, 13585 x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>; 13586 defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc, 13587 x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>; 13588} 13589 13590 13591multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 13592 bit IsCommutable> { 13593 let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13594 defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13595 (ins VR128X:$src2, VR128X:$src3), OpcodeStr, 13596 "$src3, $src2", "$src2, $src3", 13597 (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>, 13598 Sched<[WriteFMAX]>; 13599 defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst), 13600 (ins VR128X:$src2, ssmem:$src3), OpcodeStr, 13601 "$src3, $src2", "$src2, $src3", 13602 (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>, 13603 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13604 defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13605 (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr, 13606 "$rc, $src3, $src2", "$src2, $src3, $rc", 13607 (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>, 13608 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13609 } 13610} 13611 13612multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13613 SDNode OpNodeRnd, bit IsCommutable> { 13614 let Predicates = [HasFP16] in { 13615 defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13616 (ins VR128X:$src1, VR128X:$src2), OpcodeStr, 13617 "$src2, $src1", "$src1, $src2", 13618 (v4f32 (OpNode VR128X:$src1, VR128X:$src2)), 13619 IsCommutable, IsCommutable, IsCommutable, 13620 X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>; 13621 defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst), 13622 (ins VR128X:$src1, ssmem:$src2), OpcodeStr, 13623 "$src2, $src1", "$src1, $src2", 13624 (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))), 13625 0, 0, 0, X86selects, "@earlyclobber $dst">, 13626 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13627 defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13628 (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr, 13629 "$rc, $src2, $src1", "$src1, $src2, $rc", 13630 (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)), 13631 0, 0, 0, X86selects, "@earlyclobber $dst">, 13632 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13633 } 13634} 13635 13636let Uses = [MXCSR] in { 13637 defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>, 13638 T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V; 13639 defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>, 13640 T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V; 13641 13642 defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>, 13643 T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V; 13644 defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>, 13645 T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V; 13646} 13647