1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX512 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// Group template arguments that can be derived from the vector type (EltNum x 16// EltVT). These are things like the register class for the writemask, etc. 17// The idea is to pass one of these as the template argument rather than the 18// individual arguments. 19// The template is also used for scalar types, in this case numelts is 1. 20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, 21 string suffix = ""> { 22 RegisterClass RC = rc; 23 ValueType EltVT = eltvt; 24 int NumElts = numelts; 25 26 // Corresponding mask register class. 27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts); 28 29 // Corresponding mask register pair class. 30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?, 31 !cast<RegisterOperand>("VK" # NumElts # "Pair")); 32 33 // Corresponding write-mask register class. 34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM"); 35 36 // The mask VT. 37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1"); 38 39 // Suffix used in the instruction mnemonic. 40 string Suffix = suffix; 41 42 // VTName is a string name for vector VT. For vector types it will be 43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32 44 // It is a little bit complex for scalar types, where NumElts = 1. 45 // In this case we build v4f32 or v2f64 46 string VTName = "v" # !if (!eq (NumElts, 1), 47 !if (!eq (EltVT.Size, 16), 8, 48 !if (!eq (EltVT.Size, 32), 4, 49 !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT; 50 51 // The vector VT. 52 ValueType VT = !cast<ValueType>(VTName); 53 54 string EltTypeName = !cast<string>(EltVT); 55 // Size of the element type in bits, e.g. 32 for v16i32. 56 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName)); 57 int EltSize = EltVT.Size; 58 59 // "i" for integer types and "f" for floating-point types 60 string TypeVariantName = !subst(EltSizeName, "", EltTypeName); 61 62 // Size of RC in bits, e.g. 512 for VR512. 63 int Size = VT.Size; 64 65 // The corresponding memory operand, e.g. i512mem for VR512. 66 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem"); 67 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem"); 68 // FP scalar memory operand for intrinsics - ssmem/sdmem. 69 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"), 70 !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"), 71 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?))); 72 73 // Load patterns 74 PatFrag LdFrag = !cast<PatFrag>("load" # VTName); 75 76 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName); 77 78 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT); 79 PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName); 80 81 PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"), 82 !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"), 83 !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?))); 84 85 // The string to specify embedded broadcast in assembly. 86 string BroadcastStr = "{1to" # NumElts # "}"; 87 88 // 8-bit compressed displacement tuple/subvector format. This is only 89 // defined for NumElts <= 8. 90 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0), 91 !cast<CD8VForm>("CD8VT" # NumElts), ?); 92 93 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, 94 !if (!eq (Size, 256), sub_ymm, ?)); 95 96 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle, 97 !if (!eq (EltTypeName, "f64"), SSEPackedDouble, 98 !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME? 99 SSEPackedInt))); 100 101 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, 102 !if (!eq (EltTypeName, "f16"), FR16X, 103 FR64X)); 104 105 dag ImmAllZerosV = (VT immAllZerosV); 106 107 string ZSuffix = !if (!eq (Size, 128), "Z128", 108 !if (!eq (Size, 256), "Z256", "Z")); 109} 110 111def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; 112def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; 113def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; 114def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; 115def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">; 116def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; 117def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; 118 119// "x" in v32i8x_info means RC = VR256X 120def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; 121def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; 122def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; 123def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; 124def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">; 125def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; 126def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; 127 128def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; 129def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; 130def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; 131def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; 132def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">; 133def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; 134def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; 135 136// We map scalar types to the smallest (128-bit) vector type 137// with the appropriate element type. This allows to use the same masking logic. 138def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; 139def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; 140def f16x_info : X86VectorVTInfo<1, f16, VR128X, "sh">; 141def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; 142def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; 143 144class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256, 145 X86VectorVTInfo i128> { 146 X86VectorVTInfo info512 = i512; 147 X86VectorVTInfo info256 = i256; 148 X86VectorVTInfo info128 = i128; 149} 150 151def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info, 152 v16i8x_info>; 153def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info, 154 v8i16x_info>; 155def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info, 156 v4i32x_info>; 157def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info, 158 v2i64x_info>; 159def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info, 160 v8f16x_info>; 161def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, 162 v4f32x_info>; 163def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, 164 v2f64x_info>; 165 166class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, 167 ValueType _vt> { 168 RegisterClass KRC = _krc; 169 RegisterClass KRCWM = _krcwm; 170 ValueType KVT = _vt; 171} 172 173def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>; 174def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; 175def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; 176def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; 177def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; 178def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; 179def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; 180 181// Used for matching masked operations. Ensures the operation part only has a 182// single use. 183def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 184 (vselect node:$mask, node:$src1, node:$src2), [{ 185 return isProfitableToFormMaskedOp(N); 186}]>; 187 188def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 189 (X86selects node:$mask, node:$src1, node:$src2), [{ 190 return isProfitableToFormMaskedOp(N); 191}]>; 192 193// This multiclass generates the masking variants from the non-masking 194// variant. It only provides the assembly pieces for the masking variants. 195// It assumes custom ISel patterns for masking which can be provided as 196// template arguments. 197multiclass AVX512_maskable_custom<bits<8> O, Format F, 198 dag Outs, 199 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 200 string OpcodeStr, 201 string AttSrcAsm, string IntelSrcAsm, 202 list<dag> Pattern, 203 list<dag> MaskingPattern, 204 list<dag> ZeroMaskingPattern, 205 string MaskingConstraint = "", 206 bit IsCommutable = 0, 207 bit IsKCommutable = 0, 208 bit IsKZCommutable = IsCommutable, 209 string ClobberConstraint = ""> { 210 let isCommutable = IsCommutable, Constraints = ClobberConstraint in 211 def NAME: AVX512<O, F, Outs, Ins, 212 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 213 "$dst, "#IntelSrcAsm#"}", 214 Pattern>; 215 216 // Prefer over VMOV*rrk Pat<> 217 let isCommutable = IsKCommutable in 218 def NAME#k: AVX512<O, F, Outs, MaskingIns, 219 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 220 "$dst {${mask}}, "#IntelSrcAsm#"}", 221 MaskingPattern>, 222 EVEX_K { 223 // In case of the 3src subclass this is overridden with a let. 224 string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint, 225 !if(!eq(MaskingConstraint, ""), ClobberConstraint, 226 !strconcat(ClobberConstraint, ", ", MaskingConstraint))); 227 } 228 229 // Zero mask does not add any restrictions to commute operands transformation. 230 // So, it is Ok to use IsCommutable instead of IsKCommutable. 231 let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<> 232 Constraints = ClobberConstraint in 233 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 234 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 235 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 236 ZeroMaskingPattern>, 237 EVEX_KZ; 238} 239 240 241// Common base class of AVX512_maskable and AVX512_maskable_3src. 242multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 243 dag Outs, 244 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 245 string OpcodeStr, 246 string AttSrcAsm, string IntelSrcAsm, 247 dag RHS, dag MaskingRHS, 248 SDPatternOperator Select = vselect_mask, 249 string MaskingConstraint = "", 250 bit IsCommutable = 0, 251 bit IsKCommutable = 0, 252 bit IsKZCommutable = IsCommutable, 253 string ClobberConstraint = ""> : 254 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 255 AttSrcAsm, IntelSrcAsm, 256 [(set _.RC:$dst, RHS)], 257 [(set _.RC:$dst, MaskingRHS)], 258 [(set _.RC:$dst, 259 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 260 MaskingConstraint, IsCommutable, 261 IsKCommutable, IsKZCommutable, ClobberConstraint>; 262 263// This multiclass generates the unconditional/non-masking, the masking and 264// the zero-masking variant of the vector instruction. In the masking case, the 265// preserved vector elements come from a new dummy input operand tied to $dst. 266// This version uses a separate dag for non-masking and masking. 267multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 268 dag Outs, dag Ins, string OpcodeStr, 269 string AttSrcAsm, string IntelSrcAsm, 270 dag RHS, dag MaskRHS, 271 string ClobberConstraint = "", 272 bit IsCommutable = 0, bit IsKCommutable = 0, 273 bit IsKZCommutable = IsCommutable> : 274 AVX512_maskable_custom<O, F, Outs, Ins, 275 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 276 !con((ins _.KRCWM:$mask), Ins), 277 OpcodeStr, AttSrcAsm, IntelSrcAsm, 278 [(set _.RC:$dst, RHS)], 279 [(set _.RC:$dst, 280 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 281 [(set _.RC:$dst, 282 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 283 "$src0 = $dst", IsCommutable, IsKCommutable, 284 IsKZCommutable, ClobberConstraint>; 285 286// This multiclass generates the unconditional/non-masking, the masking and 287// the zero-masking variant of the vector instruction. In the masking case, the 288// preserved vector elements come from a new dummy input operand tied to $dst. 289multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 290 dag Outs, dag Ins, string OpcodeStr, 291 string AttSrcAsm, string IntelSrcAsm, 292 dag RHS, 293 bit IsCommutable = 0, bit IsKCommutable = 0, 294 bit IsKZCommutable = IsCommutable, 295 SDPatternOperator Select = vselect_mask, 296 string ClobberConstraint = ""> : 297 AVX512_maskable_common<O, F, _, Outs, Ins, 298 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 299 !con((ins _.KRCWM:$mask), Ins), 300 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 301 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 302 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 303 IsKZCommutable, ClobberConstraint>; 304 305// This multiclass generates the unconditional/non-masking, the masking and 306// the zero-masking variant of the scalar instruction. 307multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 308 dag Outs, dag Ins, string OpcodeStr, 309 string AttSrcAsm, string IntelSrcAsm, 310 dag RHS> : 311 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 312 RHS, 0, 0, 0, X86selects_mask>; 313 314// Similar to AVX512_maskable but in this case one of the source operands 315// ($src1) is already tied to $dst so we just use that for the preserved 316// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 317// $src1. 318multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 319 dag Outs, dag NonTiedIns, string OpcodeStr, 320 string AttSrcAsm, string IntelSrcAsm, 321 dag RHS, 322 bit IsCommutable = 0, 323 bit IsKCommutable = 0, 324 SDPatternOperator Select = vselect_mask, 325 bit MaskOnly = 0> : 326 AVX512_maskable_common<O, F, _, Outs, 327 !con((ins _.RC:$src1), NonTiedIns), 328 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 329 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 330 OpcodeStr, AttSrcAsm, IntelSrcAsm, 331 !if(MaskOnly, (null_frag), RHS), 332 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 333 Select, "", IsCommutable, IsKCommutable>; 334 335// Similar to AVX512_maskable_3src but in this case the input VT for the tied 336// operand differs from the output VT. This requires a bitconvert on 337// the preserved vector going into the vselect. 338// NOTE: The unmasked pattern is disabled. 339multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 340 X86VectorVTInfo InVT, 341 dag Outs, dag NonTiedIns, string OpcodeStr, 342 string AttSrcAsm, string IntelSrcAsm, 343 dag RHS, bit IsCommutable = 0> : 344 AVX512_maskable_common<O, F, OutVT, Outs, 345 !con((ins InVT.RC:$src1), NonTiedIns), 346 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 347 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 348 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 349 (vselect_mask InVT.KRCWM:$mask, RHS, 350 (bitconvert InVT.RC:$src1)), 351 vselect_mask, "", IsCommutable>; 352 353multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 354 dag Outs, dag NonTiedIns, string OpcodeStr, 355 string AttSrcAsm, string IntelSrcAsm, 356 dag RHS, 357 bit IsCommutable = 0, 358 bit IsKCommutable = 0, 359 bit MaskOnly = 0> : 360 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 361 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 362 X86selects_mask, MaskOnly>; 363 364multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 365 dag Outs, dag Ins, 366 string OpcodeStr, 367 string AttSrcAsm, string IntelSrcAsm, 368 list<dag> Pattern> : 369 AVX512_maskable_custom<O, F, Outs, Ins, 370 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 371 !con((ins _.KRCWM:$mask), Ins), 372 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 373 "$src0 = $dst">; 374 375multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 376 dag Outs, dag NonTiedIns, 377 string OpcodeStr, 378 string AttSrcAsm, string IntelSrcAsm, 379 list<dag> Pattern> : 380 AVX512_maskable_custom<O, F, Outs, 381 !con((ins _.RC:$src1), NonTiedIns), 382 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 383 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 384 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 385 "">; 386 387// Instruction with mask that puts result in mask register, 388// like "compare" and "vptest" 389multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 390 dag Outs, 391 dag Ins, dag MaskingIns, 392 string OpcodeStr, 393 string AttSrcAsm, string IntelSrcAsm, 394 list<dag> Pattern, 395 list<dag> MaskingPattern, 396 bit IsCommutable = 0> { 397 let isCommutable = IsCommutable in { 398 def NAME: AVX512<O, F, Outs, Ins, 399 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 400 "$dst, "#IntelSrcAsm#"}", 401 Pattern>; 402 403 def NAME#k: AVX512<O, F, Outs, MaskingIns, 404 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 405 "$dst {${mask}}, "#IntelSrcAsm#"}", 406 MaskingPattern>, EVEX_K; 407 } 408} 409 410multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 411 dag Outs, 412 dag Ins, dag MaskingIns, 413 string OpcodeStr, 414 string AttSrcAsm, string IntelSrcAsm, 415 dag RHS, dag MaskingRHS, 416 bit IsCommutable = 0> : 417 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 418 AttSrcAsm, IntelSrcAsm, 419 [(set _.KRC:$dst, RHS)], 420 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 421 422multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 423 dag Outs, dag Ins, string OpcodeStr, 424 string AttSrcAsm, string IntelSrcAsm, 425 dag RHS, dag RHS_su, bit IsCommutable = 0> : 426 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 427 !con((ins _.KRCWM:$mask), Ins), 428 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 429 (and _.KRCWM:$mask, RHS_su), IsCommutable>; 430 431// Used by conversion instructions. 432multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _, 433 dag Outs, 434 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 435 string OpcodeStr, 436 string AttSrcAsm, string IntelSrcAsm, 437 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> : 438 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 439 AttSrcAsm, IntelSrcAsm, 440 [(set _.RC:$dst, RHS)], 441 [(set _.RC:$dst, MaskingRHS)], 442 [(set _.RC:$dst, ZeroMaskingRHS)], 443 "$src0 = $dst">; 444 445multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _, 446 dag Outs, dag NonTiedIns, string OpcodeStr, 447 string AttSrcAsm, string IntelSrcAsm, 448 dag RHS, dag MaskingRHS, bit IsCommutable, 449 bit IsKCommutable> : 450 AVX512_maskable_custom<O, F, Outs, 451 !con((ins _.RC:$src1), NonTiedIns), 452 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 453 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 454 OpcodeStr, AttSrcAsm, IntelSrcAsm, 455 [(set _.RC:$dst, RHS)], 456 [(set _.RC:$dst, 457 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))], 458 [(set _.RC:$dst, 459 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))], 460 "", IsCommutable, IsKCommutable>; 461 462// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 463// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 464// swizzled by ExecutionDomainFix to pxor. 465// We set canFoldAsLoad because this can be converted to a constant-pool 466// load of an all-zeros value if folding it would be beneficial. 467let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 468 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 469def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 470 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 471def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 472 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 473} 474 475let Predicates = [HasAVX512] in { 476def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; 477def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; 478def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; 479def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; 480def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; 481} 482 483// Alias instructions that allow VPTERNLOG to be used with a mask to create 484// a mix of all ones and all zeros elements. This is done this way to force 485// the same register to be used as input for all three sources. 486let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 487def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 488 (ins VK16WM:$mask), "", 489 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 490 (v16i32 immAllOnesV), 491 (v16i32 immAllZerosV)))]>; 492def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 493 (ins VK8WM:$mask), "", 494 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 495 (v8i64 immAllOnesV), 496 (v8i64 immAllZerosV)))]>; 497} 498 499let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 500 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 501def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 502 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 503def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 504 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 505} 506 507let Predicates = [HasAVX512] in { 508def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; 509def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; 510def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; 511def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; 512def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; 513def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; 514def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; 515def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; 516def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; 517def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; 518} 519 520let Predicates = [HasFP16] in { 521def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>; 522def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>; 523def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>; 524} 525 526// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 527// This is expanded by ExpandPostRAPseudos. 528let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 529 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 530 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 531 [(set FR32X:$dst, fp32imm0)]>; 532 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 533 [(set FR64X:$dst, fp64imm0)]>; 534 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 535 [(set VR128X:$dst, fp128imm0)]>; 536} 537 538let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 539 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasFP16] in { 540 def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "", 541 [(set FR16X:$dst, fp16imm0)]>; 542} 543 544//===----------------------------------------------------------------------===// 545// AVX-512 - VECTOR INSERT 546// 547 548// Supports two different pattern operators for mask and unmasked ops. Allows 549// null_frag to be passed for one. 550multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 551 X86VectorVTInfo To, 552 SDPatternOperator vinsert_insert, 553 SDPatternOperator vinsert_for_mask, 554 X86FoldableSchedWrite sched> { 555 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 556 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 557 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 558 "vinsert" # From.EltTypeName # "x" # From.NumElts, 559 "$src3, $src2, $src1", "$src1, $src2, $src3", 560 (vinsert_insert:$src3 (To.VT To.RC:$src1), 561 (From.VT From.RC:$src2), 562 (iPTR imm)), 563 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 564 (From.VT From.RC:$src2), 565 (iPTR imm))>, 566 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 567 let mayLoad = 1 in 568 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 569 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 570 "vinsert" # From.EltTypeName # "x" # From.NumElts, 571 "$src3, $src2, $src1", "$src1, $src2, $src3", 572 (vinsert_insert:$src3 (To.VT To.RC:$src1), 573 (From.VT (From.LdFrag addr:$src2)), 574 (iPTR imm)), 575 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 576 (From.VT (From.LdFrag addr:$src2)), 577 (iPTR imm))>, AVX512AIi8Base, EVEX_4V, 578 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 579 Sched<[sched.Folded, sched.ReadAfterFold]>; 580 } 581} 582 583// Passes the same pattern operator for masked and unmasked ops. 584multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 585 X86VectorVTInfo To, 586 SDPatternOperator vinsert_insert, 587 X86FoldableSchedWrite sched> : 588 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 589 590multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 591 X86VectorVTInfo To, PatFrag vinsert_insert, 592 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 593 let Predicates = p in { 594 def : Pat<(vinsert_insert:$ins 595 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 596 (To.VT (!cast<Instruction>(InstrStr#"rr") 597 To.RC:$src1, From.RC:$src2, 598 (INSERT_get_vinsert_imm To.RC:$ins)))>; 599 600 def : Pat<(vinsert_insert:$ins 601 (To.VT To.RC:$src1), 602 (From.VT (From.LdFrag addr:$src2)), 603 (iPTR imm)), 604 (To.VT (!cast<Instruction>(InstrStr#"rm") 605 To.RC:$src1, addr:$src2, 606 (INSERT_get_vinsert_imm To.RC:$ins)))>; 607 } 608} 609 610multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 611 ValueType EltVT64, int Opcode256, 612 X86FoldableSchedWrite sched> { 613 614 let Predicates = [HasVLX] in 615 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 616 X86VectorVTInfo< 4, EltVT32, VR128X>, 617 X86VectorVTInfo< 8, EltVT32, VR256X>, 618 vinsert128_insert, sched>, EVEX_V256; 619 620 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 621 X86VectorVTInfo< 4, EltVT32, VR128X>, 622 X86VectorVTInfo<16, EltVT32, VR512>, 623 vinsert128_insert, sched>, EVEX_V512; 624 625 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 626 X86VectorVTInfo< 4, EltVT64, VR256X>, 627 X86VectorVTInfo< 8, EltVT64, VR512>, 628 vinsert256_insert, sched>, VEX_W, EVEX_V512; 629 630 // Even with DQI we'd like to only use these instructions for masking. 631 let Predicates = [HasVLX, HasDQI] in 632 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 633 X86VectorVTInfo< 2, EltVT64, VR128X>, 634 X86VectorVTInfo< 4, EltVT64, VR256X>, 635 null_frag, vinsert128_insert, sched>, 636 VEX_W1X, EVEX_V256; 637 638 // Even with DQI we'd like to only use these instructions for masking. 639 let Predicates = [HasDQI] in { 640 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 641 X86VectorVTInfo< 2, EltVT64, VR128X>, 642 X86VectorVTInfo< 8, EltVT64, VR512>, 643 null_frag, vinsert128_insert, sched>, 644 VEX_W, EVEX_V512; 645 646 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 647 X86VectorVTInfo< 8, EltVT32, VR256X>, 648 X86VectorVTInfo<16, EltVT32, VR512>, 649 null_frag, vinsert256_insert, sched>, 650 EVEX_V512; 651 } 652} 653 654// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 655defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 656defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 657 658// Codegen pattern with the alternative types, 659// Even with AVX512DQ we'll still use these for unmasked operations. 660defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 661 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 662defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 663 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 664 665defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 666 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 667defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 668 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 669 670defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 671 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 672defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 673 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 674 675// Codegen pattern with the alternative types insert VEC128 into VEC256 676defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 677 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 678defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 679 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 680defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info, 681 vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16, HasVLX]>; 682// Codegen pattern with the alternative types insert VEC128 into VEC512 683defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 684 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 685defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 686 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 687defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info, 688 vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16]>; 689// Codegen pattern with the alternative types insert VEC256 into VEC512 690defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 691 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 692defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 693 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 694defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info, 695 vinsert256_insert, INSERT_get_vinsert256_imm, [HasFP16]>; 696 697 698multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 699 X86VectorVTInfo To, X86VectorVTInfo Cast, 700 PatFrag vinsert_insert, 701 SDNodeXForm INSERT_get_vinsert_imm, 702 list<Predicate> p> { 703let Predicates = p in { 704 def : Pat<(Cast.VT 705 (vselect_mask Cast.KRCWM:$mask, 706 (bitconvert 707 (vinsert_insert:$ins (To.VT To.RC:$src1), 708 (From.VT From.RC:$src2), 709 (iPTR imm))), 710 Cast.RC:$src0)), 711 (!cast<Instruction>(InstrStr#"rrk") 712 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 713 (INSERT_get_vinsert_imm To.RC:$ins))>; 714 def : Pat<(Cast.VT 715 (vselect_mask Cast.KRCWM:$mask, 716 (bitconvert 717 (vinsert_insert:$ins (To.VT To.RC:$src1), 718 (From.VT 719 (bitconvert 720 (From.LdFrag addr:$src2))), 721 (iPTR imm))), 722 Cast.RC:$src0)), 723 (!cast<Instruction>(InstrStr#"rmk") 724 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 725 (INSERT_get_vinsert_imm To.RC:$ins))>; 726 727 def : Pat<(Cast.VT 728 (vselect_mask Cast.KRCWM:$mask, 729 (bitconvert 730 (vinsert_insert:$ins (To.VT To.RC:$src1), 731 (From.VT From.RC:$src2), 732 (iPTR imm))), 733 Cast.ImmAllZerosV)), 734 (!cast<Instruction>(InstrStr#"rrkz") 735 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 736 (INSERT_get_vinsert_imm To.RC:$ins))>; 737 def : Pat<(Cast.VT 738 (vselect_mask Cast.KRCWM:$mask, 739 (bitconvert 740 (vinsert_insert:$ins (To.VT To.RC:$src1), 741 (From.VT (From.LdFrag addr:$src2)), 742 (iPTR imm))), 743 Cast.ImmAllZerosV)), 744 (!cast<Instruction>(InstrStr#"rmkz") 745 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 746 (INSERT_get_vinsert_imm To.RC:$ins))>; 747} 748} 749 750defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 751 v8f32x_info, vinsert128_insert, 752 INSERT_get_vinsert128_imm, [HasVLX]>; 753defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 754 v4f64x_info, vinsert128_insert, 755 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 756 757defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 758 v8i32x_info, vinsert128_insert, 759 INSERT_get_vinsert128_imm, [HasVLX]>; 760defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 761 v8i32x_info, vinsert128_insert, 762 INSERT_get_vinsert128_imm, [HasVLX]>; 763defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 764 v8i32x_info, vinsert128_insert, 765 INSERT_get_vinsert128_imm, [HasVLX]>; 766defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 767 v4i64x_info, vinsert128_insert, 768 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 769defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 770 v4i64x_info, vinsert128_insert, 771 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 772defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 773 v4i64x_info, vinsert128_insert, 774 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 775 776defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 777 v16f32_info, vinsert128_insert, 778 INSERT_get_vinsert128_imm, [HasAVX512]>; 779defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 780 v8f64_info, vinsert128_insert, 781 INSERT_get_vinsert128_imm, [HasDQI]>; 782 783defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 784 v16i32_info, vinsert128_insert, 785 INSERT_get_vinsert128_imm, [HasAVX512]>; 786defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 787 v16i32_info, vinsert128_insert, 788 INSERT_get_vinsert128_imm, [HasAVX512]>; 789defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 790 v16i32_info, vinsert128_insert, 791 INSERT_get_vinsert128_imm, [HasAVX512]>; 792defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 793 v8i64_info, vinsert128_insert, 794 INSERT_get_vinsert128_imm, [HasDQI]>; 795defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 796 v8i64_info, vinsert128_insert, 797 INSERT_get_vinsert128_imm, [HasDQI]>; 798defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 799 v8i64_info, vinsert128_insert, 800 INSERT_get_vinsert128_imm, [HasDQI]>; 801 802defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 803 v16f32_info, vinsert256_insert, 804 INSERT_get_vinsert256_imm, [HasDQI]>; 805defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 806 v8f64_info, vinsert256_insert, 807 INSERT_get_vinsert256_imm, [HasAVX512]>; 808 809defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 810 v16i32_info, vinsert256_insert, 811 INSERT_get_vinsert256_imm, [HasDQI]>; 812defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 813 v16i32_info, vinsert256_insert, 814 INSERT_get_vinsert256_imm, [HasDQI]>; 815defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 816 v16i32_info, vinsert256_insert, 817 INSERT_get_vinsert256_imm, [HasDQI]>; 818defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 819 v8i64_info, vinsert256_insert, 820 INSERT_get_vinsert256_imm, [HasAVX512]>; 821defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 822 v8i64_info, vinsert256_insert, 823 INSERT_get_vinsert256_imm, [HasAVX512]>; 824defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 825 v8i64_info, vinsert256_insert, 826 INSERT_get_vinsert256_imm, [HasAVX512]>; 827 828// vinsertps - insert f32 to XMM 829let ExeDomain = SSEPackedSingle in { 830let isCommutable = 1 in 831def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 832 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 833 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 834 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, 835 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 836def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 837 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 838 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 839 [(set VR128X:$dst, (X86insertps VR128X:$src1, 840 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 841 timm:$src3))]>, 842 EVEX_4V, EVEX_CD8<32, CD8VT1>, 843 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 844} 845 846//===----------------------------------------------------------------------===// 847// AVX-512 VECTOR EXTRACT 848//--- 849 850// Supports two different pattern operators for mask and unmasked ops. Allows 851// null_frag to be passed for one. 852multiclass vextract_for_size_split<int Opcode, 853 X86VectorVTInfo From, X86VectorVTInfo To, 854 SDPatternOperator vextract_extract, 855 SDPatternOperator vextract_for_mask, 856 SchedWrite SchedRR, SchedWrite SchedMR> { 857 858 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 859 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 860 (ins From.RC:$src1, u8imm:$idx), 861 "vextract" # To.EltTypeName # "x" # To.NumElts, 862 "$idx, $src1", "$src1, $idx", 863 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 864 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 865 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 866 867 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 868 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 869 "vextract" # To.EltTypeName # "x" # To.NumElts # 870 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 871 [(store (To.VT (vextract_extract:$idx 872 (From.VT From.RC:$src1), (iPTR imm))), 873 addr:$dst)]>, EVEX, 874 Sched<[SchedMR]>; 875 876 let mayStore = 1, hasSideEffects = 0 in 877 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 878 (ins To.MemOp:$dst, To.KRCWM:$mask, 879 From.RC:$src1, u8imm:$idx), 880 "vextract" # To.EltTypeName # "x" # To.NumElts # 881 "\t{$idx, $src1, $dst {${mask}}|" 882 "$dst {${mask}}, $src1, $idx}", []>, 883 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable; 884 } 885} 886 887// Passes the same pattern operator for masked and unmasked ops. 888multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 889 X86VectorVTInfo To, 890 SDPatternOperator vextract_extract, 891 SchedWrite SchedRR, SchedWrite SchedMR> : 892 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 893 894// Codegen pattern for the alternative types 895multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 896 X86VectorVTInfo To, PatFrag vextract_extract, 897 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 898 let Predicates = p in { 899 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 900 (To.VT (!cast<Instruction>(InstrStr#"rr") 901 From.RC:$src1, 902 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 903 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 904 (iPTR imm))), addr:$dst), 905 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 906 (EXTRACT_get_vextract_imm To.RC:$ext))>; 907 } 908} 909 910multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 911 ValueType EltVT64, int Opcode256, 912 SchedWrite SchedRR, SchedWrite SchedMR> { 913 let Predicates = [HasAVX512] in { 914 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 915 X86VectorVTInfo<16, EltVT32, VR512>, 916 X86VectorVTInfo< 4, EltVT32, VR128X>, 917 vextract128_extract, SchedRR, SchedMR>, 918 EVEX_V512, EVEX_CD8<32, CD8VT4>; 919 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 920 X86VectorVTInfo< 8, EltVT64, VR512>, 921 X86VectorVTInfo< 4, EltVT64, VR256X>, 922 vextract256_extract, SchedRR, SchedMR>, 923 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 924 } 925 let Predicates = [HasVLX] in 926 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 927 X86VectorVTInfo< 8, EltVT32, VR256X>, 928 X86VectorVTInfo< 4, EltVT32, VR128X>, 929 vextract128_extract, SchedRR, SchedMR>, 930 EVEX_V256, EVEX_CD8<32, CD8VT4>; 931 932 // Even with DQI we'd like to only use these instructions for masking. 933 let Predicates = [HasVLX, HasDQI] in 934 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 935 X86VectorVTInfo< 4, EltVT64, VR256X>, 936 X86VectorVTInfo< 2, EltVT64, VR128X>, 937 null_frag, vextract128_extract, SchedRR, SchedMR>, 938 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; 939 940 // Even with DQI we'd like to only use these instructions for masking. 941 let Predicates = [HasDQI] in { 942 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 943 X86VectorVTInfo< 8, EltVT64, VR512>, 944 X86VectorVTInfo< 2, EltVT64, VR128X>, 945 null_frag, vextract128_extract, SchedRR, SchedMR>, 946 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 947 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 948 X86VectorVTInfo<16, EltVT32, VR512>, 949 X86VectorVTInfo< 8, EltVT32, VR256X>, 950 null_frag, vextract256_extract, SchedRR, SchedMR>, 951 EVEX_V512, EVEX_CD8<32, CD8VT8>; 952 } 953} 954 955// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 956defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 957defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 958 959// extract_subvector codegen patterns with the alternative types. 960// Even with AVX512DQ we'll still use these for unmasked operations. 961defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 962 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 963defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 964 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 965 966defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 967 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 968defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 969 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 970 971defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 972 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 973defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 974 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 975 976// Codegen pattern with the alternative types extract VEC128 from VEC256 977defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 978 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 979defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 980 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 981defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info, 982 vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16, HasVLX]>; 983 984// Codegen pattern with the alternative types extract VEC128 from VEC512 985defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 986 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 987defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 988 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 989defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info, 990 vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16]>; 991// Codegen pattern with the alternative types extract VEC256 from VEC512 992defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 993 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 994defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 995 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 996defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info, 997 vextract256_extract, EXTRACT_get_vextract256_imm, [HasFP16]>; 998 999 1000// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 1001// smaller extract to enable EVEX->VEX. 1002let Predicates = [NoVLX] in { 1003def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 1004 (v2i64 (VEXTRACTI128rr 1005 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 1006 (iPTR 1)))>; 1007def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 1008 (v2f64 (VEXTRACTF128rr 1009 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 1010 (iPTR 1)))>; 1011def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 1012 (v4i32 (VEXTRACTI128rr 1013 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 1014 (iPTR 1)))>; 1015def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 1016 (v4f32 (VEXTRACTF128rr 1017 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 1018 (iPTR 1)))>; 1019def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 1020 (v8i16 (VEXTRACTI128rr 1021 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 1022 (iPTR 1)))>; 1023def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 1024 (v16i8 (VEXTRACTI128rr 1025 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 1026 (iPTR 1)))>; 1027} 1028 1029// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 1030// smaller extract to enable EVEX->VEX. 1031let Predicates = [HasVLX] in { 1032def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 1033 (v2i64 (VEXTRACTI32x4Z256rr 1034 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 1035 (iPTR 1)))>; 1036def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 1037 (v2f64 (VEXTRACTF32x4Z256rr 1038 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 1039 (iPTR 1)))>; 1040def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 1041 (v4i32 (VEXTRACTI32x4Z256rr 1042 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 1043 (iPTR 1)))>; 1044def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 1045 (v4f32 (VEXTRACTF32x4Z256rr 1046 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 1047 (iPTR 1)))>; 1048def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 1049 (v8i16 (VEXTRACTI32x4Z256rr 1050 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 1051 (iPTR 1)))>; 1052def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 1053 (v16i8 (VEXTRACTI32x4Z256rr 1054 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 1055 (iPTR 1)))>; 1056} 1057 1058let Predicates = [HasFP16, HasVLX] in 1059def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 1060 (v8f16 (VEXTRACTF32x4Z256rr 1061 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 1062 (iPTR 1)))>; 1063 1064 1065// Additional patterns for handling a bitcast between the vselect and the 1066// extract_subvector. 1067multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 1068 X86VectorVTInfo To, X86VectorVTInfo Cast, 1069 PatFrag vextract_extract, 1070 SDNodeXForm EXTRACT_get_vextract_imm, 1071 list<Predicate> p> { 1072let Predicates = p in { 1073 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1074 (bitconvert 1075 (To.VT (vextract_extract:$ext 1076 (From.VT From.RC:$src), (iPTR imm)))), 1077 To.RC:$src0)), 1078 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 1079 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 1080 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1081 1082 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1083 (bitconvert 1084 (To.VT (vextract_extract:$ext 1085 (From.VT From.RC:$src), (iPTR imm)))), 1086 Cast.ImmAllZerosV)), 1087 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 1088 Cast.KRCWM:$mask, From.RC:$src, 1089 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1090} 1091} 1092 1093defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 1094 v4f32x_info, vextract128_extract, 1095 EXTRACT_get_vextract128_imm, [HasVLX]>; 1096defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 1097 v2f64x_info, vextract128_extract, 1098 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1099 1100defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 1101 v4i32x_info, vextract128_extract, 1102 EXTRACT_get_vextract128_imm, [HasVLX]>; 1103defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 1104 v4i32x_info, vextract128_extract, 1105 EXTRACT_get_vextract128_imm, [HasVLX]>; 1106defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 1107 v4i32x_info, vextract128_extract, 1108 EXTRACT_get_vextract128_imm, [HasVLX]>; 1109defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 1110 v2i64x_info, vextract128_extract, 1111 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1112defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 1113 v2i64x_info, vextract128_extract, 1114 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1115defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 1116 v2i64x_info, vextract128_extract, 1117 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1118 1119defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 1120 v4f32x_info, vextract128_extract, 1121 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1122defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 1123 v2f64x_info, vextract128_extract, 1124 EXTRACT_get_vextract128_imm, [HasDQI]>; 1125 1126defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 1127 v4i32x_info, vextract128_extract, 1128 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1129defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 1130 v4i32x_info, vextract128_extract, 1131 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1132defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 1133 v4i32x_info, vextract128_extract, 1134 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1135defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 1136 v2i64x_info, vextract128_extract, 1137 EXTRACT_get_vextract128_imm, [HasDQI]>; 1138defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 1139 v2i64x_info, vextract128_extract, 1140 EXTRACT_get_vextract128_imm, [HasDQI]>; 1141defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 1142 v2i64x_info, vextract128_extract, 1143 EXTRACT_get_vextract128_imm, [HasDQI]>; 1144 1145defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 1146 v8f32x_info, vextract256_extract, 1147 EXTRACT_get_vextract256_imm, [HasDQI]>; 1148defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 1149 v4f64x_info, vextract256_extract, 1150 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1151 1152defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 1153 v8i32x_info, vextract256_extract, 1154 EXTRACT_get_vextract256_imm, [HasDQI]>; 1155defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 1156 v8i32x_info, vextract256_extract, 1157 EXTRACT_get_vextract256_imm, [HasDQI]>; 1158defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 1159 v8i32x_info, vextract256_extract, 1160 EXTRACT_get_vextract256_imm, [HasDQI]>; 1161defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 1162 v4i64x_info, vextract256_extract, 1163 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1164defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 1165 v4i64x_info, vextract256_extract, 1166 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1167defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 1168 v4i64x_info, vextract256_extract, 1169 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1170 1171// vextractps - extract 32 bits from XMM 1172def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), 1173 (ins VR128X:$src1, u8imm:$src2), 1174 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1175 [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1176 EVEX, VEX_WIG, Sched<[WriteVecExtract]>; 1177 1178def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 1179 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1180 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1181 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1182 addr:$dst)]>, 1183 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1184 1185//===---------------------------------------------------------------------===// 1186// AVX-512 BROADCAST 1187//--- 1188// broadcast with a scalar argument. 1189multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo, 1190 X86VectorVTInfo SrcInfo> { 1191 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1192 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr) 1193 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1194 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1195 (X86VBroadcast SrcInfo.FRC:$src), 1196 DestInfo.RC:$src0)), 1197 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk) 1198 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1199 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1200 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1201 (X86VBroadcast SrcInfo.FRC:$src), 1202 DestInfo.ImmAllZerosV)), 1203 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz) 1204 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1205} 1206 1207// Split version to allow mask and broadcast node to be different types. This 1208// helps support the 32x2 broadcasts. 1209multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1210 SchedWrite SchedRR, SchedWrite SchedRM, 1211 X86VectorVTInfo MaskInfo, 1212 X86VectorVTInfo DestInfo, 1213 X86VectorVTInfo SrcInfo, 1214 bit IsConvertibleToThreeAddress, 1215 SDPatternOperator UnmaskedOp = X86VBroadcast, 1216 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { 1217 let hasSideEffects = 0 in 1218 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), 1219 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1220 [(set MaskInfo.RC:$dst, 1221 (MaskInfo.VT 1222 (bitconvert 1223 (DestInfo.VT 1224 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], 1225 DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>; 1226 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1227 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), 1228 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1229 "${dst} {${mask}} {z}, $src}"), 1230 [(set MaskInfo.RC:$dst, 1231 (vselect_mask MaskInfo.KRCWM:$mask, 1232 (MaskInfo.VT 1233 (bitconvert 1234 (DestInfo.VT 1235 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1236 MaskInfo.ImmAllZerosV))], 1237 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; 1238 let Constraints = "$src0 = $dst" in 1239 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1240 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1241 SrcInfo.RC:$src), 1242 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1243 "${dst} {${mask}}, $src}"), 1244 [(set MaskInfo.RC:$dst, 1245 (vselect_mask MaskInfo.KRCWM:$mask, 1246 (MaskInfo.VT 1247 (bitconvert 1248 (DestInfo.VT 1249 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1250 MaskInfo.RC:$src0))], 1251 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; 1252 1253 let hasSideEffects = 0, mayLoad = 1 in 1254 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1255 (ins SrcInfo.ScalarMemOp:$src), 1256 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1257 [(set MaskInfo.RC:$dst, 1258 (MaskInfo.VT 1259 (bitconvert 1260 (DestInfo.VT 1261 (UnmaskedBcastOp addr:$src)))))], 1262 DestInfo.ExeDomain>, T8PD, EVEX, 1263 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1264 1265 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1266 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), 1267 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1268 "${dst} {${mask}} {z}, $src}"), 1269 [(set MaskInfo.RC:$dst, 1270 (vselect_mask MaskInfo.KRCWM:$mask, 1271 (MaskInfo.VT 1272 (bitconvert 1273 (DestInfo.VT 1274 (SrcInfo.BroadcastLdFrag addr:$src)))), 1275 MaskInfo.ImmAllZerosV))], 1276 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, 1277 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1278 1279 let Constraints = "$src0 = $dst", 1280 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 1281 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1282 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1283 SrcInfo.ScalarMemOp:$src), 1284 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1285 "${dst} {${mask}}, $src}"), 1286 [(set MaskInfo.RC:$dst, 1287 (vselect_mask MaskInfo.KRCWM:$mask, 1288 (MaskInfo.VT 1289 (bitconvert 1290 (DestInfo.VT 1291 (SrcInfo.BroadcastLdFrag addr:$src)))), 1292 MaskInfo.RC:$src0))], 1293 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, 1294 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1295} 1296 1297// Helper class to force mask and broadcast result to same type. 1298multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, 1299 SchedWrite SchedRR, SchedWrite SchedRM, 1300 X86VectorVTInfo DestInfo, 1301 X86VectorVTInfo SrcInfo, 1302 bit IsConvertibleToThreeAddress> : 1303 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM, 1304 DestInfo, DestInfo, SrcInfo, 1305 IsConvertibleToThreeAddress>; 1306 1307multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1308 AVX512VLVectorVTInfo _> { 1309 let Predicates = [HasAVX512] in { 1310 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1311 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1312 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1313 EVEX_V512; 1314 } 1315 1316 let Predicates = [HasVLX] in { 1317 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1318 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1319 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1320 EVEX_V256; 1321 } 1322} 1323 1324multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1325 AVX512VLVectorVTInfo _> { 1326 let Predicates = [HasAVX512] in { 1327 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1328 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1329 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1330 EVEX_V512; 1331 } 1332 1333 let Predicates = [HasVLX] in { 1334 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1335 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1336 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1337 EVEX_V256; 1338 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1339 WriteFShuffle256Ld, _.info128, _.info128, 1>, 1340 avx512_broadcast_scalar<NAME, _.info128, _.info128>, 1341 EVEX_V128; 1342 } 1343} 1344defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1345 avx512vl_f32_info>; 1346defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1347 avx512vl_f64_info>, VEX_W1X; 1348 1349multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1350 X86VectorVTInfo _, SDPatternOperator OpNode, 1351 RegisterClass SrcRC> { 1352 // Fold with a mask even if it has multiple uses since it is cheap. 1353 let ExeDomain = _.ExeDomain in 1354 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1355 (ins SrcRC:$src), 1356 "vpbroadcast"#_.Suffix, "$src", "$src", 1357 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0, 1358 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>, 1359 T8PD, EVEX, Sched<[SchedRR]>; 1360} 1361 1362multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1363 X86VectorVTInfo _, SDPatternOperator OpNode, 1364 RegisterClass SrcRC, SubRegIndex Subreg> { 1365 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1366 defm rr : AVX512_maskable_custom<opc, MRMSrcReg, 1367 (outs _.RC:$dst), (ins GR32:$src), 1368 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1369 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1370 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [], 1371 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; 1372 1373 def : Pat <(_.VT (OpNode SrcRC:$src)), 1374 (!cast<Instruction>(Name#rr) 1375 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1376 1377 // Fold with a mask even if it has multiple uses since it is cheap. 1378 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1379 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask, 1380 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1381 1382 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1383 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask, 1384 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1385} 1386 1387multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1388 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1389 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1390 let Predicates = [prd] in 1391 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1392 OpNode, SrcRC, Subreg>, EVEX_V512; 1393 let Predicates = [prd, HasVLX] in { 1394 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1395 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1396 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1397 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1398 } 1399} 1400 1401multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1402 SDPatternOperator OpNode, 1403 RegisterClass SrcRC, Predicate prd> { 1404 let Predicates = [prd] in 1405 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1406 SrcRC>, EVEX_V512; 1407 let Predicates = [prd, HasVLX] in { 1408 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1409 SrcRC>, EVEX_V256; 1410 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1411 SrcRC>, EVEX_V128; 1412 } 1413} 1414 1415defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1416 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1417defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1418 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1419 HasBWI>; 1420defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1421 X86VBroadcast, GR32, HasAVX512>; 1422defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1423 X86VBroadcast, GR64, HasAVX512>, VEX_W; 1424 1425multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1426 AVX512VLVectorVTInfo _, Predicate prd, 1427 bit IsConvertibleToThreeAddress> { 1428 let Predicates = [prd] in { 1429 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1430 WriteShuffle256Ld, _.info512, _.info128, 1431 IsConvertibleToThreeAddress>, 1432 EVEX_V512; 1433 } 1434 let Predicates = [prd, HasVLX] in { 1435 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1436 WriteShuffle256Ld, _.info256, _.info128, 1437 IsConvertibleToThreeAddress>, 1438 EVEX_V256; 1439 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle, 1440 WriteShuffleXLd, _.info128, _.info128, 1441 IsConvertibleToThreeAddress>, 1442 EVEX_V128; 1443 } 1444} 1445 1446defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1447 avx512vl_i8_info, HasBWI, 0>; 1448defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1449 avx512vl_i16_info, HasBWI, 0>; 1450defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1451 avx512vl_i32_info, HasAVX512, 1>; 1452defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1453 avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; 1454 1455multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1456 SDPatternOperator OpNode, 1457 X86VectorVTInfo _Dst, 1458 X86VectorVTInfo _Src> { 1459 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1460 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1461 (_Dst.VT (OpNode addr:$src))>, 1462 Sched<[SchedWriteShuffle.YMM.Folded]>, 1463 AVX5128IBase, EVEX; 1464} 1465 1466// This should be used for the AVX512DQ broadcast instructions. It disables 1467// the unmasked patterns so that we only use the DQ instructions when masking 1468// is requested. 1469multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1470 SDPatternOperator OpNode, 1471 X86VectorVTInfo _Dst, 1472 X86VectorVTInfo _Src> { 1473 let hasSideEffects = 0, mayLoad = 1 in 1474 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1475 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1476 (null_frag), 1477 (_Dst.VT (OpNode addr:$src))>, 1478 Sched<[SchedWriteShuffle.YMM.Folded]>, 1479 AVX5128IBase, EVEX; 1480} 1481let Predicates = [HasFP16] in { 1482 def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)), 1483 (VPBROADCASTWZrm addr:$src)>; 1484 1485 def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))), 1486 (VPBROADCASTWZrr VR128X:$src)>; 1487 def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))), 1488 (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1489} 1490let Predicates = [HasVLX, HasFP16] in { 1491 def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)), 1492 (VPBROADCASTWZ128rm addr:$src)>; 1493 def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)), 1494 (VPBROADCASTWZ256rm addr:$src)>; 1495 1496 def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))), 1497 (VPBROADCASTWZ128rr VR128X:$src)>; 1498 def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))), 1499 (VPBROADCASTWZ256rr VR128X:$src)>; 1500 1501 def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))), 1502 (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1503 def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))), 1504 (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1505} 1506 1507//===----------------------------------------------------------------------===// 1508// AVX-512 BROADCAST SUBVECTORS 1509// 1510 1511defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1512 X86SubVBroadcastld128, v16i32_info, v4i32x_info>, 1513 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1514defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1515 X86SubVBroadcastld128, v16f32_info, v4f32x_info>, 1516 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1517defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1518 X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W, 1519 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1520defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1521 X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W, 1522 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1523 1524let Predicates = [HasAVX512] in { 1525def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)), 1526 (VBROADCASTF64X4rm addr:$src)>; 1527def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)), 1528 (VBROADCASTF64X4rm addr:$src)>; 1529def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)), 1530 (VBROADCASTF64X4rm addr:$src)>; 1531def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)), 1532 (VBROADCASTI64X4rm addr:$src)>; 1533def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)), 1534 (VBROADCASTI64X4rm addr:$src)>; 1535def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)), 1536 (VBROADCASTI64X4rm addr:$src)>; 1537def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)), 1538 (VBROADCASTI64X4rm addr:$src)>; 1539 1540def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)), 1541 (VBROADCASTF32X4rm addr:$src)>; 1542def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)), 1543 (VBROADCASTF32X4rm addr:$src)>; 1544def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)), 1545 (VBROADCASTF32X4rm addr:$src)>; 1546def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)), 1547 (VBROADCASTI32X4rm addr:$src)>; 1548def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)), 1549 (VBROADCASTI32X4rm addr:$src)>; 1550def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)), 1551 (VBROADCASTI32X4rm addr:$src)>; 1552def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)), 1553 (VBROADCASTI32X4rm addr:$src)>; 1554 1555// Patterns for selects of bitcasted operations. 1556def : Pat<(vselect_mask VK16WM:$mask, 1557 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1558 (v16f32 immAllZerosV)), 1559 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1560def : Pat<(vselect_mask VK16WM:$mask, 1561 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1562 VR512:$src0), 1563 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1564def : Pat<(vselect_mask VK16WM:$mask, 1565 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1566 (v16i32 immAllZerosV)), 1567 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1568def : Pat<(vselect_mask VK16WM:$mask, 1569 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1570 VR512:$src0), 1571 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1572 1573def : Pat<(vselect_mask VK8WM:$mask, 1574 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1575 (v8f64 immAllZerosV)), 1576 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1577def : Pat<(vselect_mask VK8WM:$mask, 1578 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1579 VR512:$src0), 1580 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1581def : Pat<(vselect_mask VK8WM:$mask, 1582 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1583 (v8i64 immAllZerosV)), 1584 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1585def : Pat<(vselect_mask VK8WM:$mask, 1586 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1587 VR512:$src0), 1588 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1589} 1590 1591let Predicates = [HasVLX] in { 1592defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1593 X86SubVBroadcastld128, v8i32x_info, v4i32x_info>, 1594 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1595defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1596 X86SubVBroadcastld128, v8f32x_info, v4f32x_info>, 1597 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1598 1599def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), 1600 (VBROADCASTF32X4Z256rm addr:$src)>; 1601def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), 1602 (VBROADCASTF32X4Z256rm addr:$src)>; 1603def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)), 1604 (VBROADCASTF32X4Z256rm addr:$src)>; 1605def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), 1606 (VBROADCASTI32X4Z256rm addr:$src)>; 1607def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), 1608 (VBROADCASTI32X4Z256rm addr:$src)>; 1609def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), 1610 (VBROADCASTI32X4Z256rm addr:$src)>; 1611def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), 1612 (VBROADCASTI32X4Z256rm addr:$src)>; 1613 1614// Patterns for selects of bitcasted operations. 1615def : Pat<(vselect_mask VK8WM:$mask, 1616 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1617 (v8f32 immAllZerosV)), 1618 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1619def : Pat<(vselect_mask VK8WM:$mask, 1620 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1621 VR256X:$src0), 1622 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1623def : Pat<(vselect_mask VK8WM:$mask, 1624 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1625 (v8i32 immAllZerosV)), 1626 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1627def : Pat<(vselect_mask VK8WM:$mask, 1628 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1629 VR256X:$src0), 1630 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1631} 1632 1633let Predicates = [HasVLX, HasDQI] in { 1634defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1635 X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X, 1636 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1637defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1638 X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X, 1639 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1640 1641// Patterns for selects of bitcasted operations. 1642def : Pat<(vselect_mask VK4WM:$mask, 1643 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1644 (v4f64 immAllZerosV)), 1645 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1646def : Pat<(vselect_mask VK4WM:$mask, 1647 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1648 VR256X:$src0), 1649 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1650def : Pat<(vselect_mask VK4WM:$mask, 1651 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1652 (v4i64 immAllZerosV)), 1653 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1654def : Pat<(vselect_mask VK4WM:$mask, 1655 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1656 VR256X:$src0), 1657 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1658} 1659 1660let Predicates = [HasDQI] in { 1661defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1662 X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W, 1663 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1664defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1665 X86SubVBroadcastld256, v16i32_info, v8i32x_info>, 1666 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1667defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1668 X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W, 1669 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1670defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1671 X86SubVBroadcastld256, v16f32_info, v8f32x_info>, 1672 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1673 1674// Patterns for selects of bitcasted operations. 1675def : Pat<(vselect_mask VK16WM:$mask, 1676 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1677 (v16f32 immAllZerosV)), 1678 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1679def : Pat<(vselect_mask VK16WM:$mask, 1680 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1681 VR512:$src0), 1682 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1683def : Pat<(vselect_mask VK16WM:$mask, 1684 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1685 (v16i32 immAllZerosV)), 1686 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1687def : Pat<(vselect_mask VK16WM:$mask, 1688 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1689 VR512:$src0), 1690 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1691 1692def : Pat<(vselect_mask VK8WM:$mask, 1693 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1694 (v8f64 immAllZerosV)), 1695 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1696def : Pat<(vselect_mask VK8WM:$mask, 1697 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1698 VR512:$src0), 1699 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1700def : Pat<(vselect_mask VK8WM:$mask, 1701 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1702 (v8i64 immAllZerosV)), 1703 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1704def : Pat<(vselect_mask VK8WM:$mask, 1705 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1706 VR512:$src0), 1707 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1708} 1709 1710multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1711 AVX512VLVectorVTInfo _Dst, 1712 AVX512VLVectorVTInfo _Src> { 1713 let Predicates = [HasDQI] in 1714 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1715 WriteShuffle256Ld, _Dst.info512, 1716 _Src.info512, _Src.info128, 0, null_frag, null_frag>, 1717 EVEX_V512; 1718 let Predicates = [HasDQI, HasVLX] in 1719 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1720 WriteShuffle256Ld, _Dst.info256, 1721 _Src.info256, _Src.info128, 0, null_frag, null_frag>, 1722 EVEX_V256; 1723} 1724 1725multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1726 AVX512VLVectorVTInfo _Dst, 1727 AVX512VLVectorVTInfo _Src> : 1728 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1729 1730 let Predicates = [HasDQI, HasVLX] in 1731 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle, 1732 WriteShuffleXLd, _Dst.info128, 1733 _Src.info128, _Src.info128, 0, null_frag, null_frag>, 1734 EVEX_V128; 1735} 1736 1737defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1738 avx512vl_i32_info, avx512vl_i64_info>; 1739defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1740 avx512vl_f32_info, avx512vl_f64_info>; 1741 1742//===----------------------------------------------------------------------===// 1743// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1744//--- 1745multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1746 X86VectorVTInfo _, RegisterClass KRC> { 1747 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1748 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1749 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1750 EVEX, Sched<[WriteShuffle]>; 1751} 1752 1753multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1754 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1755 let Predicates = [HasCDI] in 1756 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1757 let Predicates = [HasCDI, HasVLX] in { 1758 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1759 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1760 } 1761} 1762 1763defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1764 avx512vl_i32_info, VK16>; 1765defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1766 avx512vl_i64_info, VK8>, VEX_W; 1767 1768//===----------------------------------------------------------------------===// 1769// -- VPERMI2 - 3 source operands form -- 1770multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1771 X86FoldableSchedWrite sched, 1772 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1773let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1774 hasSideEffects = 0 in { 1775 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1776 (ins _.RC:$src2, _.RC:$src3), 1777 OpcodeStr, "$src3, $src2", "$src2, $src3", 1778 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1779 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1780 1781 let mayLoad = 1 in 1782 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1783 (ins _.RC:$src2, _.MemOp:$src3), 1784 OpcodeStr, "$src3, $src2", "$src2, $src3", 1785 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1786 (_.VT (_.LdFrag addr:$src3)))), 1>, 1787 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1788 } 1789} 1790 1791multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1792 X86FoldableSchedWrite sched, 1793 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1794 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1795 hasSideEffects = 0, mayLoad = 1 in 1796 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1797 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1798 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1799 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1800 (_.VT (X86VPermt2 _.RC:$src2, 1801 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1802 AVX5128IBase, EVEX_4V, EVEX_B, 1803 Sched<[sched.Folded, sched.ReadAfterFold]>; 1804} 1805 1806multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1807 X86FoldableSchedWrite sched, 1808 AVX512VLVectorVTInfo VTInfo, 1809 AVX512VLVectorVTInfo ShuffleMask> { 1810 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1811 ShuffleMask.info512>, 1812 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1813 ShuffleMask.info512>, EVEX_V512; 1814 let Predicates = [HasVLX] in { 1815 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1816 ShuffleMask.info128>, 1817 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1818 ShuffleMask.info128>, EVEX_V128; 1819 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1820 ShuffleMask.info256>, 1821 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1822 ShuffleMask.info256>, EVEX_V256; 1823 } 1824} 1825 1826multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1827 X86FoldableSchedWrite sched, 1828 AVX512VLVectorVTInfo VTInfo, 1829 AVX512VLVectorVTInfo Idx, 1830 Predicate Prd> { 1831 let Predicates = [Prd] in 1832 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1833 Idx.info512>, EVEX_V512; 1834 let Predicates = [Prd, HasVLX] in { 1835 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1836 Idx.info128>, EVEX_V128; 1837 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1838 Idx.info256>, EVEX_V256; 1839 } 1840} 1841 1842defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1843 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1844defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1845 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1846defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1847 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1848 VEX_W, EVEX_CD8<16, CD8VF>; 1849defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1850 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1851 EVEX_CD8<8, CD8VF>; 1852defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1853 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1854defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1855 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1856 1857// Extra patterns to deal with extra bitcasts due to passthru and index being 1858// different types on the fp versions. 1859multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1860 X86VectorVTInfo IdxVT, 1861 X86VectorVTInfo CastVT> { 1862 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1863 (X86VPermt2 (_.VT _.RC:$src2), 1864 (IdxVT.VT (bitconvert 1865 (CastVT.VT _.RC:$src1))), 1866 _.RC:$src3), 1867 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1868 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1869 _.RC:$src2, _.RC:$src3)>; 1870 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1871 (X86VPermt2 _.RC:$src2, 1872 (IdxVT.VT (bitconvert 1873 (CastVT.VT _.RC:$src1))), 1874 (_.LdFrag addr:$src3)), 1875 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1876 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1877 _.RC:$src2, addr:$src3)>; 1878 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1879 (X86VPermt2 _.RC:$src2, 1880 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1881 (_.BroadcastLdFrag addr:$src3)), 1882 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1883 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1884 _.RC:$src2, addr:$src3)>; 1885} 1886 1887// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1888defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>; 1889defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; 1890defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; 1891 1892// VPERMT2 1893multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1894 X86FoldableSchedWrite sched, 1895 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1896let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1897 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1898 (ins IdxVT.RC:$src2, _.RC:$src3), 1899 OpcodeStr, "$src3, $src2", "$src2, $src3", 1900 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1901 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1902 1903 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1904 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1905 OpcodeStr, "$src3, $src2", "$src2, $src3", 1906 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1907 (_.LdFrag addr:$src3))), 1>, 1908 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1909 } 1910} 1911multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1912 X86FoldableSchedWrite sched, 1913 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1914 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1915 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1916 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1917 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1918 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1919 (_.VT (X86VPermt2 _.RC:$src1, 1920 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1921 AVX5128IBase, EVEX_4V, EVEX_B, 1922 Sched<[sched.Folded, sched.ReadAfterFold]>; 1923} 1924 1925multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1926 X86FoldableSchedWrite sched, 1927 AVX512VLVectorVTInfo VTInfo, 1928 AVX512VLVectorVTInfo ShuffleMask> { 1929 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1930 ShuffleMask.info512>, 1931 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1932 ShuffleMask.info512>, EVEX_V512; 1933 let Predicates = [HasVLX] in { 1934 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1935 ShuffleMask.info128>, 1936 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1937 ShuffleMask.info128>, EVEX_V128; 1938 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1939 ShuffleMask.info256>, 1940 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1941 ShuffleMask.info256>, EVEX_V256; 1942 } 1943} 1944 1945multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1946 X86FoldableSchedWrite sched, 1947 AVX512VLVectorVTInfo VTInfo, 1948 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1949 let Predicates = [Prd] in 1950 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1951 Idx.info512>, EVEX_V512; 1952 let Predicates = [Prd, HasVLX] in { 1953 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1954 Idx.info128>, EVEX_V128; 1955 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1956 Idx.info256>, EVEX_V256; 1957 } 1958} 1959 1960defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1961 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1962defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1963 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1964defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1965 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1966 VEX_W, EVEX_CD8<16, CD8VF>; 1967defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1968 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1969 EVEX_CD8<8, CD8VF>; 1970defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1971 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1972defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1973 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 1974 1975//===----------------------------------------------------------------------===// 1976// AVX-512 - BLEND using mask 1977// 1978 1979multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1980 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1981 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1982 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1983 (ins _.RC:$src1, _.RC:$src2), 1984 !strconcat(OpcodeStr, 1985 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1986 EVEX_4V, Sched<[sched]>; 1987 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1988 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1989 !strconcat(OpcodeStr, 1990 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1991 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 1992 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1993 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1994 !strconcat(OpcodeStr, 1995 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1996 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable; 1997 let mayLoad = 1 in { 1998 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1999 (ins _.RC:$src1, _.MemOp:$src2), 2000 !strconcat(OpcodeStr, 2001 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 2002 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 2003 Sched<[sched.Folded, sched.ReadAfterFold]>; 2004 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2005 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2006 !strconcat(OpcodeStr, 2007 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 2008 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 2009 Sched<[sched.Folded, sched.ReadAfterFold]>; 2010 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2011 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2012 !strconcat(OpcodeStr, 2013 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 2014 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 2015 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 2016 } 2017 } 2018} 2019multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 2020 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2021 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { 2022 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2023 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 2024 !strconcat(OpcodeStr, 2025 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2026 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2027 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2028 Sched<[sched.Folded, sched.ReadAfterFold]>; 2029 2030 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2031 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 2032 !strconcat(OpcodeStr, 2033 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 2034 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2035 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2036 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; 2037 2038 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2039 (ins _.RC:$src1, _.ScalarMemOp:$src2), 2040 !strconcat(OpcodeStr, 2041 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 2042 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2043 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2044 Sched<[sched.Folded, sched.ReadAfterFold]>; 2045 } 2046} 2047 2048multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2049 AVX512VLVectorVTInfo VTInfo> { 2050 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2051 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2052 EVEX_V512; 2053 2054 let Predicates = [HasVLX] in { 2055 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2056 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2057 EVEX_V256; 2058 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2059 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2060 EVEX_V128; 2061 } 2062} 2063 2064multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2065 AVX512VLVectorVTInfo VTInfo> { 2066 let Predicates = [HasBWI] in 2067 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2068 EVEX_V512; 2069 2070 let Predicates = [HasBWI, HasVLX] in { 2071 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2072 EVEX_V256; 2073 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2074 EVEX_V128; 2075 } 2076} 2077 2078defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 2079 avx512vl_f32_info>; 2080defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 2081 avx512vl_f64_info>, VEX_W; 2082defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 2083 avx512vl_i32_info>; 2084defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 2085 avx512vl_i64_info>, VEX_W; 2086defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 2087 avx512vl_i8_info>; 2088defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 2089 avx512vl_i16_info>, VEX_W; 2090 2091//===----------------------------------------------------------------------===// 2092// Compare Instructions 2093//===----------------------------------------------------------------------===// 2094 2095// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 2096 2097multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, 2098 PatFrag OpNode_su, PatFrag OpNodeSAE_su, 2099 X86FoldableSchedWrite sched> { 2100 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2101 (outs _.KRC:$dst), 2102 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2103 "vcmp"#_.Suffix, 2104 "$cc, $src2, $src1", "$src1, $src2, $cc", 2105 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2106 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2107 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2108 let mayLoad = 1 in 2109 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2110 (outs _.KRC:$dst), 2111 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), 2112 "vcmp"#_.Suffix, 2113 "$cc, $src2, $src1", "$src1, $src2, $cc", 2114 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2115 timm:$cc), 2116 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2117 timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2118 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2119 2120 let Uses = [MXCSR] in 2121 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2122 (outs _.KRC:$dst), 2123 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2124 "vcmp"#_.Suffix, 2125 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", 2126 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2127 timm:$cc), 2128 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2129 timm:$cc)>, 2130 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 2131 2132 let isCodeGenOnly = 1 in { 2133 let isCommutable = 1 in 2134 def rr : AVX512Ii8<0xC2, MRMSrcReg, 2135 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), 2136 !strconcat("vcmp", _.Suffix, 2137 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2138 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2139 _.FRC:$src2, 2140 timm:$cc))]>, 2141 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2142 def rm : AVX512Ii8<0xC2, MRMSrcMem, 2143 (outs _.KRC:$dst), 2144 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2145 !strconcat("vcmp", _.Suffix, 2146 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2147 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2148 (_.ScalarLdFrag addr:$src2), 2149 timm:$cc))]>, 2150 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2151 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2152 } 2153} 2154 2155def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2156 (X86cmpms node:$src1, node:$src2, node:$cc), [{ 2157 return N->hasOneUse(); 2158}]>; 2159def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2160 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{ 2161 return N->hasOneUse(); 2162}]>; 2163 2164let Predicates = [HasAVX512] in { 2165 let ExeDomain = SSEPackedSingle in 2166 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, 2167 X86cmpms_su, X86cmpmsSAE_su, 2168 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2169 let ExeDomain = SSEPackedDouble in 2170 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, 2171 X86cmpms_su, X86cmpmsSAE_su, 2172 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W; 2173} 2174let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in 2175 defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE, 2176 X86cmpms_su, X86cmpmsSAE_su, 2177 SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA; 2178 2179multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, 2180 X86FoldableSchedWrite sched, 2181 X86VectorVTInfo _, bit IsCommutable> { 2182 let isCommutable = IsCommutable, hasSideEffects = 0 in 2183 def rr : AVX512BI<opc, MRMSrcReg, 2184 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2185 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2186 []>, EVEX_4V, Sched<[sched]>; 2187 let mayLoad = 1, hasSideEffects = 0 in 2188 def rm : AVX512BI<opc, MRMSrcMem, 2189 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2190 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2191 []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2192 let isCommutable = IsCommutable, hasSideEffects = 0 in 2193 def rrk : AVX512BI<opc, MRMSrcReg, 2194 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2195 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2196 "$dst {${mask}}, $src1, $src2}"), 2197 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 2198 let mayLoad = 1, hasSideEffects = 0 in 2199 def rmk : AVX512BI<opc, MRMSrcMem, 2200 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2201 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2202 "$dst {${mask}}, $src1, $src2}"), 2203 []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2204} 2205 2206multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, 2207 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2208 bit IsCommutable> : 2209 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { 2210 let mayLoad = 1, hasSideEffects = 0 in { 2211 def rmb : AVX512BI<opc, MRMSrcMem, 2212 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2213 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2214 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2215 []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2216 def rmbk : AVX512BI<opc, MRMSrcMem, 2217 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2218 _.ScalarMemOp:$src2), 2219 !strconcat(OpcodeStr, 2220 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2221 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2222 []>, EVEX_4V, EVEX_K, EVEX_B, 2223 Sched<[sched.Folded, sched.ReadAfterFold]>; 2224 } 2225} 2226 2227multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, 2228 X86SchedWriteWidths sched, 2229 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2230 bit IsCommutable = 0> { 2231 let Predicates = [prd] in 2232 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, 2233 VTInfo.info512, IsCommutable>, EVEX_V512; 2234 2235 let Predicates = [prd, HasVLX] in { 2236 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, 2237 VTInfo.info256, IsCommutable>, EVEX_V256; 2238 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, 2239 VTInfo.info128, IsCommutable>, EVEX_V128; 2240 } 2241} 2242 2243multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2244 X86SchedWriteWidths sched, 2245 AVX512VLVectorVTInfo VTInfo, 2246 Predicate prd, bit IsCommutable = 0> { 2247 let Predicates = [prd] in 2248 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, 2249 VTInfo.info512, IsCommutable>, EVEX_V512; 2250 2251 let Predicates = [prd, HasVLX] in { 2252 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, 2253 VTInfo.info256, IsCommutable>, EVEX_V256; 2254 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, 2255 VTInfo.info128, IsCommutable>, EVEX_V128; 2256 } 2257} 2258 2259// This fragment treats X86cmpm as commutable to help match loads in both 2260// operands for PCMPEQ. 2261def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; 2262def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), 2263 (setcc node:$src1, node:$src2, SETGT)>; 2264 2265// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2266// increase the pattern complexity the way an immediate would. 2267let AddedComplexity = 2 in { 2268// FIXME: Is there a better scheduler class for VPCMP? 2269defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", 2270 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2271 EVEX_CD8<8, CD8VF>, VEX_WIG; 2272 2273defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", 2274 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2275 EVEX_CD8<16, CD8VF>, VEX_WIG; 2276 2277defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", 2278 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2279 EVEX_CD8<32, CD8VF>; 2280 2281defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", 2282 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2283 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2284 2285defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", 2286 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2287 EVEX_CD8<8, CD8VF>, VEX_WIG; 2288 2289defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", 2290 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2291 EVEX_CD8<16, CD8VF>, VEX_WIG; 2292 2293defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", 2294 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2295 EVEX_CD8<32, CD8VF>; 2296 2297defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", 2298 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2299 T8PD, VEX_W, EVEX_CD8<64, CD8VF>; 2300} 2301 2302def X86pcmpm_imm : SDNodeXForm<setcc, [{ 2303 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2304 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2305 return getI8Imm(SSECC, SDLoc(N)); 2306}]>; 2307 2308// Swapped operand version of the above. 2309def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{ 2310 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2311 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2312 SSECC = X86::getSwappedVPCMPImm(SSECC); 2313 return getI8Imm(SSECC, SDLoc(N)); 2314}]>; 2315 2316multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2317 PatFrag Frag_su, 2318 X86FoldableSchedWrite sched, 2319 X86VectorVTInfo _, string Name> { 2320 let isCommutable = 1 in 2321 def rri : AVX512AIi8<opc, MRMSrcReg, 2322 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2323 !strconcat("vpcmp", Suffix, 2324 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2325 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2326 (_.VT _.RC:$src2), 2327 cond)))]>, 2328 EVEX_4V, Sched<[sched]>; 2329 def rmi : AVX512AIi8<opc, MRMSrcMem, 2330 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2331 !strconcat("vpcmp", Suffix, 2332 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2333 [(set _.KRC:$dst, (_.KVT 2334 (Frag:$cc 2335 (_.VT _.RC:$src1), 2336 (_.VT (_.LdFrag addr:$src2)), 2337 cond)))]>, 2338 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2339 let isCommutable = 1 in 2340 def rrik : AVX512AIi8<opc, MRMSrcReg, 2341 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2342 u8imm:$cc), 2343 !strconcat("vpcmp", Suffix, 2344 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2345 "$dst {${mask}}, $src1, $src2, $cc}"), 2346 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2347 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), 2348 (_.VT _.RC:$src2), 2349 cond))))]>, 2350 EVEX_4V, EVEX_K, Sched<[sched]>; 2351 def rmik : AVX512AIi8<opc, MRMSrcMem, 2352 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2353 u8imm:$cc), 2354 !strconcat("vpcmp", Suffix, 2355 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2356 "$dst {${mask}}, $src1, $src2, $cc}"), 2357 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2358 (_.KVT 2359 (Frag_su:$cc 2360 (_.VT _.RC:$src1), 2361 (_.VT (_.LdFrag addr:$src2)), 2362 cond))))]>, 2363 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2364 2365 def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2), 2366 (_.VT _.RC:$src1), cond)), 2367 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2368 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2369 2370 def : Pat<(and _.KRCWM:$mask, 2371 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2), 2372 (_.VT _.RC:$src1), cond))), 2373 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2374 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2375 (X86pcmpm_imm_commute $cc))>; 2376} 2377 2378multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2379 PatFrag Frag_su, X86FoldableSchedWrite sched, 2380 X86VectorVTInfo _, string Name> : 2381 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> { 2382 def rmib : AVX512AIi8<opc, MRMSrcMem, 2383 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2384 u8imm:$cc), 2385 !strconcat("vpcmp", Suffix, 2386 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2387 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2388 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2389 (_.VT _.RC:$src1), 2390 (_.BroadcastLdFrag addr:$src2), 2391 cond)))]>, 2392 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2393 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2394 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2395 _.ScalarMemOp:$src2, u8imm:$cc), 2396 !strconcat("vpcmp", Suffix, 2397 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2398 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2399 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2400 (_.KVT (Frag_su:$cc 2401 (_.VT _.RC:$src1), 2402 (_.BroadcastLdFrag addr:$src2), 2403 cond))))]>, 2404 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2405 2406 def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2), 2407 (_.VT _.RC:$src1), cond)), 2408 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2409 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2410 2411 def : Pat<(and _.KRCWM:$mask, 2412 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2), 2413 (_.VT _.RC:$src1), cond))), 2414 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2415 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2416 (X86pcmpm_imm_commute $cc))>; 2417} 2418 2419multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2420 PatFrag Frag_su, X86SchedWriteWidths sched, 2421 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2422 let Predicates = [prd] in 2423 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2424 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2425 2426 let Predicates = [prd, HasVLX] in { 2427 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2428 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2429 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2430 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2431 } 2432} 2433 2434multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2435 PatFrag Frag_su, X86SchedWriteWidths sched, 2436 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2437 let Predicates = [prd] in 2438 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2439 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2440 2441 let Predicates = [prd, HasVLX] in { 2442 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2443 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2444 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2445 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2446 } 2447} 2448 2449def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2450 (setcc node:$src1, node:$src2, node:$cc), [{ 2451 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2452 return !ISD::isUnsignedIntSetCC(CC); 2453}], X86pcmpm_imm>; 2454 2455def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2456 (setcc node:$src1, node:$src2, node:$cc), [{ 2457 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2458 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2459}], X86pcmpm_imm>; 2460 2461def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2462 (setcc node:$src1, node:$src2, node:$cc), [{ 2463 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2464 return ISD::isUnsignedIntSetCC(CC); 2465}], X86pcmpm_imm>; 2466 2467def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2468 (setcc node:$src1, node:$src2, node:$cc), [{ 2469 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2470 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2471}], X86pcmpm_imm>; 2472 2473// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2474defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su, 2475 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2476 EVEX_CD8<8, CD8VF>; 2477defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su, 2478 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2479 EVEX_CD8<8, CD8VF>; 2480 2481defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su, 2482 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2483 VEX_W, EVEX_CD8<16, CD8VF>; 2484defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su, 2485 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2486 VEX_W, EVEX_CD8<16, CD8VF>; 2487 2488defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su, 2489 SchedWriteVecALU, avx512vl_i32_info, 2490 HasAVX512>, EVEX_CD8<32, CD8VF>; 2491defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su, 2492 SchedWriteVecALU, avx512vl_i32_info, 2493 HasAVX512>, EVEX_CD8<32, CD8VF>; 2494 2495defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su, 2496 SchedWriteVecALU, avx512vl_i64_info, 2497 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2498defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su, 2499 SchedWriteVecALU, avx512vl_i64_info, 2500 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; 2501 2502def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2503 (X86cmpm node:$src1, node:$src2, node:$cc), [{ 2504 return N->hasOneUse(); 2505}]>; 2506 2507def X86cmpm_imm_commute : SDNodeXForm<timm, [{ 2508 uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f); 2509 return getI8Imm(Imm, SDLoc(N)); 2510}]>; 2511 2512multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2513 string Name> { 2514let Uses = [MXCSR], mayRaiseFPException = 1 in { 2515 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2516 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), 2517 "vcmp"#_.Suffix, 2518 "$cc, $src2, $src1", "$src1, $src2, $cc", 2519 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2520 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2521 1>, Sched<[sched]>; 2522 2523 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2524 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2525 "vcmp"#_.Suffix, 2526 "$cc, $src2, $src1", "$src1, $src2, $cc", 2527 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2528 timm:$cc), 2529 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2530 timm:$cc)>, 2531 Sched<[sched.Folded, sched.ReadAfterFold]>; 2532 2533 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2534 (outs _.KRC:$dst), 2535 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2536 "vcmp"#_.Suffix, 2537 "$cc, ${src2}"#_.BroadcastStr#", $src1", 2538 "$src1, ${src2}"#_.BroadcastStr#", $cc", 2539 (X86any_cmpm (_.VT _.RC:$src1), 2540 (_.VT (_.BroadcastLdFrag addr:$src2)), 2541 timm:$cc), 2542 (X86cmpm_su (_.VT _.RC:$src1), 2543 (_.VT (_.BroadcastLdFrag addr:$src2)), 2544 timm:$cc)>, 2545 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2546 } 2547 2548 // Patterns for selecting with loads in other operand. 2549 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2550 timm:$cc), 2551 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2552 (X86cmpm_imm_commute timm:$cc))>; 2553 2554 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), 2555 (_.VT _.RC:$src1), 2556 timm:$cc)), 2557 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2558 _.RC:$src1, addr:$src2, 2559 (X86cmpm_imm_commute timm:$cc))>; 2560 2561 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2), 2562 (_.VT _.RC:$src1), timm:$cc), 2563 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2564 (X86cmpm_imm_commute timm:$cc))>; 2565 2566 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), 2567 (_.VT _.RC:$src1), 2568 timm:$cc)), 2569 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2570 _.RC:$src1, addr:$src2, 2571 (X86cmpm_imm_commute timm:$cc))>; 2572 2573 // Patterns for mask intrinsics. 2574 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, 2575 (_.KVT immAllOnesV)), 2576 (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>; 2577 2578 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask), 2579 (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1, 2580 _.RC:$src2, timm:$cc)>; 2581 2582 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2583 (_.KVT immAllOnesV)), 2584 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>; 2585 2586 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2587 _.KRCWM:$mask), 2588 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, 2589 addr:$src2, timm:$cc)>; 2590 2591 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2592 (_.KVT immAllOnesV)), 2593 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>; 2594 2595 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2596 _.KRCWM:$mask), 2597 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, 2598 addr:$src2, timm:$cc)>; 2599 2600 // Patterns for mask intrinsics with loads in other operand. 2601 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2602 (_.KVT immAllOnesV)), 2603 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2604 (X86cmpm_imm_commute timm:$cc))>; 2605 2606 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2607 _.KRCWM:$mask), 2608 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2609 _.RC:$src1, addr:$src2, 2610 (X86cmpm_imm_commute timm:$cc))>; 2611 2612 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2613 (_.KVT immAllOnesV)), 2614 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2615 (X86cmpm_imm_commute timm:$cc))>; 2616 2617 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2618 _.KRCWM:$mask), 2619 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2620 _.RC:$src1, addr:$src2, 2621 (X86cmpm_imm_commute timm:$cc))>; 2622} 2623 2624multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2625 // comparison code form (VCMP[EQ/LT/LE/...] 2626 let Uses = [MXCSR] in 2627 defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst), 2628 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2629 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc), 2630 "vcmp"#_.Suffix, 2631 "$cc, {sae}, $src2, $src1", 2632 "$src1, $src2, {sae}, $cc", 2633 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2634 (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))], 2635 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2636 (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>, 2637 EVEX_B, Sched<[sched]>; 2638} 2639 2640multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 2641 Predicate Pred = HasAVX512> { 2642 let Predicates = [Pred] in { 2643 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2644 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2645 2646 } 2647 let Predicates = [Pred,HasVLX] in { 2648 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2649 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2650 } 2651} 2652 2653defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2654 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 2655defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2656 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 2657defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>, 2658 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA; 2659 2660// Patterns to select fp compares with load as first operand. 2661let Predicates = [HasAVX512] in { 2662 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)), 2663 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2664 2665 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)), 2666 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2667} 2668 2669let Predicates = [HasFP16] in { 2670 def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)), 2671 (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2672} 2673 2674// ---------------------------------------------------------------- 2675// FPClass 2676 2677def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2), 2678 (X86Vfpclasss node:$src1, node:$src2), [{ 2679 return N->hasOneUse(); 2680}]>; 2681 2682def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2), 2683 (X86Vfpclass node:$src1, node:$src2), [{ 2684 return N->hasOneUse(); 2685}]>; 2686 2687//handle fpclass instruction mask = op(reg_scalar,imm) 2688// op(mem_scalar,imm) 2689multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, 2690 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2691 Predicate prd> { 2692 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2693 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2694 (ins _.RC:$src1, i32u8imm:$src2), 2695 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2696 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), 2697 (i32 timm:$src2)))]>, 2698 Sched<[sched]>; 2699 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2700 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2701 OpcodeStr#_.Suffix# 2702 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2703 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2704 (X86Vfpclasss_su (_.VT _.RC:$src1), 2705 (i32 timm:$src2))))]>, 2706 EVEX_K, Sched<[sched]>; 2707 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2708 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2709 OpcodeStr#_.Suffix# 2710 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2711 [(set _.KRC:$dst, 2712 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1), 2713 (i32 timm:$src2)))]>, 2714 Sched<[sched.Folded, sched.ReadAfterFold]>; 2715 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2716 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2717 OpcodeStr#_.Suffix# 2718 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2719 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2720 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1), 2721 (i32 timm:$src2))))]>, 2722 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2723 } 2724} 2725 2726//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2727// fpclass(reg_vec, mem_vec, imm) 2728// fpclass(reg_vec, broadcast(eltVt), imm) 2729multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, 2730 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2731 string mem>{ 2732 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2733 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2734 (ins _.RC:$src1, i32u8imm:$src2), 2735 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2736 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), 2737 (i32 timm:$src2)))]>, 2738 Sched<[sched]>; 2739 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2740 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2741 OpcodeStr#_.Suffix# 2742 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2743 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2744 (X86Vfpclass_su (_.VT _.RC:$src1), 2745 (i32 timm:$src2))))]>, 2746 EVEX_K, Sched<[sched]>; 2747 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2748 (ins _.MemOp:$src1, i32u8imm:$src2), 2749 OpcodeStr#_.Suffix#"{"#mem#"}"# 2750 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2751 [(set _.KRC:$dst,(X86Vfpclass 2752 (_.VT (_.LdFrag addr:$src1)), 2753 (i32 timm:$src2)))]>, 2754 Sched<[sched.Folded, sched.ReadAfterFold]>; 2755 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2756 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2757 OpcodeStr#_.Suffix#"{"#mem#"}"# 2758 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2759 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su 2760 (_.VT (_.LdFrag addr:$src1)), 2761 (i32 timm:$src2))))]>, 2762 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2763 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2764 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2765 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2766 _.BroadcastStr#", $dst|$dst, ${src1}" 2767 #_.BroadcastStr#", $src2}", 2768 [(set _.KRC:$dst,(X86Vfpclass 2769 (_.VT (_.BroadcastLdFrag addr:$src1)), 2770 (i32 timm:$src2)))]>, 2771 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2772 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2773 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2774 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2775 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"# 2776 _.BroadcastStr#", $src2}", 2777 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su 2778 (_.VT (_.BroadcastLdFrag addr:$src1)), 2779 (i32 timm:$src2))))]>, 2780 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2781 } 2782 2783 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate 2784 // the memory form. 2785 def : InstAlias<OpcodeStr#_.Suffix#mem# 2786 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2787 (!cast<Instruction>(NAME#"rr") 2788 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2789 def : InstAlias<OpcodeStr#_.Suffix#mem# 2790 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2791 (!cast<Instruction>(NAME#"rrk") 2792 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2793 def : InstAlias<OpcodeStr#_.Suffix#mem# 2794 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"# 2795 _.BroadcastStr#", $src2}", 2796 (!cast<Instruction>(NAME#"rmb") 2797 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2798 def : InstAlias<OpcodeStr#_.Suffix#mem# 2799 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|" 2800 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}", 2801 (!cast<Instruction>(NAME#"rmbk") 2802 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2803} 2804 2805multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2806 bits<8> opc, X86SchedWriteWidths sched, 2807 Predicate prd>{ 2808 let Predicates = [prd] in { 2809 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM, 2810 _.info512, "z">, EVEX_V512; 2811 } 2812 let Predicates = [prd, HasVLX] in { 2813 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM, 2814 _.info128, "x">, EVEX_V128; 2815 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM, 2816 _.info256, "y">, EVEX_V256; 2817 } 2818} 2819 2820multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2821 bits<8> opcScalar, X86SchedWriteWidths sched> { 2822 defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec, 2823 sched, HasFP16>, 2824 EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA; 2825 defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2826 sched.Scl, f16x_info, HasFP16>, 2827 EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA; 2828 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2829 sched, HasDQI>, 2830 EVEX_CD8<32, CD8VF>, AVX512AIi8Base; 2831 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2832 sched, HasDQI>, 2833 EVEX_CD8<64, CD8VF>, AVX512AIi8Base, VEX_W; 2834 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2835 sched.Scl, f32x_info, HasDQI>, VEX_LIG, 2836 EVEX_CD8<32, CD8VT1>, AVX512AIi8Base; 2837 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2838 sched.Scl, f64x_info, HasDQI>, VEX_LIG, 2839 EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, VEX_W; 2840} 2841 2842defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX; 2843 2844//----------------------------------------------------------------- 2845// Mask register copy, including 2846// - copy between mask registers 2847// - load/store mask registers 2848// - copy from GPR to mask register and vice versa 2849// 2850multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2851 string OpcodeStr, RegisterClass KRC, 2852 ValueType vvt, X86MemOperand x86memop> { 2853 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in 2854 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2855 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2856 Sched<[WriteMove]>; 2857 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2858 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2859 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2860 Sched<[WriteLoad]>; 2861 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2862 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2863 [(store KRC:$src, addr:$dst)]>, 2864 Sched<[WriteStore]>; 2865} 2866 2867multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2868 string OpcodeStr, 2869 RegisterClass KRC, RegisterClass GRC> { 2870 let hasSideEffects = 0 in { 2871 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2872 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2873 Sched<[WriteMove]>; 2874 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2875 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2876 Sched<[WriteMove]>; 2877 } 2878} 2879 2880let Predicates = [HasDQI] in 2881 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2882 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2883 VEX, PD; 2884 2885let Predicates = [HasAVX512] in 2886 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2887 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2888 VEX, PS; 2889 2890let Predicates = [HasBWI] in { 2891 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2892 VEX, PD, VEX_W; 2893 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2894 VEX, XD; 2895 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2896 VEX, PS, VEX_W; 2897 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2898 VEX, XD, VEX_W; 2899} 2900 2901// GR from/to mask register 2902def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2903 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2904def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2905 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2906def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))), 2907 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>; 2908 2909def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2910 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2911def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2912 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2913 2914def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2915 (KMOVWrk VK16:$src)>; 2916def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2917 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; 2918def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2919 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2920def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2921 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; 2922 2923def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2924 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2925def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2926 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; 2927def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2928 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2929def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2930 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; 2931 2932def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2933 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2934def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2935 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2936def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2937 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2938def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2939 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2940 2941// Load/store kreg 2942let Predicates = [HasDQI] in { 2943 def : Pat<(v1i1 (load addr:$src)), 2944 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2945 def : Pat<(v2i1 (load addr:$src)), 2946 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2947 def : Pat<(v4i1 (load addr:$src)), 2948 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2949} 2950 2951let Predicates = [HasAVX512] in { 2952 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2953 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2954 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), 2955 (KMOVWkm addr:$src)>; 2956} 2957 2958def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", 2959 SDTypeProfile<1, 2, [SDTCisVT<0, i8>, 2960 SDTCVecEltisVT<1, i1>, 2961 SDTCisPtrTy<2>]>>; 2962 2963let Predicates = [HasAVX512] in { 2964 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2965 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2966 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2967 2968 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2969 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2970 2971 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), 2972 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; 2973 2974 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), 2975 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; 2976 } 2977 2978 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2979 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2980 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2981 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2982 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2983 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2984 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2985 2986 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2987 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2988 (KMOVWkr (AND32ri8 2989 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2990 (i32 1)))>; 2991} 2992 2993// Mask unary operation 2994// - KNOT 2995multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 2996 RegisterClass KRC, SDPatternOperator OpNode, 2997 X86FoldableSchedWrite sched, Predicate prd> { 2998 let Predicates = [prd] in 2999 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 3000 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3001 [(set KRC:$dst, (OpNode KRC:$src))]>, 3002 Sched<[sched]>; 3003} 3004 3005multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 3006 SDPatternOperator OpNode, 3007 X86FoldableSchedWrite sched> { 3008 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3009 sched, HasDQI>, VEX, PD; 3010 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3011 sched, HasAVX512>, VEX, PS; 3012 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3013 sched, HasBWI>, VEX, PD, VEX_W; 3014 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3015 sched, HasBWI>, VEX, PS, VEX_W; 3016} 3017 3018// TODO - do we need a X86SchedWriteWidths::KMASK type? 3019defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 3020 3021// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 3022let Predicates = [HasAVX512, NoDQI] in 3023def : Pat<(vnot VK8:$src), 3024 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 3025 3026def : Pat<(vnot VK4:$src), 3027 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 3028def : Pat<(vnot VK2:$src), 3029 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 3030def : Pat<(vnot VK1:$src), 3031 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>; 3032 3033// Mask binary operation 3034// - KAND, KANDN, KOR, KXNOR, KXOR 3035multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 3036 RegisterClass KRC, SDPatternOperator OpNode, 3037 X86FoldableSchedWrite sched, Predicate prd, 3038 bit IsCommutable> { 3039 let Predicates = [prd], isCommutable = IsCommutable in 3040 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 3041 !strconcat(OpcodeStr, 3042 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3043 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 3044 Sched<[sched]>; 3045} 3046 3047multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 3048 SDPatternOperator OpNode, 3049 X86FoldableSchedWrite sched, bit IsCommutable, 3050 Predicate prdW = HasAVX512> { 3051 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3052 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; 3053 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3054 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS; 3055 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3056 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; 3057 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3058 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; 3059} 3060 3061// These nodes use 'vnot' instead of 'not' to support vectors. 3062def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; 3063def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; 3064 3065// TODO - do we need a X86SchedWriteWidths::KMASK type? 3066defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 3067defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 3068defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 3069defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 3070defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 3071defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 3072 3073multiclass avx512_binop_pat<SDPatternOperator VOpNode, 3074 Instruction Inst> { 3075 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 3076 // for the DQI set, this type is legal and KxxxB instruction is used 3077 let Predicates = [NoDQI] in 3078 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 3079 (COPY_TO_REGCLASS 3080 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 3081 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 3082 3083 // All types smaller than 8 bits require conversion anyway 3084 def : Pat<(VOpNode VK1:$src1, VK1:$src2), 3085 (COPY_TO_REGCLASS (Inst 3086 (COPY_TO_REGCLASS VK1:$src1, VK16), 3087 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 3088 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 3089 (COPY_TO_REGCLASS (Inst 3090 (COPY_TO_REGCLASS VK2:$src1, VK16), 3091 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; 3092 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 3093 (COPY_TO_REGCLASS (Inst 3094 (COPY_TO_REGCLASS VK4:$src1, VK16), 3095 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; 3096} 3097 3098defm : avx512_binop_pat<and, KANDWrr>; 3099defm : avx512_binop_pat<vandn, KANDNWrr>; 3100defm : avx512_binop_pat<or, KORWrr>; 3101defm : avx512_binop_pat<vxnor, KXNORWrr>; 3102defm : avx512_binop_pat<xor, KXORWrr>; 3103 3104// Mask unpacking 3105multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, 3106 X86KVectorVTInfo Src, X86FoldableSchedWrite sched, 3107 Predicate prd> { 3108 let Predicates = [prd] in { 3109 let hasSideEffects = 0 in 3110 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), 3111 (ins Src.KRC:$src1, Src.KRC:$src2), 3112 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 3113 VEX_4V, VEX_L, Sched<[sched]>; 3114 3115 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), 3116 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>; 3117 } 3118} 3119 3120defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD; 3121defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS; 3122defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W; 3123 3124// Mask bit testing 3125multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3126 SDNode OpNode, X86FoldableSchedWrite sched, 3127 Predicate prd> { 3128 let Predicates = [prd], Defs = [EFLAGS] in 3129 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 3130 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 3131 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 3132 Sched<[sched]>; 3133} 3134 3135multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 3136 X86FoldableSchedWrite sched, 3137 Predicate prdW = HasAVX512> { 3138 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 3139 VEX, PD; 3140 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 3141 VEX, PS; 3142 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 3143 VEX, PS, VEX_W; 3144 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 3145 VEX, PD, VEX_W; 3146} 3147 3148// TODO - do we need a X86SchedWriteWidths::KMASK type? 3149defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 3150defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 3151 3152// Mask shift 3153multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3154 SDNode OpNode, X86FoldableSchedWrite sched> { 3155 let Predicates = [HasAVX512] in 3156 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 3157 !strconcat(OpcodeStr, 3158 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 3159 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, 3160 Sched<[sched]>; 3161} 3162 3163multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 3164 SDNode OpNode, X86FoldableSchedWrite sched> { 3165 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3166 sched>, VEX, TAPD, VEX_W; 3167 let Predicates = [HasDQI] in 3168 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3169 sched>, VEX, TAPD; 3170 let Predicates = [HasBWI] in { 3171 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3172 sched>, VEX, TAPD, VEX_W; 3173 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3174 sched>, VEX, TAPD; 3175 } 3176} 3177 3178defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 3179defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 3180 3181// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3182multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3183 string InstStr, 3184 X86VectorVTInfo Narrow, 3185 X86VectorVTInfo Wide> { 3186def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3187 (Narrow.VT Narrow.RC:$src2), cond)), 3188 (COPY_TO_REGCLASS 3189 (!cast<Instruction>(InstStr#"Zrri") 3190 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3191 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3192 (X86pcmpm_imm $cc)), Narrow.KRC)>; 3193 3194def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3195 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3196 (Narrow.VT Narrow.RC:$src2), 3197 cond)))), 3198 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3199 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3200 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3201 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3202 (X86pcmpm_imm $cc)), Narrow.KRC)>; 3203} 3204 3205multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3206 string InstStr, 3207 X86VectorVTInfo Narrow, 3208 X86VectorVTInfo Wide> { 3209// Broadcast load. 3210def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3211 (Narrow.BroadcastLdFrag addr:$src2), cond)), 3212 (COPY_TO_REGCLASS 3213 (!cast<Instruction>(InstStr#"Zrmib") 3214 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3215 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3216 3217def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3218 (Narrow.KVT 3219 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3220 (Narrow.BroadcastLdFrag addr:$src2), 3221 cond)))), 3222 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3223 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3224 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3225 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3226 3227// Commuted with broadcast load. 3228def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2), 3229 (Narrow.VT Narrow.RC:$src1), 3230 cond)), 3231 (COPY_TO_REGCLASS 3232 (!cast<Instruction>(InstStr#"Zrmib") 3233 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3234 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3235 3236def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3237 (Narrow.KVT 3238 (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), 3239 (Narrow.VT Narrow.RC:$src1), 3240 cond)))), 3241 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3242 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3243 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3244 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3245} 3246 3247// Same as above, but for fp types which don't use PatFrags. 3248multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, 3249 X86VectorVTInfo Narrow, 3250 X86VectorVTInfo Wide> { 3251def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3252 (Narrow.VT Narrow.RC:$src2), timm:$cc)), 3253 (COPY_TO_REGCLASS 3254 (!cast<Instruction>(InstStr#"Zrri") 3255 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3256 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3257 timm:$cc), Narrow.KRC)>; 3258 3259def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3260 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3261 (Narrow.VT Narrow.RC:$src2), timm:$cc))), 3262 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3263 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3264 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3265 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3266 timm:$cc), Narrow.KRC)>; 3267 3268// Broadcast load. 3269def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3270 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), 3271 (COPY_TO_REGCLASS 3272 (!cast<Instruction>(InstStr#"Zrmbi") 3273 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3274 addr:$src2, timm:$cc), Narrow.KRC)>; 3275 3276def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3277 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3278 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), 3279 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3280 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3281 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3282 addr:$src2, timm:$cc), Narrow.KRC)>; 3283 3284// Commuted with broadcast load. 3285def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3286 (Narrow.VT Narrow.RC:$src1), timm:$cc)), 3287 (COPY_TO_REGCLASS 3288 (!cast<Instruction>(InstStr#"Zrmbi") 3289 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3290 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3291 3292def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3293 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3294 (Narrow.VT Narrow.RC:$src1), timm:$cc))), 3295 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3296 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3297 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3298 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3299} 3300 3301let Predicates = [HasAVX512, NoVLX] in { 3302 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3303 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3304 3305 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3306 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3307 3308 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3309 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3310 3311 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3312 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3313 3314 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3315 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3316 3317 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3318 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3319 3320 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3321 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3322 3323 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3324 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3325 3326 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; 3327 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; 3328 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; 3329 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; 3330} 3331 3332let Predicates = [HasBWI, NoVLX] in { 3333 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; 3334 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; 3335 3336 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>; 3337 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>; 3338 3339 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>; 3340 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>; 3341 3342 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>; 3343 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>; 3344} 3345 3346// Mask setting all 0s or 1s 3347multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> { 3348 let Predicates = [HasAVX512] in 3349 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3350 SchedRW = [WriteZero] in 3351 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3352 [(set KRC:$dst, (VT Val))]>; 3353} 3354 3355multiclass avx512_mask_setop_w<SDPatternOperator Val> { 3356 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3357 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3358 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3359} 3360 3361defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3362defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3363 3364// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3365let Predicates = [HasAVX512] in { 3366 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3367 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3368 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3369 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3370 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3371 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3372 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3373 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3374} 3375 3376// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3377multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3378 RegisterClass RC, ValueType VT> { 3379 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3380 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3381 3382 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3383 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3384} 3385defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3386defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3387defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3388defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3389defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3390defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3391 3392defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3393defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3394defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3395defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3396defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3397 3398defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3399defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3400defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3401defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3402 3403defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3404defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3405defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3406 3407defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3408defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3409 3410defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3411 3412//===----------------------------------------------------------------------===// 3413// AVX-512 - Aligned and unaligned load and store 3414// 3415 3416multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3417 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3418 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3419 bit NoRMPattern = 0, 3420 SDPatternOperator SelectOprr = vselect> { 3421 let hasSideEffects = 0 in { 3422 let isMoveReg = 1 in 3423 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3424 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3425 _.ExeDomain>, EVEX, Sched<[Sched.RR]>, 3426 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 3427 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3428 (ins _.KRCWM:$mask, _.RC:$src), 3429 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3430 "${dst} {${mask}} {z}, $src}"), 3431 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3432 (_.VT _.RC:$src), 3433 _.ImmAllZerosV)))], _.ExeDomain>, 3434 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3435 3436 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3437 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3438 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3439 !if(NoRMPattern, [], 3440 [(set _.RC:$dst, 3441 (_.VT (ld_frag addr:$src)))]), 3442 _.ExeDomain>, EVEX, Sched<[Sched.RM]>, 3443 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 3444 3445 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3446 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3447 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3448 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3449 "${dst} {${mask}}, $src1}"), 3450 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3451 (_.VT _.RC:$src1), 3452 (_.VT _.RC:$src0))))], _.ExeDomain>, 3453 EVEX, EVEX_K, Sched<[Sched.RR]>; 3454 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3455 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3456 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3457 "${dst} {${mask}}, $src1}"), 3458 [(set _.RC:$dst, (_.VT 3459 (vselect_mask _.KRCWM:$mask, 3460 (_.VT (ld_frag addr:$src1)), 3461 (_.VT _.RC:$src0))))], _.ExeDomain>, 3462 EVEX, EVEX_K, Sched<[Sched.RM]>; 3463 } 3464 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3465 (ins _.KRCWM:$mask, _.MemOp:$src), 3466 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3467 "${dst} {${mask}} {z}, $src}", 3468 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask, 3469 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], 3470 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3471 } 3472 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3473 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3474 3475 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3476 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3477 3478 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3479 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0, 3480 _.KRCWM:$mask, addr:$ptr)>; 3481} 3482 3483multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3484 AVX512VLVectorVTInfo _, Predicate prd, 3485 X86SchedWriteMoveLSWidths Sched, 3486 string EVEX2VEXOvrd, bit NoRMPattern = 0> { 3487 let Predicates = [prd] in 3488 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3489 _.info512.AlignedLdFrag, masked_load_aligned, 3490 Sched.ZMM, "", NoRMPattern>, EVEX_V512; 3491 3492 let Predicates = [prd, HasVLX] in { 3493 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3494 _.info256.AlignedLdFrag, masked_load_aligned, 3495 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256; 3496 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3497 _.info128.AlignedLdFrag, masked_load_aligned, 3498 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128; 3499 } 3500} 3501 3502multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3503 AVX512VLVectorVTInfo _, Predicate prd, 3504 X86SchedWriteMoveLSWidths Sched, 3505 string EVEX2VEXOvrd, bit NoRMPattern = 0, 3506 SDPatternOperator SelectOprr = vselect> { 3507 let Predicates = [prd] in 3508 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3509 masked_load, Sched.ZMM, "", 3510 NoRMPattern, SelectOprr>, EVEX_V512; 3511 3512 let Predicates = [prd, HasVLX] in { 3513 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3514 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y", 3515 NoRMPattern, SelectOprr>, EVEX_V256; 3516 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3517 masked_load, Sched.XMM, EVEX2VEXOvrd, 3518 NoRMPattern, SelectOprr>, EVEX_V128; 3519 } 3520} 3521 3522multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3523 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3524 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3525 bit NoMRPattern = 0> { 3526 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3527 let isMoveReg = 1 in 3528 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3529 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3530 [], _.ExeDomain>, EVEX, 3531 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>, 3532 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">; 3533 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3534 (ins _.KRCWM:$mask, _.RC:$src), 3535 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3536 "${dst} {${mask}}, $src}", 3537 [], _.ExeDomain>, EVEX, EVEX_K, 3538 FoldGenData<BaseName#_.ZSuffix#rrk>, 3539 Sched<[Sched.RR]>; 3540 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3541 (ins _.KRCWM:$mask, _.RC:$src), 3542 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3543 "${dst} {${mask}} {z}, $src}", 3544 [], _.ExeDomain>, EVEX, EVEX_KZ, 3545 FoldGenData<BaseName#_.ZSuffix#rrkz>, 3546 Sched<[Sched.RR]>; 3547 } 3548 3549 let hasSideEffects = 0, mayStore = 1 in 3550 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3551 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3552 !if(NoMRPattern, [], 3553 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3554 _.ExeDomain>, EVEX, Sched<[Sched.MR]>, 3555 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">; 3556 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3557 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3558 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3559 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>, 3560 NotMemoryFoldable; 3561 3562 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask), 3563 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3564 _.KRCWM:$mask, _.RC:$src)>; 3565 3566 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3567 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3568 _.RC:$dst, _.RC:$src), 0>; 3569 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3570 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3571 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3572 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3573 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3574 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3575} 3576 3577multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3578 AVX512VLVectorVTInfo _, Predicate prd, 3579 X86SchedWriteMoveLSWidths Sched, 3580 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3581 let Predicates = [prd] in 3582 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3583 masked_store, Sched.ZMM, "", 3584 NoMRPattern>, EVEX_V512; 3585 let Predicates = [prd, HasVLX] in { 3586 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3587 masked_store, Sched.YMM, 3588 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3589 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3590 masked_store, Sched.XMM, EVEX2VEXOvrd, 3591 NoMRPattern>, EVEX_V128; 3592 } 3593} 3594 3595multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3596 AVX512VLVectorVTInfo _, Predicate prd, 3597 X86SchedWriteMoveLSWidths Sched, 3598 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3599 let Predicates = [prd] in 3600 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3601 masked_store_aligned, Sched.ZMM, "", 3602 NoMRPattern>, EVEX_V512; 3603 3604 let Predicates = [prd, HasVLX] in { 3605 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3606 masked_store_aligned, Sched.YMM, 3607 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3608 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3609 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd, 3610 NoMRPattern>, EVEX_V128; 3611 } 3612} 3613 3614defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3615 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3616 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3617 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3618 PS, EVEX_CD8<32, CD8VF>; 3619 3620defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3621 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3622 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3623 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3624 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3625 3626defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3627 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, 3628 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3629 SchedWriteFMoveLS, "VMOVUPS">, 3630 PS, EVEX_CD8<32, CD8VF>; 3631 3632defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3633 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, 3634 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3635 SchedWriteFMoveLS, "VMOVUPD">, 3636 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3637 3638defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3639 HasAVX512, SchedWriteVecMoveLS, 3640 "VMOVDQA", 1>, 3641 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3642 HasAVX512, SchedWriteVecMoveLS, 3643 "VMOVDQA", 1>, 3644 PD, EVEX_CD8<32, CD8VF>; 3645 3646defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3647 HasAVX512, SchedWriteVecMoveLS, 3648 "VMOVDQA">, 3649 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3650 HasAVX512, SchedWriteVecMoveLS, 3651 "VMOVDQA">, 3652 PD, VEX_W, EVEX_CD8<64, CD8VF>; 3653 3654defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3655 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3656 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3657 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3658 XD, EVEX_CD8<8, CD8VF>; 3659 3660defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3661 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3662 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3663 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3664 XD, VEX_W, EVEX_CD8<16, CD8VF>; 3665 3666defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3667 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, 3668 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3669 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3670 XS, EVEX_CD8<32, CD8VF>; 3671 3672defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3673 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, 3674 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3675 SchedWriteVecMoveLS, "VMOVDQU">, 3676 XS, VEX_W, EVEX_CD8<64, CD8VF>; 3677 3678// Special instructions to help with spilling when we don't have VLX. We need 3679// to load or store from a ZMM register instead. These are converted in 3680// expandPostRAPseudos. 3681let isReMaterializable = 1, canFoldAsLoad = 1, 3682 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3683def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3684 "", []>, Sched<[WriteFLoadX]>; 3685def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3686 "", []>, Sched<[WriteFLoadY]>; 3687def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3688 "", []>, Sched<[WriteFLoadX]>; 3689def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3690 "", []>, Sched<[WriteFLoadY]>; 3691} 3692 3693let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3694def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3695 "", []>, Sched<[WriteFStoreX]>; 3696def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3697 "", []>, Sched<[WriteFStoreY]>; 3698def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3699 "", []>, Sched<[WriteFStoreX]>; 3700def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3701 "", []>, Sched<[WriteFStoreY]>; 3702} 3703 3704def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), 3705 (v8i64 VR512:$src))), 3706 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3707 VK8), VR512:$src)>; 3708 3709def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3710 (v16i32 VR512:$src))), 3711 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3712 3713// These patterns exist to prevent the above patterns from introducing a second 3714// mask inversion when one already exists. 3715def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)), 3716 (v8i64 immAllZerosV), 3717 (v8i64 VR512:$src))), 3718 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3719def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)), 3720 (v16i32 immAllZerosV), 3721 (v16i32 VR512:$src))), 3722 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3723 3724multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3725 X86VectorVTInfo Wide> { 3726 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3727 Narrow.RC:$src1, Narrow.RC:$src0)), 3728 (EXTRACT_SUBREG 3729 (Wide.VT 3730 (!cast<Instruction>(InstrStr#"rrk") 3731 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3732 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3733 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3734 Narrow.SubRegIdx)>; 3735 3736 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3737 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3738 (EXTRACT_SUBREG 3739 (Wide.VT 3740 (!cast<Instruction>(InstrStr#"rrkz") 3741 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3742 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3743 Narrow.SubRegIdx)>; 3744} 3745 3746// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3747// available. Use a 512-bit operation and extract. 3748let Predicates = [HasAVX512, NoVLX] in { 3749 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3750 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3751 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3752 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3753 3754 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3755 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3756 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3757 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3758} 3759 3760let Predicates = [HasBWI, NoVLX] in { 3761 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3762 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3763 3764 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3765 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3766} 3767 3768let Predicates = [HasAVX512] in { 3769 // 512-bit load. 3770 def : Pat<(alignedloadv16i32 addr:$src), 3771 (VMOVDQA64Zrm addr:$src)>; 3772 def : Pat<(alignedloadv32i16 addr:$src), 3773 (VMOVDQA64Zrm addr:$src)>; 3774 def : Pat<(alignedloadv64i8 addr:$src), 3775 (VMOVDQA64Zrm addr:$src)>; 3776 def : Pat<(loadv16i32 addr:$src), 3777 (VMOVDQU64Zrm addr:$src)>; 3778 def : Pat<(loadv32i16 addr:$src), 3779 (VMOVDQU64Zrm addr:$src)>; 3780 def : Pat<(loadv64i8 addr:$src), 3781 (VMOVDQU64Zrm addr:$src)>; 3782 3783 // 512-bit store. 3784 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3785 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3786 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3787 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3788 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3789 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3790 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3791 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3792 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3793 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3794 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3795 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3796} 3797 3798let Predicates = [HasVLX] in { 3799 // 128-bit load. 3800 def : Pat<(alignedloadv4i32 addr:$src), 3801 (VMOVDQA64Z128rm addr:$src)>; 3802 def : Pat<(alignedloadv8i16 addr:$src), 3803 (VMOVDQA64Z128rm addr:$src)>; 3804 def : Pat<(alignedloadv16i8 addr:$src), 3805 (VMOVDQA64Z128rm addr:$src)>; 3806 def : Pat<(loadv4i32 addr:$src), 3807 (VMOVDQU64Z128rm addr:$src)>; 3808 def : Pat<(loadv8i16 addr:$src), 3809 (VMOVDQU64Z128rm addr:$src)>; 3810 def : Pat<(loadv16i8 addr:$src), 3811 (VMOVDQU64Z128rm addr:$src)>; 3812 3813 // 128-bit store. 3814 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3815 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3816 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3817 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3818 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3819 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3820 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3821 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3822 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3823 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3824 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3825 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3826 3827 // 256-bit load. 3828 def : Pat<(alignedloadv8i32 addr:$src), 3829 (VMOVDQA64Z256rm addr:$src)>; 3830 def : Pat<(alignedloadv16i16 addr:$src), 3831 (VMOVDQA64Z256rm addr:$src)>; 3832 def : Pat<(alignedloadv32i8 addr:$src), 3833 (VMOVDQA64Z256rm addr:$src)>; 3834 def : Pat<(loadv8i32 addr:$src), 3835 (VMOVDQU64Z256rm addr:$src)>; 3836 def : Pat<(loadv16i16 addr:$src), 3837 (VMOVDQU64Z256rm addr:$src)>; 3838 def : Pat<(loadv32i8 addr:$src), 3839 (VMOVDQU64Z256rm addr:$src)>; 3840 3841 // 256-bit store. 3842 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3843 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3844 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3845 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3846 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3847 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3848 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3849 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3850 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3851 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3852 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3853 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3854} 3855let Predicates = [HasFP16] in { 3856 def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), (v32f16 VR512:$src0))), 3857 (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>; 3858 def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)), 3859 (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>; 3860 def : Pat<(v32f16 (alignedloadv32f16 addr:$src)), 3861 (VMOVAPSZrm addr:$src)>; 3862 def : Pat<(v32f16 (vselect VK32WM:$mask, 3863 (v32f16 (alignedloadv32f16 addr:$src)), (v32f16 VR512:$src0))), 3864 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3865 def : Pat<(v32f16 (vselect VK32WM:$mask, 3866 (v32f16 (alignedloadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)), 3867 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3868 def : Pat<(v32f16 (loadv32f16 addr:$src)), 3869 (VMOVUPSZrm addr:$src)>; 3870 def : Pat<(v32f16 (vselect VK32WM:$mask, 3871 (v32f16 (loadv32f16 addr:$src)), (v32f16 VR512:$src0))), 3872 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3873 def : Pat<(v32f16 (vselect VK32WM:$mask, 3874 (v32f16 (loadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)), 3875 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3876 def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, (v32f16 VR512:$src0))), 3877 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3878 def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, undef)), 3879 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3880 def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, v32f16_info.ImmAllZerosV)), 3881 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3882 3883 def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst), 3884 (VMOVAPSZmr addr:$dst, VR512:$src)>; 3885 def : Pat<(store (v32f16 VR512:$src), addr:$dst), 3886 (VMOVUPSZmr addr:$dst, VR512:$src)>; 3887 def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask), 3888 (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>; 3889} 3890let Predicates = [HasFP16, HasVLX] in { 3891 def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), (v16f16 VR256X:$src0))), 3892 (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>; 3893 def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)), 3894 (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>; 3895 def : Pat<(v16f16 (alignedloadv16f16 addr:$src)), 3896 (VMOVAPSZ256rm addr:$src)>; 3897 def : Pat<(v16f16 (vselect VK16WM:$mask, 3898 (v16f16 (alignedloadv16f16 addr:$src)), (v16f16 VR256X:$src0))), 3899 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3900 def : Pat<(v16f16 (vselect VK16WM:$mask, 3901 (v16f16 (alignedloadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)), 3902 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3903 def : Pat<(v16f16 (loadv16f16 addr:$src)), 3904 (VMOVUPSZ256rm addr:$src)>; 3905 def : Pat<(v16f16 (vselect VK16WM:$mask, 3906 (v16f16 (loadv16f16 addr:$src)), (v16f16 VR256X:$src0))), 3907 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3908 def : Pat<(v16f16 (vselect VK16WM:$mask, 3909 (v16f16 (loadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)), 3910 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3911 def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, (v16f16 VR256X:$src0))), 3912 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3913 def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, undef)), 3914 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3915 def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, v16f16x_info.ImmAllZerosV)), 3916 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3917 3918 def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst), 3919 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; 3920 def : Pat<(store (v16f16 VR256X:$src), addr:$dst), 3921 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; 3922 def : Pat<(masked_store (v16f16 VR256X:$src), addr:$dst, VK16WM:$mask), 3923 (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>; 3924 3925 def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), (v8f16 VR128X:$src0))), 3926 (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>; 3927 def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), v8f16x_info.ImmAllZerosV)), 3928 (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>; 3929 def : Pat<(v8f16 (alignedloadv8f16 addr:$src)), 3930 (VMOVAPSZ128rm addr:$src)>; 3931 def : Pat<(v8f16 (vselect VK8WM:$mask, 3932 (v8f16 (alignedloadv8f16 addr:$src)), (v8f16 VR128X:$src0))), 3933 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3934 def : Pat<(v8f16 (vselect VK8WM:$mask, 3935 (v8f16 (alignedloadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)), 3936 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3937 def : Pat<(v8f16 (loadv8f16 addr:$src)), 3938 (VMOVUPSZ128rm addr:$src)>; 3939 def : Pat<(v8f16 (vselect VK8WM:$mask, 3940 (v8f16 (loadv8f16 addr:$src)), (v8f16 VR128X:$src0))), 3941 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3942 def : Pat<(v8f16 (vselect VK8WM:$mask, 3943 (v8f16 (loadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)), 3944 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3945 def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, (v8f16 VR128X:$src0))), 3946 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3947 def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, undef)), 3948 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3949 def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, v8f16x_info.ImmAllZerosV)), 3950 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3951 3952 def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst), 3953 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 3954 def : Pat<(store (v8f16 VR128X:$src), addr:$dst), 3955 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 3956 def : Pat<(masked_store (v8f16 VR128X:$src), addr:$dst, VK8WM:$mask), 3957 (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>; 3958} 3959 3960// Move Int Doubleword to Packed Double Int 3961// 3962let ExeDomain = SSEPackedInt in { 3963def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 3964 "vmovd\t{$src, $dst|$dst, $src}", 3965 [(set VR128X:$dst, 3966 (v4i32 (scalar_to_vector GR32:$src)))]>, 3967 EVEX, Sched<[WriteVecMoveFromGpr]>; 3968def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 3969 "vmovd\t{$src, $dst|$dst, $src}", 3970 [(set VR128X:$dst, 3971 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3972 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3973def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 3974 "vmovq\t{$src, $dst|$dst, $src}", 3975 [(set VR128X:$dst, 3976 (v2i64 (scalar_to_vector GR64:$src)))]>, 3977 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3978let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3979def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 3980 (ins i64mem:$src), 3981 "vmovq\t{$src, $dst|$dst, $src}", []>, 3982 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 3983let isCodeGenOnly = 1 in { 3984def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 3985 "vmovq\t{$src, $dst|$dst, $src}", 3986 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 3987 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3988def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 3989 "vmovq\t{$src, $dst|$dst, $src}", 3990 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 3991 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 3992} 3993} // ExeDomain = SSEPackedInt 3994 3995// Move Int Doubleword to Single Scalar 3996// 3997let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3998def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 3999 "vmovd\t{$src, $dst|$dst, $src}", 4000 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 4001 EVEX, Sched<[WriteVecMoveFromGpr]>; 4002} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4003 4004// Move doubleword from xmm register to r/m32 4005// 4006let ExeDomain = SSEPackedInt in { 4007def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 4008 "vmovd\t{$src, $dst|$dst, $src}", 4009 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 4010 (iPTR 0)))]>, 4011 EVEX, Sched<[WriteVecMoveToGpr]>; 4012def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 4013 (ins i32mem:$dst, VR128X:$src), 4014 "vmovd\t{$src, $dst|$dst, $src}", 4015 [(store (i32 (extractelt (v4i32 VR128X:$src), 4016 (iPTR 0))), addr:$dst)]>, 4017 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 4018} // ExeDomain = SSEPackedInt 4019 4020// Move quadword from xmm1 register to r/m64 4021// 4022let ExeDomain = SSEPackedInt in { 4023def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 4024 "vmovq\t{$src, $dst|$dst, $src}", 4025 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 4026 (iPTR 0)))]>, 4027 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>, 4028 Requires<[HasAVX512]>; 4029 4030let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 4031def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 4032 "vmovq\t{$src, $dst|$dst, $src}", []>, PD, 4033 EVEX, VEX_W, Sched<[WriteVecStore]>, 4034 Requires<[HasAVX512, In64BitMode]>; 4035 4036def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 4037 (ins i64mem:$dst, VR128X:$src), 4038 "vmovq\t{$src, $dst|$dst, $src}", 4039 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 4040 addr:$dst)]>, 4041 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>, 4042 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 4043 4044let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 4045def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 4046 (ins VR128X:$src), 4047 "vmovq\t{$src, $dst|$dst, $src}", []>, 4048 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>; 4049} // ExeDomain = SSEPackedInt 4050 4051def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 4052 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 4053 4054let Predicates = [HasAVX512] in { 4055 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), 4056 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; 4057} 4058 4059// Move Scalar Single to Double Int 4060// 4061let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4062def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 4063 (ins FR32X:$src), 4064 "vmovd\t{$src, $dst|$dst, $src}", 4065 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 4066 EVEX, Sched<[WriteVecMoveToGpr]>; 4067} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4068 4069// Move Quadword Int to Packed Quadword Int 4070// 4071let ExeDomain = SSEPackedInt in { 4072def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 4073 (ins i64mem:$src), 4074 "vmovq\t{$src, $dst|$dst, $src}", 4075 [(set VR128X:$dst, 4076 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 4077 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 4078} // ExeDomain = SSEPackedInt 4079 4080// Allow "vmovd" but print "vmovq". 4081def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4082 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 4083def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4084 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 4085 4086// Conversions between masks and scalar fp. 4087def : Pat<(v32i1 (bitconvert FR32X:$src)), 4088 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>; 4089def : Pat<(f32 (bitconvert VK32:$src)), 4090 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>; 4091 4092def : Pat<(v64i1 (bitconvert FR64X:$src)), 4093 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>; 4094def : Pat<(f64 (bitconvert VK64:$src)), 4095 (VMOV64toSDZrr (KMOVQrk VK64:$src))>; 4096 4097//===----------------------------------------------------------------------===// 4098// AVX-512 MOVSH, MOVSS, MOVSD 4099//===----------------------------------------------------------------------===// 4100 4101multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, 4102 X86VectorVTInfo _, 4103 list<Predicate> prd = [HasAVX512, OptForSize]> { 4104 let Predicates = prd in 4105 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 4106 (ins _.RC:$src1, _.RC:$src2), 4107 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4108 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 4109 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 4110 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 4111 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 4112 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 4113 "$dst {${mask}} {z}, $src1, $src2}"), 4114 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 4115 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4116 _.ImmAllZerosV)))], 4117 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 4118 let Constraints = "$src0 = $dst" in 4119 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 4120 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 4121 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 4122 "$dst {${mask}}, $src1, $src2}"), 4123 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 4124 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4125 (_.VT _.RC:$src0))))], 4126 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 4127 let canFoldAsLoad = 1, isReMaterializable = 1 in { 4128 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), 4129 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4130 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))], 4131 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 4132 // _alt version uses FR32/FR64 register class. 4133 let isCodeGenOnly = 1 in 4134 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 4135 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4136 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 4137 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 4138 } 4139 let mayLoad = 1, hasSideEffects = 0 in { 4140 let Constraints = "$src0 = $dst" in 4141 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 4142 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 4143 !strconcat(asm, "\t{$src, $dst {${mask}}|", 4144 "$dst {${mask}}, $src}"), 4145 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 4146 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 4147 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 4148 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 4149 "$dst {${mask}} {z}, $src}"), 4150 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 4151 } 4152 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 4153 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4154 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 4155 EVEX, Sched<[WriteFStore]>; 4156 let mayStore = 1, hasSideEffects = 0 in 4157 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 4158 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src), 4159 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 4160 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>, 4161 NotMemoryFoldable; 4162} 4163 4164defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, 4165 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; 4166 4167defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, 4168 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 4169 4170defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info, 4171 [HasFP16]>, 4172 VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 4173 4174multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 4175 PatLeaf ZeroFP, X86VectorVTInfo _> { 4176 4177def : Pat<(_.VT (OpNode _.RC:$src0, 4178 (_.VT (scalar_to_vector 4179 (_.EltVT (X86selects VK1WM:$mask, 4180 (_.EltVT _.FRC:$src1), 4181 (_.EltVT _.FRC:$src2))))))), 4182 (!cast<Instruction>(InstrStr#rrk) 4183 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 4184 VK1WM:$mask, 4185 (_.VT _.RC:$src0), 4186 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4187 4188def : Pat<(_.VT (OpNode _.RC:$src0, 4189 (_.VT (scalar_to_vector 4190 (_.EltVT (X86selects VK1WM:$mask, 4191 (_.EltVT _.FRC:$src1), 4192 (_.EltVT ZeroFP))))))), 4193 (!cast<Instruction>(InstrStr#rrkz) 4194 VK1WM:$mask, 4195 (_.VT _.RC:$src0), 4196 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4197} 4198 4199multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4200 dag Mask, RegisterClass MaskRC> { 4201 4202def : Pat<(masked_store 4203 (_.info512.VT (insert_subvector undef, 4204 (_.info128.VT _.info128.RC:$src), 4205 (iPTR 0))), addr:$dst, Mask), 4206 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4207 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4208 _.info128.RC:$src)>; 4209 4210} 4211 4212multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 4213 AVX512VLVectorVTInfo _, 4214 dag Mask, RegisterClass MaskRC, 4215 SubRegIndex subreg> { 4216 4217def : Pat<(masked_store 4218 (_.info512.VT (insert_subvector undef, 4219 (_.info128.VT _.info128.RC:$src), 4220 (iPTR 0))), addr:$dst, Mask), 4221 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4222 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4223 _.info128.RC:$src)>; 4224 4225} 4226 4227// This matches the more recent codegen from clang that avoids emitting a 512 4228// bit masked store directly. Codegen will widen 128-bit masked store to 512 4229// bits on AVX512F only targets. 4230multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4231 AVX512VLVectorVTInfo _, 4232 dag Mask512, dag Mask128, 4233 RegisterClass MaskRC, 4234 SubRegIndex subreg> { 4235 4236// AVX512F pattern. 4237def : Pat<(masked_store 4238 (_.info512.VT (insert_subvector undef, 4239 (_.info128.VT _.info128.RC:$src), 4240 (iPTR 0))), addr:$dst, Mask512), 4241 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4242 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4243 _.info128.RC:$src)>; 4244 4245// AVX512VL pattern. 4246def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128), 4247 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4248 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4249 _.info128.RC:$src)>; 4250} 4251 4252multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4253 dag Mask, RegisterClass MaskRC> { 4254 4255def : Pat<(_.info128.VT (extract_subvector 4256 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4257 _.info512.ImmAllZerosV)), 4258 (iPTR 0))), 4259 (!cast<Instruction>(InstrStr#rmkz) 4260 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4261 addr:$srcAddr)>; 4262 4263def : Pat<(_.info128.VT (extract_subvector 4264 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4265 (_.info512.VT (insert_subvector undef, 4266 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4267 (iPTR 0))))), 4268 (iPTR 0))), 4269 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4270 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4271 addr:$srcAddr)>; 4272 4273} 4274 4275multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4276 AVX512VLVectorVTInfo _, 4277 dag Mask, RegisterClass MaskRC, 4278 SubRegIndex subreg> { 4279 4280def : Pat<(_.info128.VT (extract_subvector 4281 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4282 _.info512.ImmAllZerosV)), 4283 (iPTR 0))), 4284 (!cast<Instruction>(InstrStr#rmkz) 4285 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4286 addr:$srcAddr)>; 4287 4288def : Pat<(_.info128.VT (extract_subvector 4289 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4290 (_.info512.VT (insert_subvector undef, 4291 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4292 (iPTR 0))))), 4293 (iPTR 0))), 4294 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4295 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4296 addr:$srcAddr)>; 4297 4298} 4299 4300// This matches the more recent codegen from clang that avoids emitting a 512 4301// bit masked load directly. Codegen will widen 128-bit masked load to 512 4302// bits on AVX512F only targets. 4303multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4304 AVX512VLVectorVTInfo _, 4305 dag Mask512, dag Mask128, 4306 RegisterClass MaskRC, 4307 SubRegIndex subreg> { 4308// AVX512F patterns. 4309def : Pat<(_.info128.VT (extract_subvector 4310 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4311 _.info512.ImmAllZerosV)), 4312 (iPTR 0))), 4313 (!cast<Instruction>(InstrStr#rmkz) 4314 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4315 addr:$srcAddr)>; 4316 4317def : Pat<(_.info128.VT (extract_subvector 4318 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4319 (_.info512.VT (insert_subvector undef, 4320 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4321 (iPTR 0))))), 4322 (iPTR 0))), 4323 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4324 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4325 addr:$srcAddr)>; 4326 4327// AVX512Vl patterns. 4328def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4329 _.info128.ImmAllZerosV)), 4330 (!cast<Instruction>(InstrStr#rmkz) 4331 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4332 addr:$srcAddr)>; 4333 4334def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4335 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4336 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4337 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4338 addr:$srcAddr)>; 4339} 4340 4341defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>; 4342defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4343defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4344 4345defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4346 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4347defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4348 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4349defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4350 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4351defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4352 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4353defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4354 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4355 4356defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4357 (v32i1 (insert_subvector 4358 (v32i1 immAllZerosV), 4359 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4360 (iPTR 0))), 4361 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4362 GR8, sub_8bit>; 4363defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4364 (v16i1 (insert_subvector 4365 (v16i1 immAllZerosV), 4366 (v4i1 (extract_subvector 4367 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4368 (iPTR 0))), 4369 (iPTR 0))), 4370 (v4i1 (extract_subvector 4371 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4372 (iPTR 0))), GR8, sub_8bit>; 4373defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4374 (v8i1 4375 (extract_subvector 4376 (v16i1 4377 (insert_subvector 4378 (v16i1 immAllZerosV), 4379 (v2i1 (extract_subvector 4380 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4381 (iPTR 0))), 4382 (iPTR 0))), 4383 (iPTR 0))), 4384 (v2i1 (extract_subvector 4385 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4386 (iPTR 0))), GR8, sub_8bit>; 4387 4388defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4389 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4390defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4391 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4392defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4393 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4394defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4395 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4396defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4397 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4398 4399defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4400 (v32i1 (insert_subvector 4401 (v32i1 immAllZerosV), 4402 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4403 (iPTR 0))), 4404 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4405 GR8, sub_8bit>; 4406defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4407 (v16i1 (insert_subvector 4408 (v16i1 immAllZerosV), 4409 (v4i1 (extract_subvector 4410 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4411 (iPTR 0))), 4412 (iPTR 0))), 4413 (v4i1 (extract_subvector 4414 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4415 (iPTR 0))), GR8, sub_8bit>; 4416defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4417 (v8i1 4418 (extract_subvector 4419 (v16i1 4420 (insert_subvector 4421 (v16i1 immAllZerosV), 4422 (v2i1 (extract_subvector 4423 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4424 (iPTR 0))), 4425 (iPTR 0))), 4426 (iPTR 0))), 4427 (v2i1 (extract_subvector 4428 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4429 (iPTR 0))), GR8, sub_8bit>; 4430 4431def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))), 4432 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk 4433 (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)), 4434 VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4435 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4436 4437def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)), 4438 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4439 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4440 4441def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4442 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4443 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4444 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4445 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4446 4447def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4448 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4449 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4450 4451def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), 4452 (COPY_TO_REGCLASS 4453 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), 4454 VK1WM:$mask, addr:$src)), 4455 FR32X)>; 4456def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), 4457 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; 4458 4459def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4460 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4461 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4462 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4463 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4464 4465def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), 4466 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4467 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4468 4469def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), 4470 (COPY_TO_REGCLASS 4471 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), 4472 VK1WM:$mask, addr:$src)), 4473 FR64X)>; 4474def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), 4475 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; 4476 4477 4478def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))), 4479 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4480def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))), 4481 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4482 4483def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))), 4484 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4485def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))), 4486 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4487 4488let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4489 let Predicates = [HasFP16] in { 4490 def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4491 (ins VR128X:$src1, VR128X:$src2), 4492 "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4493 []>, T_MAP5XS, EVEX_4V, VEX_LIG, 4494 FoldGenData<"VMOVSHZrr">, 4495 Sched<[SchedWriteFShuffle.XMM]>; 4496 4497 let Constraints = "$src0 = $dst" in 4498 def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4499 (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask, 4500 VR128X:$src1, VR128X:$src2), 4501 "vmovsh\t{$src2, $src1, $dst {${mask}}|"# 4502 "$dst {${mask}}, $src1, $src2}", 4503 []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG, 4504 FoldGenData<"VMOVSHZrrk">, 4505 Sched<[SchedWriteFShuffle.XMM]>; 4506 4507 def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4508 (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4509 "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"# 4510 "$dst {${mask}} {z}, $src1, $src2}", 4511 []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG, 4512 FoldGenData<"VMOVSHZrrkz">, 4513 Sched<[SchedWriteFShuffle.XMM]>; 4514 } 4515 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4516 (ins VR128X:$src1, VR128X:$src2), 4517 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4518 []>, XS, EVEX_4V, VEX_LIG, 4519 FoldGenData<"VMOVSSZrr">, 4520 Sched<[SchedWriteFShuffle.XMM]>; 4521 4522 let Constraints = "$src0 = $dst" in 4523 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4524 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4525 VR128X:$src1, VR128X:$src2), 4526 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4527 "$dst {${mask}}, $src1, $src2}", 4528 []>, EVEX_K, XS, EVEX_4V, VEX_LIG, 4529 FoldGenData<"VMOVSSZrrk">, 4530 Sched<[SchedWriteFShuffle.XMM]>; 4531 4532 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4533 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4534 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4535 "$dst {${mask}} {z}, $src1, $src2}", 4536 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, 4537 FoldGenData<"VMOVSSZrrkz">, 4538 Sched<[SchedWriteFShuffle.XMM]>; 4539 4540 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4541 (ins VR128X:$src1, VR128X:$src2), 4542 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4543 []>, XD, EVEX_4V, VEX_LIG, VEX_W, 4544 FoldGenData<"VMOVSDZrr">, 4545 Sched<[SchedWriteFShuffle.XMM]>; 4546 4547 let Constraints = "$src0 = $dst" in 4548 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4549 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4550 VR128X:$src1, VR128X:$src2), 4551 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4552 "$dst {${mask}}, $src1, $src2}", 4553 []>, EVEX_K, XD, EVEX_4V, VEX_LIG, 4554 VEX_W, FoldGenData<"VMOVSDZrrk">, 4555 Sched<[SchedWriteFShuffle.XMM]>; 4556 4557 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4558 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4559 VR128X:$src2), 4560 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4561 "$dst {${mask}} {z}, $src1, $src2}", 4562 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, 4563 VEX_W, FoldGenData<"VMOVSDZrrkz">, 4564 Sched<[SchedWriteFShuffle.XMM]>; 4565} 4566 4567def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4568 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4569def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"# 4570 "$dst {${mask}}, $src1, $src2}", 4571 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask, 4572 VR128X:$src1, VR128X:$src2), 0>; 4573def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4574 "$dst {${mask}} {z}, $src1, $src2}", 4575 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask, 4576 VR128X:$src1, VR128X:$src2), 0>; 4577def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4578 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4579def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4580 "$dst {${mask}}, $src1, $src2}", 4581 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4582 VR128X:$src1, VR128X:$src2), 0>; 4583def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4584 "$dst {${mask}} {z}, $src1, $src2}", 4585 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4586 VR128X:$src1, VR128X:$src2), 0>; 4587def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4588 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4589def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4590 "$dst {${mask}}, $src1, $src2}", 4591 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4592 VR128X:$src1, VR128X:$src2), 0>; 4593def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4594 "$dst {${mask}} {z}, $src1, $src2}", 4595 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4596 VR128X:$src1, VR128X:$src2), 0>; 4597 4598let Predicates = [HasAVX512, OptForSize] in { 4599 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4600 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4601 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4602 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4603 4604 // Move low f32 and clear high bits. 4605 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4606 (SUBREG_TO_REG (i32 0), 4607 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4608 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4609 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4610 (SUBREG_TO_REG (i32 0), 4611 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4612 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4613 4614 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4615 (SUBREG_TO_REG (i32 0), 4616 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4617 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4618 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4619 (SUBREG_TO_REG (i32 0), 4620 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4621 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4622} 4623 4624// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4625// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4626let Predicates = [HasAVX512, OptForSpeed] in { 4627 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4628 (SUBREG_TO_REG (i32 0), 4629 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4630 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4631 (i8 1))), sub_xmm)>; 4632 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4633 (SUBREG_TO_REG (i32 0), 4634 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4635 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4636 (i8 3))), sub_xmm)>; 4637} 4638 4639let Predicates = [HasAVX512] in { 4640 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 4641 (VMOVSSZrm addr:$src)>; 4642 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 4643 (VMOVSDZrm addr:$src)>; 4644 4645 // Represent the same patterns above but in the form they appear for 4646 // 256-bit types 4647 def : Pat<(v8f32 (X86vzload32 addr:$src)), 4648 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4649 def : Pat<(v4f64 (X86vzload64 addr:$src)), 4650 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4651 4652 // Represent the same patterns above but in the form they appear for 4653 // 512-bit types 4654 def : Pat<(v16f32 (X86vzload32 addr:$src)), 4655 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4656 def : Pat<(v8f64 (X86vzload64 addr:$src)), 4657 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4658} 4659let Predicates = [HasFP16] in { 4660 def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))), 4661 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>; 4662 4663 // FIXME we need better canonicalization in dag combine 4664 def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))), 4665 (SUBREG_TO_REG (i32 0), 4666 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4667 (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>; 4668 def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))), 4669 (SUBREG_TO_REG (i32 0), 4670 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4671 (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>; 4672 4673 def : Pat<(v8f16 (X86vzload16 addr:$src)), 4674 (VMOVSHZrm addr:$src)>; 4675 4676 def : Pat<(v16f16 (X86vzload16 addr:$src)), 4677 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4678 4679 def : Pat<(v32f16 (X86vzload16 addr:$src)), 4680 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4681} 4682 4683let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4684def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4685 (ins VR128X:$src), 4686 "vmovq\t{$src, $dst|$dst, $src}", 4687 [(set VR128X:$dst, (v2i64 (X86vzmovl 4688 (v2i64 VR128X:$src))))]>, 4689 EVEX, VEX_W; 4690} 4691 4692let Predicates = [HasAVX512] in { 4693 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4694 (VMOVDI2PDIZrr GR32:$src)>; 4695 4696 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4697 (VMOV64toPQIZrr GR64:$src)>; 4698 4699 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4700 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4701 (VMOVDI2PDIZrm addr:$src)>; 4702 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4703 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4704 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4705 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4706 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4707 (VMOVQI2PQIZrm addr:$src)>; 4708 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4709 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4710 4711 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4712 def : Pat<(v16i32 (X86vzload32 addr:$src)), 4713 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4714 def : Pat<(v8i64 (X86vzload64 addr:$src)), 4715 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4716 4717 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4718 (SUBREG_TO_REG (i32 0), 4719 (v2f64 (VMOVZPQILo2PQIZrr 4720 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), 4721 sub_xmm)>; 4722 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4723 (SUBREG_TO_REG (i32 0), 4724 (v2i64 (VMOVZPQILo2PQIZrr 4725 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), 4726 sub_xmm)>; 4727 4728 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4729 (SUBREG_TO_REG (i32 0), 4730 (v2f64 (VMOVZPQILo2PQIZrr 4731 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), 4732 sub_xmm)>; 4733 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4734 (SUBREG_TO_REG (i32 0), 4735 (v2i64 (VMOVZPQILo2PQIZrr 4736 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), 4737 sub_xmm)>; 4738} 4739 4740//===----------------------------------------------------------------------===// 4741// AVX-512 - Non-temporals 4742//===----------------------------------------------------------------------===// 4743 4744def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4745 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4746 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4747 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4748 4749let Predicates = [HasVLX] in { 4750 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4751 (ins i256mem:$src), 4752 "vmovntdqa\t{$src, $dst|$dst, $src}", 4753 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4754 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4755 4756 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4757 (ins i128mem:$src), 4758 "vmovntdqa\t{$src, $dst|$dst, $src}", 4759 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4760 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4761} 4762 4763multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4764 X86SchedWriteMoveLS Sched, 4765 PatFrag st_frag = alignednontemporalstore> { 4766 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4767 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4768 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4769 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4770 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4771} 4772 4773multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4774 AVX512VLVectorVTInfo VTInfo, 4775 X86SchedWriteMoveLSWidths Sched> { 4776 let Predicates = [HasAVX512] in 4777 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4778 4779 let Predicates = [HasAVX512, HasVLX] in { 4780 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4781 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4782 } 4783} 4784 4785defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4786 SchedWriteVecMoveLSNT>, PD; 4787defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4788 SchedWriteFMoveLSNT>, PD, VEX_W; 4789defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4790 SchedWriteFMoveLSNT>, PS; 4791 4792let Predicates = [HasAVX512], AddedComplexity = 400 in { 4793 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4794 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4795 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4796 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4797 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4798 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4799 4800 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4801 (VMOVNTDQAZrm addr:$src)>; 4802 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4803 (VMOVNTDQAZrm addr:$src)>; 4804 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4805 (VMOVNTDQAZrm addr:$src)>; 4806 def : Pat<(v16i32 (alignednontemporalload addr:$src)), 4807 (VMOVNTDQAZrm addr:$src)>; 4808 def : Pat<(v32i16 (alignednontemporalload addr:$src)), 4809 (VMOVNTDQAZrm addr:$src)>; 4810 def : Pat<(v64i8 (alignednontemporalload addr:$src)), 4811 (VMOVNTDQAZrm addr:$src)>; 4812} 4813 4814let Predicates = [HasVLX], AddedComplexity = 400 in { 4815 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4816 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4817 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4818 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4819 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4820 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4821 4822 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4823 (VMOVNTDQAZ256rm addr:$src)>; 4824 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4825 (VMOVNTDQAZ256rm addr:$src)>; 4826 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4827 (VMOVNTDQAZ256rm addr:$src)>; 4828 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 4829 (VMOVNTDQAZ256rm addr:$src)>; 4830 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 4831 (VMOVNTDQAZ256rm addr:$src)>; 4832 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 4833 (VMOVNTDQAZ256rm addr:$src)>; 4834 4835 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4836 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4837 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4838 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4839 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4840 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4841 4842 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4843 (VMOVNTDQAZ128rm addr:$src)>; 4844 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4845 (VMOVNTDQAZ128rm addr:$src)>; 4846 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4847 (VMOVNTDQAZ128rm addr:$src)>; 4848 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 4849 (VMOVNTDQAZ128rm addr:$src)>; 4850 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 4851 (VMOVNTDQAZ128rm addr:$src)>; 4852 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 4853 (VMOVNTDQAZ128rm addr:$src)>; 4854} 4855 4856//===----------------------------------------------------------------------===// 4857// AVX-512 - Integer arithmetic 4858// 4859multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4860 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4861 bit IsCommutable = 0> { 4862 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4863 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4864 "$src2, $src1", "$src1, $src2", 4865 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4866 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V, 4867 Sched<[sched]>; 4868 4869 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4870 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4871 "$src2, $src1", "$src1, $src2", 4872 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, 4873 AVX512BIBase, EVEX_4V, 4874 Sched<[sched.Folded, sched.ReadAfterFold]>; 4875} 4876 4877multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4878 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4879 bit IsCommutable = 0> : 4880 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4881 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4882 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4883 "${src2}"#_.BroadcastStr#", $src1", 4884 "$src1, ${src2}"#_.BroadcastStr, 4885 (_.VT (OpNode _.RC:$src1, 4886 (_.BroadcastLdFrag addr:$src2)))>, 4887 AVX512BIBase, EVEX_4V, EVEX_B, 4888 Sched<[sched.Folded, sched.ReadAfterFold]>; 4889} 4890 4891multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4892 AVX512VLVectorVTInfo VTInfo, 4893 X86SchedWriteWidths sched, Predicate prd, 4894 bit IsCommutable = 0> { 4895 let Predicates = [prd] in 4896 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4897 IsCommutable>, EVEX_V512; 4898 4899 let Predicates = [prd, HasVLX] in { 4900 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4901 sched.YMM, IsCommutable>, EVEX_V256; 4902 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4903 sched.XMM, IsCommutable>, EVEX_V128; 4904 } 4905} 4906 4907multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4908 AVX512VLVectorVTInfo VTInfo, 4909 X86SchedWriteWidths sched, Predicate prd, 4910 bit IsCommutable = 0> { 4911 let Predicates = [prd] in 4912 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4913 IsCommutable>, EVEX_V512; 4914 4915 let Predicates = [prd, HasVLX] in { 4916 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4917 sched.YMM, IsCommutable>, EVEX_V256; 4918 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4919 sched.XMM, IsCommutable>, EVEX_V128; 4920 } 4921} 4922 4923multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4924 X86SchedWriteWidths sched, Predicate prd, 4925 bit IsCommutable = 0> { 4926 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4927 sched, prd, IsCommutable>, 4928 VEX_W, EVEX_CD8<64, CD8VF>; 4929} 4930 4931multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4932 X86SchedWriteWidths sched, Predicate prd, 4933 bit IsCommutable = 0> { 4934 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4935 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4936} 4937 4938multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4939 X86SchedWriteWidths sched, Predicate prd, 4940 bit IsCommutable = 0> { 4941 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4942 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4943 VEX_WIG; 4944} 4945 4946multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4947 X86SchedWriteWidths sched, Predicate prd, 4948 bit IsCommutable = 0> { 4949 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4950 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4951 VEX_WIG; 4952} 4953 4954multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 4955 SDNode OpNode, X86SchedWriteWidths sched, 4956 Predicate prd, bit IsCommutable = 0> { 4957 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 4958 IsCommutable>; 4959 4960 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 4961 IsCommutable>; 4962} 4963 4964multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 4965 SDNode OpNode, X86SchedWriteWidths sched, 4966 Predicate prd, bit IsCommutable = 0> { 4967 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 4968 IsCommutable>; 4969 4970 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 4971 IsCommutable>; 4972} 4973 4974multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 4975 bits<8> opc_d, bits<8> opc_q, 4976 string OpcodeStr, SDNode OpNode, 4977 X86SchedWriteWidths sched, 4978 bit IsCommutable = 0> { 4979 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 4980 sched, HasAVX512, IsCommutable>, 4981 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 4982 sched, HasBWI, IsCommutable>; 4983} 4984 4985multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 4986 X86FoldableSchedWrite sched, 4987 SDNode OpNode,X86VectorVTInfo _Src, 4988 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 4989 bit IsCommutable = 0> { 4990 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4991 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4992 "$src2, $src1","$src1, $src2", 4993 (_Dst.VT (OpNode 4994 (_Src.VT _Src.RC:$src1), 4995 (_Src.VT _Src.RC:$src2))), 4996 IsCommutable>, 4997 AVX512BIBase, EVEX_4V, Sched<[sched]>; 4998 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4999 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 5000 "$src2, $src1", "$src1, $src2", 5001 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 5002 (_Src.LdFrag addr:$src2)))>, 5003 AVX512BIBase, EVEX_4V, 5004 Sched<[sched.Folded, sched.ReadAfterFold]>; 5005 5006 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5007 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 5008 OpcodeStr, 5009 "${src2}"#_Brdct.BroadcastStr#", $src1", 5010 "$src1, ${src2}"#_Brdct.BroadcastStr, 5011 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 5012 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, 5013 AVX512BIBase, EVEX_4V, EVEX_B, 5014 Sched<[sched.Folded, sched.ReadAfterFold]>; 5015} 5016 5017defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 5018 SchedWriteVecALU, 1>; 5019defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 5020 SchedWriteVecALU, 0>; 5021defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, 5022 SchedWriteVecALU, HasBWI, 1>; 5023defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, 5024 SchedWriteVecALU, HasBWI, 0>; 5025defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, 5026 SchedWriteVecALU, HasBWI, 1>; 5027defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, 5028 SchedWriteVecALU, HasBWI, 0>; 5029defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 5030 SchedWritePMULLD, HasAVX512, 1>, T8PD; 5031defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 5032 SchedWriteVecIMul, HasBWI, 1>; 5033defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 5034 SchedWriteVecIMul, HasDQI, 1>, T8PD, 5035 NotEVEX2VEXConvertible; 5036defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 5037 HasBWI, 1>; 5038defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 5039 HasBWI, 1>; 5040defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 5041 SchedWriteVecIMul, HasBWI, 1>, T8PD; 5042defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, 5043 SchedWriteVecALU, HasBWI, 1>; 5044defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 5045 SchedWriteVecIMul, HasAVX512, 1>, T8PD; 5046defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 5047 SchedWriteVecIMul, HasAVX512, 1>; 5048 5049multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 5050 X86SchedWriteWidths sched, 5051 AVX512VLVectorVTInfo _SrcVTInfo, 5052 AVX512VLVectorVTInfo _DstVTInfo, 5053 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 5054 let Predicates = [prd] in 5055 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 5056 _SrcVTInfo.info512, _DstVTInfo.info512, 5057 v8i64_info, IsCommutable>, 5058 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; 5059 let Predicates = [HasVLX, prd] in { 5060 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 5061 _SrcVTInfo.info256, _DstVTInfo.info256, 5062 v4i64x_info, IsCommutable>, 5063 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; 5064 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 5065 _SrcVTInfo.info128, _DstVTInfo.info128, 5066 v2i64x_info, IsCommutable>, 5067 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; 5068 } 5069} 5070 5071defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 5072 avx512vl_i8_info, avx512vl_i8_info, 5073 X86multishift, HasVBMI, 0>, T8PD; 5074 5075multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 5076 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 5077 X86FoldableSchedWrite sched> { 5078 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5079 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 5080 OpcodeStr, 5081 "${src2}"#_Src.BroadcastStr#", $src1", 5082 "$src1, ${src2}"#_Src.BroadcastStr, 5083 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 5084 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, 5085 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 5086 Sched<[sched.Folded, sched.ReadAfterFold]>; 5087} 5088 5089multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 5090 SDNode OpNode,X86VectorVTInfo _Src, 5091 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 5092 bit IsCommutable = 0> { 5093 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 5094 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 5095 "$src2, $src1","$src1, $src2", 5096 (_Dst.VT (OpNode 5097 (_Src.VT _Src.RC:$src1), 5098 (_Src.VT _Src.RC:$src2))), 5099 IsCommutable, IsCommutable>, 5100 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; 5101 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5102 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 5103 "$src2, $src1", "$src1, $src2", 5104 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 5105 (_Src.LdFrag addr:$src2)))>, 5106 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, 5107 Sched<[sched.Folded, sched.ReadAfterFold]>; 5108} 5109 5110multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 5111 SDNode OpNode> { 5112 let Predicates = [HasBWI] in 5113 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 5114 v32i16_info, SchedWriteShuffle.ZMM>, 5115 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 5116 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 5117 let Predicates = [HasBWI, HasVLX] in { 5118 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 5119 v16i16x_info, SchedWriteShuffle.YMM>, 5120 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 5121 v16i16x_info, SchedWriteShuffle.YMM>, 5122 EVEX_V256; 5123 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 5124 v8i16x_info, SchedWriteShuffle.XMM>, 5125 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 5126 v8i16x_info, SchedWriteShuffle.XMM>, 5127 EVEX_V128; 5128 } 5129} 5130multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 5131 SDNode OpNode> { 5132 let Predicates = [HasBWI] in 5133 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 5134 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG; 5135 let Predicates = [HasBWI, HasVLX] in { 5136 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 5137 v32i8x_info, SchedWriteShuffle.YMM>, 5138 EVEX_V256, VEX_WIG; 5139 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 5140 v16i8x_info, SchedWriteShuffle.XMM>, 5141 EVEX_V128, VEX_WIG; 5142 } 5143} 5144 5145multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 5146 SDNode OpNode, AVX512VLVectorVTInfo _Src, 5147 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 5148 let Predicates = [HasBWI] in 5149 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 5150 _Dst.info512, SchedWriteVecIMul.ZMM, 5151 IsCommutable>, EVEX_V512; 5152 let Predicates = [HasBWI, HasVLX] in { 5153 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 5154 _Dst.info256, SchedWriteVecIMul.YMM, 5155 IsCommutable>, EVEX_V256; 5156 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 5157 _Dst.info128, SchedWriteVecIMul.XMM, 5158 IsCommutable>, EVEX_V128; 5159 } 5160} 5161 5162defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 5163defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 5164defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 5165defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 5166 5167defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 5168 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG; 5169defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 5170 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG; 5171 5172defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 5173 SchedWriteVecALU, HasBWI, 1>, T8PD; 5174defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 5175 SchedWriteVecALU, HasBWI, 1>; 5176defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 5177 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5178defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 5179 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5180 NotEVEX2VEXConvertible; 5181 5182defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 5183 SchedWriteVecALU, HasBWI, 1>; 5184defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 5185 SchedWriteVecALU, HasBWI, 1>, T8PD; 5186defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 5187 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5188defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 5189 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5190 NotEVEX2VEXConvertible; 5191 5192defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 5193 SchedWriteVecALU, HasBWI, 1>, T8PD; 5194defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 5195 SchedWriteVecALU, HasBWI, 1>; 5196defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 5197 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5198defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 5199 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5200 NotEVEX2VEXConvertible; 5201 5202defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 5203 SchedWriteVecALU, HasBWI, 1>; 5204defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 5205 SchedWriteVecALU, HasBWI, 1>, T8PD; 5206defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 5207 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5208defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 5209 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5210 NotEVEX2VEXConvertible; 5211 5212// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 5213let Predicates = [HasDQI, NoVLX] in { 5214 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 5215 (EXTRACT_SUBREG 5216 (VPMULLQZrr 5217 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5218 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5219 sub_ymm)>; 5220 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5221 (EXTRACT_SUBREG 5222 (VPMULLQZrmb 5223 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5224 addr:$src2), 5225 sub_ymm)>; 5226 5227 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5228 (EXTRACT_SUBREG 5229 (VPMULLQZrr 5230 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5231 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5232 sub_xmm)>; 5233 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5234 (EXTRACT_SUBREG 5235 (VPMULLQZrmb 5236 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5237 addr:$src2), 5238 sub_xmm)>; 5239} 5240 5241multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { 5242 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 5243 (EXTRACT_SUBREG 5244 (!cast<Instruction>(Instr#"rr") 5245 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5246 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5247 sub_ymm)>; 5248 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5249 (EXTRACT_SUBREG 5250 (!cast<Instruction>(Instr#"rmb") 5251 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5252 addr:$src2), 5253 sub_ymm)>; 5254 5255 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 5256 (EXTRACT_SUBREG 5257 (!cast<Instruction>(Instr#"rr") 5258 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5259 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5260 sub_xmm)>; 5261 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5262 (EXTRACT_SUBREG 5263 (!cast<Instruction>(Instr#"rmb") 5264 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5265 addr:$src2), 5266 sub_xmm)>; 5267} 5268 5269let Predicates = [HasAVX512, NoVLX] in { 5270 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; 5271 defm : avx512_min_max_lowering<"VPMINUQZ", umin>; 5272 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; 5273 defm : avx512_min_max_lowering<"VPMINSQZ", smin>; 5274} 5275 5276//===----------------------------------------------------------------------===// 5277// AVX-512 Logical Instructions 5278//===----------------------------------------------------------------------===// 5279 5280defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, 5281 SchedWriteVecLogic, HasAVX512, 1>; 5282defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, 5283 SchedWriteVecLogic, HasAVX512, 1>; 5284defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 5285 SchedWriteVecLogic, HasAVX512, 1>; 5286defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5287 SchedWriteVecLogic, HasAVX512>; 5288 5289let Predicates = [HasVLX] in { 5290 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)), 5291 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5292 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)), 5293 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5294 5295 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)), 5296 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5297 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)), 5298 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5299 5300 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)), 5301 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5302 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)), 5303 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5304 5305 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)), 5306 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5307 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)), 5308 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5309 5310 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)), 5311 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5312 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)), 5313 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5314 5315 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)), 5316 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5317 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)), 5318 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5319 5320 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)), 5321 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5322 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)), 5323 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5324 5325 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)), 5326 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5327 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), 5328 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5329 5330 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), 5331 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5332 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), 5333 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5334 5335 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)), 5336 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5337 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)), 5338 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5339 5340 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)), 5341 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5342 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)), 5343 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5344 5345 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)), 5346 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5347 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)), 5348 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5349 5350 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)), 5351 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5352 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)), 5353 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5354 5355 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)), 5356 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5357 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)), 5358 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5359 5360 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)), 5361 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5362 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)), 5363 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5364 5365 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)), 5366 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5367 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), 5368 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5369} 5370 5371let Predicates = [HasAVX512] in { 5372 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)), 5373 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5374 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)), 5375 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5376 5377 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)), 5378 (VPORQZrr VR512:$src1, VR512:$src2)>; 5379 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)), 5380 (VPORQZrr VR512:$src1, VR512:$src2)>; 5381 5382 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)), 5383 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5384 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)), 5385 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5386 5387 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)), 5388 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5389 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)), 5390 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5391 5392 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)), 5393 (VPANDQZrm VR512:$src1, addr:$src2)>; 5394 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)), 5395 (VPANDQZrm VR512:$src1, addr:$src2)>; 5396 5397 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)), 5398 (VPORQZrm VR512:$src1, addr:$src2)>; 5399 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)), 5400 (VPORQZrm VR512:$src1, addr:$src2)>; 5401 5402 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)), 5403 (VPXORQZrm VR512:$src1, addr:$src2)>; 5404 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)), 5405 (VPXORQZrm VR512:$src1, addr:$src2)>; 5406 5407 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)), 5408 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5409 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), 5410 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5411} 5412 5413// Patterns to catch vselect with different type than logic op. 5414multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, 5415 X86VectorVTInfo _, 5416 X86VectorVTInfo IntInfo> { 5417 // Masked register-register logical operations. 5418 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5419 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5420 _.RC:$src0)), 5421 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5422 _.RC:$src1, _.RC:$src2)>; 5423 5424 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5425 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5426 _.ImmAllZerosV)), 5427 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5428 _.RC:$src2)>; 5429 5430 // Masked register-memory logical operations. 5431 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5432 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5433 (load addr:$src2)))), 5434 _.RC:$src0)), 5435 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5436 _.RC:$src1, addr:$src2)>; 5437 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5438 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5439 (load addr:$src2)))), 5440 _.ImmAllZerosV)), 5441 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5442 addr:$src2)>; 5443} 5444 5445multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, 5446 X86VectorVTInfo _, 5447 X86VectorVTInfo IntInfo> { 5448 // Register-broadcast logical operations. 5449 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5450 (bitconvert 5451 (IntInfo.VT (OpNode _.RC:$src1, 5452 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5453 _.RC:$src0)), 5454 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5455 _.RC:$src1, addr:$src2)>; 5456 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5457 (bitconvert 5458 (IntInfo.VT (OpNode _.RC:$src1, 5459 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5460 _.ImmAllZerosV)), 5461 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5462 _.RC:$src1, addr:$src2)>; 5463} 5464 5465multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode, 5466 AVX512VLVectorVTInfo SelectInfo, 5467 AVX512VLVectorVTInfo IntInfo> { 5468let Predicates = [HasVLX] in { 5469 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128, 5470 IntInfo.info128>; 5471 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256, 5472 IntInfo.info256>; 5473} 5474let Predicates = [HasAVX512] in { 5475 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512, 5476 IntInfo.info512>; 5477} 5478} 5479 5480multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode, 5481 AVX512VLVectorVTInfo SelectInfo, 5482 AVX512VLVectorVTInfo IntInfo> { 5483let Predicates = [HasVLX] in { 5484 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode, 5485 SelectInfo.info128, IntInfo.info128>; 5486 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode, 5487 SelectInfo.info256, IntInfo.info256>; 5488} 5489let Predicates = [HasAVX512] in { 5490 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode, 5491 SelectInfo.info512, IntInfo.info512>; 5492} 5493} 5494 5495multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> { 5496 // i64 vselect with i32/i16/i8 logic op 5497 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5498 avx512vl_i32_info>; 5499 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5500 avx512vl_i16_info>; 5501 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5502 avx512vl_i8_info>; 5503 5504 // i32 vselect with i64/i16/i8 logic op 5505 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5506 avx512vl_i64_info>; 5507 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5508 avx512vl_i16_info>; 5509 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5510 avx512vl_i8_info>; 5511 5512 // f32 vselect with i64/i32/i16/i8 logic op 5513 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5514 avx512vl_i64_info>; 5515 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5516 avx512vl_i32_info>; 5517 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5518 avx512vl_i16_info>; 5519 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5520 avx512vl_i8_info>; 5521 5522 // f64 vselect with i64/i32/i16/i8 logic op 5523 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5524 avx512vl_i64_info>; 5525 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5526 avx512vl_i32_info>; 5527 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5528 avx512vl_i16_info>; 5529 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5530 avx512vl_i8_info>; 5531 5532 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode, 5533 avx512vl_f32_info, 5534 avx512vl_i32_info>; 5535 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode, 5536 avx512vl_f64_info, 5537 avx512vl_i64_info>; 5538} 5539 5540defm : avx512_logical_lowering_types<"VPAND", and>; 5541defm : avx512_logical_lowering_types<"VPOR", or>; 5542defm : avx512_logical_lowering_types<"VPXOR", xor>; 5543defm : avx512_logical_lowering_types<"VPANDN", X86andnp>; 5544 5545//===----------------------------------------------------------------------===// 5546// AVX-512 FP arithmetic 5547//===----------------------------------------------------------------------===// 5548 5549multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5550 SDPatternOperator OpNode, SDNode VecNode, 5551 X86FoldableSchedWrite sched, bit IsCommutable> { 5552 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5553 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5554 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5555 "$src2, $src1", "$src1, $src2", 5556 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5557 Sched<[sched]>; 5558 5559 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5560 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5561 "$src2, $src1", "$src1, $src2", 5562 (_.VT (VecNode _.RC:$src1, 5563 (_.ScalarIntMemFrags addr:$src2)))>, 5564 Sched<[sched.Folded, sched.ReadAfterFold]>; 5565 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5566 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5567 (ins _.FRC:$src1, _.FRC:$src2), 5568 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5569 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5570 Sched<[sched]> { 5571 let isCommutable = IsCommutable; 5572 } 5573 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5574 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5575 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5576 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5577 (_.ScalarLdFrag addr:$src2)))]>, 5578 Sched<[sched.Folded, sched.ReadAfterFold]>; 5579 } 5580 } 5581} 5582 5583multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5584 SDNode VecNode, X86FoldableSchedWrite sched> { 5585 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5586 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5587 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5588 "$rc, $src2, $src1", "$src1, $src2, $rc", 5589 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5590 (i32 timm:$rc))>, 5591 EVEX_B, EVEX_RC, Sched<[sched]>; 5592} 5593multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5594 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5595 X86FoldableSchedWrite sched, bit IsCommutable, 5596 string EVEX2VexOvrd> { 5597 let ExeDomain = _.ExeDomain in { 5598 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5599 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5600 "$src2, $src1", "$src1, $src2", 5601 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5602 Sched<[sched]>, SIMD_EXC; 5603 5604 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5605 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5606 "$src2, $src1", "$src1, $src2", 5607 (_.VT (VecNode _.RC:$src1, 5608 (_.ScalarIntMemFrags addr:$src2)))>, 5609 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 5610 5611 let isCodeGenOnly = 1, Predicates = [HasAVX512], 5612 Uses = [MXCSR], mayRaiseFPException = 1 in { 5613 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5614 (ins _.FRC:$src1, _.FRC:$src2), 5615 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5616 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5617 Sched<[sched]>, 5618 EVEX2VEXOverride<EVEX2VexOvrd#"rr"> { 5619 let isCommutable = IsCommutable; 5620 } 5621 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5622 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5623 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5624 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5625 (_.ScalarLdFrag addr:$src2)))]>, 5626 Sched<[sched.Folded, sched.ReadAfterFold]>, 5627 EVEX2VEXOverride<EVEX2VexOvrd#"rm">; 5628 } 5629 5630 let Uses = [MXCSR] in 5631 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5632 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5633 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5634 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 5635 EVEX_B, Sched<[sched]>; 5636 } 5637} 5638 5639multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5640 SDNode VecNode, SDNode RndNode, 5641 X86SchedWriteSizes sched, bit IsCommutable> { 5642 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5643 sched.PS.Scl, IsCommutable>, 5644 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, 5645 sched.PS.Scl>, 5646 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5647 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5648 sched.PD.Scl, IsCommutable>, 5649 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, 5650 sched.PD.Scl>, 5651 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5652 let Predicates = [HasFP16] in 5653 defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode, 5654 VecNode, sched.PH.Scl, IsCommutable>, 5655 avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode, 5656 sched.PH.Scl>, 5657 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>; 5658} 5659 5660multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5661 SDNode VecNode, SDNode SaeNode, 5662 X86SchedWriteSizes sched, bit IsCommutable> { 5663 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5664 VecNode, SaeNode, sched.PS.Scl, IsCommutable, 5665 NAME#"SS">, 5666 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5667 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5668 VecNode, SaeNode, sched.PD.Scl, IsCommutable, 5669 NAME#"SD">, 5670 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5671 let Predicates = [HasFP16] in { 5672 defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode, 5673 VecNode, SaeNode, sched.PH.Scl, IsCommutable, 5674 NAME#"SH">, 5675 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, 5676 NotEVEX2VEXConvertible; 5677 } 5678} 5679defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, 5680 SchedWriteFAddSizes, 1>; 5681defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, 5682 SchedWriteFMulSizes, 1>; 5683defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, 5684 SchedWriteFAddSizes, 0>; 5685defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, 5686 SchedWriteFDivSizes, 0>; 5687defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, 5688 SchedWriteFCmpSizes, 0>; 5689defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, 5690 SchedWriteFCmpSizes, 0>; 5691 5692// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5693// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5694multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5695 X86VectorVTInfo _, SDNode OpNode, 5696 X86FoldableSchedWrite sched, 5697 string EVEX2VEXOvrd> { 5698 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5699 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5700 (ins _.FRC:$src1, _.FRC:$src2), 5701 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5702 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5703 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> { 5704 let isCommutable = 1; 5705 } 5706 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5707 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5708 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5709 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5710 (_.ScalarLdFrag addr:$src2)))]>, 5711 Sched<[sched.Folded, sched.ReadAfterFold]>, 5712 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 5713 } 5714} 5715defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5716 SchedWriteFCmp.Scl, "VMINCSS">, XS, 5717 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5718 5719defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5720 SchedWriteFCmp.Scl, "VMINCSD">, XD, 5721 VEX_W, EVEX_4V, VEX_LIG, 5722 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5723 5724defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5725 SchedWriteFCmp.Scl, "VMAXCSS">, XS, 5726 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5727 5728defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5729 SchedWriteFCmp.Scl, "VMAXCSD">, XD, 5730 VEX_W, EVEX_4V, VEX_LIG, 5731 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5732 5733defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc, 5734 SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS, 5735 EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, 5736 NotEVEX2VEXConvertible; 5737defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc, 5738 SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS, 5739 EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, 5740 NotEVEX2VEXConvertible; 5741 5742multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5743 SDPatternOperator MaskOpNode, 5744 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5745 bit IsCommutable, 5746 bit IsKCommutable = IsCommutable, 5747 string suffix = _.Suffix, 5748 string ClobberConstraint = "", 5749 bit MayRaiseFPException = 1> { 5750 let ExeDomain = _.ExeDomain, hasSideEffects = 0, 5751 Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in { 5752 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 5753 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix, 5754 "$src2, $src1", "$src1, $src2", 5755 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 5756 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint, 5757 IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>; 5758 let mayLoad = 1 in { 5759 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5760 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix, 5761 "$src2, $src1", "$src1, $src2", 5762 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5763 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5764 ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5765 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5766 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix, 5767 "${src2}"#_.BroadcastStr#", $src1", 5768 "$src1, ${src2}"#_.BroadcastStr, 5769 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5770 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5771 ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5772 } 5773 } 5774} 5775 5776multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5777 SDPatternOperator OpNodeRnd, 5778 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5779 string suffix = _.Suffix, 5780 string ClobberConstraint = ""> { 5781 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5782 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5783 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix, 5784 "$rc, $src2, $src1", "$src1, $src2, $rc", 5785 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))), 5786 0, 0, 0, vselect_mask, ClobberConstraint>, 5787 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 5788} 5789 5790multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5791 SDPatternOperator OpNodeSAE, 5792 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5793 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5794 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5795 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5796 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5797 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, 5798 EVEX_4V, EVEX_B, Sched<[sched]>; 5799} 5800 5801multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5802 SDPatternOperator MaskOpNode, 5803 Predicate prd, X86SchedWriteSizes sched, 5804 bit IsCommutable = 0, 5805 bit IsPD128Commutable = IsCommutable> { 5806 let Predicates = [prd] in { 5807 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 5808 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, 5809 EVEX_CD8<32, CD8VF>; 5810 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info, 5811 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, 5812 EVEX_CD8<64, CD8VF>; 5813 } 5814 5815 // Define only if AVX512VL feature is present. 5816 let Predicates = [prd, HasVLX] in { 5817 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 5818 sched.PS.XMM, IsCommutable>, EVEX_V128, PS, 5819 EVEX_CD8<32, CD8VF>; 5820 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 5821 sched.PS.YMM, IsCommutable>, EVEX_V256, PS, 5822 EVEX_CD8<32, CD8VF>; 5823 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info, 5824 sched.PD.XMM, IsPD128Commutable, 5825 IsCommutable>, EVEX_V128, PD, VEX_W, 5826 EVEX_CD8<64, CD8VF>; 5827 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info, 5828 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, 5829 EVEX_CD8<64, CD8VF>; 5830 } 5831} 5832 5833multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5834 SDPatternOperator MaskOpNode, 5835 X86SchedWriteSizes sched, bit IsCommutable = 0> { 5836 let Predicates = [HasFP16] in { 5837 defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info, 5838 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS, 5839 EVEX_CD8<16, CD8VF>; 5840 } 5841 let Predicates = [HasVLX, HasFP16] in { 5842 defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info, 5843 sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS, 5844 EVEX_CD8<16, CD8VF>; 5845 defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info, 5846 sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS, 5847 EVEX_CD8<16, CD8VF>; 5848 } 5849} 5850 5851let Uses = [MXCSR] in 5852multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5853 X86SchedWriteSizes sched> { 5854 let Predicates = [HasFP16] in { 5855 defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5856 v32f16_info>, 5857 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 5858 } 5859 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5860 v16f32_info>, 5861 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5862 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5863 v8f64_info>, 5864 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5865} 5866 5867let Uses = [MXCSR] in 5868multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5869 X86SchedWriteSizes sched> { 5870 let Predicates = [HasFP16] in { 5871 defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5872 v32f16_info>, 5873 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 5874 } 5875 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5876 v16f32_info>, 5877 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5878 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5879 v8f64_info>, 5880 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; 5881} 5882 5883defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512, 5884 SchedWriteFAddSizes, 1>, 5885 avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>, 5886 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5887defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512, 5888 SchedWriteFMulSizes, 1>, 5889 avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>, 5890 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5891defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512, 5892 SchedWriteFAddSizes>, 5893 avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>, 5894 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5895defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512, 5896 SchedWriteFDivSizes>, 5897 avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>, 5898 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5899defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512, 5900 SchedWriteFCmpSizes, 0>, 5901 avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>, 5902 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 5903defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512, 5904 SchedWriteFCmpSizes, 0>, 5905 avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>, 5906 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 5907let isCodeGenOnly = 1 in { 5908 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512, 5909 SchedWriteFCmpSizes, 1>, 5910 avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc, 5911 SchedWriteFCmpSizes, 1>; 5912 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512, 5913 SchedWriteFCmpSizes, 1>, 5914 avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc, 5915 SchedWriteFCmpSizes, 1>; 5916} 5917let Uses = []<Register>, mayRaiseFPException = 0 in { 5918defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI, 5919 SchedWriteFLogicSizes, 1>; 5920defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI, 5921 SchedWriteFLogicSizes, 0>; 5922defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI, 5923 SchedWriteFLogicSizes, 1>; 5924defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI, 5925 SchedWriteFLogicSizes, 1>; 5926} 5927 5928multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5929 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5930 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5931 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5932 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5933 "$src2, $src1", "$src1, $src2", 5934 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5935 EVEX_4V, Sched<[sched]>; 5936 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5937 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, 5938 "$src2, $src1", "$src1, $src2", 5939 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5940 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5941 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5942 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, 5943 "${src2}"#_.BroadcastStr#", $src1", 5944 "$src1, ${src2}"#_.BroadcastStr, 5945 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5946 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5947 } 5948} 5949 5950multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5951 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5952 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5953 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5954 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5955 "$src2, $src1", "$src1, $src2", 5956 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5957 Sched<[sched]>; 5958 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5959 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix, 5960 "$src2, $src1", "$src1, $src2", 5961 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>, 5962 Sched<[sched.Folded, sched.ReadAfterFold]>; 5963 } 5964} 5965 5966multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 5967 X86SchedWriteWidths sched> { 5968 let Predicates = [HasFP16] in { 5969 defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>, 5970 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>, 5971 EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>; 5972 defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>, 5973 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>, 5974 EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>; 5975 } 5976 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, 5977 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, 5978 EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD; 5979 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, 5980 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, 5981 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>, T8PD; 5982 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, 5983 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info, 5984 X86scalefsRnd, sched.Scl>, 5985 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD; 5986 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, 5987 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info, 5988 X86scalefsRnd, sched.Scl>, 5989 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W, T8PD; 5990 5991 // Define only if AVX512VL feature is present. 5992 let Predicates = [HasVLX] in { 5993 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, 5994 EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD; 5995 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, 5996 EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD; 5997 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, 5998 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>, T8PD; 5999 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, 6000 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>, T8PD; 6001 } 6002 6003 let Predicates = [HasFP16, HasVLX] in { 6004 defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>, 6005 EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD; 6006 defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>, 6007 EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD; 6008 } 6009} 6010defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", 6011 SchedWriteFAdd>, NotEVEX2VEXConvertible; 6012 6013//===----------------------------------------------------------------------===// 6014// AVX-512 VPTESTM instructions 6015//===----------------------------------------------------------------------===// 6016 6017multiclass avx512_vptest<bits<8> opc, string OpcodeStr, 6018 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6019 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. 6020 // There are just too many permutations due to commutability and bitcasts. 6021 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6022 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 6023 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 6024 "$src2, $src1", "$src1, $src2", 6025 (null_frag), (null_frag), 1>, 6026 EVEX_4V, Sched<[sched]>; 6027 let mayLoad = 1 in 6028 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 6029 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 6030 "$src2, $src1", "$src1, $src2", 6031 (null_frag), (null_frag)>, 6032 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6033 Sched<[sched.Folded, sched.ReadAfterFold]>; 6034 } 6035} 6036 6037multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, 6038 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6039 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 6040 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 6041 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6042 "${src2}"#_.BroadcastStr#", $src1", 6043 "$src1, ${src2}"#_.BroadcastStr, 6044 (null_frag), (null_frag)>, 6045 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6046 Sched<[sched.Folded, sched.ReadAfterFold]>; 6047} 6048 6049multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, 6050 X86SchedWriteWidths sched, 6051 AVX512VLVectorVTInfo _> { 6052 let Predicates = [HasAVX512] in 6053 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>, 6054 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512; 6055 6056 let Predicates = [HasAVX512, HasVLX] in { 6057 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>, 6058 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256; 6059 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>, 6060 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128; 6061 } 6062} 6063 6064multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, 6065 X86SchedWriteWidths sched> { 6066 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched, 6067 avx512vl_i32_info>; 6068 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched, 6069 avx512vl_i64_info>, VEX_W; 6070} 6071 6072multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 6073 X86SchedWriteWidths sched> { 6074 let Predicates = [HasBWI] in { 6075 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM, 6076 v32i16_info>, EVEX_V512, VEX_W; 6077 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM, 6078 v64i8_info>, EVEX_V512; 6079 } 6080 6081 let Predicates = [HasVLX, HasBWI] in { 6082 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM, 6083 v16i16x_info>, EVEX_V256, VEX_W; 6084 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM, 6085 v8i16x_info>, EVEX_V128, VEX_W; 6086 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM, 6087 v32i8x_info>, EVEX_V256; 6088 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM, 6089 v16i8x_info>, EVEX_V128; 6090 } 6091} 6092 6093multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 6094 X86SchedWriteWidths sched> : 6095 avx512_vptest_wb<opc_wb, OpcodeStr, sched>, 6096 avx512_vptest_dq<opc_dq, OpcodeStr, sched>; 6097 6098defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", 6099 SchedWriteVecLogic>, T8PD; 6100defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", 6101 SchedWriteVecLogic>, T8XS; 6102 6103//===----------------------------------------------------------------------===// 6104// AVX-512 Shift instructions 6105//===----------------------------------------------------------------------===// 6106 6107multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 6108 string OpcodeStr, SDNode OpNode, 6109 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6110 let ExeDomain = _.ExeDomain in { 6111 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 6112 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 6113 "$src2, $src1", "$src1, $src2", 6114 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, 6115 Sched<[sched]>; 6116 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 6117 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 6118 "$src2, $src1", "$src1, $src2", 6119 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), 6120 (i8 timm:$src2)))>, 6121 Sched<[sched.Folded]>; 6122 } 6123} 6124 6125multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 6126 string OpcodeStr, SDNode OpNode, 6127 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6128 let ExeDomain = _.ExeDomain in 6129 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 6130 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 6131 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2", 6132 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, 6133 EVEX_B, Sched<[sched.Folded]>; 6134} 6135 6136multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6137 X86FoldableSchedWrite sched, ValueType SrcVT, 6138 X86VectorVTInfo _> { 6139 // src2 is always 128-bit 6140 let ExeDomain = _.ExeDomain in { 6141 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 6142 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 6143 "$src2, $src1", "$src1, $src2", 6144 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 6145 AVX512BIBase, EVEX_4V, Sched<[sched]>; 6146 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6147 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 6148 "$src2, $src1", "$src1, $src2", 6149 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, 6150 AVX512BIBase, 6151 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6152 } 6153} 6154 6155multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6156 X86SchedWriteWidths sched, ValueType SrcVT, 6157 AVX512VLVectorVTInfo VTInfo, 6158 Predicate prd> { 6159 let Predicates = [prd] in 6160 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 6161 VTInfo.info512>, EVEX_V512, 6162 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 6163 let Predicates = [prd, HasVLX] in { 6164 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 6165 VTInfo.info256>, EVEX_V256, 6166 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 6167 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 6168 VTInfo.info128>, EVEX_V128, 6169 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 6170 } 6171} 6172 6173multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 6174 string OpcodeStr, SDNode OpNode, 6175 X86SchedWriteWidths sched, 6176 bit NotEVEX2VEXConvertibleQ = 0> { 6177 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 6178 avx512vl_i32_info, HasAVX512>; 6179 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 6180 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 6181 avx512vl_i64_info, HasAVX512>, VEX_W; 6182 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 6183 avx512vl_i16_info, HasBWI>; 6184} 6185 6186multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6187 string OpcodeStr, SDNode OpNode, 6188 X86SchedWriteWidths sched, 6189 AVX512VLVectorVTInfo VTInfo> { 6190 let Predicates = [HasAVX512] in 6191 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6192 sched.ZMM, VTInfo.info512>, 6193 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 6194 VTInfo.info512>, EVEX_V512; 6195 let Predicates = [HasAVX512, HasVLX] in { 6196 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6197 sched.YMM, VTInfo.info256>, 6198 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 6199 VTInfo.info256>, EVEX_V256; 6200 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6201 sched.XMM, VTInfo.info128>, 6202 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 6203 VTInfo.info128>, EVEX_V128; 6204 } 6205} 6206 6207multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 6208 string OpcodeStr, SDNode OpNode, 6209 X86SchedWriteWidths sched> { 6210 let Predicates = [HasBWI] in 6211 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6212 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG; 6213 let Predicates = [HasVLX, HasBWI] in { 6214 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6215 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG; 6216 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6217 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG; 6218 } 6219} 6220 6221multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 6222 Format ImmFormR, Format ImmFormM, 6223 string OpcodeStr, SDNode OpNode, 6224 X86SchedWriteWidths sched, 6225 bit NotEVEX2VEXConvertibleQ = 0> { 6226 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 6227 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 6228 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 6229 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 6230 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 6231} 6232 6233defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 6234 SchedWriteVecShiftImm>, 6235 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 6236 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6237 6238defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 6239 SchedWriteVecShiftImm>, 6240 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 6241 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6242 6243defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 6244 SchedWriteVecShiftImm, 1>, 6245 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 6246 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6247 6248defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 6249 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6250defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 6251 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6252 6253defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 6254 SchedWriteVecShift>; 6255defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 6256 SchedWriteVecShift, 1>; 6257defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 6258 SchedWriteVecShift>; 6259 6260// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 6261let Predicates = [HasAVX512, NoVLX] in { 6262 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 6263 (EXTRACT_SUBREG (v8i64 6264 (VPSRAQZrr 6265 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6266 VR128X:$src2)), sub_ymm)>; 6267 6268 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6269 (EXTRACT_SUBREG (v8i64 6270 (VPSRAQZrr 6271 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6272 VR128X:$src2)), sub_xmm)>; 6273 6274 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), 6275 (EXTRACT_SUBREG (v8i64 6276 (VPSRAQZri 6277 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6278 timm:$src2)), sub_ymm)>; 6279 6280 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), 6281 (EXTRACT_SUBREG (v8i64 6282 (VPSRAQZri 6283 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6284 timm:$src2)), sub_xmm)>; 6285} 6286 6287//===-------------------------------------------------------------------===// 6288// Variable Bit Shifts 6289//===-------------------------------------------------------------------===// 6290 6291multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 6292 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6293 let ExeDomain = _.ExeDomain in { 6294 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 6295 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 6296 "$src2, $src1", "$src1, $src2", 6297 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 6298 AVX5128IBase, EVEX_4V, Sched<[sched]>; 6299 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6300 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 6301 "$src2, $src1", "$src1, $src2", 6302 (_.VT (OpNode _.RC:$src1, 6303 (_.VT (_.LdFrag addr:$src2))))>, 6304 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6305 Sched<[sched.Folded, sched.ReadAfterFold]>; 6306 } 6307} 6308 6309multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 6310 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6311 let ExeDomain = _.ExeDomain in 6312 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6313 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6314 "${src2}"#_.BroadcastStr#", $src1", 6315 "$src1, ${src2}"#_.BroadcastStr, 6316 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, 6317 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6318 Sched<[sched.Folded, sched.ReadAfterFold]>; 6319} 6320 6321multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6322 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 6323 let Predicates = [HasAVX512] in 6324 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 6325 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 6326 6327 let Predicates = [HasAVX512, HasVLX] in { 6328 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 6329 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 6330 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 6331 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 6332 } 6333} 6334 6335multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 6336 SDNode OpNode, X86SchedWriteWidths sched> { 6337 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 6338 avx512vl_i32_info>; 6339 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 6340 avx512vl_i64_info>, VEX_W; 6341} 6342 6343// Use 512bit version to implement 128/256 bit in case NoVLX. 6344multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 6345 SDNode OpNode, list<Predicate> p> { 6346 let Predicates = p in { 6347 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 6348 (_.info256.VT _.info256.RC:$src2))), 6349 (EXTRACT_SUBREG 6350 (!cast<Instruction>(OpcodeStr#"Zrr") 6351 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 6352 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 6353 sub_ymm)>; 6354 6355 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 6356 (_.info128.VT _.info128.RC:$src2))), 6357 (EXTRACT_SUBREG 6358 (!cast<Instruction>(OpcodeStr#"Zrr") 6359 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6360 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6361 sub_xmm)>; 6362 } 6363} 6364multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6365 SDNode OpNode, X86SchedWriteWidths sched> { 6366 let Predicates = [HasBWI] in 6367 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6368 EVEX_V512, VEX_W; 6369 let Predicates = [HasVLX, HasBWI] in { 6370 6371 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6372 EVEX_V256, VEX_W; 6373 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6374 EVEX_V128, VEX_W; 6375 } 6376} 6377 6378defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, 6379 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; 6380 6381defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, 6382 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; 6383 6384defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, 6385 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; 6386 6387defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6388defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6389 6390defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>; 6391defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>; 6392defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>; 6393defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>; 6394 6395 6396// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6397let Predicates = [HasAVX512, NoVLX] in { 6398 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6399 (EXTRACT_SUBREG (v8i64 6400 (VPROLVQZrr 6401 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6402 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6403 sub_xmm)>; 6404 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6405 (EXTRACT_SUBREG (v8i64 6406 (VPROLVQZrr 6407 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6408 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6409 sub_ymm)>; 6410 6411 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6412 (EXTRACT_SUBREG (v16i32 6413 (VPROLVDZrr 6414 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6415 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6416 sub_xmm)>; 6417 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6418 (EXTRACT_SUBREG (v16i32 6419 (VPROLVDZrr 6420 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6421 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6422 sub_ymm)>; 6423 6424 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), 6425 (EXTRACT_SUBREG (v8i64 6426 (VPROLQZri 6427 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6428 timm:$src2)), sub_xmm)>; 6429 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), 6430 (EXTRACT_SUBREG (v8i64 6431 (VPROLQZri 6432 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6433 timm:$src2)), sub_ymm)>; 6434 6435 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), 6436 (EXTRACT_SUBREG (v16i32 6437 (VPROLDZri 6438 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6439 timm:$src2)), sub_xmm)>; 6440 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), 6441 (EXTRACT_SUBREG (v16i32 6442 (VPROLDZri 6443 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6444 timm:$src2)), sub_ymm)>; 6445} 6446 6447// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6448let Predicates = [HasAVX512, NoVLX] in { 6449 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6450 (EXTRACT_SUBREG (v8i64 6451 (VPRORVQZrr 6452 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6453 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6454 sub_xmm)>; 6455 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6456 (EXTRACT_SUBREG (v8i64 6457 (VPRORVQZrr 6458 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6459 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6460 sub_ymm)>; 6461 6462 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6463 (EXTRACT_SUBREG (v16i32 6464 (VPRORVDZrr 6465 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6466 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6467 sub_xmm)>; 6468 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6469 (EXTRACT_SUBREG (v16i32 6470 (VPRORVDZrr 6471 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6472 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6473 sub_ymm)>; 6474 6475 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), 6476 (EXTRACT_SUBREG (v8i64 6477 (VPRORQZri 6478 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6479 timm:$src2)), sub_xmm)>; 6480 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), 6481 (EXTRACT_SUBREG (v8i64 6482 (VPRORQZri 6483 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6484 timm:$src2)), sub_ymm)>; 6485 6486 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), 6487 (EXTRACT_SUBREG (v16i32 6488 (VPRORDZri 6489 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6490 timm:$src2)), sub_xmm)>; 6491 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), 6492 (EXTRACT_SUBREG (v16i32 6493 (VPRORDZri 6494 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6495 timm:$src2)), sub_ymm)>; 6496} 6497 6498//===-------------------------------------------------------------------===// 6499// 1-src variable permutation VPERMW/D/Q 6500//===-------------------------------------------------------------------===// 6501 6502multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6503 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6504 let Predicates = [HasAVX512] in 6505 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6506 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6507 6508 let Predicates = [HasAVX512, HasVLX] in 6509 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6510 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6511} 6512 6513multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6514 string OpcodeStr, SDNode OpNode, 6515 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6516 let Predicates = [HasAVX512] in 6517 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6518 sched, VTInfo.info512>, 6519 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6520 sched, VTInfo.info512>, EVEX_V512; 6521 let Predicates = [HasAVX512, HasVLX] in 6522 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6523 sched, VTInfo.info256>, 6524 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6525 sched, VTInfo.info256>, EVEX_V256; 6526} 6527 6528multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6529 Predicate prd, SDNode OpNode, 6530 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6531 let Predicates = [prd] in 6532 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6533 EVEX_V512 ; 6534 let Predicates = [HasVLX, prd] in { 6535 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6536 EVEX_V256 ; 6537 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6538 EVEX_V128 ; 6539 } 6540} 6541 6542defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6543 WriteVarShuffle256, avx512vl_i16_info>, VEX_W; 6544defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6545 WriteVarShuffle256, avx512vl_i8_info>; 6546 6547defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6548 WriteVarShuffle256, avx512vl_i32_info>; 6549defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6550 WriteVarShuffle256, avx512vl_i64_info>, VEX_W; 6551defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6552 WriteFVarShuffle256, avx512vl_f32_info>; 6553defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6554 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W; 6555 6556defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6557 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6558 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6559defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6560 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6561 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; 6562 6563//===----------------------------------------------------------------------===// 6564// AVX-512 - VPERMIL 6565//===----------------------------------------------------------------------===// 6566 6567multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6568 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6569 X86VectorVTInfo Ctrl> { 6570 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6571 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6572 "$src2, $src1", "$src1, $src2", 6573 (_.VT (OpNode _.RC:$src1, 6574 (Ctrl.VT Ctrl.RC:$src2)))>, 6575 T8PD, EVEX_4V, Sched<[sched]>; 6576 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6577 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6578 "$src2, $src1", "$src1, $src2", 6579 (_.VT (OpNode 6580 _.RC:$src1, 6581 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, 6582 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6583 Sched<[sched.Folded, sched.ReadAfterFold]>; 6584 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6585 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6586 "${src2}"#_.BroadcastStr#", $src1", 6587 "$src1, ${src2}"#_.BroadcastStr, 6588 (_.VT (OpNode 6589 _.RC:$src1, 6590 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, 6591 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6592 Sched<[sched.Folded, sched.ReadAfterFold]>; 6593} 6594 6595multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6596 X86SchedWriteWidths sched, 6597 AVX512VLVectorVTInfo _, 6598 AVX512VLVectorVTInfo Ctrl> { 6599 let Predicates = [HasAVX512] in { 6600 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6601 _.info512, Ctrl.info512>, EVEX_V512; 6602 } 6603 let Predicates = [HasAVX512, HasVLX] in { 6604 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6605 _.info128, Ctrl.info128>, EVEX_V128; 6606 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6607 _.info256, Ctrl.info256>, EVEX_V256; 6608 } 6609} 6610 6611multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6612 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6613 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6614 _, Ctrl>; 6615 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6616 X86VPermilpi, SchedWriteFShuffle, _>, 6617 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6618} 6619 6620let ExeDomain = SSEPackedSingle in 6621defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6622 avx512vl_i32_info>; 6623let ExeDomain = SSEPackedDouble in 6624defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6625 avx512vl_i64_info>, VEX_W1X; 6626 6627//===----------------------------------------------------------------------===// 6628// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6629//===----------------------------------------------------------------------===// 6630 6631defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6632 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6633 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6634defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6635 X86PShufhw, SchedWriteShuffle>, 6636 EVEX, AVX512XSIi8Base; 6637defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6638 X86PShuflw, SchedWriteShuffle>, 6639 EVEX, AVX512XDIi8Base; 6640 6641//===----------------------------------------------------------------------===// 6642// AVX-512 - VPSHUFB 6643//===----------------------------------------------------------------------===// 6644 6645multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6646 X86SchedWriteWidths sched> { 6647 let Predicates = [HasBWI] in 6648 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6649 EVEX_V512; 6650 6651 let Predicates = [HasVLX, HasBWI] in { 6652 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6653 EVEX_V256; 6654 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6655 EVEX_V128; 6656 } 6657} 6658 6659defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6660 SchedWriteVarShuffle>, VEX_WIG; 6661 6662//===----------------------------------------------------------------------===// 6663// Move Low to High and High to Low packed FP Instructions 6664//===----------------------------------------------------------------------===// 6665 6666def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6667 (ins VR128X:$src1, VR128X:$src2), 6668 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6669 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6670 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; 6671let isCommutable = 1 in 6672def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6673 (ins VR128X:$src1, VR128X:$src2), 6674 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6675 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6676 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable; 6677 6678//===----------------------------------------------------------------------===// 6679// VMOVHPS/PD VMOVLPS Instructions 6680// All patterns was taken from SSS implementation. 6681//===----------------------------------------------------------------------===// 6682 6683multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6684 SDPatternOperator OpNode, 6685 X86VectorVTInfo _> { 6686 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6687 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6688 (ins _.RC:$src1, f64mem:$src2), 6689 !strconcat(OpcodeStr, 6690 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6691 [(set _.RC:$dst, 6692 (OpNode _.RC:$src1, 6693 (_.VT (bitconvert 6694 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6695 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V; 6696} 6697 6698// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6699// SSE1. And MOVLPS pattern is even more complex. 6700defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6701 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6702defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6703 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6704defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6705 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6706defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6707 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W; 6708 6709let Predicates = [HasAVX512] in { 6710 // VMOVHPD patterns 6711 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), 6712 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6713 6714 // VMOVLPD patterns 6715 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), 6716 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; 6717} 6718 6719let SchedRW = [WriteFStore] in { 6720let mayStore = 1, hasSideEffects = 0 in 6721def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6722 (ins f64mem:$dst, VR128X:$src), 6723 "vmovhps\t{$src, $dst|$dst, $src}", 6724 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6725def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6726 (ins f64mem:$dst, VR128X:$src), 6727 "vmovhpd\t{$src, $dst|$dst, $src}", 6728 [(store (f64 (extractelt 6729 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6730 (iPTR 0))), addr:$dst)]>, 6731 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6732let mayStore = 1, hasSideEffects = 0 in 6733def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6734 (ins f64mem:$dst, VR128X:$src), 6735 "vmovlps\t{$src, $dst|$dst, $src}", 6736 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6737def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6738 (ins f64mem:$dst, VR128X:$src), 6739 "vmovlpd\t{$src, $dst|$dst, $src}", 6740 [(store (f64 (extractelt (v2f64 VR128X:$src), 6741 (iPTR 0))), addr:$dst)]>, 6742 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W; 6743} // SchedRW 6744 6745let Predicates = [HasAVX512] in { 6746 // VMOVHPD patterns 6747 def : Pat<(store (f64 (extractelt 6748 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6749 (iPTR 0))), addr:$dst), 6750 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6751} 6752//===----------------------------------------------------------------------===// 6753// FMA - Fused Multiply Operations 6754// 6755 6756multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6757 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6758 X86VectorVTInfo _> { 6759 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6760 Uses = [MXCSR], mayRaiseFPException = 1 in { 6761 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6762 (ins _.RC:$src2, _.RC:$src3), 6763 OpcodeStr, "$src3, $src2", "$src2, $src3", 6764 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 6765 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6766 EVEX_4V, Sched<[sched]>; 6767 6768 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6769 (ins _.RC:$src2, _.MemOp:$src3), 6770 OpcodeStr, "$src3, $src2", "$src2, $src3", 6771 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 6772 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6773 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6774 6775 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6776 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6777 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6778 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6779 (OpNode _.RC:$src2, 6780 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 6781 (MaskOpNode _.RC:$src2, 6782 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, 6783 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6784 } 6785} 6786 6787multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6788 X86FoldableSchedWrite sched, 6789 X86VectorVTInfo _> { 6790 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6791 Uses = [MXCSR] in 6792 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6793 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6794 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6795 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 6796 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, 6797 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 6798} 6799 6800multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6801 SDNode MaskOpNode, SDNode OpNodeRnd, 6802 X86SchedWriteWidths sched, 6803 AVX512VLVectorVTInfo _, 6804 Predicate prd = HasAVX512> { 6805 let Predicates = [prd] in { 6806 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6807 sched.ZMM, _.info512>, 6808 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6809 _.info512>, 6810 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6811 } 6812 let Predicates = [HasVLX, prd] in { 6813 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6814 sched.YMM, _.info256>, 6815 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6816 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6817 sched.XMM, _.info128>, 6818 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6819 } 6820} 6821 6822multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6823 SDNode MaskOpNode, SDNode OpNodeRnd> { 6824 defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6825 OpNodeRnd, SchedWriteFMA, 6826 avx512vl_f16_info, HasFP16>, T_MAP6PD; 6827 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6828 OpNodeRnd, SchedWriteFMA, 6829 avx512vl_f32_info>, T8PD; 6830 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6831 OpNodeRnd, SchedWriteFMA, 6832 avx512vl_f64_info>, T8PD, VEX_W; 6833} 6834 6835defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma, 6836 fma, X86FmaddRnd>; 6837defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub, 6838 X86Fmsub, X86FmsubRnd>; 6839defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, 6840 X86Fmaddsub, X86FmaddsubRnd>; 6841defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, 6842 X86Fmsubadd, X86FmsubaddRnd>; 6843defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd, 6844 X86Fnmadd, X86FnmaddRnd>; 6845defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub, 6846 X86Fnmsub, X86FnmsubRnd>; 6847 6848 6849multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6850 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6851 X86VectorVTInfo _> { 6852 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6853 Uses = [MXCSR], mayRaiseFPException = 1 in { 6854 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6855 (ins _.RC:$src2, _.RC:$src3), 6856 OpcodeStr, "$src3, $src2", "$src2, $src3", 6857 (null_frag), 6858 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6859 EVEX_4V, Sched<[sched]>; 6860 6861 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6862 (ins _.RC:$src2, _.MemOp:$src3), 6863 OpcodeStr, "$src3, $src2", "$src2, $src3", 6864 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 6865 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6866 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6867 6868 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6869 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6870 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6871 "$src2, ${src3}"#_.BroadcastStr, 6872 (_.VT (OpNode _.RC:$src2, 6873 (_.VT (_.BroadcastLdFrag addr:$src3)), 6874 _.RC:$src1)), 6875 (_.VT (MaskOpNode _.RC:$src2, 6876 (_.VT (_.BroadcastLdFrag addr:$src3)), 6877 _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B, 6878 Sched<[sched.Folded, sched.ReadAfterFold]>; 6879 } 6880} 6881 6882multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6883 X86FoldableSchedWrite sched, 6884 X86VectorVTInfo _> { 6885 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6886 Uses = [MXCSR] in 6887 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6888 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6889 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6890 (null_frag), 6891 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), 6892 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 6893} 6894 6895multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6896 SDNode MaskOpNode, SDNode OpNodeRnd, 6897 X86SchedWriteWidths sched, 6898 AVX512VLVectorVTInfo _, 6899 Predicate prd = HasAVX512> { 6900 let Predicates = [prd] in { 6901 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6902 sched.ZMM, _.info512>, 6903 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6904 _.info512>, 6905 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6906 } 6907 let Predicates = [HasVLX, prd] in { 6908 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6909 sched.YMM, _.info256>, 6910 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6911 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6912 sched.XMM, _.info128>, 6913 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6914 } 6915} 6916 6917multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6918 SDNode MaskOpNode, SDNode OpNodeRnd > { 6919 defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6920 OpNodeRnd, SchedWriteFMA, 6921 avx512vl_f16_info, HasFP16>, T_MAP6PD; 6922 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6923 OpNodeRnd, SchedWriteFMA, 6924 avx512vl_f32_info>, T8PD; 6925 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6926 OpNodeRnd, SchedWriteFMA, 6927 avx512vl_f64_info>, T8PD, VEX_W; 6928} 6929 6930defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma, 6931 fma, X86FmaddRnd>; 6932defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub, 6933 X86Fmsub, X86FmsubRnd>; 6934defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, 6935 X86Fmaddsub, X86FmaddsubRnd>; 6936defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, 6937 X86Fmsubadd, X86FmsubaddRnd>; 6938defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd, 6939 X86Fnmadd, X86FnmaddRnd>; 6940defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub, 6941 X86Fnmsub, X86FnmsubRnd>; 6942 6943multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6944 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6945 X86VectorVTInfo _> { 6946 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6947 Uses = [MXCSR], mayRaiseFPException = 1 in { 6948 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6949 (ins _.RC:$src2, _.RC:$src3), 6950 OpcodeStr, "$src3, $src2", "$src2, $src3", 6951 (null_frag), 6952 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>, 6953 EVEX_4V, Sched<[sched]>; 6954 6955 // Pattern is 312 order so that the load is in a different place from the 6956 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6957 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6958 (ins _.RC:$src2, _.MemOp:$src3), 6959 OpcodeStr, "$src3, $src2", "$src2, $src3", 6960 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 6961 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 6962 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6963 6964 // Pattern is 312 order so that the load is in a different place from the 6965 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6966 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6967 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6968 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6969 "$src2, ${src3}"#_.BroadcastStr, 6970 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6971 _.RC:$src1, _.RC:$src2)), 6972 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6973 _.RC:$src1, _.RC:$src2)), 1, 0>, 6974 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 6975 } 6976} 6977 6978multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6979 X86FoldableSchedWrite sched, 6980 X86VectorVTInfo _> { 6981 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6982 Uses = [MXCSR] in 6983 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6984 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6985 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6986 (null_frag), 6987 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), 6988 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 6989} 6990 6991multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6992 SDNode MaskOpNode, SDNode OpNodeRnd, 6993 X86SchedWriteWidths sched, 6994 AVX512VLVectorVTInfo _, 6995 Predicate prd = HasAVX512> { 6996 let Predicates = [prd] in { 6997 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6998 sched.ZMM, _.info512>, 6999 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 7000 _.info512>, 7001 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7002 } 7003 let Predicates = [HasVLX, prd] in { 7004 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 7005 sched.YMM, _.info256>, 7006 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7007 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 7008 sched.XMM, _.info128>, 7009 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7010 } 7011} 7012 7013multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 7014 SDNode MaskOpNode, SDNode OpNodeRnd > { 7015 defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 7016 OpNodeRnd, SchedWriteFMA, 7017 avx512vl_f16_info, HasFP16>, T_MAP6PD; 7018 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 7019 OpNodeRnd, SchedWriteFMA, 7020 avx512vl_f32_info>, T8PD; 7021 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 7022 OpNodeRnd, SchedWriteFMA, 7023 avx512vl_f64_info>, T8PD, VEX_W; 7024} 7025 7026defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma, 7027 fma, X86FmaddRnd>; 7028defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub, 7029 X86Fmsub, X86FmsubRnd>; 7030defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, 7031 X86Fmaddsub, X86FmaddsubRnd>; 7032defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, 7033 X86Fmsubadd, X86FmsubaddRnd>; 7034defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd, 7035 X86Fnmadd, X86FnmaddRnd>; 7036defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub, 7037 X86Fnmsub, X86FnmsubRnd>; 7038 7039// Scalar FMA 7040multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7041 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 7042let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 7043 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7044 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 7045 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 7046 EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 7047 7048 let mayLoad = 1 in 7049 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7050 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 7051 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 7052 EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 7053 7054 let Uses = [MXCSR] in 7055 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7056 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 7057 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 7058 EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 7059 7060 let isCodeGenOnly = 1, isCommutable = 1 in { 7061 def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 7062 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 7063 !strconcat(OpcodeStr, 7064 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7065 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC; 7066 def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst), 7067 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 7068 !strconcat(OpcodeStr, 7069 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7070 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC; 7071 7072 let Uses = [MXCSR] in 7073 def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 7074 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 7075 !strconcat(OpcodeStr, 7076 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), 7077 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 7078 Sched<[SchedWriteFMA.Scl]>, EVEX_4V; 7079 }// isCodeGenOnly = 1 7080}// Constraints = "$src1 = $dst" 7081} 7082 7083multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 7084 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd, 7085 X86VectorVTInfo _, string SUFF> { 7086 let ExeDomain = _.ExeDomain in { 7087 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 7088 // Operands for intrinsic are in 123 order to preserve passthu 7089 // semantics. 7090 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 7091 _.FRC:$src3))), 7092 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 7093 (_.ScalarLdFrag addr:$src3)))), 7094 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 7095 _.FRC:$src3, (i32 timm:$rc)))), 0>; 7096 7097 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 7098 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 7099 _.FRC:$src1))), 7100 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 7101 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 7102 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 7103 _.FRC:$src1, (i32 timm:$rc)))), 1>; 7104 7105 // One pattern is 312 order so that the load is in a different place from the 7106 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 7107 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 7108 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 7109 _.FRC:$src2))), 7110 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 7111 _.FRC:$src1, _.FRC:$src2))), 7112 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 7113 _.FRC:$src2, (i32 timm:$rc)))), 1>; 7114 } 7115} 7116 7117multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 7118 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> { 7119 let Predicates = [HasAVX512] in { 7120 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 7121 OpNodeRnd, f32x_info, "SS">, 7122 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD; 7123 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 7124 OpNodeRnd, f64x_info, "SD">, 7125 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD; 7126 } 7127 let Predicates = [HasFP16] in { 7128 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 7129 OpNodeRnd, f16x_info, "SH">, 7130 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD; 7131 } 7132} 7133 7134defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>; 7135defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>; 7136defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>; 7137defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>; 7138 7139multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp, 7140 SDNode RndOp, string Prefix, 7141 string Suffix, SDNode Move, 7142 X86VectorVTInfo _, PatLeaf ZeroFP, 7143 Predicate prd = HasAVX512> { 7144 let Predicates = [prd] in { 7145 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7146 (Op _.FRC:$src2, 7147 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7148 _.FRC:$src3))))), 7149 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 7150 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7151 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7152 7153 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7154 (Op _.FRC:$src2, _.FRC:$src3, 7155 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7156 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 7157 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7158 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7159 7160 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7161 (Op _.FRC:$src2, 7162 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7163 (_.ScalarLdFrag addr:$src3)))))), 7164 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 7165 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7166 addr:$src3)>; 7167 7168 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7169 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7170 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 7171 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 7172 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7173 addr:$src3)>; 7174 7175 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7176 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7177 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7178 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 7179 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7180 addr:$src3)>; 7181 7182 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7183 (X86selects_mask VK1WM:$mask, 7184 (MaskedOp _.FRC:$src2, 7185 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7186 _.FRC:$src3), 7187 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7188 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 7189 VR128X:$src1, VK1WM:$mask, 7190 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7191 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7192 7193 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7194 (X86selects_mask VK1WM:$mask, 7195 (MaskedOp _.FRC:$src2, 7196 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7197 (_.ScalarLdFrag addr:$src3)), 7198 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7199 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 7200 VR128X:$src1, VK1WM:$mask, 7201 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7202 7203 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7204 (X86selects_mask VK1WM:$mask, 7205 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7206 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 7207 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7208 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 7209 VR128X:$src1, VK1WM:$mask, 7210 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7211 7212 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7213 (X86selects_mask VK1WM:$mask, 7214 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7215 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7216 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7217 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 7218 VR128X:$src1, VK1WM:$mask, 7219 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7220 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7221 7222 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7223 (X86selects_mask VK1WM:$mask, 7224 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7225 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7226 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7227 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 7228 VR128X:$src1, VK1WM:$mask, 7229 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7230 7231 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7232 (X86selects_mask VK1WM:$mask, 7233 (MaskedOp _.FRC:$src2, 7234 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7235 _.FRC:$src3), 7236 (_.EltVT ZeroFP)))))), 7237 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 7238 VR128X:$src1, VK1WM:$mask, 7239 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7240 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7241 7242 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7243 (X86selects_mask VK1WM:$mask, 7244 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7245 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7246 (_.EltVT ZeroFP)))))), 7247 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 7248 VR128X:$src1, VK1WM:$mask, 7249 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7250 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7251 7252 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7253 (X86selects_mask VK1WM:$mask, 7254 (MaskedOp _.FRC:$src2, 7255 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7256 (_.ScalarLdFrag addr:$src3)), 7257 (_.EltVT ZeroFP)))))), 7258 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 7259 VR128X:$src1, VK1WM:$mask, 7260 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7261 7262 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7263 (X86selects_mask VK1WM:$mask, 7264 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7265 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 7266 (_.EltVT ZeroFP)))))), 7267 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 7268 VR128X:$src1, VK1WM:$mask, 7269 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7270 7271 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7272 (X86selects_mask VK1WM:$mask, 7273 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7274 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7275 (_.EltVT ZeroFP)))))), 7276 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 7277 VR128X:$src1, VK1WM:$mask, 7278 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7279 7280 // Patterns with rounding mode. 7281 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7282 (RndOp _.FRC:$src2, 7283 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7284 _.FRC:$src3, (i32 timm:$rc)))))), 7285 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 7286 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7287 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7288 7289 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7290 (RndOp _.FRC:$src2, _.FRC:$src3, 7291 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7292 (i32 timm:$rc)))))), 7293 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 7294 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7295 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7296 7297 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7298 (X86selects_mask VK1WM:$mask, 7299 (RndOp _.FRC:$src2, 7300 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7301 _.FRC:$src3, (i32 timm:$rc)), 7302 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7303 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 7304 VR128X:$src1, VK1WM:$mask, 7305 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7306 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7307 7308 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7309 (X86selects_mask VK1WM:$mask, 7310 (RndOp _.FRC:$src2, _.FRC:$src3, 7311 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7312 (i32 timm:$rc)), 7313 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7314 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 7315 VR128X:$src1, VK1WM:$mask, 7316 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7317 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7318 7319 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7320 (X86selects_mask VK1WM:$mask, 7321 (RndOp _.FRC:$src2, 7322 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7323 _.FRC:$src3, (i32 timm:$rc)), 7324 (_.EltVT ZeroFP)))))), 7325 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 7326 VR128X:$src1, VK1WM:$mask, 7327 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7328 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7329 7330 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7331 (X86selects_mask VK1WM:$mask, 7332 (RndOp _.FRC:$src2, _.FRC:$src3, 7333 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7334 (i32 timm:$rc)), 7335 (_.EltVT ZeroFP)))))), 7336 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 7337 VR128X:$src1, VK1WM:$mask, 7338 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7339 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7340 } 7341} 7342defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH", 7343 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7344defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH", 7345 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7346defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH", 7347 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7348defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH", 7349 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7350 7351defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7352 "SS", X86Movss, v4f32x_info, fp32imm0>; 7353defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7354 "SS", X86Movss, v4f32x_info, fp32imm0>; 7355defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7356 "SS", X86Movss, v4f32x_info, fp32imm0>; 7357defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7358 "SS", X86Movss, v4f32x_info, fp32imm0>; 7359 7360defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7361 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7362defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7363 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7364defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7365 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7366defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7367 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7368 7369//===----------------------------------------------------------------------===// 7370// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 7371//===----------------------------------------------------------------------===// 7372let Constraints = "$src1 = $dst" in { 7373multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7374 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 7375 // NOTE: The SDNode have the multiply operands first with the add last. 7376 // This enables commuted load patterns to be autogenerated by tablegen. 7377 let ExeDomain = _.ExeDomain in { 7378 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 7379 (ins _.RC:$src2, _.RC:$src3), 7380 OpcodeStr, "$src3, $src2", "$src2, $src3", 7381 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 7382 T8PD, EVEX_4V, Sched<[sched]>; 7383 7384 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7385 (ins _.RC:$src2, _.MemOp:$src3), 7386 OpcodeStr, "$src3, $src2", "$src2, $src3", 7387 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 7388 T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7389 7390 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7391 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7392 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 7393 !strconcat("$src2, ${src3}", _.BroadcastStr ), 7394 (OpNode _.RC:$src2, 7395 (_.VT (_.BroadcastLdFrag addr:$src3)), 7396 _.RC:$src1)>, 7397 T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 7398 } 7399} 7400} // Constraints = "$src1 = $dst" 7401 7402multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 7403 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 7404 let Predicates = [HasIFMA] in { 7405 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 7406 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7407 } 7408 let Predicates = [HasVLX, HasIFMA] in { 7409 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 7410 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7411 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 7412 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7413 } 7414} 7415 7416defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 7417 SchedWriteVecIMul, avx512vl_i64_info>, 7418 VEX_W; 7419defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 7420 SchedWriteVecIMul, avx512vl_i64_info>, 7421 VEX_W; 7422 7423//===----------------------------------------------------------------------===// 7424// AVX-512 Scalar convert from sign integer to float/double 7425//===----------------------------------------------------------------------===// 7426 7427multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, 7428 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7429 X86MemOperand x86memop, PatFrag ld_frag, string asm, 7430 string mem, list<Register> _Uses = [MXCSR], 7431 bit _mayRaiseFPException = 1> { 7432let ExeDomain = DstVT.ExeDomain, Uses = _Uses, 7433 mayRaiseFPException = _mayRaiseFPException in { 7434 let hasSideEffects = 0, isCodeGenOnly = 1 in { 7435 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7436 (ins DstVT.FRC:$src1, SrcRC:$src), 7437 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7438 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7439 let mayLoad = 1 in 7440 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7441 (ins DstVT.FRC:$src1, x86memop:$src), 7442 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 7443 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7444 } // hasSideEffects = 0 7445 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7446 (ins DstVT.RC:$src1, SrcRC:$src2), 7447 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7448 [(set DstVT.RC:$dst, 7449 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, 7450 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7451 7452 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7453 (ins DstVT.RC:$src1, x86memop:$src2), 7454 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7455 [(set DstVT.RC:$dst, 7456 (OpNode (DstVT.VT DstVT.RC:$src1), 7457 (ld_frag addr:$src2)))]>, 7458 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7459} 7460 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7461 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, 7462 DstVT.RC:$src1, SrcRC:$src2), 0, "att">; 7463} 7464 7465multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7466 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7467 X86VectorVTInfo DstVT, string asm, 7468 string mem> { 7469 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in 7470 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7471 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7472 !strconcat(asm, 7473 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7474 [(set DstVT.RC:$dst, 7475 (OpNode (DstVT.VT DstVT.RC:$src1), 7476 SrcRC:$src2, 7477 (i32 timm:$rc)))]>, 7478 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7479 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", 7480 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, 7481 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; 7482} 7483 7484multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd, 7485 X86FoldableSchedWrite sched, 7486 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7487 X86MemOperand x86memop, PatFrag ld_frag, 7488 string asm, string mem> { 7489 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>, 7490 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7491 ld_frag, asm, mem>, VEX_LIG; 7492} 7493 7494let Predicates = [HasAVX512] in { 7495defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7496 WriteCvtI2SS, GR32, 7497 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, 7498 XS, EVEX_CD8<32, CD8VT1>; 7499defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7500 WriteCvtI2SS, GR64, 7501 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, 7502 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7503defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, 7504 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>, 7505 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7506defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7507 WriteCvtI2SD, GR64, 7508 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, 7509 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7510 7511def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7512 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7513def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7514 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7515 7516def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 7517 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7518def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 7519 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7520def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 7521 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7522def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 7523 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7524 7525def : Pat<(f32 (any_sint_to_fp GR32:$src)), 7526 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7527def : Pat<(f32 (any_sint_to_fp GR64:$src)), 7528 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7529def : Pat<(f64 (any_sint_to_fp GR32:$src)), 7530 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7531def : Pat<(f64 (any_sint_to_fp GR64:$src)), 7532 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7533 7534defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7535 WriteCvtI2SS, GR32, 7536 v4f32x_info, i32mem, loadi32, 7537 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>; 7538defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7539 WriteCvtI2SS, GR64, 7540 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, 7541 XS, VEX_W, EVEX_CD8<64, CD8VT1>; 7542defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, 7543 i32mem, loadi32, "cvtusi2sd", "l", [], 0>, 7544 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7545defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7546 WriteCvtI2SD, GR64, 7547 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, 7548 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7549 7550def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7551 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7552def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7553 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7554 7555def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))), 7556 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7557def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))), 7558 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7559def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))), 7560 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7561def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))), 7562 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7563 7564def : Pat<(f32 (any_uint_to_fp GR32:$src)), 7565 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7566def : Pat<(f32 (any_uint_to_fp GR64:$src)), 7567 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7568def : Pat<(f64 (any_uint_to_fp GR32:$src)), 7569 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7570def : Pat<(f64 (any_uint_to_fp GR64:$src)), 7571 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7572} 7573 7574//===----------------------------------------------------------------------===// 7575// AVX-512 Scalar convert from float/double to integer 7576//===----------------------------------------------------------------------===// 7577 7578multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7579 X86VectorVTInfo DstVT, SDNode OpNode, 7580 SDNode OpNodeRnd, 7581 X86FoldableSchedWrite sched, string asm, 7582 string aliasStr, Predicate prd = HasAVX512> { 7583 let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in { 7584 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7585 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7586 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, 7587 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7588 let Uses = [MXCSR] in 7589 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7590 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7591 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, 7592 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7593 Sched<[sched]>; 7594 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7595 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7596 [(set DstVT.RC:$dst, (OpNode 7597 (SrcVT.ScalarIntMemFrags addr:$src)))]>, 7598 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7599 } // Predicates = [prd] 7600 7601 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7602 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7603 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7604 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7605 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7606 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7607 SrcVT.IntScalarMemOp:$src), 0, "att">; 7608} 7609 7610// Convert float/double to signed/unsigned int 32/64 7611defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, 7612 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, 7613 XS, EVEX_CD8<32, CD8VT1>; 7614defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, 7615 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, 7616 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7617defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, 7618 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, 7619 XS, EVEX_CD8<32, CD8VT1>; 7620defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, 7621 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, 7622 XS, VEX_W, EVEX_CD8<32, CD8VT1>; 7623defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, 7624 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, 7625 XD, EVEX_CD8<64, CD8VT1>; 7626defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, 7627 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, 7628 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7629defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, 7630 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7631 XD, EVEX_CD8<64, CD8VT1>; 7632defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, 7633 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7634 XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7635 7636multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT, 7637 X86VectorVTInfo DstVT, SDNode OpNode, 7638 X86FoldableSchedWrite sched> { 7639 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7640 let isCodeGenOnly = 1 in { 7641 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src), 7642 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7643 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>, 7644 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7645 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src), 7646 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7647 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>, 7648 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7649 } 7650 } // Predicates = [HasAVX512] 7651} 7652 7653defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info, 7654 lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>; 7655defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info, 7656 llrint, WriteCvtSS2I>, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7657defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info, 7658 lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>; 7659defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info, 7660 llrint, WriteCvtSD2I>, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7661 7662let Predicates = [HasAVX512] in { 7663 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>; 7664 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>; 7665 7666 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>; 7667 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>; 7668} 7669 7670// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7671// which produce unnecessary vmovs{s,d} instructions 7672let Predicates = [HasAVX512] in { 7673def : Pat<(v4f32 (X86Movss 7674 (v4f32 VR128X:$dst), 7675 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 7676 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7677 7678def : Pat<(v4f32 (X86Movss 7679 (v4f32 VR128X:$dst), 7680 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 7681 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7682 7683def : Pat<(v4f32 (X86Movss 7684 (v4f32 VR128X:$dst), 7685 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 7686 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7687 7688def : Pat<(v4f32 (X86Movss 7689 (v4f32 VR128X:$dst), 7690 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 7691 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7692 7693def : Pat<(v2f64 (X86Movsd 7694 (v2f64 VR128X:$dst), 7695 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 7696 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7697 7698def : Pat<(v2f64 (X86Movsd 7699 (v2f64 VR128X:$dst), 7700 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 7701 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7702 7703def : Pat<(v2f64 (X86Movsd 7704 (v2f64 VR128X:$dst), 7705 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 7706 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7707 7708def : Pat<(v2f64 (X86Movsd 7709 (v2f64 VR128X:$dst), 7710 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 7711 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7712 7713def : Pat<(v4f32 (X86Movss 7714 (v4f32 VR128X:$dst), 7715 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))), 7716 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7717 7718def : Pat<(v4f32 (X86Movss 7719 (v4f32 VR128X:$dst), 7720 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))), 7721 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7722 7723def : Pat<(v4f32 (X86Movss 7724 (v4f32 VR128X:$dst), 7725 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))), 7726 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7727 7728def : Pat<(v4f32 (X86Movss 7729 (v4f32 VR128X:$dst), 7730 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))), 7731 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7732 7733def : Pat<(v2f64 (X86Movsd 7734 (v2f64 VR128X:$dst), 7735 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))), 7736 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7737 7738def : Pat<(v2f64 (X86Movsd 7739 (v2f64 VR128X:$dst), 7740 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))), 7741 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7742 7743def : Pat<(v2f64 (X86Movsd 7744 (v2f64 VR128X:$dst), 7745 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))), 7746 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7747 7748def : Pat<(v2f64 (X86Movsd 7749 (v2f64 VR128X:$dst), 7750 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))), 7751 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7752} // Predicates = [HasAVX512] 7753 7754// Convert float/double to signed/unsigned int 32/64 with truncation 7755multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7756 X86VectorVTInfo _DstRC, SDPatternOperator OpNode, 7757 SDNode OpNodeInt, SDNode OpNodeSAE, 7758 X86FoldableSchedWrite sched, string aliasStr, 7759 Predicate prd = HasAVX512> { 7760let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in { 7761 let isCodeGenOnly = 1 in { 7762 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7763 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7764 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7765 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7766 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7767 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7768 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7769 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7770 } 7771 7772 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7773 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7774 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 7775 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7776 let Uses = [MXCSR] in 7777 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7778 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7779 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 7780 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 7781 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7782 (ins _SrcRC.IntScalarMemOp:$src), 7783 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7784 [(set _DstRC.RC:$dst, 7785 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, 7786 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7787} // Predicates = [prd] 7788 7789 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7790 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7791 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7792 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7793 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7794 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7795 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7796} 7797 7798defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7799 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7800 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7801defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7802 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7803 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; 7804defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7805 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7806 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7807defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7808 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7809 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; 7810 7811defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, 7812 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7813 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7814defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, 7815 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7816 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>; 7817defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7818 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7819 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7820defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7821 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7822 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; 7823 7824//===----------------------------------------------------------------------===// 7825// AVX-512 Convert form float to double and back 7826//===----------------------------------------------------------------------===// 7827 7828let Uses = [MXCSR], mayRaiseFPException = 1 in 7829multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7830 X86VectorVTInfo _Src, SDNode OpNode, 7831 X86FoldableSchedWrite sched> { 7832 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7833 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7834 "$src2, $src1", "$src1, $src2", 7835 (_.VT (OpNode (_.VT _.RC:$src1), 7836 (_Src.VT _Src.RC:$src2)))>, 7837 EVEX_4V, VEX_LIG, Sched<[sched]>; 7838 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7839 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7840 "$src2, $src1", "$src1, $src2", 7841 (_.VT (OpNode (_.VT _.RC:$src1), 7842 (_Src.ScalarIntMemFrags addr:$src2)))>, 7843 EVEX_4V, VEX_LIG, 7844 Sched<[sched.Folded, sched.ReadAfterFold]>; 7845 7846 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7847 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7848 (ins _.FRC:$src1, _Src.FRC:$src2), 7849 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7850 EVEX_4V, VEX_LIG, Sched<[sched]>; 7851 let mayLoad = 1 in 7852 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7853 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7854 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7855 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7856 } 7857} 7858 7859// Scalar Conversion with SAE - suppress all exceptions 7860multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7861 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7862 X86FoldableSchedWrite sched> { 7863 let Uses = [MXCSR] in 7864 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7865 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7866 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7867 (_.VT (OpNodeSAE (_.VT _.RC:$src1), 7868 (_Src.VT _Src.RC:$src2)))>, 7869 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 7870} 7871 7872// Scalar Conversion with rounding control (RC) 7873multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7874 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7875 X86FoldableSchedWrite sched> { 7876 let Uses = [MXCSR] in 7877 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7878 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7879 "$rc, $src2, $src1", "$src1, $src2, $rc", 7880 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7881 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 7882 EVEX_4V, VEX_LIG, Sched<[sched]>, 7883 EVEX_B, EVEX_RC; 7884} 7885multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr, 7886 SDNode OpNode, SDNode OpNodeRnd, 7887 X86FoldableSchedWrite sched, 7888 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7889 Predicate prd = HasAVX512> { 7890 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7891 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7892 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7893 OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>; 7894 } 7895} 7896 7897multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr, 7898 SDNode OpNode, SDNode OpNodeSAE, 7899 X86FoldableSchedWrite sched, 7900 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7901 Predicate prd = HasAVX512> { 7902 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7903 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7904 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, 7905 EVEX_CD8<_src.EltSize, CD8VT1>; 7906 } 7907} 7908defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds, 7909 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7910 f32x_info>, XD, VEX_W; 7911defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts, 7912 X86fpextsSAE, WriteCvtSS2SD, f32x_info, 7913 f64x_info>, XS; 7914defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds, 7915 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7916 f16x_info, HasFP16>, T_MAP5XD, VEX_W; 7917defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts, 7918 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7919 f64x_info, HasFP16>, T_MAP5XS; 7920defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds, 7921 X86froundsRnd, WriteCvtSD2SS, f32x_info, 7922 f16x_info, HasFP16>, T_MAP5PS; 7923defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts, 7924 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7925 f32x_info, HasFP16>, T_MAP6PS; 7926 7927def : Pat<(f64 (any_fpextend FR32X:$src)), 7928 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7929 Requires<[HasAVX512]>; 7930def : Pat<(f64 (any_fpextend (loadf32 addr:$src))), 7931 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7932 Requires<[HasAVX512, OptForSize]>; 7933 7934def : Pat<(f32 (any_fpround FR64X:$src)), 7935 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7936 Requires<[HasAVX512]>; 7937 7938def : Pat<(f32 (any_fpextend FR16X:$src)), 7939 (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>, 7940 Requires<[HasFP16]>; 7941def : Pat<(f32 (any_fpextend (loadf16 addr:$src))), 7942 (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>, 7943 Requires<[HasFP16, OptForSize]>; 7944 7945def : Pat<(f64 (any_fpextend FR16X:$src)), 7946 (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>, 7947 Requires<[HasFP16]>; 7948def : Pat<(f64 (any_fpextend (loadf16 addr:$src))), 7949 (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7950 Requires<[HasFP16, OptForSize]>; 7951 7952def : Pat<(f16 (any_fpround FR32X:$src)), 7953 (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>, 7954 Requires<[HasFP16]>; 7955def : Pat<(f16 (any_fpround FR64X:$src)), 7956 (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>, 7957 Requires<[HasFP16]>; 7958 7959def : Pat<(v4f32 (X86Movss 7960 (v4f32 VR128X:$dst), 7961 (v4f32 (scalar_to_vector 7962 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 7963 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 7964 Requires<[HasAVX512]>; 7965 7966def : Pat<(v2f64 (X86Movsd 7967 (v2f64 VR128X:$dst), 7968 (v2f64 (scalar_to_vector 7969 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 7970 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 7971 Requires<[HasAVX512]>; 7972 7973//===----------------------------------------------------------------------===// 7974// AVX-512 Vector convert from signed/unsigned integer to float/double 7975// and from float/double to signed/unsigned integer 7976//===----------------------------------------------------------------------===// 7977 7978multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7979 X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode, 7980 X86FoldableSchedWrite sched, 7981 string Broadcast = _.BroadcastStr, 7982 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7983 RegisterClass MaskRC = _.KRCWM, 7984 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))), 7985 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> { 7986let Uses = [MXCSR], mayRaiseFPException = 1 in { 7987 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst), 7988 (ins _Src.RC:$src), 7989 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), 7990 (ins MaskRC:$mask, _Src.RC:$src), 7991 OpcodeStr, "$src", "$src", 7992 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7993 (vselect_mask MaskRC:$mask, 7994 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7995 _.RC:$src0), 7996 (vselect_mask MaskRC:$mask, 7997 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7998 _.ImmAllZerosV)>, 7999 EVEX, Sched<[sched]>; 8000 8001 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 8002 (ins MemOp:$src), 8003 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), 8004 (ins MaskRC:$mask, MemOp:$src), 8005 OpcodeStr#Alias, "$src", "$src", 8006 LdDAG, 8007 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0), 8008 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>, 8009 EVEX, Sched<[sched.Folded]>; 8010 8011 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 8012 (ins _Src.ScalarMemOp:$src), 8013 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), 8014 (ins MaskRC:$mask, _Src.ScalarMemOp:$src), 8015 OpcodeStr, 8016 "${src}"#Broadcast, "${src}"#Broadcast, 8017 (_.VT (OpNode (_Src.VT 8018 (_Src.BroadcastLdFrag addr:$src)) 8019 )), 8020 (vselect_mask MaskRC:$mask, 8021 (_.VT 8022 (MaskOpNode 8023 (_Src.VT 8024 (_Src.BroadcastLdFrag addr:$src)))), 8025 _.RC:$src0), 8026 (vselect_mask MaskRC:$mask, 8027 (_.VT 8028 (MaskOpNode 8029 (_Src.VT 8030 (_Src.BroadcastLdFrag addr:$src)))), 8031 _.ImmAllZerosV)>, 8032 EVEX, EVEX_B, Sched<[sched.Folded]>; 8033 } 8034} 8035// Conversion with SAE - suppress all exceptions 8036multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8037 X86VectorVTInfo _Src, SDNode OpNodeSAE, 8038 X86FoldableSchedWrite sched> { 8039 let Uses = [MXCSR] in 8040 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8041 (ins _Src.RC:$src), OpcodeStr, 8042 "{sae}, $src", "$src, {sae}", 8043 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, 8044 EVEX, EVEX_B, Sched<[sched]>; 8045} 8046 8047// Conversion with rounding control (RC) 8048multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8049 X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd, 8050 X86FoldableSchedWrite sched> { 8051 let Uses = [MXCSR] in 8052 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8053 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 8054 "$rc, $src", "$src, $rc", 8055 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, 8056 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 8057} 8058 8059// Similar to avx512_vcvt_fp, but uses an extload for the memory form. 8060multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8061 X86VectorVTInfo _Src, SDPatternOperator OpNode, 8062 SDNode MaskOpNode, 8063 X86FoldableSchedWrite sched, 8064 string Broadcast = _.BroadcastStr, 8065 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 8066 RegisterClass MaskRC = _.KRCWM> 8067 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast, 8068 Alias, MemOp, MaskRC, 8069 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)), 8070 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; 8071 8072// Extend [Float to Double, Half to Float] 8073multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr, 8074 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8075 X86SchedWriteWidths sched, Predicate prd = HasAVX512> { 8076 let Predicates = [prd] in { 8077 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256, 8078 any_fpextend, fpextend, sched.ZMM>, 8079 avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256, 8080 X86vfpextSAE, sched.ZMM>, EVEX_V512; 8081 } 8082 let Predicates = [prd, HasVLX] in { 8083 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128, 8084 X86any_vfpext, X86vfpext, sched.XMM, 8085 _dst.info128.BroadcastStr, 8086 "", f64mem>, EVEX_V128; 8087 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128, 8088 any_fpextend, fpextend, sched.YMM>, EVEX_V256; 8089 } 8090} 8091 8092// Truncate [Double to Float, Float to Half] 8093multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr, 8094 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8095 X86SchedWriteWidths sched, Predicate prd = HasAVX512, 8096 PatFrag bcast128 = _src.info128.BroadcastLdFrag, 8097 PatFrag loadVT128 = _src.info128.LdFrag, 8098 RegisterClass maskRC128 = _src.info128.KRCWM> { 8099 let Predicates = [prd] in { 8100 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, 8101 X86any_vfpround, X86vfpround, sched.ZMM>, 8102 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 8103 X86vfproundRnd, sched.ZMM>, EVEX_V512; 8104 } 8105 let Predicates = [prd, HasVLX] in { 8106 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, 8107 null_frag, null_frag, sched.XMM, 8108 _src.info128.BroadcastStr, "{x}", 8109 f128mem, maskRC128>, EVEX_V128; 8110 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, 8111 X86any_vfpround, X86vfpround, 8112 sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256; 8113 8114 // Special patterns to allow use of X86vmfpround for masking. Instruction 8115 // patterns have been disabled with null_frag. 8116 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))), 8117 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 8118 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 8119 maskRC128:$mask), 8120 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>; 8121 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 8122 maskRC128:$mask), 8123 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>; 8124 8125 def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))), 8126 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 8127 def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0), 8128 maskRC128:$mask), 8129 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 8130 def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV, 8131 maskRC128:$mask), 8132 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>; 8133 8134 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))), 8135 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 8136 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 8137 (_dst.info128.VT VR128X:$src0), maskRC128:$mask), 8138 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 8139 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 8140 _dst.info128.ImmAllZerosV, maskRC128:$mask), 8141 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>; 8142 } 8143 8144 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8145 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 8146 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8147 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8148 VK2WM:$mask, VR128X:$src), 0, "att">; 8149 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|" 8150 "$dst {${mask}} {z}, $src}", 8151 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8152 VK2WM:$mask, VR128X:$src), 0, "att">; 8153 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8154 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; 8155 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8156 "$dst {${mask}}, ${src}{1to2}}", 8157 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8158 VK2WM:$mask, f64mem:$src), 0, "att">; 8159 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8160 "$dst {${mask}} {z}, ${src}{1to2}}", 8161 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8162 VK2WM:$mask, f64mem:$src), 0, "att">; 8163 8164 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8165 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 8166 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8167 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8168 VK4WM:$mask, VR256X:$src), 0, "att">; 8169 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8170 "$dst {${mask}} {z}, $src}", 8171 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8172 VK4WM:$mask, VR256X:$src), 0, "att">; 8173 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8174 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; 8175 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8176 "$dst {${mask}}, ${src}{1to4}}", 8177 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8178 VK4WM:$mask, f64mem:$src), 0, "att">; 8179 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8180 "$dst {${mask}} {z}, ${src}{1to4}}", 8181 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8182 VK4WM:$mask, f64mem:$src), 0, "att">; 8183} 8184 8185defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps", 8186 avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>, 8187 VEX_W, PD, EVEX_CD8<64, CD8VF>; 8188defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd", 8189 avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>, 8190 PS, EVEX_CD8<32, CD8VH>; 8191 8192// Extend Half to Double 8193multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr, 8194 X86SchedWriteWidths sched> { 8195 let Predicates = [HasFP16] in { 8196 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info, 8197 any_fpextend, fpextend, sched.ZMM>, 8198 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info, 8199 X86vfpextSAE, sched.ZMM>, EVEX_V512; 8200 def : Pat<(v8f64 (extloadv8f16 addr:$src)), 8201 (!cast<Instruction>(NAME # "Zrm") addr:$src)>; 8202 } 8203 let Predicates = [HasFP16, HasVLX] in { 8204 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info, 8205 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "", 8206 f32mem>, EVEX_V128; 8207 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info, 8208 X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "", 8209 f64mem>, EVEX_V256; 8210 } 8211} 8212 8213// Truncate Double to Half 8214multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 8215 let Predicates = [HasFP16] in { 8216 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info, 8217 X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">, 8218 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info, 8219 X86vfproundRnd, sched.ZMM>, EVEX_V512; 8220 } 8221 let Predicates = [HasFP16, HasVLX] in { 8222 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag, 8223 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8224 VK2WM>, EVEX_V128; 8225 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag, 8226 null_frag, sched.YMM, "{1to4}", "{y}", f256mem, 8227 VK4WM>, EVEX_V256; 8228 } 8229 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8230 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8231 VR128X:$src), 0, "att">; 8232 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8233 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8234 VK2WM:$mask, VR128X:$src), 0, "att">; 8235 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8236 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8237 VK2WM:$mask, VR128X:$src), 0, "att">; 8238 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8239 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8240 i64mem:$src), 0, "att">; 8241 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8242 "$dst {${mask}}, ${src}{1to2}}", 8243 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8244 VK2WM:$mask, i64mem:$src), 0, "att">; 8245 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8246 "$dst {${mask}} {z}, ${src}{1to2}}", 8247 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8248 VK2WM:$mask, i64mem:$src), 0, "att">; 8249 8250 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8251 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8252 VR256X:$src), 0, "att">; 8253 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8254 "$dst {${mask}}, $src}", 8255 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8256 VK4WM:$mask, VR256X:$src), 0, "att">; 8257 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8258 "$dst {${mask}} {z}, $src}", 8259 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8260 VK4WM:$mask, VR256X:$src), 0, "att">; 8261 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8262 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8263 i64mem:$src), 0, "att">; 8264 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8265 "$dst {${mask}}, ${src}{1to4}}", 8266 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8267 VK4WM:$mask, i64mem:$src), 0, "att">; 8268 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8269 "$dst {${mask}} {z}, ${src}{1to4}}", 8270 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8271 VK4WM:$mask, i64mem:$src), 0, "att">; 8272 8273 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 8274 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 8275 VR512:$src), 0, "att">; 8276 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 8277 "$dst {${mask}}, $src}", 8278 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 8279 VK8WM:$mask, VR512:$src), 0, "att">; 8280 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 8281 "$dst {${mask}} {z}, $src}", 8282 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 8283 VK8WM:$mask, VR512:$src), 0, "att">; 8284 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 8285 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 8286 i64mem:$src), 0, "att">; 8287 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 8288 "$dst {${mask}}, ${src}{1to8}}", 8289 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 8290 VK8WM:$mask, i64mem:$src), 0, "att">; 8291 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 8292 "$dst {${mask}} {z}, ${src}{1to8}}", 8293 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 8294 VK8WM:$mask, i64mem:$src), 0, "att">; 8295} 8296 8297defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info, 8298 avx512vl_f32_info, SchedWriteCvtPD2PS, 8299 HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>; 8300defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info, 8301 avx512vl_f16_info, SchedWriteCvtPS2PD, 8302 HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>; 8303defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>, 8304 VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>; 8305defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>, 8306 T_MAP5PS, EVEX_CD8<16, CD8VQ>; 8307 8308let Predicates = [HasFP16, HasVLX] in { 8309 // Special patterns to allow use of X86vmfpround for masking. Instruction 8310 // patterns have been disabled with null_frag. 8311 def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))), 8312 (VCVTPD2PHZ256rr VR256X:$src)>; 8313 def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0), 8314 VK4WM:$mask)), 8315 (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 8316 def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV, 8317 VK4WM:$mask), 8318 (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 8319 8320 def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))), 8321 (VCVTPD2PHZ256rm addr:$src)>; 8322 def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0), 8323 VK4WM:$mask), 8324 (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8325 def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV, 8326 VK4WM:$mask), 8327 (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>; 8328 8329 def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))), 8330 (VCVTPD2PHZ256rmb addr:$src)>; 8331 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8332 (v8f16 VR128X:$src0), VK4WM:$mask), 8333 (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8334 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8335 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 8336 (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 8337 8338 def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))), 8339 (VCVTPD2PHZ128rr VR128X:$src)>; 8340 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0), 8341 VK2WM:$mask), 8342 (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8343 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV, 8344 VK2WM:$mask), 8345 (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 8346 8347 def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))), 8348 (VCVTPD2PHZ128rm addr:$src)>; 8349 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0), 8350 VK2WM:$mask), 8351 (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8352 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV, 8353 VK2WM:$mask), 8354 (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>; 8355 8356 def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))), 8357 (VCVTPD2PHZ128rmb addr:$src)>; 8358 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8359 (v8f16 VR128X:$src0), VK2WM:$mask), 8360 (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8361 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8362 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 8363 (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 8364} 8365 8366// Convert Signed/Unsigned Doubleword to Double 8367let Uses = []<Register>, mayRaiseFPException = 0 in 8368multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8369 SDNode MaskOpNode, SDPatternOperator OpNode128, 8370 SDNode MaskOpNode128, 8371 X86SchedWriteWidths sched> { 8372 // No rounding in this op 8373 let Predicates = [HasAVX512] in 8374 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 8375 MaskOpNode, sched.ZMM>, EVEX_V512; 8376 8377 let Predicates = [HasVLX] in { 8378 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 8379 OpNode128, MaskOpNode128, sched.XMM, "{1to2}", 8380 "", i64mem, VK2WM, 8381 (v2f64 (OpNode128 (bc_v4i32 8382 (v2i64 8383 (scalar_to_vector (loadi64 addr:$src)))))), 8384 (v2f64 (MaskOpNode128 (bc_v4i32 8385 (v2i64 8386 (scalar_to_vector (loadi64 addr:$src))))))>, 8387 EVEX_V128; 8388 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 8389 MaskOpNode, sched.YMM>, EVEX_V256; 8390 } 8391} 8392 8393// Convert Signed/Unsigned Doubleword to Float 8394multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8395 SDNode MaskOpNode, SDNode OpNodeRnd, 8396 X86SchedWriteWidths sched> { 8397 let Predicates = [HasAVX512] in 8398 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 8399 MaskOpNode, sched.ZMM>, 8400 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 8401 OpNodeRnd, sched.ZMM>, EVEX_V512; 8402 8403 let Predicates = [HasVLX] in { 8404 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 8405 MaskOpNode, sched.XMM>, EVEX_V128; 8406 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 8407 MaskOpNode, sched.YMM>, EVEX_V256; 8408 } 8409} 8410 8411// Convert Float to Signed/Unsigned Doubleword with truncation 8412multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8413 SDNode MaskOpNode, 8414 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 8415 let Predicates = [HasAVX512] in { 8416 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8417 MaskOpNode, sched.ZMM>, 8418 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 8419 OpNodeSAE, sched.ZMM>, EVEX_V512; 8420 } 8421 let Predicates = [HasVLX] in { 8422 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8423 MaskOpNode, sched.XMM>, EVEX_V128; 8424 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8425 MaskOpNode, sched.YMM>, EVEX_V256; 8426 } 8427} 8428 8429// Convert Float to Signed/Unsigned Doubleword 8430multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8431 SDNode MaskOpNode, SDNode OpNodeRnd, 8432 X86SchedWriteWidths sched> { 8433 let Predicates = [HasAVX512] in { 8434 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8435 MaskOpNode, sched.ZMM>, 8436 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 8437 OpNodeRnd, sched.ZMM>, EVEX_V512; 8438 } 8439 let Predicates = [HasVLX] in { 8440 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8441 MaskOpNode, sched.XMM>, EVEX_V128; 8442 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8443 MaskOpNode, sched.YMM>, EVEX_V256; 8444 } 8445} 8446 8447// Convert Double to Signed/Unsigned Doubleword with truncation 8448multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8449 SDNode MaskOpNode, SDNode OpNodeSAE, 8450 X86SchedWriteWidths sched> { 8451 let Predicates = [HasAVX512] in { 8452 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8453 MaskOpNode, sched.ZMM>, 8454 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 8455 OpNodeSAE, sched.ZMM>, EVEX_V512; 8456 } 8457 let Predicates = [HasVLX] in { 8458 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8459 // memory forms of these instructions in Asm Parser. They have the same 8460 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8461 // due to the same reason. 8462 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8463 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8464 VK2WM>, EVEX_V128; 8465 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8466 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8467 } 8468 8469 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8470 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8471 VR128X:$src), 0, "att">; 8472 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8473 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8474 VK2WM:$mask, VR128X:$src), 0, "att">; 8475 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8476 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8477 VK2WM:$mask, VR128X:$src), 0, "att">; 8478 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8479 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8480 f64mem:$src), 0, "att">; 8481 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8482 "$dst {${mask}}, ${src}{1to2}}", 8483 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8484 VK2WM:$mask, f64mem:$src), 0, "att">; 8485 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8486 "$dst {${mask}} {z}, ${src}{1to2}}", 8487 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8488 VK2WM:$mask, f64mem:$src), 0, "att">; 8489 8490 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8491 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8492 VR256X:$src), 0, "att">; 8493 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8494 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8495 VK4WM:$mask, VR256X:$src), 0, "att">; 8496 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8497 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8498 VK4WM:$mask, VR256X:$src), 0, "att">; 8499 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8500 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8501 f64mem:$src), 0, "att">; 8502 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8503 "$dst {${mask}}, ${src}{1to4}}", 8504 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8505 VK4WM:$mask, f64mem:$src), 0, "att">; 8506 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8507 "$dst {${mask}} {z}, ${src}{1to4}}", 8508 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8509 VK4WM:$mask, f64mem:$src), 0, "att">; 8510} 8511 8512// Convert Double to Signed/Unsigned Doubleword 8513multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8514 SDNode MaskOpNode, SDNode OpNodeRnd, 8515 X86SchedWriteWidths sched> { 8516 let Predicates = [HasAVX512] in { 8517 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8518 MaskOpNode, sched.ZMM>, 8519 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 8520 OpNodeRnd, sched.ZMM>, EVEX_V512; 8521 } 8522 let Predicates = [HasVLX] in { 8523 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8524 // memory forms of these instructions in Asm Parcer. They have the same 8525 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8526 // due to the same reason. 8527 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8528 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8529 VK2WM>, EVEX_V128; 8530 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8531 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8532 } 8533 8534 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8535 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 8536 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8537 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8538 VK2WM:$mask, VR128X:$src), 0, "att">; 8539 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8540 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8541 VK2WM:$mask, VR128X:$src), 0, "att">; 8542 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8543 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8544 f64mem:$src), 0, "att">; 8545 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8546 "$dst {${mask}}, ${src}{1to2}}", 8547 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8548 VK2WM:$mask, f64mem:$src), 0, "att">; 8549 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8550 "$dst {${mask}} {z}, ${src}{1to2}}", 8551 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8552 VK2WM:$mask, f64mem:$src), 0, "att">; 8553 8554 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8555 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 8556 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8557 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8558 VK4WM:$mask, VR256X:$src), 0, "att">; 8559 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8560 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8561 VK4WM:$mask, VR256X:$src), 0, "att">; 8562 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8563 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8564 f64mem:$src), 0, "att">; 8565 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8566 "$dst {${mask}}, ${src}{1to4}}", 8567 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8568 VK4WM:$mask, f64mem:$src), 0, "att">; 8569 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8570 "$dst {${mask}} {z}, ${src}{1to4}}", 8571 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8572 VK4WM:$mask, f64mem:$src), 0, "att">; 8573} 8574 8575// Convert Double to Signed/Unsigned Quardword 8576multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8577 SDNode MaskOpNode, SDNode OpNodeRnd, 8578 X86SchedWriteWidths sched> { 8579 let Predicates = [HasDQI] in { 8580 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8581 MaskOpNode, sched.ZMM>, 8582 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 8583 OpNodeRnd, sched.ZMM>, EVEX_V512; 8584 } 8585 let Predicates = [HasDQI, HasVLX] in { 8586 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8587 MaskOpNode, sched.XMM>, EVEX_V128; 8588 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8589 MaskOpNode, sched.YMM>, EVEX_V256; 8590 } 8591} 8592 8593// Convert Double to Signed/Unsigned Quardword with truncation 8594multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8595 SDNode MaskOpNode, SDNode OpNodeRnd, 8596 X86SchedWriteWidths sched> { 8597 let Predicates = [HasDQI] in { 8598 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8599 MaskOpNode, sched.ZMM>, 8600 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 8601 OpNodeRnd, sched.ZMM>, EVEX_V512; 8602 } 8603 let Predicates = [HasDQI, HasVLX] in { 8604 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8605 MaskOpNode, sched.XMM>, EVEX_V128; 8606 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8607 MaskOpNode, sched.YMM>, EVEX_V256; 8608 } 8609} 8610 8611// Convert Signed/Unsigned Quardword to Double 8612multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8613 SDNode MaskOpNode, SDNode OpNodeRnd, 8614 X86SchedWriteWidths sched> { 8615 let Predicates = [HasDQI] in { 8616 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 8617 MaskOpNode, sched.ZMM>, 8618 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 8619 OpNodeRnd, sched.ZMM>, EVEX_V512; 8620 } 8621 let Predicates = [HasDQI, HasVLX] in { 8622 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 8623 MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; 8624 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 8625 MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; 8626 } 8627} 8628 8629// Convert Float to Signed/Unsigned Quardword 8630multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8631 SDNode MaskOpNode, SDNode OpNodeRnd, 8632 X86SchedWriteWidths sched> { 8633 let Predicates = [HasDQI] in { 8634 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8635 MaskOpNode, sched.ZMM>, 8636 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 8637 OpNodeRnd, sched.ZMM>, EVEX_V512; 8638 } 8639 let Predicates = [HasDQI, HasVLX] in { 8640 // Explicitly specified broadcast string, since we take only 2 elements 8641 // from v4f32x_info source 8642 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8643 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8644 (v2i64 (OpNode (bc_v4f32 8645 (v2f64 8646 (scalar_to_vector (loadf64 addr:$src)))))), 8647 (v2i64 (MaskOpNode (bc_v4f32 8648 (v2f64 8649 (scalar_to_vector (loadf64 addr:$src))))))>, 8650 EVEX_V128; 8651 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8652 MaskOpNode, sched.YMM>, EVEX_V256; 8653 } 8654} 8655 8656// Convert Float to Signed/Unsigned Quardword with truncation 8657multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8658 SDNode MaskOpNode, SDNode OpNodeRnd, 8659 X86SchedWriteWidths sched> { 8660 let Predicates = [HasDQI] in { 8661 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8662 MaskOpNode, sched.ZMM>, 8663 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 8664 OpNodeRnd, sched.ZMM>, EVEX_V512; 8665 } 8666 let Predicates = [HasDQI, HasVLX] in { 8667 // Explicitly specified broadcast string, since we take only 2 elements 8668 // from v4f32x_info source 8669 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8670 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8671 (v2i64 (OpNode (bc_v4f32 8672 (v2f64 8673 (scalar_to_vector (loadf64 addr:$src)))))), 8674 (v2i64 (MaskOpNode (bc_v4f32 8675 (v2f64 8676 (scalar_to_vector (loadf64 addr:$src))))))>, 8677 EVEX_V128; 8678 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8679 MaskOpNode, sched.YMM>, EVEX_V256; 8680 } 8681} 8682 8683// Convert Signed/Unsigned Quardword to Float 8684// Also Convert Signed/Unsigned Doubleword to Half 8685multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8686 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128, 8687 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd, 8688 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8689 X86SchedWriteWidths sched, Predicate prd = HasDQI> { 8690 let Predicates = [prd] in { 8691 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode, 8692 MaskOpNode, sched.ZMM>, 8693 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 8694 OpNodeRnd, sched.ZMM>, EVEX_V512; 8695 } 8696 let Predicates = [prd, HasVLX] in { 8697 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8698 // memory forms of these instructions in Asm Parcer. They have the same 8699 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8700 // due to the same reason. 8701 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag, 8702 null_frag, sched.XMM, _src.info128.BroadcastStr, 8703 "{x}", i128mem, _src.info128.KRCWM>, 8704 EVEX_V128, NotEVEX2VEXConvertible; 8705 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode, 8706 MaskOpNode, sched.YMM, _src.info256.BroadcastStr, 8707 "{y}">, EVEX_V256, 8708 NotEVEX2VEXConvertible; 8709 8710 // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction 8711 // patterns have been disabled with null_frag. 8712 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))), 8713 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 8714 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 8715 _src.info128.KRCWM:$mask), 8716 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>; 8717 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 8718 _src.info128.KRCWM:$mask), 8719 (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>; 8720 8721 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))), 8722 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 8723 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0), 8724 _src.info128.KRCWM:$mask), 8725 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8726 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV, 8727 _src.info128.KRCWM:$mask), 8728 (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>; 8729 8730 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))), 8731 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 8732 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8733 (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask), 8734 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8735 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8736 _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask), 8737 (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>; 8738 } 8739 8740 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8741 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8742 VR128X:$src), 0, "att">; 8743 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8744 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8745 VK2WM:$mask, VR128X:$src), 0, "att">; 8746 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8747 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8748 VK2WM:$mask, VR128X:$src), 0, "att">; 8749 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8750 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8751 i64mem:$src), 0, "att">; 8752 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8753 "$dst {${mask}}, ${src}{1to2}}", 8754 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8755 VK2WM:$mask, i64mem:$src), 0, "att">; 8756 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8757 "$dst {${mask}} {z}, ${src}{1to2}}", 8758 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8759 VK2WM:$mask, i64mem:$src), 0, "att">; 8760 8761 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8762 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8763 VR256X:$src), 0, "att">; 8764 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8765 "$dst {${mask}}, $src}", 8766 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8767 VK4WM:$mask, VR256X:$src), 0, "att">; 8768 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8769 "$dst {${mask}} {z}, $src}", 8770 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8771 VK4WM:$mask, VR256X:$src), 0, "att">; 8772 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8773 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8774 i64mem:$src), 0, "att">; 8775 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8776 "$dst {${mask}}, ${src}{1to4}}", 8777 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8778 VK4WM:$mask, i64mem:$src), 0, "att">; 8779 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8780 "$dst {${mask}} {z}, ${src}{1to4}}", 8781 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8782 VK4WM:$mask, i64mem:$src), 0, "att">; 8783} 8784 8785defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp, 8786 X86any_VSintToFP, X86VSintToFP, 8787 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8788 8789defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp, 8790 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8791 PS, EVEX_CD8<32, CD8VF>; 8792 8793defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, 8794 X86cvttp2si, X86cvttp2siSAE, 8795 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>; 8796 8797defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, 8798 X86cvttp2si, X86cvttp2siSAE, 8799 SchedWriteCvtPD2DQ>, 8800 PD, VEX_W, EVEX_CD8<64, CD8VF>; 8801 8802defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, 8803 X86cvttp2ui, X86cvttp2uiSAE, 8804 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; 8805 8806defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, 8807 X86cvttp2ui, X86cvttp2uiSAE, 8808 SchedWriteCvtPD2DQ>, 8809 PS, VEX_W, EVEX_CD8<64, CD8VF>; 8810 8811defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, 8812 uint_to_fp, X86any_VUintToFP, X86VUintToFP, 8813 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8814 8815defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, 8816 uint_to_fp, X86VUintToFpRnd, 8817 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>; 8818 8819defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int, 8820 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8821 EVEX_CD8<32, CD8VF>; 8822 8823defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int, 8824 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, 8825 VEX_W, EVEX_CD8<64, CD8VF>; 8826 8827defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt, 8828 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8829 PS, EVEX_CD8<32, CD8VF>; 8830 8831defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt, 8832 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8833 PS, EVEX_CD8<64, CD8VF>; 8834 8835defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int, 8836 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8837 PD, EVEX_CD8<64, CD8VF>; 8838 8839defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int, 8840 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8841 EVEX_CD8<32, CD8VH>; 8842 8843defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt, 8844 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W, 8845 PD, EVEX_CD8<64, CD8VF>; 8846 8847defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt, 8848 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, 8849 EVEX_CD8<32, CD8VH>; 8850 8851defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, 8852 X86cvttp2si, X86cvttp2siSAE, 8853 SchedWriteCvtPD2DQ>, VEX_W, 8854 PD, EVEX_CD8<64, CD8VF>; 8855 8856defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, 8857 X86cvttp2si, X86cvttp2siSAE, 8858 SchedWriteCvtPS2DQ>, PD, 8859 EVEX_CD8<32, CD8VH>; 8860 8861defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, 8862 X86cvttp2ui, X86cvttp2uiSAE, 8863 SchedWriteCvtPD2DQ>, VEX_W, 8864 PD, EVEX_CD8<64, CD8VF>; 8865 8866defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, 8867 X86cvttp2ui, X86cvttp2uiSAE, 8868 SchedWriteCvtPS2DQ>, PD, 8869 EVEX_CD8<32, CD8VH>; 8870 8871defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp, 8872 sint_to_fp, X86VSintToFpRnd, 8873 SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>; 8874 8875defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp, 8876 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, 8877 VEX_W, XS, EVEX_CD8<64, CD8VF>; 8878 8879defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp, 8880 X86any_VSintToFP, X86VMSintToFP, 8881 X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8882 SchedWriteCvtDQ2PS, HasFP16>, 8883 T_MAP5PS, EVEX_CD8<32, CD8VF>; 8884 8885defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp, 8886 X86any_VUintToFP, X86VMUintToFP, 8887 X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8888 SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD, 8889 EVEX_CD8<32, CD8VF>; 8890 8891defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp, 8892 X86any_VSintToFP, X86VMSintToFP, 8893 X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8894 SchedWriteCvtDQ2PS>, VEX_W, PS, 8895 EVEX_CD8<64, CD8VF>; 8896 8897defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp, 8898 X86any_VUintToFP, X86VMUintToFP, 8899 X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8900 SchedWriteCvtDQ2PS>, VEX_W, XD, 8901 EVEX_CD8<64, CD8VF>; 8902 8903let Predicates = [HasVLX] in { 8904 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction 8905 // patterns have been disabled with null_frag. 8906 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), 8907 (VCVTPD2DQZ128rr VR128X:$src)>; 8908 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8909 VK2WM:$mask), 8910 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8911 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8912 VK2WM:$mask), 8913 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8914 8915 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), 8916 (VCVTPD2DQZ128rm addr:$src)>; 8917 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8918 VK2WM:$mask), 8919 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8920 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8921 VK2WM:$mask), 8922 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8923 8924 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), 8925 (VCVTPD2DQZ128rmb addr:$src)>; 8926 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8927 (v4i32 VR128X:$src0), VK2WM:$mask), 8928 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8929 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8930 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8931 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8932 8933 // Special patterns to allow use of X86mcvttp2si for masking. Instruction 8934 // patterns have been disabled with null_frag. 8935 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))), 8936 (VCVTTPD2DQZ128rr VR128X:$src)>; 8937 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8938 VK2WM:$mask), 8939 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8940 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8941 VK2WM:$mask), 8942 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8943 8944 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))), 8945 (VCVTTPD2DQZ128rm addr:$src)>; 8946 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8947 VK2WM:$mask), 8948 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8949 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8950 VK2WM:$mask), 8951 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8952 8953 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), 8954 (VCVTTPD2DQZ128rmb addr:$src)>; 8955 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8956 (v4i32 VR128X:$src0), VK2WM:$mask), 8957 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8958 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8959 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8960 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8961 8962 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8963 // patterns have been disabled with null_frag. 8964 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), 8965 (VCVTPD2UDQZ128rr VR128X:$src)>; 8966 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8967 VK2WM:$mask), 8968 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8969 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8970 VK2WM:$mask), 8971 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8972 8973 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), 8974 (VCVTPD2UDQZ128rm addr:$src)>; 8975 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8976 VK2WM:$mask), 8977 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8978 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8979 VK2WM:$mask), 8980 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8981 8982 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), 8983 (VCVTPD2UDQZ128rmb addr:$src)>; 8984 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8985 (v4i32 VR128X:$src0), VK2WM:$mask), 8986 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8987 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8988 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8989 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8990 8991 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8992 // patterns have been disabled with null_frag. 8993 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))), 8994 (VCVTTPD2UDQZ128rr VR128X:$src)>; 8995 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8996 VK2WM:$mask), 8997 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8998 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8999 VK2WM:$mask), 9000 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 9001 9002 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))), 9003 (VCVTTPD2UDQZ128rm addr:$src)>; 9004 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 9005 VK2WM:$mask), 9006 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9007 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 9008 VK2WM:$mask), 9009 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 9010 9011 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), 9012 (VCVTTPD2UDQZ128rmb addr:$src)>; 9013 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 9014 (v4i32 VR128X:$src0), VK2WM:$mask), 9015 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9016 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 9017 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 9018 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 9019} 9020 9021let Predicates = [HasDQI, HasVLX] in { 9022 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9023 (VCVTPS2QQZ128rm addr:$src)>; 9024 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9025 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9026 VR128X:$src0)), 9027 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9028 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9029 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9030 v2i64x_info.ImmAllZerosV)), 9031 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 9032 9033 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9034 (VCVTPS2UQQZ128rm addr:$src)>; 9035 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9036 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9037 VR128X:$src0)), 9038 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9039 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9040 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9041 v2i64x_info.ImmAllZerosV)), 9042 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 9043 9044 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9045 (VCVTTPS2QQZ128rm addr:$src)>; 9046 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9047 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9048 VR128X:$src0)), 9049 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9050 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9051 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9052 v2i64x_info.ImmAllZerosV)), 9053 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 9054 9055 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9056 (VCVTTPS2UQQZ128rm addr:$src)>; 9057 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9058 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9059 VR128X:$src0)), 9060 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9061 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9062 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9063 v2i64x_info.ImmAllZerosV)), 9064 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 9065} 9066 9067let Predicates = [HasVLX] in { 9068 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9069 (VCVTDQ2PDZ128rm addr:$src)>; 9070 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9071 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9072 VR128X:$src0)), 9073 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9074 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9075 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9076 v2f64x_info.ImmAllZerosV)), 9077 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 9078 9079 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9080 (VCVTUDQ2PDZ128rm addr:$src)>; 9081 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9082 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9083 VR128X:$src0)), 9084 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9085 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9086 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9087 v2f64x_info.ImmAllZerosV)), 9088 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 9089} 9090 9091//===----------------------------------------------------------------------===// 9092// Half precision conversion instructions 9093//===----------------------------------------------------------------------===// 9094 9095let Uses = [MXCSR], mayRaiseFPException = 1 in 9096multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9097 X86MemOperand x86memop, dag ld_dag, 9098 X86FoldableSchedWrite sched> { 9099 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 9100 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 9101 (X86any_cvtph2ps (_src.VT _src.RC:$src)), 9102 (X86cvtph2ps (_src.VT _src.RC:$src))>, 9103 T8PD, Sched<[sched]>; 9104 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 9105 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 9106 (X86any_cvtph2ps (_src.VT ld_dag)), 9107 (X86cvtph2ps (_src.VT ld_dag))>, 9108 T8PD, Sched<[sched.Folded]>; 9109} 9110 9111multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9112 X86FoldableSchedWrite sched> { 9113 let Uses = [MXCSR] in 9114 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 9115 (ins _src.RC:$src), "vcvtph2ps", 9116 "{sae}, $src", "$src, {sae}", 9117 (X86cvtph2psSAE (_src.VT _src.RC:$src))>, 9118 T8PD, EVEX_B, Sched<[sched]>; 9119} 9120 9121let Predicates = [HasAVX512] in 9122 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, 9123 (load addr:$src), WriteCvtPH2PSZ>, 9124 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 9125 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 9126 9127let Predicates = [HasVLX] in { 9128 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 9129 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256, 9130 EVEX_CD8<32, CD8VH>; 9131 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 9132 (bitconvert (v2i64 (X86vzload64 addr:$src))), 9133 WriteCvtPH2PS>, EVEX, EVEX_V128, 9134 EVEX_CD8<32, CD8VH>; 9135 9136 // Pattern match vcvtph2ps of a scalar i64 load. 9137 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert 9138 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 9139 (VCVTPH2PSZ128rm addr:$src)>; 9140} 9141 9142multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9143 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 9144let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9145 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9146 (ins _src.RC:$src1, i32u8imm:$src2), 9147 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 9148 [(set _dest.RC:$dst, 9149 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 9150 Sched<[RR]>; 9151 let Constraints = "$src0 = $dst" in 9152 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9153 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9154 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 9155 [(set _dest.RC:$dst, 9156 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 9157 _dest.RC:$src0, _src.KRCWM:$mask))]>, 9158 Sched<[RR]>, EVEX_K; 9159 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9160 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9161 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", 9162 [(set _dest.RC:$dst, 9163 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 9164 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 9165 Sched<[RR]>, EVEX_KZ; 9166 let hasSideEffects = 0, mayStore = 1 in { 9167 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 9168 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 9169 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9170 Sched<[MR]>; 9171 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 9172 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9173 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 9174 EVEX_K, Sched<[MR]>, NotMemoryFoldable; 9175 } 9176} 9177} 9178 9179multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9180 SchedWrite Sched> { 9181 let hasSideEffects = 0, Uses = [MXCSR] in 9182 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, 9183 (outs _dest.RC:$dst), 9184 (ins _src.RC:$src1, i32u8imm:$src2), 9185 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>, 9186 EVEX_B, AVX512AIi8Base, Sched<[Sched]>; 9187} 9188 9189let Predicates = [HasAVX512] in { 9190 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 9191 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 9192 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 9193 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 9194 9195 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), 9196 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; 9197} 9198 9199let Predicates = [HasVLX] in { 9200 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 9201 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 9202 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 9203 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 9204 WriteCvtPS2PH, WriteCvtPS2PHSt>, 9205 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 9206 9207 def : Pat<(store (f64 (extractelt 9208 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9209 (iPTR 0))), addr:$dst), 9210 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9211 def : Pat<(store (i64 (extractelt 9212 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9213 (iPTR 0))), addr:$dst), 9214 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9215 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), 9216 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; 9217} 9218 9219// Unordered/Ordered scalar fp compare with Sae and set EFLAGS 9220multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 9221 string OpcodeStr, Domain d, 9222 X86FoldableSchedWrite sched = WriteFComX> { 9223 let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in 9224 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 9225 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 9226 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 9227} 9228 9229let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9230 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>, 9231 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9232 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>, 9233 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 9234 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>, 9235 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9236 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>, 9237 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; 9238} 9239 9240let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9241 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32, 9242 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9243 EVEX_CD8<32, CD8VT1>; 9244 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64, 9245 "ucomisd", SSEPackedDouble>, PD, EVEX, 9246 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9247 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32, 9248 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9249 EVEX_CD8<32, CD8VT1>; 9250 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64, 9251 "comisd", SSEPackedDouble>, PD, EVEX, 9252 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9253 let isCodeGenOnly = 1 in { 9254 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 9255 sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9256 EVEX_CD8<32, CD8VT1>; 9257 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 9258 sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX, 9259 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9260 9261 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 9262 sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9263 EVEX_CD8<32, CD8VT1>; 9264 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 9265 sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX, 9266 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; 9267 } 9268} 9269 9270let Defs = [EFLAGS], Predicates = [HasFP16] in { 9271 defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish", 9272 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS, 9273 EVEX_CD8<16, CD8VT1>; 9274 defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish", 9275 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS, 9276 EVEX_CD8<16, CD8VT1>; 9277 defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16, 9278 "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX, 9279 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9280 defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16, 9281 "comish", SSEPackedSingle>, T_MAP5PS, EVEX, 9282 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9283 let isCodeGenOnly = 1 in { 9284 defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem, 9285 sse_load_f16, "ucomish", SSEPackedSingle>, 9286 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9287 9288 defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem, 9289 sse_load_f16, "comish", SSEPackedSingle>, 9290 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9291 } 9292} 9293 9294/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh 9295multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9296 X86FoldableSchedWrite sched, X86VectorVTInfo _, 9297 Predicate prd = HasAVX512> { 9298 let Predicates = [prd], ExeDomain = _.ExeDomain in { 9299 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9300 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9301 "$src2, $src1", "$src1, $src2", 9302 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9303 EVEX_4V, VEX_LIG, Sched<[sched]>; 9304 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9305 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9306 "$src2, $src1", "$src1, $src2", 9307 (OpNode (_.VT _.RC:$src1), 9308 (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG, 9309 Sched<[sched.Folded, sched.ReadAfterFold]>; 9310} 9311} 9312 9313defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl, 9314 f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>, 9315 T_MAP6PD; 9316defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s, 9317 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>, 9318 EVEX_CD8<16, CD8VT1>, T_MAP6PD; 9319let Uses = [MXCSR] in { 9320defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 9321 f32x_info>, EVEX_CD8<32, CD8VT1>, 9322 T8PD; 9323defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 9324 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, 9325 T8PD; 9326defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 9327 SchedWriteFRsqrt.Scl, f32x_info>, 9328 EVEX_CD8<32, CD8VT1>, T8PD; 9329defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 9330 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W, 9331 EVEX_CD8<64, CD8VT1>, T8PD; 9332} 9333 9334/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 9335multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 9336 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9337 let ExeDomain = _.ExeDomain in { 9338 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9339 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9340 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD, 9341 Sched<[sched]>; 9342 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9343 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9344 (OpNode (_.VT 9345 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, 9346 Sched<[sched.Folded, sched.ReadAfterFold]>; 9347 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9348 (ins _.ScalarMemOp:$src), OpcodeStr, 9349 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9350 (OpNode (_.VT 9351 (_.BroadcastLdFrag addr:$src)))>, 9352 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9353 } 9354} 9355 9356multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 9357 X86SchedWriteWidths sched> { 9358 let Uses = [MXCSR] in { 9359 defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM, 9360 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 9361 defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM, 9362 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 9363 } 9364 let Predicates = [HasFP16] in 9365 defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM, 9366 v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9367 9368 // Define only if AVX512VL feature is present. 9369 let Predicates = [HasVLX], Uses = [MXCSR] in { 9370 defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9371 OpNode, sched.XMM, v4f32x_info>, 9372 EVEX_V128, EVEX_CD8<32, CD8VF>; 9373 defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9374 OpNode, sched.YMM, v8f32x_info>, 9375 EVEX_V256, EVEX_CD8<32, CD8VF>; 9376 defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9377 OpNode, sched.XMM, v2f64x_info>, 9378 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; 9379 defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9380 OpNode, sched.YMM, v4f64x_info>, 9381 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; 9382 } 9383 let Predicates = [HasFP16, HasVLX] in { 9384 defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9385 OpNode, sched.XMM, v8f16x_info>, 9386 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9387 defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9388 OpNode, sched.YMM, v16f16x_info>, 9389 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9390 } 9391} 9392 9393defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>; 9394defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>; 9395 9396/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 9397multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 9398 SDNode OpNode, SDNode OpNodeSAE, 9399 X86FoldableSchedWrite sched> { 9400 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 9401 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9402 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9403 "$src2, $src1", "$src1, $src2", 9404 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9405 Sched<[sched]>, SIMD_EXC; 9406 9407 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9408 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9409 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 9410 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9411 EVEX_B, Sched<[sched]>; 9412 9413 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9414 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9415 "$src2, $src1", "$src1, $src2", 9416 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>, 9417 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9418 } 9419} 9420 9421multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9422 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9423 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, 9424 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V; 9425 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, 9426 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD, EVEX_4V; 9427} 9428 9429multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode, 9430 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9431 let Predicates = [HasFP16] in 9432 defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>, 9433 EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V; 9434} 9435 9436let Predicates = [HasERI] in { 9437 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs, 9438 SchedWriteFRcp.Scl>; 9439 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs, 9440 SchedWriteFRsqrt.Scl>; 9441} 9442 9443defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9444 SchedWriteFRnd.Scl>, 9445 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9446 SchedWriteFRnd.Scl>; 9447/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 9448 9449multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9450 SDNode OpNode, X86FoldableSchedWrite sched> { 9451 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9452 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9453 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9454 (OpNode (_.VT _.RC:$src))>, 9455 Sched<[sched]>; 9456 9457 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9458 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9459 (OpNode (_.VT 9460 (bitconvert (_.LdFrag addr:$src))))>, 9461 Sched<[sched.Folded, sched.ReadAfterFold]>; 9462 9463 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9464 (ins _.ScalarMemOp:$src), OpcodeStr, 9465 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9466 (OpNode (_.VT 9467 (_.BroadcastLdFrag addr:$src)))>, 9468 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9469 } 9470} 9471multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9472 SDNode OpNode, X86FoldableSchedWrite sched> { 9473 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 9474 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9475 (ins _.RC:$src), OpcodeStr, 9476 "{sae}, $src", "$src, {sae}", 9477 (OpNode (_.VT _.RC:$src))>, 9478 EVEX_B, Sched<[sched]>; 9479} 9480 9481multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 9482 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9483 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 9484 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, 9485 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 9486 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 9487 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, 9488 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; 9489} 9490 9491multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 9492 SDNode OpNode, X86SchedWriteWidths sched> { 9493 // Define only if AVX512VL feature is present. 9494 let Predicates = [HasVLX] in { 9495 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, 9496 sched.XMM>, 9497 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; 9498 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, 9499 sched.YMM>, 9500 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; 9501 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, 9502 sched.XMM>, 9503 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 9504 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, 9505 sched.YMM>, 9506 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; 9507 } 9508} 9509 9510multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode, 9511 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9512 let Predicates = [HasFP16] in 9513 defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>, 9514 avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>, 9515 T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>; 9516 let Predicates = [HasFP16, HasVLX] in { 9517 defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>, 9518 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9519 defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>, 9520 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9521 } 9522} 9523let Predicates = [HasERI] in { 9524 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE, 9525 SchedWriteFRsqrt>, EVEX; 9526 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE, 9527 SchedWriteFRcp>, EVEX; 9528 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE, 9529 SchedWriteFAdd>, EVEX; 9530} 9531defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9532 SchedWriteFRnd>, 9533 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9534 SchedWriteFRnd>, 9535 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, 9536 SchedWriteFRnd>, EVEX; 9537 9538multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 9539 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9540 let ExeDomain = _.ExeDomain in 9541 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9542 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 9543 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, 9544 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 9545} 9546 9547multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 9548 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9549 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9550 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 9551 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9552 (_.VT (any_fsqrt _.RC:$src)), 9553 (_.VT (fsqrt _.RC:$src))>, EVEX, 9554 Sched<[sched]>; 9555 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9556 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9557 (any_fsqrt (_.VT (_.LdFrag addr:$src))), 9558 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX, 9559 Sched<[sched.Folded, sched.ReadAfterFold]>; 9560 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9561 (ins _.ScalarMemOp:$src), OpcodeStr, 9562 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9563 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))), 9564 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>, 9565 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9566 } 9567} 9568 9569let Uses = [MXCSR], mayRaiseFPException = 1 in 9570multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 9571 X86SchedWriteSizes sched> { 9572 let Predicates = [HasFP16] in 9573 defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9574 sched.PH.ZMM, v32f16_info>, 9575 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9576 let Predicates = [HasFP16, HasVLX] in { 9577 defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9578 sched.PH.XMM, v8f16x_info>, 9579 EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9580 defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9581 sched.PH.YMM, v16f16x_info>, 9582 EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9583 } 9584 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9585 sched.PS.ZMM, v16f32_info>, 9586 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 9587 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9588 sched.PD.ZMM, v8f64_info>, 9589 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9590 // Define only if AVX512VL feature is present. 9591 let Predicates = [HasVLX] in { 9592 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9593 sched.PS.XMM, v4f32x_info>, 9594 EVEX_V128, PS, EVEX_CD8<32, CD8VF>; 9595 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9596 sched.PS.YMM, v8f32x_info>, 9597 EVEX_V256, PS, EVEX_CD8<32, CD8VF>; 9598 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9599 sched.PD.XMM, v2f64x_info>, 9600 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9601 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9602 sched.PD.YMM, v4f64x_info>, 9603 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9604 } 9605} 9606 9607let Uses = [MXCSR] in 9608multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 9609 X86SchedWriteSizes sched> { 9610 let Predicates = [HasFP16] in 9611 defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"), 9612 sched.PH.ZMM, v32f16_info>, 9613 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9614 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 9615 sched.PS.ZMM, v16f32_info>, 9616 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 9617 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 9618 sched.PD.ZMM, v8f64_info>, 9619 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; 9620} 9621 9622multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9623 X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> { 9624 let ExeDomain = _.ExeDomain, Predicates = [prd] in { 9625 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9626 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9627 "$src2, $src1", "$src1, $src2", 9628 (X86fsqrts (_.VT _.RC:$src1), 9629 (_.VT _.RC:$src2))>, 9630 Sched<[sched]>, SIMD_EXC; 9631 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9632 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9633 "$src2, $src1", "$src1, $src2", 9634 (X86fsqrts (_.VT _.RC:$src1), 9635 (_.ScalarIntMemFrags addr:$src2))>, 9636 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9637 let Uses = [MXCSR] in 9638 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9639 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 9640 "$rc, $src2, $src1", "$src1, $src2, $rc", 9641 (X86fsqrtRnds (_.VT _.RC:$src1), 9642 (_.VT _.RC:$src2), 9643 (i32 timm:$rc))>, 9644 EVEX_B, EVEX_RC, Sched<[sched]>; 9645 9646 let isCodeGenOnly = 1, hasSideEffects = 0 in { 9647 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9648 (ins _.FRC:$src1, _.FRC:$src2), 9649 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9650 Sched<[sched]>, SIMD_EXC; 9651 let mayLoad = 1 in 9652 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9653 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 9654 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9655 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9656 } 9657 } 9658 9659 let Predicates = [prd] in { 9660 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)), 9661 (!cast<Instruction>(Name#Zr) 9662 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 9663 } 9664 9665 let Predicates = [prd, OptForSize] in { 9666 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))), 9667 (!cast<Instruction>(Name#Zm) 9668 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 9669 } 9670} 9671 9672multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 9673 X86SchedWriteSizes sched> { 9674 defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>, 9675 EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS; 9676 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 9677 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; 9678 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 9679 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; 9680} 9681 9682defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 9683 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 9684 9685defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 9686 9687multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 9688 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9689 let ExeDomain = _.ExeDomain in { 9690 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9691 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9692 "$src3, $src2, $src1", "$src1, $src2, $src3", 9693 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9694 (i32 timm:$src3)))>, 9695 Sched<[sched]>, SIMD_EXC; 9696 9697 let Uses = [MXCSR] in 9698 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9699 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9700 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 9701 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9702 (i32 timm:$src3)))>, EVEX_B, 9703 Sched<[sched]>; 9704 9705 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9706 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 9707 OpcodeStr, 9708 "$src3, $src2, $src1", "$src1, $src2, $src3", 9709 (_.VT (X86RndScales _.RC:$src1, 9710 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>, 9711 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9712 9713 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 9714 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9715 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 9716 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9717 []>, Sched<[sched]>, SIMD_EXC; 9718 9719 let mayLoad = 1 in 9720 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9721 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9722 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9723 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9724 } 9725 } 9726 9727 let Predicates = [HasAVX512] in { 9728 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2), 9729 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)), 9730 _.FRC:$src1, timm:$src2))>; 9731 } 9732 9733 let Predicates = [HasAVX512, OptForSize] in { 9734 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), 9735 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)), 9736 addr:$src1, timm:$src2))>; 9737 } 9738} 9739 9740let Predicates = [HasFP16] in 9741defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh", 9742 SchedWriteFRnd.Scl, f16x_info>, 9743 AVX512PSIi8Base, TA, EVEX_4V, 9744 EVEX_CD8<16, CD8VT1>; 9745 9746defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 9747 SchedWriteFRnd.Scl, f32x_info>, 9748 AVX512AIi8Base, EVEX_4V, VEX_LIG, 9749 EVEX_CD8<32, CD8VT1>; 9750 9751defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 9752 SchedWriteFRnd.Scl, f64x_info>, 9753 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG, 9754 EVEX_CD8<64, CD8VT1>; 9755 9756multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 9757 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 9758 dag OutMask, Predicate BasePredicate> { 9759 let Predicates = [BasePredicate] in { 9760 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9761 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9762 (extractelt _.VT:$dst, (iPTR 0))))), 9763 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 9764 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 9765 9766 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9767 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9768 ZeroFP))), 9769 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 9770 OutMask, _.VT:$src2, _.VT:$src1)>; 9771 } 9772} 9773 9774defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh, 9775 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info, 9776 fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>; 9777defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 9778 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 9779 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9780defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 9781 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 9782 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9783 9784 9785//------------------------------------------------- 9786// Integer truncate and extend operations 9787//------------------------------------------------- 9788 9789// PatFrags that contain a select and a truncate op. The take operands in the 9790// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass 9791// either to the multiclasses. 9792def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask), 9793 (vselect_mask node:$mask, 9794 (trunc node:$src), node:$src0)>; 9795def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask), 9796 (vselect_mask node:$mask, 9797 (X86vtruncs node:$src), node:$src0)>; 9798def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask), 9799 (vselect_mask node:$mask, 9800 (X86vtruncus node:$src), node:$src0)>; 9801 9802multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9803 SDPatternOperator MaskNode, 9804 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9805 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9806 let ExeDomain = DestInfo.ExeDomain in { 9807 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9808 (ins SrcInfo.RC:$src), 9809 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9810 [(set DestInfo.RC:$dst, 9811 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>, 9812 EVEX, Sched<[sched]>; 9813 let Constraints = "$src0 = $dst" in 9814 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9815 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9816 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9817 [(set DestInfo.RC:$dst, 9818 (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9819 (DestInfo.VT DestInfo.RC:$src0), 9820 SrcInfo.KRCWM:$mask))]>, 9821 EVEX, EVEX_K, Sched<[sched]>; 9822 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9823 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9824 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 9825 [(set DestInfo.RC:$dst, 9826 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9827 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>, 9828 EVEX, EVEX_KZ, Sched<[sched]>; 9829 } 9830 9831 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9832 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9833 (ins x86memop:$dst, SrcInfo.RC:$src), 9834 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9835 EVEX, Sched<[sched.Folded]>; 9836 9837 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9838 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9839 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9840 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable; 9841 }//mayStore = 1, hasSideEffects = 0 9842} 9843 9844multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9845 PatFrag truncFrag, PatFrag mtruncFrag, 9846 string Name> { 9847 9848 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9849 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr) 9850 addr:$dst, SrcInfo.RC:$src)>; 9851 9852 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, 9853 SrcInfo.KRCWM:$mask), 9854 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk) 9855 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9856} 9857 9858multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9859 SDNode OpNode256, SDNode OpNode512, 9860 SDPatternOperator MaskNode128, 9861 SDPatternOperator MaskNode256, 9862 SDPatternOperator MaskNode512, 9863 X86FoldableSchedWrite sched, 9864 AVX512VLVectorVTInfo VTSrcInfo, 9865 X86VectorVTInfo DestInfoZ128, 9866 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9867 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9868 X86MemOperand x86memopZ, PatFrag truncFrag, 9869 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9870 9871 let Predicates = [HasVLX, prd] in { 9872 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched, 9873 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9874 avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag, 9875 mtruncFrag, NAME>, EVEX_V128; 9876 9877 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched, 9878 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9879 avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag, 9880 mtruncFrag, NAME>, EVEX_V256; 9881 } 9882 let Predicates = [prd] in 9883 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched, 9884 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9885 avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag, 9886 mtruncFrag, NAME>, EVEX_V512; 9887} 9888 9889multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, 9890 X86FoldableSchedWrite sched, PatFrag StoreNode, 9891 PatFrag MaskedStoreNode, SDNode InVecNode, 9892 SDPatternOperator InVecMaskNode> { 9893 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, 9894 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched, 9895 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9896 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9897 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9898} 9899 9900multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9901 SDPatternOperator MaskNode, 9902 X86FoldableSchedWrite sched, PatFrag StoreNode, 9903 PatFrag MaskedStoreNode, SDNode InVecNode, 9904 SDPatternOperator InVecMaskNode> { 9905 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9906 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9907 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9908 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9909 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9910} 9911 9912multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9913 SDPatternOperator MaskNode, 9914 X86FoldableSchedWrite sched, PatFrag StoreNode, 9915 PatFrag MaskedStoreNode, SDNode InVecNode, 9916 SDPatternOperator InVecMaskNode> { 9917 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9918 InVecMaskNode, MaskNode, MaskNode, sched, 9919 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9920 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9921 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9922} 9923 9924multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 9925 SDPatternOperator MaskNode, 9926 X86FoldableSchedWrite sched, PatFrag StoreNode, 9927 PatFrag MaskedStoreNode, SDNode InVecNode, 9928 SDPatternOperator InVecMaskNode> { 9929 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9930 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9931 avx512vl_i32_info, v16i8x_info, v16i8x_info, 9932 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 9933 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 9934} 9935 9936multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9937 SDPatternOperator MaskNode, 9938 X86FoldableSchedWrite sched, PatFrag StoreNode, 9939 PatFrag MaskedStoreNode, SDNode InVecNode, 9940 SDPatternOperator InVecMaskNode> { 9941 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9942 InVecMaskNode, MaskNode, MaskNode, sched, 9943 avx512vl_i32_info, v8i16x_info, v8i16x_info, 9944 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 9945 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 9946} 9947 9948multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9949 SDPatternOperator MaskNode, 9950 X86FoldableSchedWrite sched, PatFrag StoreNode, 9951 PatFrag MaskedStoreNode, SDNode InVecNode, 9952 SDPatternOperator InVecMaskNode> { 9953 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9954 InVecMaskNode, MaskNode, MaskNode, sched, 9955 avx512vl_i16_info, v16i8x_info, v16i8x_info, 9956 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 9957 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 9958} 9959 9960defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", 9961 WriteVPMOV256, truncstorevi8, 9962 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9963defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", 9964 WriteVPMOV256, truncstore_s_vi8, 9965 masked_truncstore_s_vi8, X86vtruncs, 9966 X86vmtruncs>; 9967defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", 9968 WriteVPMOV256, truncstore_us_vi8, 9969 masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; 9970 9971defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, 9972 WriteVPMOV256, truncstorevi16, 9973 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9974defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, 9975 WriteVPMOV256, truncstore_s_vi16, 9976 masked_truncstore_s_vi16, X86vtruncs, 9977 X86vmtruncs>; 9978defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, 9979 select_truncus, WriteVPMOV256, 9980 truncstore_us_vi16, masked_truncstore_us_vi16, 9981 X86vtruncus, X86vmtruncus>; 9982 9983defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, 9984 WriteVPMOV256, truncstorevi32, 9985 masked_truncstorevi32, X86vtrunc, X86vmtrunc>; 9986defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, 9987 WriteVPMOV256, truncstore_s_vi32, 9988 masked_truncstore_s_vi32, X86vtruncs, 9989 X86vmtruncs>; 9990defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, 9991 select_truncus, WriteVPMOV256, 9992 truncstore_us_vi32, masked_truncstore_us_vi32, 9993 X86vtruncus, X86vmtruncus>; 9994 9995defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, 9996 WriteVPMOV256, truncstorevi8, 9997 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9998defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, 9999 WriteVPMOV256, truncstore_s_vi8, 10000 masked_truncstore_s_vi8, X86vtruncs, 10001 X86vmtruncs>; 10002defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, 10003 select_truncus, WriteVPMOV256, 10004 truncstore_us_vi8, masked_truncstore_us_vi8, 10005 X86vtruncus, X86vmtruncus>; 10006 10007defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, 10008 WriteVPMOV256, truncstorevi16, 10009 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 10010defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, 10011 WriteVPMOV256, truncstore_s_vi16, 10012 masked_truncstore_s_vi16, X86vtruncs, 10013 X86vmtruncs>; 10014defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, 10015 select_truncus, WriteVPMOV256, 10016 truncstore_us_vi16, masked_truncstore_us_vi16, 10017 X86vtruncus, X86vmtruncus>; 10018 10019defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, 10020 WriteVPMOV256, truncstorevi8, 10021 masked_truncstorevi8, X86vtrunc, 10022 X86vmtrunc>; 10023defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, 10024 WriteVPMOV256, truncstore_s_vi8, 10025 masked_truncstore_s_vi8, X86vtruncs, 10026 X86vmtruncs>; 10027defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, 10028 select_truncus, WriteVPMOV256, 10029 truncstore_us_vi8, masked_truncstore_us_vi8, 10030 X86vtruncus, X86vmtruncus>; 10031 10032let Predicates = [HasAVX512, NoVLX] in { 10033def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 10034 (v8i16 (EXTRACT_SUBREG 10035 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 10036 VR256X:$src, sub_ymm)))), sub_xmm))>; 10037def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 10038 (v4i32 (EXTRACT_SUBREG 10039 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 10040 VR256X:$src, sub_ymm)))), sub_xmm))>; 10041} 10042 10043let Predicates = [HasBWI, NoVLX] in { 10044def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 10045 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 10046 VR256X:$src, sub_ymm))), sub_xmm))>; 10047} 10048 10049// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. 10050multiclass mtrunc_lowering<string InstrName, SDNode OpNode, 10051 X86VectorVTInfo DestInfo, 10052 X86VectorVTInfo SrcInfo> { 10053 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 10054 DestInfo.RC:$src0, 10055 SrcInfo.KRCWM:$mask)), 10056 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0, 10057 SrcInfo.KRCWM:$mask, 10058 SrcInfo.RC:$src)>; 10059 10060 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 10061 DestInfo.ImmAllZerosV, 10062 SrcInfo.KRCWM:$mask)), 10063 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask, 10064 SrcInfo.RC:$src)>; 10065} 10066 10067let Predicates = [HasVLX] in { 10068defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; 10069defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; 10070defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; 10071} 10072 10073let Predicates = [HasAVX512] in { 10074defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; 10075defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; 10076defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; 10077 10078defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; 10079defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; 10080defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; 10081 10082defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; 10083defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; 10084defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; 10085} 10086 10087multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 10088 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 10089 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 10090 let ExeDomain = DestInfo.ExeDomain in { 10091 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10092 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 10093 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 10094 EVEX, Sched<[sched]>; 10095 10096 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10097 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 10098 (DestInfo.VT (LdFrag addr:$src))>, 10099 EVEX, Sched<[sched.Folded]>; 10100 } 10101} 10102 10103multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr, 10104 SDNode OpNode, SDNode InVecNode, string ExtTy, 10105 X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ, 10106 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10107 let Predicates = [HasVLX, HasBWI] in { 10108 defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v8i16x_info, 10109 v16i8x_info, i64mem, LdFrag, InVecNode>, 10110 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 10111 10112 defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i16x_info, 10113 v16i8x_info, i128mem, LdFrag, OpNode>, 10114 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 10115 } 10116 let Predicates = [HasBWI] in { 10117 defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v32i16_info, 10118 v32i8x_info, i256mem, LdFrag, OpNode>, 10119 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 10120 } 10121} 10122 10123multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr, 10124 SDNode OpNode, SDNode InVecNode, string ExtTy, 10125 X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ, 10126 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10127 let Predicates = [HasVLX, HasAVX512] in { 10128 defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v4i32x_info, 10129 v16i8x_info, i32mem, LdFrag, InVecNode>, 10130 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 10131 10132 defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i32x_info, 10133 v16i8x_info, i64mem, LdFrag, InVecNode>, 10134 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 10135 } 10136 let Predicates = [HasAVX512] in { 10137 defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i32_info, 10138 v16i8x_info, i128mem, LdFrag, OpNode>, 10139 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 10140 } 10141} 10142 10143multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr, 10144 SDNode InVecNode, string ExtTy, 10145 X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ, 10146 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10147 let Predicates = [HasVLX, HasAVX512] in { 10148 defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info, 10149 v16i8x_info, i16mem, LdFrag, InVecNode>, 10150 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; 10151 10152 defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info, 10153 v16i8x_info, i32mem, LdFrag, InVecNode>, 10154 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; 10155 } 10156 let Predicates = [HasAVX512] in { 10157 defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info, 10158 v16i8x_info, i64mem, LdFrag, InVecNode>, 10159 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; 10160 } 10161} 10162 10163multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr, 10164 SDNode OpNode, SDNode InVecNode, string ExtTy, 10165 X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ, 10166 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 10167 let Predicates = [HasVLX, HasAVX512] in { 10168 defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v4i32x_info, 10169 v8i16x_info, i64mem, LdFrag, InVecNode>, 10170 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; 10171 10172 defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i32x_info, 10173 v8i16x_info, i128mem, LdFrag, OpNode>, 10174 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; 10175 } 10176 let Predicates = [HasAVX512] in { 10177 defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v16i32_info, 10178 v16i16x_info, i256mem, LdFrag, OpNode>, 10179 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; 10180 } 10181} 10182 10183multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr, 10184 SDNode OpNode, SDNode InVecNode, string ExtTy, 10185 X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ, 10186 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 10187 let Predicates = [HasVLX, HasAVX512] in { 10188 defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info, 10189 v8i16x_info, i32mem, LdFrag, InVecNode>, 10190 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; 10191 10192 defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info, 10193 v8i16x_info, i64mem, LdFrag, InVecNode>, 10194 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; 10195 } 10196 let Predicates = [HasAVX512] in { 10197 defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info, 10198 v8i16x_info, i128mem, LdFrag, OpNode>, 10199 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; 10200 } 10201} 10202 10203multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr, 10204 SDNode OpNode, SDNode InVecNode, string ExtTy, 10205 X86FoldableSchedWrite schedX, X86FoldableSchedWrite schedYZ, 10206 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 10207 10208 let Predicates = [HasVLX, HasAVX512] in { 10209 defm Z128: avx512_pmovx_common<opc, OpcodeStr, schedX, v2i64x_info, 10210 v4i32x_info, i64mem, LdFrag, InVecNode>, 10211 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; 10212 10213 defm Z256: avx512_pmovx_common<opc, OpcodeStr, schedYZ, v4i64x_info, 10214 v4i32x_info, i128mem, LdFrag, OpNode>, 10215 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; 10216 } 10217 let Predicates = [HasAVX512] in { 10218 defm Z : avx512_pmovx_common<opc, OpcodeStr, schedYZ, v8i64_info, 10219 v8i32x_info, i256mem, LdFrag, OpNode>, 10220 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; 10221 } 10222} 10223 10224defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>; 10225defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>; 10226defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>; 10227defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>; 10228defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>; 10229defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteShuffle.XMM, WriteVPMOV256>; 10230 10231defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>; 10232defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>; 10233defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>; 10234defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>; 10235defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>; 10236defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteShuffle.XMM, WriteVPMOV256>; 10237 10238 10239// Patterns that we also need any extend versions of. aext_vector_inreg 10240// is currently legalized to zext_vector_inreg. 10241multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { 10242 // 256-bit patterns 10243 let Predicates = [HasVLX, HasBWI] in { 10244 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 10245 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 10246 } 10247 10248 let Predicates = [HasVLX] in { 10249 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 10250 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 10251 10252 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 10253 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 10254 } 10255 10256 // 512-bit patterns 10257 let Predicates = [HasBWI] in { 10258 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))), 10259 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 10260 } 10261 let Predicates = [HasAVX512] in { 10262 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))), 10263 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 10264 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))), 10265 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 10266 10267 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))), 10268 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 10269 10270 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))), 10271 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 10272 } 10273} 10274 10275multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 10276 SDNode InVecOp> : 10277 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { 10278 // 128-bit patterns 10279 let Predicates = [HasVLX, HasBWI] in { 10280 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10281 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10282 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10283 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10284 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10285 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10286 } 10287 let Predicates = [HasVLX] in { 10288 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10289 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10290 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10291 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10292 10293 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 10294 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 10295 10296 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10297 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10298 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10299 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10300 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10301 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10302 10303 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10304 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10305 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 10306 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10307 10308 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10309 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10310 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10311 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10312 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 10313 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10314 } 10315 let Predicates = [HasVLX] in { 10316 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10317 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10318 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10319 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10320 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10321 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10322 10323 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10324 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10325 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10326 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10327 10328 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10329 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10330 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10331 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10332 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10333 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10334 } 10335 // 512-bit patterns 10336 let Predicates = [HasAVX512] in { 10337 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10338 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10339 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10340 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10341 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10342 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10343 } 10344} 10345 10346defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; 10347defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; 10348 10349// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge 10350// ext+trunc aggressively making it impossible to legalize the DAG to this 10351// pattern directly. 10352let Predicates = [HasAVX512, NoBWI] in { 10353def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 10354 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; 10355def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), 10356 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; 10357} 10358 10359//===----------------------------------------------------------------------===// 10360// GATHER - SCATTER Operations 10361 10362// FIXME: Improve scheduling of gather/scatter instructions. 10363multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10364 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10365 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 10366 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 10367 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 10368 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 10369 !strconcat(OpcodeStr#_.Suffix, 10370 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 10371 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10372 Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 10373} 10374 10375multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 10376 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10377 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, 10378 vy512xmem>, EVEX_V512, VEX_W; 10379 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512, 10380 vz512mem>, EVEX_V512, VEX_W; 10381let Predicates = [HasVLX] in { 10382 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10383 vx256xmem>, EVEX_V256, VEX_W; 10384 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256, 10385 vy256xmem>, EVEX_V256, VEX_W; 10386 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10387 vx128xmem>, EVEX_V128, VEX_W; 10388 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10389 vx128xmem>, EVEX_V128, VEX_W; 10390} 10391} 10392 10393multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 10394 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10395 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10396 EVEX_V512; 10397 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10398 EVEX_V512; 10399let Predicates = [HasVLX] in { 10400 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10401 vy256xmem>, EVEX_V256; 10402 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10403 vy128xmem>, EVEX_V256; 10404 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10405 vx128xmem>, EVEX_V128; 10406 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10407 vx64xmem, VK2WM>, EVEX_V128; 10408} 10409} 10410 10411 10412defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 10413 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 10414 10415defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 10416 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 10417 10418multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10419 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10420 10421let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain, 10422 hasSideEffects = 0 in 10423 10424 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 10425 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 10426 !strconcat(OpcodeStr#_.Suffix, 10427 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 10428 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10429 Sched<[WriteStore]>; 10430} 10431 10432multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 10433 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10434 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, 10435 vy512xmem>, EVEX_V512, VEX_W; 10436 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512, 10437 vz512mem>, EVEX_V512, VEX_W; 10438let Predicates = [HasVLX] in { 10439 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10440 vx256xmem>, EVEX_V256, VEX_W; 10441 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256, 10442 vy256xmem>, EVEX_V256, VEX_W; 10443 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10444 vx128xmem>, EVEX_V128, VEX_W; 10445 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10446 vx128xmem>, EVEX_V128, VEX_W; 10447} 10448} 10449 10450multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 10451 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10452 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10453 EVEX_V512; 10454 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10455 EVEX_V512; 10456let Predicates = [HasVLX] in { 10457 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10458 vy256xmem>, EVEX_V256; 10459 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10460 vy128xmem>, EVEX_V256; 10461 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10462 vx128xmem>, EVEX_V128; 10463 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10464 vx64xmem, VK2WM>, EVEX_V128; 10465} 10466} 10467 10468defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 10469 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 10470 10471defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 10472 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 10473 10474// prefetch 10475multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 10476 RegisterClass KRC, X86MemOperand memop> { 10477 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in 10478 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 10479 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 10480 EVEX, EVEX_K, Sched<[WriteLoad]>; 10481} 10482 10483defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 10484 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10485 10486defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 10487 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10488 10489defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 10490 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10491 10492defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 10493 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10494 10495defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 10496 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10497 10498defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 10499 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10500 10501defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 10502 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10503 10504defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 10505 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10506 10507defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 10508 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10509 10510defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 10511 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10512 10513defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 10514 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10515 10516defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 10517 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10518 10519defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 10520 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10521 10522defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 10523 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10524 10525defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 10526 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; 10527 10528defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 10529 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; 10530 10531multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> { 10532def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 10533 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 10534 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 10535 EVEX, Sched<[Sched]>; 10536} 10537 10538multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 10539 string OpcodeStr, Predicate prd> { 10540let Predicates = [prd] in 10541 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512; 10542 10543 let Predicates = [prd, HasVLX] in { 10544 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256; 10545 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128; 10546 } 10547} 10548 10549defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 10550defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W; 10551defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 10552defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W; 10553 10554multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 10555 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 10556 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 10557 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 10558 EVEX, Sched<[WriteMove]>; 10559} 10560 10561// Use 512bit version to implement 128/256 bit in case NoVLX. 10562multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 10563 X86VectorVTInfo _, 10564 string Name> { 10565 10566 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 10567 (_.KVT (COPY_TO_REGCLASS 10568 (!cast<Instruction>(Name#"Zrr") 10569 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 10570 _.RC:$src, _.SubRegIdx)), 10571 _.KRC))>; 10572} 10573 10574multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 10575 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10576 let Predicates = [prd] in 10577 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 10578 EVEX_V512; 10579 10580 let Predicates = [prd, HasVLX] in { 10581 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 10582 EVEX_V256; 10583 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 10584 EVEX_V128; 10585 } 10586 let Predicates = [prd, NoVLX] in { 10587 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 10588 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 10589 } 10590} 10591 10592defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 10593 avx512vl_i8_info, HasBWI>; 10594defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 10595 avx512vl_i16_info, HasBWI>, VEX_W; 10596defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 10597 avx512vl_i32_info, HasDQI>; 10598defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 10599 avx512vl_i64_info, HasDQI>, VEX_W; 10600 10601// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 10602// is available, but BWI is not. We can't handle this in lowering because 10603// a target independent DAG combine likes to combine sext and trunc. 10604let Predicates = [HasDQI, NoBWI] in { 10605 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 10606 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10607 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 10608 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10609} 10610 10611let Predicates = [HasDQI, NoBWI, HasVLX] in { 10612 def : Pat<(v8i16 (sext (v8i1 VK8:$src))), 10613 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 10614} 10615 10616//===----------------------------------------------------------------------===// 10617// AVX-512 - COMPRESS and EXPAND 10618// 10619 10620multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 10621 string OpcodeStr, X86FoldableSchedWrite sched> { 10622 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 10623 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10624 (null_frag)>, AVX5128IBase, 10625 Sched<[sched]>; 10626 10627 let mayStore = 1, hasSideEffects = 0 in 10628 def mr : AVX5128I<opc, MRMDestMem, (outs), 10629 (ins _.MemOp:$dst, _.RC:$src), 10630 OpcodeStr # "\t{$src, $dst|$dst, $src}", 10631 []>, EVEX_CD8<_.EltSize, CD8VT1>, 10632 Sched<[sched.Folded]>; 10633 10634 def mrk : AVX5128I<opc, MRMDestMem, (outs), 10635 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 10636 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 10637 []>, 10638 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10639 Sched<[sched.Folded]>; 10640} 10641 10642multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10643 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), 10644 (!cast<Instruction>(Name#_.ZSuffix#mrk) 10645 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 10646 10647 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10648 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10649 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10650 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10651 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10652 _.KRCWM:$mask, _.RC:$src)>; 10653} 10654 10655multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 10656 X86FoldableSchedWrite sched, 10657 AVX512VLVectorVTInfo VTInfo, 10658 Predicate Pred = HasAVX512> { 10659 let Predicates = [Pred] in 10660 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 10661 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10662 10663 let Predicates = [Pred, HasVLX] in { 10664 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 10665 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10666 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 10667 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10668 } 10669} 10670 10671// FIXME: Is there a better scheduler class for VPCOMPRESS? 10672defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 10673 avx512vl_i32_info>, EVEX, NotMemoryFoldable; 10674defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 10675 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable; 10676defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 10677 avx512vl_f32_info>, EVEX, NotMemoryFoldable; 10678defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 10679 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable; 10680 10681// expand 10682multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 10683 string OpcodeStr, X86FoldableSchedWrite sched> { 10684 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10685 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10686 (null_frag)>, AVX5128IBase, 10687 Sched<[sched]>; 10688 10689 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10690 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 10691 (null_frag)>, 10692 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 10693 Sched<[sched.Folded, sched.ReadAfterFold]>; 10694} 10695 10696multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10697 10698 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 10699 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10700 _.KRCWM:$mask, addr:$src)>; 10701 10702 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 10703 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10704 _.KRCWM:$mask, addr:$src)>; 10705 10706 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 10707 (_.VT _.RC:$src0))), 10708 (!cast<Instruction>(Name#_.ZSuffix#rmk) 10709 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 10710 10711 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10712 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10713 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10714 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10715 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10716 _.KRCWM:$mask, _.RC:$src)>; 10717} 10718 10719multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 10720 X86FoldableSchedWrite sched, 10721 AVX512VLVectorVTInfo VTInfo, 10722 Predicate Pred = HasAVX512> { 10723 let Predicates = [Pred] in 10724 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 10725 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10726 10727 let Predicates = [Pred, HasVLX] in { 10728 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 10729 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10730 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 10731 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10732 } 10733} 10734 10735// FIXME: Is there a better scheduler class for VPEXPAND? 10736defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 10737 avx512vl_i32_info>, EVEX; 10738defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 10739 avx512vl_i64_info>, EVEX, VEX_W; 10740defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 10741 avx512vl_f32_info>, EVEX; 10742defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 10743 avx512vl_f64_info>, EVEX, VEX_W; 10744 10745//handle instruction reg_vec1 = op(reg_vec,imm) 10746// op(mem_vec,imm) 10747// op(broadcast(eltVt),imm) 10748//all instruction created with FROUND_CURRENT 10749multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, 10750 SDPatternOperator OpNode, 10751 SDPatternOperator MaskOpNode, 10752 X86FoldableSchedWrite sched, 10753 X86VectorVTInfo _> { 10754 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10755 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 10756 (ins _.RC:$src1, i32u8imm:$src2), 10757 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10758 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)), 10759 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>, 10760 Sched<[sched]>; 10761 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10762 (ins _.MemOp:$src1, i32u8imm:$src2), 10763 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10764 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10765 (i32 timm:$src2)), 10766 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10767 (i32 timm:$src2))>, 10768 Sched<[sched.Folded, sched.ReadAfterFold]>; 10769 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10770 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 10771 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr, 10772 "${src1}"#_.BroadcastStr#", $src2", 10773 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10774 (i32 timm:$src2)), 10775 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10776 (i32 timm:$src2))>, EVEX_B, 10777 Sched<[sched.Folded, sched.ReadAfterFold]>; 10778 } 10779} 10780 10781//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10782multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10783 SDNode OpNode, X86FoldableSchedWrite sched, 10784 X86VectorVTInfo _> { 10785 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10786 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10787 (ins _.RC:$src1, i32u8imm:$src2), 10788 OpcodeStr#_.Suffix, "$src2, {sae}, $src1", 10789 "$src1, {sae}, $src2", 10790 (OpNode (_.VT _.RC:$src1), 10791 (i32 timm:$src2))>, 10792 EVEX_B, Sched<[sched]>; 10793} 10794 10795multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 10796 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode, 10797 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched, 10798 Predicate prd>{ 10799 let Predicates = [prd] in { 10800 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10801 sched.ZMM, _.info512>, 10802 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, 10803 sched.ZMM, _.info512>, EVEX_V512; 10804 } 10805 let Predicates = [prd, HasVLX] in { 10806 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10807 sched.XMM, _.info128>, EVEX_V128; 10808 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10809 sched.YMM, _.info256>, EVEX_V256; 10810 } 10811} 10812 10813//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10814// op(reg_vec2,mem_vec,imm) 10815// op(reg_vec2,broadcast(eltVt),imm) 10816//all instruction created with FROUND_CURRENT 10817multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10818 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10819 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10820 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10821 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10822 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10823 (OpNode (_.VT _.RC:$src1), 10824 (_.VT _.RC:$src2), 10825 (i32 timm:$src3))>, 10826 Sched<[sched]>; 10827 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10828 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 10829 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10830 (OpNode (_.VT _.RC:$src1), 10831 (_.VT (bitconvert (_.LdFrag addr:$src2))), 10832 (i32 timm:$src3))>, 10833 Sched<[sched.Folded, sched.ReadAfterFold]>; 10834 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10835 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10836 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10837 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10838 (OpNode (_.VT _.RC:$src1), 10839 (_.VT (_.BroadcastLdFrag addr:$src2)), 10840 (i32 timm:$src3))>, EVEX_B, 10841 Sched<[sched.Folded, sched.ReadAfterFold]>; 10842 } 10843} 10844 10845//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10846// op(reg_vec2,mem_vec,imm) 10847multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10848 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 10849 X86VectorVTInfo SrcInfo>{ 10850 let ExeDomain = DestInfo.ExeDomain in { 10851 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10852 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 10853 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10854 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10855 (SrcInfo.VT SrcInfo.RC:$src2), 10856 (i8 timm:$src3)))>, 10857 Sched<[sched]>; 10858 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10859 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 10860 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10861 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10862 (SrcInfo.VT (bitconvert 10863 (SrcInfo.LdFrag addr:$src2))), 10864 (i8 timm:$src3)))>, 10865 Sched<[sched.Folded, sched.ReadAfterFold]>; 10866 } 10867} 10868 10869//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10870// op(reg_vec2,mem_vec,imm) 10871// op(reg_vec2,broadcast(eltVt),imm) 10872multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10873 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 10874 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 10875 10876 let ExeDomain = _.ExeDomain in 10877 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10878 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10879 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10880 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10881 (OpNode (_.VT _.RC:$src1), 10882 (_.VT (_.BroadcastLdFrag addr:$src2)), 10883 (i8 timm:$src3))>, EVEX_B, 10884 Sched<[sched.Folded, sched.ReadAfterFold]>; 10885} 10886 10887//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10888// op(reg_vec2,mem_scalar,imm) 10889multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10890 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10891 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10892 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10893 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10894 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10895 (OpNode (_.VT _.RC:$src1), 10896 (_.VT _.RC:$src2), 10897 (i32 timm:$src3))>, 10898 Sched<[sched]>; 10899 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 10900 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 10901 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10902 (OpNode (_.VT _.RC:$src1), 10903 (_.ScalarIntMemFrags addr:$src2), 10904 (i32 timm:$src3))>, 10905 Sched<[sched.Folded, sched.ReadAfterFold]>; 10906 } 10907} 10908 10909//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10910multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10911 SDNode OpNode, X86FoldableSchedWrite sched, 10912 X86VectorVTInfo _> { 10913 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10914 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10915 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10916 OpcodeStr, "$src3, {sae}, $src2, $src1", 10917 "$src1, $src2, {sae}, $src3", 10918 (OpNode (_.VT _.RC:$src1), 10919 (_.VT _.RC:$src2), 10920 (i32 timm:$src3))>, 10921 EVEX_B, Sched<[sched]>; 10922} 10923 10924//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10925multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10926 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10927 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10928 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10929 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10930 OpcodeStr, "$src3, {sae}, $src2, $src1", 10931 "$src1, $src2, {sae}, $src3", 10932 (OpNode (_.VT _.RC:$src1), 10933 (_.VT _.RC:$src2), 10934 (i32 timm:$src3))>, 10935 EVEX_B, Sched<[sched]>; 10936} 10937 10938multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 10939 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10940 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10941 let Predicates = [prd] in { 10942 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10943 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, 10944 EVEX_V512; 10945 10946 } 10947 let Predicates = [prd, HasVLX] in { 10948 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10949 EVEX_V128; 10950 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10951 EVEX_V256; 10952 } 10953} 10954 10955multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 10956 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 10957 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 10958 let Predicates = [Pred] in { 10959 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 10960 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V; 10961 } 10962 let Predicates = [Pred, HasVLX] in { 10963 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 10964 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V; 10965 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 10966 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V; 10967 } 10968} 10969 10970multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 10971 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 10972 Predicate Pred = HasAVX512> { 10973 let Predicates = [Pred] in { 10974 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10975 EVEX_V512; 10976 } 10977 let Predicates = [Pred, HasVLX] in { 10978 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10979 EVEX_V128; 10980 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10981 EVEX_V256; 10982 } 10983} 10984 10985multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 10986 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 10987 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> { 10988 let Predicates = [prd] in { 10989 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 10990 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>; 10991 } 10992} 10993 10994multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 10995 bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode, 10996 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, 10997 X86SchedWriteWidths sched, Predicate prd>{ 10998 defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info, 10999 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>, 11000 AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 11001 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 11002 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 11003 AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>; 11004 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 11005 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 11006 AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, VEX_W; 11007} 11008 11009defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 11010 X86VReduce, X86VReduce, X86VReduceSAE, 11011 SchedWriteFRnd, HasDQI>; 11012defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 11013 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE, 11014 SchedWriteFRnd, HasAVX512>; 11015defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 11016 X86VGetMant, X86VGetMant, X86VGetMantSAE, 11017 SchedWriteFRnd, HasAVX512>; 11018 11019defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 11020 0x50, X86VRange, X86VRangeSAE, 11021 SchedWriteFAdd, HasDQI>, 11022 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11023defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 11024 0x50, X86VRange, X86VRangeSAE, 11025 SchedWriteFAdd, HasDQI>, 11026 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11027 11028defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 11029 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 11030 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11031defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 11032 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 11033 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11034 11035defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 11036 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 11037 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11038defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 11039 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 11040 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11041defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info, 11042 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>, 11043 AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>; 11044 11045defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 11046 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 11047 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 11048defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 11049 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 11050 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11051defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info, 11052 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>, 11053 AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>; 11054 11055multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 11056 X86FoldableSchedWrite sched, 11057 X86VectorVTInfo _, 11058 X86VectorVTInfo CastInfo, 11059 string EVEX2VEXOvrd> { 11060 let ExeDomain = _.ExeDomain in { 11061 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11062 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 11063 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11064 (_.VT (bitconvert 11065 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 11066 (i8 timm:$src3)))))>, 11067 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 11068 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11069 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 11070 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11071 (_.VT 11072 (bitconvert 11073 (CastInfo.VT (X86Shuf128 _.RC:$src1, 11074 (CastInfo.LdFrag addr:$src2), 11075 (i8 timm:$src3)))))>, 11076 Sched<[sched.Folded, sched.ReadAfterFold]>, 11077 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 11078 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11079 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11080 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 11081 "$src1, ${src2}"#_.BroadcastStr#", $src3", 11082 (_.VT 11083 (bitconvert 11084 (CastInfo.VT 11085 (X86Shuf128 _.RC:$src1, 11086 (_.BroadcastLdFrag addr:$src2), 11087 (i8 timm:$src3)))))>, EVEX_B, 11088 Sched<[sched.Folded, sched.ReadAfterFold]>; 11089 } 11090} 11091 11092multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 11093 AVX512VLVectorVTInfo _, 11094 AVX512VLVectorVTInfo CastInfo, bits<8> opc, 11095 string EVEX2VEXOvrd>{ 11096 let Predicates = [HasAVX512] in 11097 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 11098 _.info512, CastInfo.info512, "">, EVEX_V512; 11099 11100 let Predicates = [HasAVX512, HasVLX] in 11101 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 11102 _.info256, CastInfo.info256, 11103 EVEX2VEXOvrd>, EVEX_V256; 11104} 11105 11106defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 11107 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11108defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 11109 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11110defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 11111 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11112defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 11113 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 11114 11115multiclass avx512_valign<bits<8> opc, string OpcodeStr, 11116 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11117 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the 11118 // instantiation of this class. 11119 let ExeDomain = _.ExeDomain in { 11120 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11121 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 11122 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11123 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, 11124 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; 11125 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11126 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 11127 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11128 (_.VT (X86VAlign _.RC:$src1, 11129 (bitconvert (_.LdFrag addr:$src2)), 11130 (i8 timm:$src3)))>, 11131 Sched<[sched.Folded, sched.ReadAfterFold]>, 11132 EVEX2VEXOverride<"VPALIGNRrmi">; 11133 11134 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11135 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11136 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 11137 "$src1, ${src2}"#_.BroadcastStr#", $src3", 11138 (X86VAlign _.RC:$src1, 11139 (_.VT (_.BroadcastLdFrag addr:$src2)), 11140 (i8 timm:$src3))>, EVEX_B, 11141 Sched<[sched.Folded, sched.ReadAfterFold]>; 11142 } 11143} 11144 11145multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 11146 AVX512VLVectorVTInfo _> { 11147 let Predicates = [HasAVX512] in { 11148 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 11149 AVX512AIi8Base, EVEX_4V, EVEX_V512; 11150 } 11151 let Predicates = [HasAVX512, HasVLX] in { 11152 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 11153 AVX512AIi8Base, EVEX_4V, EVEX_V128; 11154 // We can't really override the 256-bit version so change it back to unset. 11155 let EVEX2VEXOverride = ? in 11156 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 11157 AVX512AIi8Base, EVEX_4V, EVEX_V256; 11158 } 11159} 11160 11161defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 11162 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11163defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 11164 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 11165 VEX_W; 11166 11167defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 11168 SchedWriteShuffle, avx512vl_i8_info, 11169 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 11170 11171// Fragments to help convert valignq into masked valignd. Or valignq/valignd 11172// into vpalignr. 11173def ValignqImm32XForm : SDNodeXForm<timm, [{ 11174 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 11175}]>; 11176def ValignqImm8XForm : SDNodeXForm<timm, [{ 11177 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 11178}]>; 11179def ValigndImm8XForm : SDNodeXForm<timm, [{ 11180 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 11181}]>; 11182 11183multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 11184 X86VectorVTInfo From, X86VectorVTInfo To, 11185 SDNodeXForm ImmXForm> { 11186 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11187 (bitconvert 11188 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 11189 timm:$src3))), 11190 To.RC:$src0)), 11191 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 11192 To.RC:$src1, To.RC:$src2, 11193 (ImmXForm timm:$src3))>; 11194 11195 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11196 (bitconvert 11197 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 11198 timm:$src3))), 11199 To.ImmAllZerosV)), 11200 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 11201 To.RC:$src1, To.RC:$src2, 11202 (ImmXForm timm:$src3))>; 11203 11204 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11205 (bitconvert 11206 (From.VT (OpNode From.RC:$src1, 11207 (From.LdFrag addr:$src2), 11208 timm:$src3))), 11209 To.RC:$src0)), 11210 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 11211 To.RC:$src1, addr:$src2, 11212 (ImmXForm timm:$src3))>; 11213 11214 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11215 (bitconvert 11216 (From.VT (OpNode From.RC:$src1, 11217 (From.LdFrag addr:$src2), 11218 timm:$src3))), 11219 To.ImmAllZerosV)), 11220 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 11221 To.RC:$src1, addr:$src2, 11222 (ImmXForm timm:$src3))>; 11223} 11224 11225multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 11226 X86VectorVTInfo From, 11227 X86VectorVTInfo To, 11228 SDNodeXForm ImmXForm> : 11229 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 11230 def : Pat<(From.VT (OpNode From.RC:$src1, 11231 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), 11232 timm:$src3)), 11233 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 11234 (ImmXForm timm:$src3))>; 11235 11236 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11237 (bitconvert 11238 (From.VT (OpNode From.RC:$src1, 11239 (bitconvert 11240 (To.VT (To.BroadcastLdFrag addr:$src2))), 11241 timm:$src3))), 11242 To.RC:$src0)), 11243 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 11244 To.RC:$src1, addr:$src2, 11245 (ImmXForm timm:$src3))>; 11246 11247 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11248 (bitconvert 11249 (From.VT (OpNode From.RC:$src1, 11250 (bitconvert 11251 (To.VT (To.BroadcastLdFrag addr:$src2))), 11252 timm:$src3))), 11253 To.ImmAllZerosV)), 11254 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 11255 To.RC:$src1, addr:$src2, 11256 (ImmXForm timm:$src3))>; 11257} 11258 11259let Predicates = [HasAVX512] in { 11260 // For 512-bit we lower to the widest element type we can. So we only need 11261 // to handle converting valignq to valignd. 11262 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 11263 v16i32_info, ValignqImm32XForm>; 11264} 11265 11266let Predicates = [HasVLX] in { 11267 // For 128-bit we lower to the widest element type we can. So we only need 11268 // to handle converting valignq to valignd. 11269 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 11270 v4i32x_info, ValignqImm32XForm>; 11271 // For 256-bit we lower to the widest element type we can. So we only need 11272 // to handle converting valignq to valignd. 11273 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 11274 v8i32x_info, ValignqImm32XForm>; 11275} 11276 11277let Predicates = [HasVLX, HasBWI] in { 11278 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 11279 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 11280 v16i8x_info, ValignqImm8XForm>; 11281 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 11282 v16i8x_info, ValigndImm8XForm>; 11283} 11284 11285defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 11286 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 11287 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible; 11288 11289multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 11290 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11291 let ExeDomain = _.ExeDomain in { 11292 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11293 (ins _.RC:$src1), OpcodeStr, 11294 "$src1", "$src1", 11295 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, 11296 Sched<[sched]>; 11297 11298 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11299 (ins _.MemOp:$src1), OpcodeStr, 11300 "$src1", "$src1", 11301 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, 11302 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 11303 Sched<[sched.Folded]>; 11304 } 11305} 11306 11307multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 11308 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 11309 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 11310 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11311 (ins _.ScalarMemOp:$src1), OpcodeStr, 11312 "${src1}"#_.BroadcastStr, 11313 "${src1}"#_.BroadcastStr, 11314 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, 11315 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 11316 Sched<[sched.Folded]>; 11317} 11318 11319multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11320 X86SchedWriteWidths sched, 11321 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 11322 let Predicates = [prd] in 11323 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11324 EVEX_V512; 11325 11326 let Predicates = [prd, HasVLX] in { 11327 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11328 EVEX_V256; 11329 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11330 EVEX_V128; 11331 } 11332} 11333 11334multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11335 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 11336 Predicate prd> { 11337 let Predicates = [prd] in 11338 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11339 EVEX_V512; 11340 11341 let Predicates = [prd, HasVLX] in { 11342 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11343 EVEX_V256; 11344 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11345 EVEX_V128; 11346 } 11347} 11348 11349multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 11350 SDNode OpNode, X86SchedWriteWidths sched, 11351 Predicate prd> { 11352 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 11353 avx512vl_i64_info, prd>, VEX_W; 11354 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 11355 avx512vl_i32_info, prd>; 11356} 11357 11358multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 11359 SDNode OpNode, X86SchedWriteWidths sched, 11360 Predicate prd> { 11361 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 11362 avx512vl_i16_info, prd>, VEX_WIG; 11363 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 11364 avx512vl_i8_info, prd>, VEX_WIG; 11365} 11366 11367multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 11368 bits<8> opc_d, bits<8> opc_q, 11369 string OpcodeStr, SDNode OpNode, 11370 X86SchedWriteWidths sched> { 11371 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 11372 HasAVX512>, 11373 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 11374 HasBWI>; 11375} 11376 11377defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 11378 SchedWriteVecALU>; 11379 11380// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 11381let Predicates = [HasAVX512, NoVLX] in { 11382 def : Pat<(v4i64 (abs VR256X:$src)), 11383 (EXTRACT_SUBREG 11384 (VPABSQZrr 11385 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 11386 sub_ymm)>; 11387 def : Pat<(v2i64 (abs VR128X:$src)), 11388 (EXTRACT_SUBREG 11389 (VPABSQZrr 11390 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 11391 sub_xmm)>; 11392} 11393 11394// Use 512bit version to implement 128/256 bit. 11395multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 11396 AVX512VLVectorVTInfo _, Predicate prd> { 11397 let Predicates = [prd, NoVLX] in { 11398 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), 11399 (EXTRACT_SUBREG 11400 (!cast<Instruction>(InstrStr # "Zrr") 11401 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11402 _.info256.RC:$src1, 11403 _.info256.SubRegIdx)), 11404 _.info256.SubRegIdx)>; 11405 11406 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), 11407 (EXTRACT_SUBREG 11408 (!cast<Instruction>(InstrStr # "Zrr") 11409 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11410 _.info128.RC:$src1, 11411 _.info128.SubRegIdx)), 11412 _.info128.SubRegIdx)>; 11413 } 11414} 11415 11416defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 11417 SchedWriteVecIMul, HasCDI>; 11418 11419// FIXME: Is there a better scheduler class for VPCONFLICT? 11420defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 11421 SchedWriteVecALU, HasCDI>; 11422 11423// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 11424defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 11425defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 11426 11427//===---------------------------------------------------------------------===// 11428// Counts number of ones - VPOPCNTD and VPOPCNTQ 11429//===---------------------------------------------------------------------===// 11430 11431// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 11432defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 11433 SchedWriteVecALU, HasVPOPCNTDQ>; 11434 11435defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 11436defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 11437 11438//===---------------------------------------------------------------------===// 11439// Replicate Single FP - MOVSHDUP and MOVSLDUP 11440//===---------------------------------------------------------------------===// 11441 11442multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 11443 X86SchedWriteWidths sched> { 11444 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 11445 avx512vl_f32_info, HasAVX512>, XS; 11446} 11447 11448defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 11449 SchedWriteFShuffle>; 11450defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 11451 SchedWriteFShuffle>; 11452 11453//===----------------------------------------------------------------------===// 11454// AVX-512 - MOVDDUP 11455//===----------------------------------------------------------------------===// 11456 11457multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, 11458 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11459 let ExeDomain = _.ExeDomain in { 11460 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11461 (ins _.RC:$src), OpcodeStr, "$src", "$src", 11462 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, 11463 Sched<[sched]>; 11464 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11465 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 11466 (_.VT (_.BroadcastLdFrag addr:$src))>, 11467 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 11468 Sched<[sched.Folded]>; 11469 } 11470} 11471 11472multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, 11473 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 11474 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 11475 VTInfo.info512>, EVEX_V512; 11476 11477 let Predicates = [HasAVX512, HasVLX] in { 11478 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 11479 VTInfo.info256>, EVEX_V256; 11480 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM, 11481 VTInfo.info128>, EVEX_V128; 11482 } 11483} 11484 11485multiclass avx512_movddup<bits<8> opc, string OpcodeStr, 11486 X86SchedWriteWidths sched> { 11487 defm NAME: avx512_movddup_common<opc, OpcodeStr, sched, 11488 avx512vl_f64_info>, XD, VEX_W; 11489} 11490 11491defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>; 11492 11493let Predicates = [HasVLX] in { 11494def : Pat<(v2f64 (X86VBroadcast f64:$src)), 11495 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11496 11497def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11498 (v2f64 VR128X:$src0)), 11499 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 11500 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11501def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11502 immAllZerosV), 11503 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11504} 11505 11506//===----------------------------------------------------------------------===// 11507// AVX-512 - Unpack Instructions 11508//===----------------------------------------------------------------------===// 11509 11510let Uses = []<Register>, mayRaiseFPException = 0 in { 11511defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512, 11512 SchedWriteFShuffleSizes, 0, 1>; 11513defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512, 11514 SchedWriteFShuffleSizes>; 11515} 11516 11517defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 11518 SchedWriteShuffle, HasBWI>; 11519defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 11520 SchedWriteShuffle, HasBWI>; 11521defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 11522 SchedWriteShuffle, HasBWI>; 11523defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 11524 SchedWriteShuffle, HasBWI>; 11525 11526defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 11527 SchedWriteShuffle, HasAVX512>; 11528defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 11529 SchedWriteShuffle, HasAVX512>; 11530defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 11531 SchedWriteShuffle, HasAVX512>; 11532defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 11533 SchedWriteShuffle, HasAVX512>; 11534 11535//===----------------------------------------------------------------------===// 11536// AVX-512 - Extract & Insert Integer Instructions 11537//===----------------------------------------------------------------------===// 11538 11539multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11540 X86VectorVTInfo _> { 11541 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 11542 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11543 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11544 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))), 11545 addr:$dst)]>, 11546 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 11547} 11548 11549multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 11550 let Predicates = [HasBWI] in { 11551 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 11552 (ins _.RC:$src1, u8imm:$src2), 11553 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11554 [(set GR32orGR64:$dst, 11555 (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>, 11556 EVEX, TAPD, Sched<[WriteVecExtract]>; 11557 11558 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; 11559 } 11560} 11561 11562multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 11563 let Predicates = [HasBWI] in { 11564 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 11565 (ins _.RC:$src1, u8imm:$src2), 11566 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11567 [(set GR32orGR64:$dst, 11568 (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>, 11569 EVEX, PD, Sched<[WriteVecExtract]>; 11570 11571 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 11572 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 11573 (ins _.RC:$src1, u8imm:$src2), 11574 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 11575 EVEX, TAPD, FoldGenData<NAME#rr>, 11576 Sched<[WriteVecExtract]>; 11577 11578 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; 11579 } 11580} 11581 11582multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 11583 RegisterClass GRC> { 11584 let Predicates = [HasDQI] in { 11585 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 11586 (ins _.RC:$src1, u8imm:$src2), 11587 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11588 [(set GRC:$dst, 11589 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 11590 EVEX, TAPD, Sched<[WriteVecExtract]>; 11591 11592 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 11593 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11594 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11595 [(store (extractelt (_.VT _.RC:$src1), 11596 imm:$src2),addr:$dst)]>, 11597 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, 11598 Sched<[WriteVecExtractSt]>; 11599 } 11600} 11601 11602defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG; 11603defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG; 11604defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 11605defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; 11606 11607multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11608 X86VectorVTInfo _, PatFrag LdFrag, 11609 SDPatternOperator immoperator> { 11610 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 11611 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11612 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11613 [(set _.RC:$dst, 11614 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>, 11615 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 11616} 11617 11618multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 11619 X86VectorVTInfo _, PatFrag LdFrag> { 11620 let Predicates = [HasBWI] in { 11621 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11622 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 11623 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11624 [(set _.RC:$dst, 11625 (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V, 11626 Sched<[WriteVecInsert]>; 11627 11628 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>; 11629 } 11630} 11631 11632multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 11633 X86VectorVTInfo _, RegisterClass GRC> { 11634 let Predicates = [HasDQI] in { 11635 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11636 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 11637 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11638 [(set _.RC:$dst, 11639 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 11640 EVEX_4V, TAPD, Sched<[WriteVecInsert]>; 11641 11642 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 11643 _.ScalarLdFrag, imm>, TAPD; 11644 } 11645} 11646 11647defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 11648 extloadi8>, TAPD, VEX_WIG; 11649defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 11650 extloadi16>, PD, VEX_WIG; 11651defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 11652defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; 11653 11654//===----------------------------------------------------------------------===// 11655// VSHUFPS - VSHUFPD Operations 11656//===----------------------------------------------------------------------===// 11657 11658multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{ 11659 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 11660 SchedWriteFShuffle>, 11661 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 11662 AVX512AIi8Base, EVEX_4V; 11663} 11664 11665defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS; 11666defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, VEX_W; 11667 11668//===----------------------------------------------------------------------===// 11669// AVX-512 - Byte shift Left/Right 11670//===----------------------------------------------------------------------===// 11671 11672multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 11673 Format MRMm, string OpcodeStr, 11674 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11675 def ri : AVX512<opc, MRMr, 11676 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 11677 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11678 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, 11679 Sched<[sched]>; 11680 def mi : AVX512<opc, MRMm, 11681 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 11682 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11683 [(set _.RC:$dst,(_.VT (OpNode 11684 (_.VT (bitconvert (_.LdFrag addr:$src1))), 11685 (i8 timm:$src2))))]>, 11686 Sched<[sched.Folded, sched.ReadAfterFold]>; 11687} 11688 11689multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 11690 Format MRMm, string OpcodeStr, 11691 X86SchedWriteWidths sched, Predicate prd>{ 11692 let Predicates = [prd] in 11693 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11694 sched.ZMM, v64i8_info>, EVEX_V512; 11695 let Predicates = [prd, HasVLX] in { 11696 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11697 sched.YMM, v32i8x_info>, EVEX_V256; 11698 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11699 sched.XMM, v16i8x_info>, EVEX_V128; 11700 } 11701} 11702defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11703 SchedWriteShuffle, HasBWI>, 11704 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11705defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11706 SchedWriteShuffle, HasBWI>, 11707 AVX512PDIi8Base, EVEX_4V, VEX_WIG; 11708 11709multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11710 string OpcodeStr, X86FoldableSchedWrite sched, 11711 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11712 let isCommutable = 1 in 11713 def rr : AVX512BI<opc, MRMSrcReg, 11714 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11715 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11716 [(set _dst.RC:$dst,(_dst.VT 11717 (OpNode (_src.VT _src.RC:$src1), 11718 (_src.VT _src.RC:$src2))))]>, 11719 Sched<[sched]>; 11720 def rm : AVX512BI<opc, MRMSrcMem, 11721 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11722 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11723 [(set _dst.RC:$dst,(_dst.VT 11724 (OpNode (_src.VT _src.RC:$src1), 11725 (_src.VT (bitconvert 11726 (_src.LdFrag addr:$src2))))))]>, 11727 Sched<[sched.Folded, sched.ReadAfterFold]>; 11728} 11729 11730multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11731 string OpcodeStr, X86SchedWriteWidths sched, 11732 Predicate prd> { 11733 let Predicates = [prd] in 11734 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11735 v8i64_info, v64i8_info>, EVEX_V512; 11736 let Predicates = [prd, HasVLX] in { 11737 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11738 v4i64x_info, v32i8x_info>, EVEX_V256; 11739 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11740 v2i64x_info, v16i8x_info>, EVEX_V128; 11741 } 11742} 11743 11744defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11745 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG; 11746 11747// Transforms to swizzle an immediate to enable better matching when 11748// memory operand isn't in the right place. 11749def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{ 11750 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11751 uint8_t Imm = N->getZExtValue(); 11752 // Swap bits 1/4 and 3/6. 11753 uint8_t NewImm = Imm & 0xa5; 11754 if (Imm & 0x02) NewImm |= 0x10; 11755 if (Imm & 0x10) NewImm |= 0x02; 11756 if (Imm & 0x08) NewImm |= 0x40; 11757 if (Imm & 0x40) NewImm |= 0x08; 11758 return getI8Imm(NewImm, SDLoc(N)); 11759}]>; 11760def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{ 11761 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11762 uint8_t Imm = N->getZExtValue(); 11763 // Swap bits 2/4 and 3/5. 11764 uint8_t NewImm = Imm & 0xc3; 11765 if (Imm & 0x04) NewImm |= 0x10; 11766 if (Imm & 0x10) NewImm |= 0x04; 11767 if (Imm & 0x08) NewImm |= 0x20; 11768 if (Imm & 0x20) NewImm |= 0x08; 11769 return getI8Imm(NewImm, SDLoc(N)); 11770}]>; 11771def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{ 11772 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11773 uint8_t Imm = N->getZExtValue(); 11774 // Swap bits 1/2 and 5/6. 11775 uint8_t NewImm = Imm & 0x99; 11776 if (Imm & 0x02) NewImm |= 0x04; 11777 if (Imm & 0x04) NewImm |= 0x02; 11778 if (Imm & 0x20) NewImm |= 0x40; 11779 if (Imm & 0x40) NewImm |= 0x20; 11780 return getI8Imm(NewImm, SDLoc(N)); 11781}]>; 11782def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{ 11783 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11784 uint8_t Imm = N->getZExtValue(); 11785 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11786 uint8_t NewImm = Imm & 0x81; 11787 if (Imm & 0x02) NewImm |= 0x04; 11788 if (Imm & 0x04) NewImm |= 0x10; 11789 if (Imm & 0x08) NewImm |= 0x40; 11790 if (Imm & 0x10) NewImm |= 0x02; 11791 if (Imm & 0x20) NewImm |= 0x08; 11792 if (Imm & 0x40) NewImm |= 0x20; 11793 return getI8Imm(NewImm, SDLoc(N)); 11794}]>; 11795def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{ 11796 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11797 uint8_t Imm = N->getZExtValue(); 11798 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11799 uint8_t NewImm = Imm & 0x81; 11800 if (Imm & 0x02) NewImm |= 0x10; 11801 if (Imm & 0x04) NewImm |= 0x02; 11802 if (Imm & 0x08) NewImm |= 0x20; 11803 if (Imm & 0x10) NewImm |= 0x04; 11804 if (Imm & 0x20) NewImm |= 0x40; 11805 if (Imm & 0x40) NewImm |= 0x08; 11806 return getI8Imm(NewImm, SDLoc(N)); 11807}]>; 11808 11809multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11810 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11811 string Name>{ 11812 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11813 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11814 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11815 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11816 (OpNode (_.VT _.RC:$src1), 11817 (_.VT _.RC:$src2), 11818 (_.VT _.RC:$src3), 11819 (i8 timm:$src4)), 1, 1>, 11820 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 11821 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11822 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11823 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11824 (OpNode (_.VT _.RC:$src1), 11825 (_.VT _.RC:$src2), 11826 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11827 (i8 timm:$src4)), 1, 0>, 11828 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11829 Sched<[sched.Folded, sched.ReadAfterFold]>; 11830 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11831 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11832 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11833 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11834 (OpNode (_.VT _.RC:$src1), 11835 (_.VT _.RC:$src2), 11836 (_.VT (_.BroadcastLdFrag addr:$src3)), 11837 (i8 timm:$src4)), 1, 0>, EVEX_B, 11838 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11839 Sched<[sched.Folded, sched.ReadAfterFold]>; 11840 }// Constraints = "$src1 = $dst" 11841 11842 // Additional patterns for matching passthru operand in other positions. 11843 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11844 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11845 _.RC:$src1)), 11846 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11847 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11848 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11849 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), 11850 _.RC:$src1)), 11851 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11852 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11853 11854 // Additional patterns for matching zero masking with loads in other 11855 // positions. 11856 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11857 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11858 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11859 _.ImmAllZerosV)), 11860 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11861 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11862 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11863 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11864 _.RC:$src2, (i8 timm:$src4)), 11865 _.ImmAllZerosV)), 11866 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11867 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11868 11869 // Additional patterns for matching masked loads with different 11870 // operand orders. 11871 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11872 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11873 _.RC:$src2, (i8 timm:$src4)), 11874 _.RC:$src1)), 11875 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11876 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11877 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11878 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11879 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11880 _.RC:$src1)), 11881 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11882 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11883 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11884 (OpNode _.RC:$src2, _.RC:$src1, 11885 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), 11886 _.RC:$src1)), 11887 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11888 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11889 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11890 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11891 _.RC:$src1, (i8 timm:$src4)), 11892 _.RC:$src1)), 11893 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11894 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11895 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11896 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11897 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11898 _.RC:$src1)), 11899 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11900 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11901 11902 // Additional patterns for matching zero masking with broadcasts in other 11903 // positions. 11904 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11905 (OpNode (_.BroadcastLdFrag addr:$src3), 11906 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11907 _.ImmAllZerosV)), 11908 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11909 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11910 (VPTERNLOG321_imm8 timm:$src4))>; 11911 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11912 (OpNode _.RC:$src1, 11913 (_.BroadcastLdFrag addr:$src3), 11914 _.RC:$src2, (i8 timm:$src4)), 11915 _.ImmAllZerosV)), 11916 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11917 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11918 (VPTERNLOG132_imm8 timm:$src4))>; 11919 11920 // Additional patterns for matching masked broadcasts with different 11921 // operand orders. 11922 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11923 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), 11924 _.RC:$src2, (i8 timm:$src4)), 11925 _.RC:$src1)), 11926 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11927 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11928 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11929 (OpNode (_.BroadcastLdFrag addr:$src3), 11930 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11931 _.RC:$src1)), 11932 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11933 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11934 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11935 (OpNode _.RC:$src2, _.RC:$src1, 11936 (_.BroadcastLdFrag addr:$src3), 11937 (i8 timm:$src4)), _.RC:$src1)), 11938 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11939 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11940 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11941 (OpNode _.RC:$src2, 11942 (_.BroadcastLdFrag addr:$src3), 11943 _.RC:$src1, (i8 timm:$src4)), 11944 _.RC:$src1)), 11945 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11946 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11947 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11948 (OpNode (_.BroadcastLdFrag addr:$src3), 11949 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11950 _.RC:$src1)), 11951 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11952 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11953} 11954 11955multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 11956 AVX512VLVectorVTInfo _> { 11957 let Predicates = [HasAVX512] in 11958 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 11959 _.info512, NAME>, EVEX_V512; 11960 let Predicates = [HasAVX512, HasVLX] in { 11961 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 11962 _.info128, NAME>, EVEX_V128; 11963 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 11964 _.info256, NAME>, EVEX_V256; 11965 } 11966} 11967 11968defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 11969 avx512vl_i32_info>; 11970defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 11971 avx512vl_i64_info>, VEX_W; 11972 11973// Patterns to implement vnot using vpternlog instead of creating all ones 11974// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 11975// so that the result is only dependent on src0. But we use the same source 11976// for all operands to prevent a false dependency. 11977// TODO: We should maybe have a more generalized algorithm for folding to 11978// vpternlog. 11979let Predicates = [HasAVX512] in { 11980 def : Pat<(v64i8 (vnot VR512:$src)), 11981 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11982 def : Pat<(v32i16 (vnot VR512:$src)), 11983 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11984 def : Pat<(v16i32 (vnot VR512:$src)), 11985 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11986 def : Pat<(v8i64 (vnot VR512:$src)), 11987 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11988} 11989 11990let Predicates = [HasAVX512, NoVLX] in { 11991 def : Pat<(v16i8 (vnot VR128X:$src)), 11992 (EXTRACT_SUBREG 11993 (VPTERNLOGQZrri 11994 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11995 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11996 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11997 (i8 15)), sub_xmm)>; 11998 def : Pat<(v8i16 (vnot VR128X:$src)), 11999 (EXTRACT_SUBREG 12000 (VPTERNLOGQZrri 12001 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12002 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12003 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12004 (i8 15)), sub_xmm)>; 12005 def : Pat<(v4i32 (vnot VR128X:$src)), 12006 (EXTRACT_SUBREG 12007 (VPTERNLOGQZrri 12008 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12009 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12010 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12011 (i8 15)), sub_xmm)>; 12012 def : Pat<(v2i64 (vnot VR128X:$src)), 12013 (EXTRACT_SUBREG 12014 (VPTERNLOGQZrri 12015 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12016 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12017 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12018 (i8 15)), sub_xmm)>; 12019 12020 def : Pat<(v32i8 (vnot VR256X:$src)), 12021 (EXTRACT_SUBREG 12022 (VPTERNLOGQZrri 12023 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12024 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12025 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12026 (i8 15)), sub_ymm)>; 12027 def : Pat<(v16i16 (vnot VR256X:$src)), 12028 (EXTRACT_SUBREG 12029 (VPTERNLOGQZrri 12030 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12031 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12032 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12033 (i8 15)), sub_ymm)>; 12034 def : Pat<(v8i32 (vnot VR256X:$src)), 12035 (EXTRACT_SUBREG 12036 (VPTERNLOGQZrri 12037 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12038 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12039 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12040 (i8 15)), sub_ymm)>; 12041 def : Pat<(v4i64 (vnot VR256X:$src)), 12042 (EXTRACT_SUBREG 12043 (VPTERNLOGQZrri 12044 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12045 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12046 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12047 (i8 15)), sub_ymm)>; 12048} 12049 12050let Predicates = [HasVLX] in { 12051 def : Pat<(v16i8 (vnot VR128X:$src)), 12052 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12053 def : Pat<(v8i16 (vnot VR128X:$src)), 12054 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12055 def : Pat<(v4i32 (vnot VR128X:$src)), 12056 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12057 def : Pat<(v2i64 (vnot VR128X:$src)), 12058 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12059 12060 def : Pat<(v32i8 (vnot VR256X:$src)), 12061 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12062 def : Pat<(v16i16 (vnot VR256X:$src)), 12063 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12064 def : Pat<(v8i32 (vnot VR256X:$src)), 12065 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12066 def : Pat<(v4i64 (vnot VR256X:$src)), 12067 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12068} 12069 12070//===----------------------------------------------------------------------===// 12071// AVX-512 - FixupImm 12072//===----------------------------------------------------------------------===// 12073 12074multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, 12075 X86FoldableSchedWrite sched, X86VectorVTInfo _, 12076 X86VectorVTInfo TblVT>{ 12077 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 12078 Uses = [MXCSR], mayRaiseFPException = 1 in { 12079 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12080 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12081 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12082 (X86VFixupimm (_.VT _.RC:$src1), 12083 (_.VT _.RC:$src2), 12084 (TblVT.VT _.RC:$src3), 12085 (i32 timm:$src4))>, Sched<[sched]>; 12086 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12087 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 12088 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12089 (X86VFixupimm (_.VT _.RC:$src1), 12090 (_.VT _.RC:$src2), 12091 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 12092 (i32 timm:$src4))>, 12093 Sched<[sched.Folded, sched.ReadAfterFold]>; 12094 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12095 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 12096 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2", 12097 "$src2, ${src3}"#_.BroadcastStr#", $src4", 12098 (X86VFixupimm (_.VT _.RC:$src1), 12099 (_.VT _.RC:$src2), 12100 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), 12101 (i32 timm:$src4))>, 12102 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 12103 } // Constraints = "$src1 = $dst" 12104} 12105 12106multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 12107 X86FoldableSchedWrite sched, 12108 X86VectorVTInfo _, X86VectorVTInfo TblVT> 12109 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> { 12110let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 12111 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12112 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12113 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 12114 "$src2, $src3, {sae}, $src4", 12115 (X86VFixupimmSAE (_.VT _.RC:$src1), 12116 (_.VT _.RC:$src2), 12117 (TblVT.VT _.RC:$src3), 12118 (i32 timm:$src4))>, 12119 EVEX_B, Sched<[sched]>; 12120 } 12121} 12122 12123multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, 12124 X86FoldableSchedWrite sched, X86VectorVTInfo _, 12125 X86VectorVTInfo _src3VT> { 12126 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 12127 ExeDomain = _.ExeDomain in { 12128 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 12129 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12130 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12131 (X86VFixupimms (_.VT _.RC:$src1), 12132 (_.VT _.RC:$src2), 12133 (_src3VT.VT _src3VT.RC:$src3), 12134 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC; 12135 let Uses = [MXCSR] in 12136 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 12137 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12138 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 12139 "$src2, $src3, {sae}, $src4", 12140 (X86VFixupimmSAEs (_.VT _.RC:$src1), 12141 (_.VT _.RC:$src2), 12142 (_src3VT.VT _src3VT.RC:$src3), 12143 (i32 timm:$src4))>, 12144 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 12145 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 12146 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 12147 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12148 (X86VFixupimms (_.VT _.RC:$src1), 12149 (_.VT _.RC:$src2), 12150 (_src3VT.VT (scalar_to_vector 12151 (_src3VT.ScalarLdFrag addr:$src3))), 12152 (i32 timm:$src4))>, 12153 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 12154 } 12155} 12156 12157multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 12158 AVX512VLVectorVTInfo _Vec, 12159 AVX512VLVectorVTInfo _Tbl> { 12160 let Predicates = [HasAVX512] in 12161 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, 12162 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 12163 EVEX_4V, EVEX_V512; 12164 let Predicates = [HasAVX512, HasVLX] in { 12165 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, 12166 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 12167 EVEX_4V, EVEX_V128; 12168 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, 12169 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 12170 EVEX_4V, EVEX_V256; 12171 } 12172} 12173 12174defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 12175 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 12176 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 12177defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 12178 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 12179 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; 12180defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 12181 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12182defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 12183 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; 12184 12185// Patterns used to select SSE scalar fp arithmetic instructions from 12186// either: 12187// 12188// (1) a scalar fp operation followed by a blend 12189// 12190// The effect is that the backend no longer emits unnecessary vector 12191// insert instructions immediately after SSE scalar fp instructions 12192// like addss or mulss. 12193// 12194// For example, given the following code: 12195// __m128 foo(__m128 A, __m128 B) { 12196// A[0] += B[0]; 12197// return A; 12198// } 12199// 12200// Previously we generated: 12201// addss %xmm0, %xmm1 12202// movss %xmm1, %xmm0 12203// 12204// We now generate: 12205// addss %xmm1, %xmm0 12206// 12207// (2) a vector packed single/double fp operation followed by a vector insert 12208// 12209// The effect is that the backend converts the packed fp instruction 12210// followed by a vector insert into a single SSE scalar fp instruction. 12211// 12212// For example, given the following code: 12213// __m128 foo(__m128 A, __m128 B) { 12214// __m128 C = A + B; 12215// return (__m128) {c[0], a[1], a[2], a[3]}; 12216// } 12217// 12218// Previously we generated: 12219// addps %xmm0, %xmm1 12220// movss %xmm1, %xmm0 12221// 12222// We now generate: 12223// addss %xmm1, %xmm0 12224 12225// TODO: Some canonicalization in lowering would simplify the number of 12226// patterns we have to try to match. 12227multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp, 12228 string OpcPrefix, SDNode MoveNode, 12229 X86VectorVTInfo _, PatLeaf ZeroFP> { 12230 let Predicates = [HasAVX512] in { 12231 // extracted scalar math op with insert via movss 12232 def : Pat<(MoveNode 12233 (_.VT VR128X:$dst), 12234 (_.VT (scalar_to_vector 12235 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12236 _.FRC:$src)))), 12237 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst, 12238 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 12239 def : Pat<(MoveNode 12240 (_.VT VR128X:$dst), 12241 (_.VT (scalar_to_vector 12242 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12243 (_.ScalarLdFrag addr:$src))))), 12244 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>; 12245 12246 // extracted masked scalar math op with insert via movss 12247 def : Pat<(MoveNode (_.VT VR128X:$src1), 12248 (scalar_to_vector 12249 (X86selects_mask VK1WM:$mask, 12250 (MaskedOp (_.EltVT 12251 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12252 _.FRC:$src2), 12253 _.FRC:$src0))), 12254 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk") 12255 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12256 VK1WM:$mask, _.VT:$src1, 12257 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12258 def : Pat<(MoveNode (_.VT VR128X:$src1), 12259 (scalar_to_vector 12260 (X86selects_mask VK1WM:$mask, 12261 (MaskedOp (_.EltVT 12262 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12263 (_.ScalarLdFrag addr:$src2)), 12264 _.FRC:$src0))), 12265 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk") 12266 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12267 VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12268 12269 // extracted masked scalar math op with insert via movss 12270 def : Pat<(MoveNode (_.VT VR128X:$src1), 12271 (scalar_to_vector 12272 (X86selects_mask VK1WM:$mask, 12273 (MaskedOp (_.EltVT 12274 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12275 _.FRC:$src2), (_.EltVT ZeroFP)))), 12276 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz") 12277 VK1WM:$mask, _.VT:$src1, 12278 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12279 def : Pat<(MoveNode (_.VT VR128X:$src1), 12280 (scalar_to_vector 12281 (X86selects_mask VK1WM:$mask, 12282 (MaskedOp (_.EltVT 12283 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12284 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), 12285 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12286 } 12287} 12288 12289defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 12290defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 12291defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 12292defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 12293 12294defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 12295defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 12296defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 12297defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 12298 12299defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>; 12300defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>; 12301defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>; 12302defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>; 12303 12304multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, 12305 SDNode Move, X86VectorVTInfo _> { 12306 let Predicates = [HasAVX512] in { 12307 def : Pat<(_.VT (Move _.VT:$dst, 12308 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 12309 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>; 12310 } 12311} 12312 12313defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 12314defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 12315defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>; 12316 12317//===----------------------------------------------------------------------===// 12318// AES instructions 12319//===----------------------------------------------------------------------===// 12320 12321multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 12322 let Predicates = [HasVLX, HasVAES] in { 12323 defm Z128 : AESI_binop_rm_int<Op, OpStr, 12324 !cast<Intrinsic>(IntPrefix), 12325 loadv2i64, 0, VR128X, i128mem>, 12326 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG; 12327 defm Z256 : AESI_binop_rm_int<Op, OpStr, 12328 !cast<Intrinsic>(IntPrefix#"_256"), 12329 loadv4i64, 0, VR256X, i256mem>, 12330 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG; 12331 } 12332 let Predicates = [HasAVX512, HasVAES] in 12333 defm Z : AESI_binop_rm_int<Op, OpStr, 12334 !cast<Intrinsic>(IntPrefix#"_512"), 12335 loadv8i64, 0, VR512, i512mem>, 12336 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG; 12337} 12338 12339defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 12340defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 12341defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 12342defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 12343 12344//===----------------------------------------------------------------------===// 12345// PCLMUL instructions - Carry less multiplication 12346//===----------------------------------------------------------------------===// 12347 12348let Predicates = [HasAVX512, HasVPCLMULQDQ] in 12349defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 12350 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG; 12351 12352let Predicates = [HasVLX, HasVPCLMULQDQ] in { 12353defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 12354 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG; 12355 12356defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 12357 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256, 12358 EVEX_CD8<64, CD8VF>, VEX_WIG; 12359} 12360 12361// Aliases 12362defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 12363defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 12364defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 12365 12366//===----------------------------------------------------------------------===// 12367// VBMI2 12368//===----------------------------------------------------------------------===// 12369 12370multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 12371 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12372 let Constraints = "$src1 = $dst", 12373 ExeDomain = VTI.ExeDomain in { 12374 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12375 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12376 "$src3, $src2", "$src2, $src3", 12377 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 12378 T8PD, EVEX_4V, Sched<[sched]>; 12379 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12380 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12381 "$src3, $src2", "$src2, $src3", 12382 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12383 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12384 T8PD, EVEX_4V, 12385 Sched<[sched.Folded, sched.ReadAfterFold]>; 12386 } 12387} 12388 12389multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12390 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 12391 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 12392 let Constraints = "$src1 = $dst", 12393 ExeDomain = VTI.ExeDomain in 12394 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12395 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 12396 "${src3}"#VTI.BroadcastStr#", $src2", 12397 "$src2, ${src3}"#VTI.BroadcastStr, 12398 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12399 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12400 T8PD, EVEX_4V, EVEX_B, 12401 Sched<[sched.Folded, sched.ReadAfterFold]>; 12402} 12403 12404multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 12405 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12406 let Predicates = [HasVBMI2] in 12407 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12408 EVEX_V512; 12409 let Predicates = [HasVBMI2, HasVLX] in { 12410 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12411 EVEX_V256; 12412 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12413 EVEX_V128; 12414 } 12415} 12416 12417multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 12418 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12419 let Predicates = [HasVBMI2] in 12420 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12421 EVEX_V512; 12422 let Predicates = [HasVBMI2, HasVLX] in { 12423 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12424 EVEX_V256; 12425 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12426 EVEX_V128; 12427 } 12428} 12429multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 12430 SDNode OpNode, X86SchedWriteWidths sched> { 12431 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched, 12432 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; 12433 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched, 12434 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12435 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched, 12436 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; 12437} 12438 12439multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 12440 SDNode OpNode, X86SchedWriteWidths sched> { 12441 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched, 12442 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 12443 VEX_W, EVEX_CD8<16, CD8VF>; 12444 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp, 12445 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 12446 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode, 12447 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; 12448} 12449 12450// Concat & Shift 12451defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 12452defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 12453defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 12454defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 12455 12456// Compress 12457defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 12458 avx512vl_i8_info, HasVBMI2>, EVEX, 12459 NotMemoryFoldable; 12460defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 12461 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W, 12462 NotMemoryFoldable; 12463// Expand 12464defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 12465 avx512vl_i8_info, HasVBMI2>, EVEX; 12466defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 12467 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; 12468 12469//===----------------------------------------------------------------------===// 12470// VNNI 12471//===----------------------------------------------------------------------===// 12472 12473let Constraints = "$src1 = $dst" in 12474multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12475 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12476 bit IsCommutable> { 12477 let ExeDomain = VTI.ExeDomain in { 12478 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12479 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12480 "$src3, $src2", "$src2, $src3", 12481 (VTI.VT (OpNode VTI.RC:$src1, 12482 VTI.RC:$src2, VTI.RC:$src3)), 12483 IsCommutable, IsCommutable>, 12484 EVEX_4V, T8PD, Sched<[sched]>; 12485 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12486 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12487 "$src3, $src2", "$src2, $src3", 12488 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12489 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12490 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, 12491 Sched<[sched.Folded, sched.ReadAfterFold]>; 12492 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12493 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 12494 OpStr, "${src3}"#VTI.BroadcastStr#", $src2", 12495 "$src2, ${src3}"#VTI.BroadcastStr, 12496 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12497 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12498 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, 12499 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>; 12500 } 12501} 12502 12503multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 12504 X86SchedWriteWidths sched, bit IsCommutable> { 12505 let Predicates = [HasVNNI] in 12506 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info, 12507 IsCommutable>, EVEX_V512; 12508 let Predicates = [HasVNNI, HasVLX] in { 12509 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info, 12510 IsCommutable>, EVEX_V256; 12511 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info, 12512 IsCommutable>, EVEX_V128; 12513 } 12514} 12515 12516// FIXME: Is there a better scheduler class for VPDP? 12517defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; 12518defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; 12519defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; 12520defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; 12521 12522// Patterns to match VPDPWSSD from existing instructions/intrinsics. 12523let Predicates = [HasVNNI] in { 12524 def : Pat<(v16i32 (add VR512:$src1, 12525 (X86vpmaddwd_su VR512:$src2, VR512:$src3))), 12526 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; 12527 def : Pat<(v16i32 (add VR512:$src1, 12528 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), 12529 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; 12530} 12531let Predicates = [HasVNNI,HasVLX] in { 12532 def : Pat<(v8i32 (add VR256X:$src1, 12533 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), 12534 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; 12535 def : Pat<(v8i32 (add VR256X:$src1, 12536 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), 12537 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; 12538 def : Pat<(v4i32 (add VR128X:$src1, 12539 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), 12540 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; 12541 def : Pat<(v4i32 (add VR128X:$src1, 12542 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), 12543 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; 12544} 12545 12546//===----------------------------------------------------------------------===// 12547// Bit Algorithms 12548//===----------------------------------------------------------------------===// 12549 12550// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 12551defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 12552 avx512vl_i8_info, HasBITALG>; 12553defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 12554 avx512vl_i16_info, HasBITALG>, VEX_W; 12555 12556defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 12557defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 12558 12559def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2), 12560 (X86Vpshufbitqmb node:$src1, node:$src2), [{ 12561 return N->hasOneUse(); 12562}]>; 12563 12564multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12565 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 12566 (ins VTI.RC:$src1, VTI.RC:$src2), 12567 "vpshufbitqmb", 12568 "$src2, $src1", "$src1, $src2", 12569 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12570 (VTI.VT VTI.RC:$src2)), 12571 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12572 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, 12573 Sched<[sched]>; 12574 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 12575 (ins VTI.RC:$src1, VTI.MemOp:$src2), 12576 "vpshufbitqmb", 12577 "$src2, $src1", "$src1, $src2", 12578 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12579 (VTI.VT (VTI.LdFrag addr:$src2))), 12580 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12581 (VTI.VT (VTI.LdFrag addr:$src2)))>, 12582 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, 12583 Sched<[sched.Folded, sched.ReadAfterFold]>; 12584} 12585 12586multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12587 let Predicates = [HasBITALG] in 12588 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 12589 let Predicates = [HasBITALG, HasVLX] in { 12590 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 12591 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 12592 } 12593} 12594 12595// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 12596defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 12597 12598//===----------------------------------------------------------------------===// 12599// GFNI 12600//===----------------------------------------------------------------------===// 12601 12602multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12603 X86SchedWriteWidths sched> { 12604 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 12605 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 12606 EVEX_V512; 12607 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 12608 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 12609 EVEX_V256; 12610 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 12611 EVEX_V128; 12612 } 12613} 12614 12615defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 12616 SchedWriteVecALU>, 12617 EVEX_CD8<8, CD8VF>, T8PD; 12618 12619multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 12620 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12621 X86VectorVTInfo BcstVTI> 12622 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 12623 let ExeDomain = VTI.ExeDomain in 12624 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12625 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), 12626 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1", 12627 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3", 12628 (OpNode (VTI.VT VTI.RC:$src1), 12629 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), 12630 (i8 timm:$src3))>, EVEX_B, 12631 Sched<[sched.Folded, sched.ReadAfterFold]>; 12632} 12633 12634multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12635 X86SchedWriteWidths sched> { 12636 let Predicates = [HasGFNI, HasAVX512, HasBWI] in 12637 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 12638 v64i8_info, v8i64_info>, EVEX_V512; 12639 let Predicates = [HasGFNI, HasVLX, HasBWI] in { 12640 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 12641 v32i8x_info, v4i64x_info>, EVEX_V256; 12642 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 12643 v16i8x_info, v2i64x_info>, EVEX_V128; 12644 } 12645} 12646 12647defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 12648 X86GF2P8affineinvqb, SchedWriteVecIMul>, 12649 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12650defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 12651 X86GF2P8affineqb, SchedWriteVecIMul>, 12652 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; 12653 12654 12655//===----------------------------------------------------------------------===// 12656// AVX5124FMAPS 12657//===----------------------------------------------------------------------===// 12658 12659let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 12660 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in { 12661defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 12662 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12663 "v4fmaddps", "$src3, $src2", "$src2, $src3", 12664 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12665 Sched<[SchedWriteFMA.ZMM.Folded]>; 12666 12667defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 12668 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12669 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 12670 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12671 Sched<[SchedWriteFMA.ZMM.Folded]>; 12672 12673defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 12674 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12675 "v4fmaddss", "$src3, $src2", "$src2, $src3", 12676 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12677 Sched<[SchedWriteFMA.Scl.Folded]>; 12678 12679defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 12680 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12681 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 12682 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12683 Sched<[SchedWriteFMA.Scl.Folded]>; 12684} 12685 12686//===----------------------------------------------------------------------===// 12687// AVX5124VNNIW 12688//===----------------------------------------------------------------------===// 12689 12690let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 12691 Constraints = "$src1 = $dst" in { 12692defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 12693 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12694 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 12695 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12696 Sched<[SchedWriteFMA.ZMM.Folded]>; 12697 12698defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 12699 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12700 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 12701 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12702 Sched<[SchedWriteFMA.ZMM.Folded]>; 12703} 12704 12705let hasSideEffects = 0 in { 12706 let mayStore = 1, SchedRW = [WriteFStoreX] in 12707 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>; 12708 let mayLoad = 1, SchedRW = [WriteFLoadX] in 12709 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>; 12710} 12711 12712//===----------------------------------------------------------------------===// 12713// VP2INTERSECT 12714//===----------------------------------------------------------------------===// 12715 12716multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 12717 def rr : I<0x68, MRMSrcReg, 12718 (outs _.KRPC:$dst), 12719 (ins _.RC:$src1, _.RC:$src2), 12720 !strconcat("vp2intersect", _.Suffix, 12721 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12722 [(set _.KRPC:$dst, (X86vp2intersect 12723 _.RC:$src1, (_.VT _.RC:$src2)))]>, 12724 EVEX_4V, T8XD, Sched<[sched]>; 12725 12726 def rm : I<0x68, MRMSrcMem, 12727 (outs _.KRPC:$dst), 12728 (ins _.RC:$src1, _.MemOp:$src2), 12729 !strconcat("vp2intersect", _.Suffix, 12730 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12731 [(set _.KRPC:$dst, (X86vp2intersect 12732 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 12733 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>, 12734 Sched<[sched.Folded, sched.ReadAfterFold]>; 12735 12736 def rmb : I<0x68, MRMSrcMem, 12737 (outs _.KRPC:$dst), 12738 (ins _.RC:$src1, _.ScalarMemOp:$src2), 12739 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, 12740 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), 12741 [(set _.KRPC:$dst, (X86vp2intersect 12742 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, 12743 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 12744 Sched<[sched.Folded, sched.ReadAfterFold]>; 12745} 12746 12747multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 12748 let Predicates = [HasAVX512, HasVP2INTERSECT] in 12749 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512; 12750 12751 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { 12752 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256; 12753 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128; 12754 } 12755} 12756 12757defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>; 12758defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W; 12759 12760multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, 12761 X86SchedWriteWidths sched, 12762 AVX512VLVectorVTInfo _SrcVTInfo, 12763 AVX512VLVectorVTInfo _DstVTInfo, 12764 SDNode OpNode, Predicate prd, 12765 bit IsCommutable = 0> { 12766 let Predicates = [prd] in 12767 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 12768 _SrcVTInfo.info512, _DstVTInfo.info512, 12769 _SrcVTInfo.info512, IsCommutable>, 12770 EVEX_V512, EVEX_CD8<32, CD8VF>; 12771 let Predicates = [HasVLX, prd] in { 12772 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 12773 _SrcVTInfo.info256, _DstVTInfo.info256, 12774 _SrcVTInfo.info256, IsCommutable>, 12775 EVEX_V256, EVEX_CD8<32, CD8VF>; 12776 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 12777 _SrcVTInfo.info128, _DstVTInfo.info128, 12778 _SrcVTInfo.info128, IsCommutable>, 12779 EVEX_V128, EVEX_CD8<32, CD8VF>; 12780 } 12781} 12782 12783let ExeDomain = SSEPackedSingle in 12784defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", 12785 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF 12786 avx512vl_f32_info, avx512vl_i16_info, 12787 X86cvtne2ps2bf16, HasBF16, 0>, T8XD; 12788 12789// Truncate Float to BFloat16 12790multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, 12791 X86SchedWriteWidths sched> { 12792 let ExeDomain = SSEPackedSingle in { 12793 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in { 12794 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info, 12795 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512; 12796 } 12797 let Predicates = [HasBF16, HasVLX] in { 12798 let Uses = []<Register>, mayRaiseFPException = 0 in { 12799 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info, 12800 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem, 12801 VK4WM>, EVEX_V128; 12802 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info, 12803 X86cvtneps2bf16, X86cvtneps2bf16, 12804 sched.YMM, "{1to8}", "{y}">, EVEX_V256; 12805 } 12806 } // Predicates = [HasBF16, HasVLX] 12807 } // ExeDomain = SSEPackedSingle 12808 12809 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12810 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 12811 VR128X:$src), 0>; 12812 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12813 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, 12814 f128mem:$src), 0, "intel">; 12815 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12816 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 12817 VR256X:$src), 0>; 12818 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12819 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, 12820 f256mem:$src), 0, "intel">; 12821} 12822 12823defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", 12824 SchedWriteCvtPD2PS>, T8XS, 12825 EVEX_CD8<32, CD8VF>; 12826 12827let Predicates = [HasBF16, HasVLX] in { 12828 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction 12829 // patterns have been disabled with null_frag. 12830 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))), 12831 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12832 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0), 12833 VK4WM:$mask), 12834 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>; 12835 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV, 12836 VK4WM:$mask), 12837 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>; 12838 12839 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), 12840 (VCVTNEPS2BF16Z128rm addr:$src)>; 12841 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0), 12842 VK4WM:$mask), 12843 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12844 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV, 12845 VK4WM:$mask), 12846 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; 12847 12848 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 12849 (X86VBroadcastld32 addr:$src)))), 12850 (VCVTNEPS2BF16Z128rmb addr:$src)>; 12851 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12852 (v8i16 VR128X:$src0), VK4WM:$mask), 12853 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12854 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12855 v8i16x_info.ImmAllZerosV, VK4WM:$mask), 12856 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; 12857} 12858 12859let Constraints = "$src1 = $dst" in { 12860multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 12861 X86FoldableSchedWrite sched, 12862 X86VectorVTInfo _, X86VectorVTInfo src_v> { 12863 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12864 (ins src_v.RC:$src2, src_v.RC:$src3), 12865 OpcodeStr, "$src3, $src2", "$src2, $src3", 12866 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>, 12867 EVEX_4V, Sched<[sched]>; 12868 12869 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12870 (ins src_v.RC:$src2, src_v.MemOp:$src3), 12871 OpcodeStr, "$src3, $src2", "$src2, $src3", 12872 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12873 (src_v.LdFrag addr:$src3)))>, EVEX_4V, 12874 Sched<[sched.Folded, sched.ReadAfterFold]>; 12875 12876 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12877 (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3), 12878 OpcodeStr, 12879 !strconcat("${src3}", _.BroadcastStr,", $src2"), 12880 !strconcat("$src2, ${src3}", _.BroadcastStr), 12881 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12882 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, 12883 EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 12884 12885} 12886} // Constraints = "$src1 = $dst" 12887 12888multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 12889 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 12890 AVX512VLVectorVTInfo src_v, Predicate prd> { 12891 let Predicates = [prd] in { 12892 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, 12893 src_v.info512>, EVEX_V512; 12894 } 12895 let Predicates = [HasVLX, prd] in { 12896 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256, 12897 src_v.info256>, EVEX_V256; 12898 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128, 12899 src_v.info128>, EVEX_V128; 12900 } 12901} 12902 12903let ExeDomain = SSEPackedSingle in 12904defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, 12905 avx512vl_f32_info, avx512vl_i32_info, 12906 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>; 12907 12908//===----------------------------------------------------------------------===// 12909// AVX512FP16 12910//===----------------------------------------------------------------------===// 12911 12912let Predicates = [HasFP16] in { 12913// Move word ( r/m16) to Packed word 12914def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 12915 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>; 12916def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src), 12917 "vmovw\t{$src, $dst|$dst, $src}", 12918 [(set VR128X:$dst, 12919 (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>, 12920 T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>; 12921 12922def : Pat<(f16 (bitconvert GR16:$src)), 12923 (f16 (COPY_TO_REGCLASS 12924 (VMOVW2SHrr 12925 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), 12926 FR16X))>; 12927def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))), 12928 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 12929def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))), 12930 (VMOVW2SHrr GR32:$src)>; 12931// FIXME: We should really find a way to improve these patterns. 12932def : Pat<(v8i32 (X86vzmovl 12933 (insert_subvector undef, 12934 (v4i32 (scalar_to_vector 12935 (and GR32:$src, 0xffff))), 12936 (iPTR 0)))), 12937 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 12938def : Pat<(v16i32 (X86vzmovl 12939 (insert_subvector undef, 12940 (v4i32 (scalar_to_vector 12941 (and GR32:$src, 0xffff))), 12942 (iPTR 0)))), 12943 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 12944 12945def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))), 12946 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 12947 12948// AVX 128-bit movw instruction write zeros in the high 128-bit part. 12949def : Pat<(v8i16 (X86vzload16 addr:$src)), 12950 (VMOVWrm addr:$src)>; 12951def : Pat<(v16i16 (X86vzload16 addr:$src)), 12952 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 12953 12954// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 12955def : Pat<(v32i16 (X86vzload16 addr:$src)), 12956 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 12957 12958def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))), 12959 (VMOVWrm addr:$src)>; 12960def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))), 12961 (VMOVWrm addr:$src)>; 12962def : Pat<(v8i32 (X86vzmovl 12963 (insert_subvector undef, 12964 (v4i32 (scalar_to_vector 12965 (i32 (zextloadi16 addr:$src)))), 12966 (iPTR 0)))), 12967 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 12968def : Pat<(v16i32 (X86vzmovl 12969 (insert_subvector undef, 12970 (v4i32 (scalar_to_vector 12971 (i32 (zextloadi16 addr:$src)))), 12972 (iPTR 0)))), 12973 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 12974 12975// Move word from xmm register to r/m16 12976def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 12977 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>; 12978def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs), 12979 (ins i16mem:$dst, VR128X:$src), 12980 "vmovw\t{$src, $dst|$dst, $src}", 12981 [(store (i16 (extractelt (v8i16 VR128X:$src), 12982 (iPTR 0))), addr:$dst)]>, 12983 T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>; 12984 12985def : Pat<(i16 (bitconvert FR16X:$src)), 12986 (i16 (EXTRACT_SUBREG 12987 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)), 12988 sub_16bit))>; 12989def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))), 12990 (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>; 12991} 12992 12993// Allow "vmovw" to use GR64 12994let hasSideEffects = 0 in { 12995 def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 12996 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; 12997 def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 12998 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>; 12999} 13000 13001// Convert 16-bit float to i16/u16 13002multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13003 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13004 AVX512VLVectorVTInfo _Dst, 13005 AVX512VLVectorVTInfo _Src, 13006 X86SchedWriteWidths sched> { 13007 let Predicates = [HasFP16] in { 13008 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 13009 OpNode, MaskOpNode, sched.ZMM>, 13010 avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512, 13011 OpNodeRnd, sched.ZMM>, EVEX_V512; 13012 } 13013 let Predicates = [HasFP16, HasVLX] in { 13014 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 13015 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 13016 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 13017 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 13018 } 13019} 13020 13021// Convert 16-bit float to i16/u16 truncate 13022multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13023 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13024 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src, 13025 X86SchedWriteWidths sched> { 13026 let Predicates = [HasFP16] in { 13027 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 13028 OpNode, MaskOpNode, sched.ZMM>, 13029 avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512, 13030 OpNodeRnd, sched.ZMM>, EVEX_V512; 13031 } 13032 let Predicates = [HasFP16, HasVLX] in { 13033 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 13034 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 13035 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 13036 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 13037 } 13038} 13039 13040defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt, 13041 X86cvtp2UIntRnd, avx512vl_i16_info, 13042 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 13043 T_MAP5PS, EVEX_CD8<16, CD8VF>; 13044defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp, 13045 X86VUintToFpRnd, avx512vl_f16_info, 13046 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 13047 T_MAP5XD, EVEX_CD8<16, CD8VF>; 13048defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si, 13049 X86cvttp2si, X86cvttp2siSAE, 13050 avx512vl_i16_info, avx512vl_f16_info, 13051 SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>; 13052defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui, 13053 X86cvttp2ui, X86cvttp2uiSAE, 13054 avx512vl_i16_info, avx512vl_f16_info, 13055 SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>; 13056defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int, 13057 X86cvtp2IntRnd, avx512vl_i16_info, 13058 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 13059 T_MAP5PD, EVEX_CD8<16, CD8VF>; 13060defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp, 13061 X86VSintToFpRnd, avx512vl_f16_info, 13062 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 13063 T_MAP5XS, EVEX_CD8<16, CD8VF>; 13064 13065// Convert Half to Signed/Unsigned Doubleword 13066multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13067 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13068 X86SchedWriteWidths sched> { 13069 let Predicates = [HasFP16] in { 13070 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 13071 MaskOpNode, sched.ZMM>, 13072 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info, 13073 OpNodeRnd, sched.ZMM>, EVEX_V512; 13074 } 13075 let Predicates = [HasFP16, HasVLX] in { 13076 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 13077 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 13078 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 13079 MaskOpNode, sched.YMM>, EVEX_V256; 13080 } 13081} 13082 13083// Convert Half to Signed/Unsigned Doubleword with truncation 13084multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13085 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13086 X86SchedWriteWidths sched> { 13087 let Predicates = [HasFP16] in { 13088 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 13089 MaskOpNode, sched.ZMM>, 13090 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info, 13091 OpNodeRnd, sched.ZMM>, EVEX_V512; 13092 } 13093 let Predicates = [HasFP16, HasVLX] in { 13094 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 13095 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 13096 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 13097 MaskOpNode, sched.YMM>, EVEX_V256; 13098 } 13099} 13100 13101 13102defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int, 13103 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, 13104 EVEX_CD8<16, CD8VH>; 13105defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt, 13106 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS, 13107 EVEX_CD8<16, CD8VH>; 13108 13109defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si, 13110 X86cvttp2si, X86cvttp2siSAE, 13111 SchedWriteCvtPS2DQ>, T_MAP5XS, 13112 EVEX_CD8<16, CD8VH>; 13113 13114defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui, 13115 X86cvttp2ui, X86cvttp2uiSAE, 13116 SchedWriteCvtPS2DQ>, T_MAP5PS, 13117 EVEX_CD8<16, CD8VH>; 13118 13119// Convert Half to Signed/Unsigned Quardword 13120multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13121 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13122 X86SchedWriteWidths sched> { 13123 let Predicates = [HasFP16] in { 13124 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 13125 MaskOpNode, sched.ZMM>, 13126 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info, 13127 OpNodeRnd, sched.ZMM>, EVEX_V512; 13128 } 13129 let Predicates = [HasFP16, HasVLX] in { 13130 // Explicitly specified broadcast string, since we take only 2 elements 13131 // from v8f16x_info source 13132 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13133 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, 13134 EVEX_V128; 13135 // Explicitly specified broadcast string, since we take only 4 elements 13136 // from v8f16x_info source 13137 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13138 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, 13139 EVEX_V256; 13140 } 13141} 13142 13143// Convert Half to Signed/Unsigned Quardword with truncation 13144multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13145 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13146 X86SchedWriteWidths sched> { 13147 let Predicates = [HasFP16] in { 13148 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 13149 MaskOpNode, sched.ZMM>, 13150 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info, 13151 OpNodeRnd, sched.ZMM>, EVEX_V512; 13152 } 13153 let Predicates = [HasFP16, HasVLX] in { 13154 // Explicitly specified broadcast string, since we take only 2 elements 13155 // from v8f16x_info source 13156 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13157 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128; 13158 // Explicitly specified broadcast string, since we take only 4 elements 13159 // from v8f16x_info source 13160 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13161 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256; 13162 } 13163} 13164 13165defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int, 13166 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, 13167 EVEX_CD8<16, CD8VQ>; 13168 13169defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt, 13170 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, 13171 EVEX_CD8<16, CD8VQ>; 13172 13173defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si, 13174 X86cvttp2si, X86cvttp2siSAE, 13175 SchedWriteCvtPS2DQ>, T_MAP5PD, 13176 EVEX_CD8<16, CD8VQ>; 13177 13178defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui, 13179 X86cvttp2ui, X86cvttp2uiSAE, 13180 SchedWriteCvtPS2DQ>, T_MAP5PD, 13181 EVEX_CD8<16, CD8VQ>; 13182 13183// Convert Signed/Unsigned Quardword to Half 13184multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13185 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13186 X86SchedWriteWidths sched> { 13187 // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and 13188 // 512 memory forms of these instructions in Asm Parcer. They have the same 13189 // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly 13190 // due to the same reason. 13191 let Predicates = [HasFP16] in { 13192 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode, 13193 MaskOpNode, sched.ZMM, "{1to8}", "{z}">, 13194 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info, 13195 OpNodeRnd, sched.ZMM>, EVEX_V512; 13196 } 13197 let Predicates = [HasFP16, HasVLX] in { 13198 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info, 13199 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", 13200 i128mem, VK2WM>, 13201 EVEX_V128, NotEVEX2VEXConvertible; 13202 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info, 13203 null_frag, null_frag, sched.YMM, "{1to4}", "{y}", 13204 i256mem, VK4WM>, 13205 EVEX_V256, NotEVEX2VEXConvertible; 13206 } 13207 13208 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 13209 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 13210 VR128X:$src), 0, "att">; 13211 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 13212 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 13213 VK2WM:$mask, VR128X:$src), 0, "att">; 13214 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 13215 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 13216 VK2WM:$mask, VR128X:$src), 0, "att">; 13217 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 13218 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 13219 i64mem:$src), 0, "att">; 13220 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 13221 "$dst {${mask}}, ${src}{1to2}}", 13222 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 13223 VK2WM:$mask, i64mem:$src), 0, "att">; 13224 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 13225 "$dst {${mask}} {z}, ${src}{1to2}}", 13226 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 13227 VK2WM:$mask, i64mem:$src), 0, "att">; 13228 13229 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 13230 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 13231 VR256X:$src), 0, "att">; 13232 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 13233 "$dst {${mask}}, $src}", 13234 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 13235 VK4WM:$mask, VR256X:$src), 0, "att">; 13236 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 13237 "$dst {${mask}} {z}, $src}", 13238 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 13239 VK4WM:$mask, VR256X:$src), 0, "att">; 13240 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 13241 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 13242 i64mem:$src), 0, "att">; 13243 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 13244 "$dst {${mask}}, ${src}{1to4}}", 13245 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 13246 VK4WM:$mask, i64mem:$src), 0, "att">; 13247 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 13248 "$dst {${mask}} {z}, ${src}{1to4}}", 13249 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 13250 VK4WM:$mask, i64mem:$src), 0, "att">; 13251 13252 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 13253 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 13254 VR512:$src), 0, "att">; 13255 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 13256 "$dst {${mask}}, $src}", 13257 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 13258 VK8WM:$mask, VR512:$src), 0, "att">; 13259 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 13260 "$dst {${mask}} {z}, $src}", 13261 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 13262 VK8WM:$mask, VR512:$src), 0, "att">; 13263 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 13264 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 13265 i64mem:$src), 0, "att">; 13266 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 13267 "$dst {${mask}}, ${src}{1to8}}", 13268 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 13269 VK8WM:$mask, i64mem:$src), 0, "att">; 13270 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 13271 "$dst {${mask}} {z}, ${src}{1to8}}", 13272 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 13273 VK8WM:$mask, i64mem:$src), 0, "att">; 13274} 13275 13276defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp, 13277 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS, 13278 EVEX_CD8<64, CD8VF>; 13279 13280defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp, 13281 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD, 13282 EVEX_CD8<64, CD8VF>; 13283 13284// Convert half to signed/unsigned int 32/64 13285defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si, 13286 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>, 13287 T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13288defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si, 13289 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>, 13290 T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>; 13291defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi, 13292 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>, 13293 T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13294defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi, 13295 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>, 13296 T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>; 13297 13298defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info, 13299 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13300 "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13301defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info, 13302 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13303 "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13304defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info, 13305 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13306 "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13307defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info, 13308 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13309 "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>; 13310 13311let Predicates = [HasFP16] in { 13312 defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32, 13313 v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">, 13314 T_MAP5XS, EVEX_CD8<32, CD8VT1>; 13315 defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64, 13316 v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">, 13317 T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>; 13318 defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32, 13319 v8f16x_info, i32mem, loadi32, 13320 "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>; 13321 defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64, 13322 v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">, 13323 T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>; 13324 def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13325 (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13326 13327 def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13328 (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13329 13330 13331 def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))), 13332 (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13333 def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))), 13334 (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13335 13336 def : Pat<(f16 (any_sint_to_fp GR32:$src)), 13337 (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13338 def : Pat<(f16 (any_sint_to_fp GR64:$src)), 13339 (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13340 13341 def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))), 13342 (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13343 def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))), 13344 (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13345 13346 def : Pat<(f16 (any_uint_to_fp GR32:$src)), 13347 (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13348 def : Pat<(f16 (any_uint_to_fp GR64:$src)), 13349 (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13350 13351 // Patterns used for matching vcvtsi2sh intrinsic sequences from clang 13352 // which produce unnecessary vmovsh instructions 13353 def : Pat<(v8f16 (X86Movsh 13354 (v8f16 VR128X:$dst), 13355 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))), 13356 (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13357 13358 def : Pat<(v8f16 (X86Movsh 13359 (v8f16 VR128X:$dst), 13360 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))), 13361 (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13362 13363 def : Pat<(v8f16 (X86Movsh 13364 (v8f16 VR128X:$dst), 13365 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))), 13366 (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13367 13368 def : Pat<(v8f16 (X86Movsh 13369 (v8f16 VR128X:$dst), 13370 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))), 13371 (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13372 13373 def : Pat<(v8f16 (X86Movsh 13374 (v8f16 VR128X:$dst), 13375 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))), 13376 (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13377 13378 def : Pat<(v8f16 (X86Movsh 13379 (v8f16 VR128X:$dst), 13380 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))), 13381 (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13382 13383 def : Pat<(v8f16 (X86Movsh 13384 (v8f16 VR128X:$dst), 13385 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))), 13386 (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13387 13388 def : Pat<(v8f16 (X86Movsh 13389 (v8f16 VR128X:$dst), 13390 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))), 13391 (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13392} // Predicates = [HasFP16] 13393 13394let Predicates = [HasFP16, HasVLX] in { 13395 // Special patterns to allow use of X86VMSintToFP for masking. Instruction 13396 // patterns have been disabled with null_frag. 13397 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))), 13398 (VCVTQQ2PHZ256rr VR256X:$src)>; 13399 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13400 VK4WM:$mask), 13401 (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13402 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13403 VK4WM:$mask), 13404 (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13405 13406 def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))), 13407 (VCVTQQ2PHZ256rm addr:$src)>; 13408 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13409 VK4WM:$mask), 13410 (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13411 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13412 VK4WM:$mask), 13413 (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13414 13415 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13416 (VCVTQQ2PHZ256rmb addr:$src)>; 13417 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13418 (v8f16 VR128X:$src0), VK4WM:$mask), 13419 (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13420 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13421 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13422 (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13423 13424 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))), 13425 (VCVTQQ2PHZ128rr VR128X:$src)>; 13426 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13427 VK2WM:$mask), 13428 (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13429 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13430 VK2WM:$mask), 13431 (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13432 13433 def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))), 13434 (VCVTQQ2PHZ128rm addr:$src)>; 13435 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13436 VK2WM:$mask), 13437 (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13438 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13439 VK2WM:$mask), 13440 (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13441 13442 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13443 (VCVTQQ2PHZ128rmb addr:$src)>; 13444 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13445 (v8f16 VR128X:$src0), VK2WM:$mask), 13446 (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13447 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13448 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13449 (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13450 13451 // Special patterns to allow use of X86VMUintToFP for masking. Instruction 13452 // patterns have been disabled with null_frag. 13453 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))), 13454 (VCVTUQQ2PHZ256rr VR256X:$src)>; 13455 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13456 VK4WM:$mask), 13457 (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13458 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13459 VK4WM:$mask), 13460 (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13461 13462 def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))), 13463 (VCVTUQQ2PHZ256rm addr:$src)>; 13464 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13465 VK4WM:$mask), 13466 (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13467 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13468 VK4WM:$mask), 13469 (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13470 13471 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13472 (VCVTUQQ2PHZ256rmb addr:$src)>; 13473 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13474 (v8f16 VR128X:$src0), VK4WM:$mask), 13475 (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13476 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13477 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13478 (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13479 13480 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))), 13481 (VCVTUQQ2PHZ128rr VR128X:$src)>; 13482 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13483 VK2WM:$mask), 13484 (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13485 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13486 VK2WM:$mask), 13487 (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13488 13489 def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))), 13490 (VCVTUQQ2PHZ128rm addr:$src)>; 13491 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13492 VK2WM:$mask), 13493 (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13494 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13495 VK2WM:$mask), 13496 (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13497 13498 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13499 (VCVTUQQ2PHZ128rmb addr:$src)>; 13500 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13501 (v8f16 VR128X:$src0), VK2WM:$mask), 13502 (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13503 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13504 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13505 (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13506} 13507 13508let Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13509 multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> { 13510 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13511 (ins _.RC:$src2, _.RC:$src3), 13512 OpcodeStr, "$src3, $src2", "$src2, $src3", 13513 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V; 13514 13515 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13516 (ins _.RC:$src2, _.MemOp:$src3), 13517 OpcodeStr, "$src3, $src2", "$src2, $src3", 13518 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V; 13519 13520 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13521 (ins _.RC:$src2, _.ScalarMemOp:$src3), 13522 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr), 13523 (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V; 13524 } 13525} // Constraints = "@earlyclobber $dst, $src1 = $dst" 13526 13527multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 13528 X86VectorVTInfo _> { 13529 let Constraints = "@earlyclobber $dst, $src1 = $dst" in 13530 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13531 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 13532 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 13533 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>, 13534 EVEX_4V, EVEX_B, EVEX_RC; 13535} 13536 13537 13538multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> { 13539 let Predicates = [HasFP16] in { 13540 defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>, 13541 avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>, 13542 EVEX_V512, Sched<[WriteFMAZ]>; 13543 } 13544 let Predicates = [HasVLX, HasFP16] in { 13545 defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>; 13546 defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>; 13547 } 13548} 13549 13550multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13551 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> { 13552 let Predicates = [HasFP16] in { 13553 defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 13554 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, 13555 avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info, 13556 "", "@earlyclobber $dst">, EVEX_V512; 13557 } 13558 let Predicates = [HasVLX, HasFP16] in { 13559 defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 13560 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256; 13561 defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 13562 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128; 13563 } 13564} 13565 13566 13567let Uses = [MXCSR] in { 13568 defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>, 13569 T_MAP6XS, EVEX_CD8<32, CD8VF>; 13570 defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>, 13571 T_MAP6XD, EVEX_CD8<32, CD8VF>; 13572 13573 defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc, 13574 x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>; 13575 defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc, 13576 x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>; 13577} 13578 13579 13580multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 13581 bit IsCommutable> { 13582 let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13583 defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13584 (ins VR128X:$src2, VR128X:$src3), OpcodeStr, 13585 "$src3, $src2", "$src2, $src3", 13586 (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>, 13587 Sched<[WriteFMAX]>; 13588 defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst), 13589 (ins VR128X:$src2, ssmem:$src3), OpcodeStr, 13590 "$src3, $src2", "$src2, $src3", 13591 (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>, 13592 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13593 defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13594 (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr, 13595 "$rc, $src3, $src2", "$src2, $src3, $rc", 13596 (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>, 13597 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13598 } 13599} 13600 13601multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13602 SDNode OpNodeRnd, bit IsCommutable> { 13603 let Predicates = [HasFP16] in { 13604 defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13605 (ins VR128X:$src1, VR128X:$src2), OpcodeStr, 13606 "$src2, $src1", "$src1, $src2", 13607 (v4f32 (OpNode VR128X:$src1, VR128X:$src2)), 13608 IsCommutable, IsCommutable, IsCommutable, 13609 X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>; 13610 defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst), 13611 (ins VR128X:$src1, ssmem:$src2), OpcodeStr, 13612 "$src2, $src1", "$src1, $src2", 13613 (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))), 13614 0, 0, 0, X86selects, "@earlyclobber $dst">, 13615 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13616 defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13617 (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr, 13618 "$rc, $src2, $src1", "$src1, $src2, $rc", 13619 (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)), 13620 0, 0, 0, X86selects, "@earlyclobber $dst">, 13621 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13622 } 13623} 13624 13625let Uses = [MXCSR] in { 13626 defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>, 13627 T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V; 13628 defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>, 13629 T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V; 13630 13631 defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>, 13632 T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V; 13633 defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>, 13634 T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V; 13635} 13636