1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX512 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// Group template arguments that can be derived from the vector type (EltNum x 16// EltVT). These are things like the register class for the writemask, etc. 17// The idea is to pass one of these as the template argument rather than the 18// individual arguments. 19// The template is also used for scalar types, in this case numelts is 1. 20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc, 21 string suffix = ""> { 22 RegisterClass RC = rc; 23 ValueType EltVT = eltvt; 24 int NumElts = numelts; 25 26 // Corresponding mask register class. 27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts); 28 29 // Corresponding mask register pair class. 30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?, 31 !cast<RegisterOperand>("VK" # NumElts # "Pair")); 32 33 // Corresponding write-mask register class. 34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM"); 35 36 // The mask VT. 37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1"); 38 39 // Suffix used in the instruction mnemonic. 40 string Suffix = suffix; 41 42 // VTName is a string name for vector VT. For vector types it will be 43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32 44 // It is a little bit complex for scalar types, where NumElts = 1. 45 // In this case we build v4f32 or v2f64 46 string VTName = "v" # !if (!eq (NumElts, 1), 47 !if (!eq (EltVT.Size, 16), 8, 48 !if (!eq (EltVT.Size, 32), 4, 49 !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT; 50 51 // The vector VT. 52 ValueType VT = !cast<ValueType>(VTName); 53 54 string EltTypeName = !cast<string>(EltVT); 55 // Size of the element type in bits, e.g. 32 for v16i32. 56 string EltSizeName = !subst("i", "", !subst("f", "", !subst("b", "", EltTypeName))); 57 int EltSize = EltVT.Size; 58 59 // "i" for integer types and "f" for floating-point types 60 string TypeVariantName = !subst("b", "", !subst(EltSizeName, "", EltTypeName)); 61 62 // Size of RC in bits, e.g. 512 for VR512. 63 int Size = VT.Size; 64 65 // The corresponding memory operand, e.g. i512mem for VR512. 66 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem"); 67 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(!subst("b", "", EltTypeName) # "mem"); 68 // FP scalar memory operand for intrinsics - ssmem/sdmem. 69 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"), 70 !if (!eq (EltTypeName, "bf16"), !cast<Operand>("shmem"), 71 !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"), 72 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)))); 73 74 // Load patterns 75 PatFrag LdFrag = !cast<PatFrag>("load" # VTName); 76 77 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName); 78 79 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # !subst("b", "", EltTypeName)); 80 PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName); 81 82 PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"), 83 !if (!eq (EltTypeName, "bf16"), !cast<PatFrags>("sse_load_f16"), 84 !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"), 85 !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?)))); 86 87 // The string to specify embedded broadcast in assembly. 88 string BroadcastStr = "{1to" # NumElts # "}"; 89 90 // 8-bit compressed displacement tuple/subvector format. This is only 91 // defined for NumElts <= 8. 92 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0), 93 !cast<CD8VForm>("CD8VT" # NumElts), ?); 94 95 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm, 96 !if (!eq (Size, 256), sub_ymm, ?)); 97 98 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle, 99 !if (!eq (EltTypeName, "f64"), SSEPackedDouble, 100 !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME? 101 !if (!eq (EltTypeName, "bf16"), SSEPackedSingle, // FIXME? 102 SSEPackedInt)))); 103 104 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, 105 !if (!eq (EltTypeName, "f16"), FR16X, 106 !if (!eq (EltTypeName, "bf16"), FR16X, 107 FR64X))); 108 109 dag ImmAllZerosV = (VT immAllZerosV); 110 111 string ZSuffix = !if (!eq (Size, 128), "Z128", 112 !if (!eq (Size, 256), "Z256", "Z")); 113} 114 115def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; 116def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; 117def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; 118def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; 119def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">; 120def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbf">; 121def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; 122def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; 123 124// "x" in v32i8x_info means RC = VR256X 125def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; 126def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">; 127def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">; 128def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">; 129def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">; 130def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbf">; 131def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">; 132def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">; 133 134def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">; 135def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">; 136def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">; 137def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">; 138def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">; 139def v8bf16x_info : X86VectorVTInfo<8, bf16, VR128X, "pbf">; 140def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">; 141def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">; 142 143// We map scalar types to the smallest (128-bit) vector type 144// with the appropriate element type. This allows to use the same masking logic. 145def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">; 146def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">; 147def f16x_info : X86VectorVTInfo<1, f16, VR128X, "sh">; 148def bf16x_info : X86VectorVTInfo<1, bf16, VR128X, "sbf">; 149def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">; 150def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">; 151 152class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256, 153 X86VectorVTInfo i128> { 154 X86VectorVTInfo info512 = i512; 155 X86VectorVTInfo info256 = i256; 156 X86VectorVTInfo info128 = i128; 157} 158 159def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info, 160 v16i8x_info>; 161def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info, 162 v8i16x_info>; 163def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info, 164 v4i32x_info>; 165def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info, 166 v2i64x_info>; 167def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info, 168 v8f16x_info>; 169def avx512vl_bf16_info : AVX512VLVectorVTInfo<v32bf16_info, v16bf16x_info, 170 v8bf16x_info>; 171def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, 172 v4f32x_info>; 173def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, 174 v2f64x_info>; 175 176class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, 177 ValueType _vt> { 178 RegisterClass KRC = _krc; 179 RegisterClass KRCWM = _krcwm; 180 ValueType KVT = _vt; 181} 182 183def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>; 184def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; 185def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; 186def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; 187def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; 188def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; 189def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; 190 191// Used for matching masked operations. Ensures the operation part only has a 192// single use. 193def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 194 (vselect node:$mask, node:$src1, node:$src2), [{ 195 return isProfitableToFormMaskedOp(N); 196}]>; 197 198def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2), 199 (X86selects node:$mask, node:$src1, node:$src2), [{ 200 return isProfitableToFormMaskedOp(N); 201}]>; 202 203// This multiclass generates the masking variants from the non-masking 204// variant. It only provides the assembly pieces for the masking variants. 205// It assumes custom ISel patterns for masking which can be provided as 206// template arguments. 207multiclass AVX512_maskable_custom<bits<8> O, Format F, 208 dag Outs, 209 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 210 string OpcodeStr, 211 string AttSrcAsm, string IntelSrcAsm, 212 list<dag> Pattern, 213 list<dag> MaskingPattern, 214 list<dag> ZeroMaskingPattern, 215 string MaskingConstraint = "", 216 bit IsCommutable = 0, 217 bit IsKCommutable = 0, 218 bit IsKZCommutable = IsCommutable, 219 string ClobberConstraint = ""> { 220 let isCommutable = IsCommutable, Constraints = ClobberConstraint in 221 def NAME: AVX512<O, F, Outs, Ins, 222 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 223 "$dst, "#IntelSrcAsm#"}", 224 Pattern>; 225 226 // Prefer over VMOV*rrk Pat<> 227 let isCommutable = IsKCommutable in 228 def NAME#k: AVX512<O, F, Outs, MaskingIns, 229 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 230 "$dst {${mask}}, "#IntelSrcAsm#"}", 231 MaskingPattern>, 232 EVEX_K { 233 // In case of the 3src subclass this is overridden with a let. 234 string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint, 235 !if(!eq(MaskingConstraint, ""), ClobberConstraint, 236 !strconcat(ClobberConstraint, ", ", MaskingConstraint))); 237 } 238 239 // Zero mask does not add any restrictions to commute operands transformation. 240 // So, it is Ok to use IsCommutable instead of IsKCommutable. 241 let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<> 242 Constraints = ClobberConstraint in 243 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 244 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 245 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 246 ZeroMaskingPattern>, 247 EVEX_KZ; 248} 249 250 251// Common base class of AVX512_maskable and AVX512_maskable_3src. 252multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 253 dag Outs, 254 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 255 string OpcodeStr, 256 string AttSrcAsm, string IntelSrcAsm, 257 dag RHS, dag MaskingRHS, 258 SDPatternOperator Select = vselect_mask, 259 string MaskingConstraint = "", 260 bit IsCommutable = 0, 261 bit IsKCommutable = 0, 262 bit IsKZCommutable = IsCommutable, 263 string ClobberConstraint = ""> : 264 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 265 AttSrcAsm, IntelSrcAsm, 266 [(set _.RC:$dst, RHS)], 267 [(set _.RC:$dst, MaskingRHS)], 268 [(set _.RC:$dst, 269 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 270 MaskingConstraint, IsCommutable, 271 IsKCommutable, IsKZCommutable, ClobberConstraint>; 272 273// This multiclass generates the unconditional/non-masking, the masking and 274// the zero-masking variant of the vector instruction. In the masking case, the 275// preserved vector elements come from a new dummy input operand tied to $dst. 276// This version uses a separate dag for non-masking and masking. 277multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 278 dag Outs, dag Ins, string OpcodeStr, 279 string AttSrcAsm, string IntelSrcAsm, 280 dag RHS, dag MaskRHS, 281 string ClobberConstraint = "", 282 bit IsCommutable = 0, bit IsKCommutable = 0, 283 bit IsKZCommutable = IsCommutable> : 284 AVX512_maskable_custom<O, F, Outs, Ins, 285 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 286 !con((ins _.KRCWM:$mask), Ins), 287 OpcodeStr, AttSrcAsm, IntelSrcAsm, 288 [(set _.RC:$dst, RHS)], 289 [(set _.RC:$dst, 290 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 291 [(set _.RC:$dst, 292 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 293 "$src0 = $dst", IsCommutable, IsKCommutable, 294 IsKZCommutable, ClobberConstraint>; 295 296// This multiclass generates the unconditional/non-masking, the masking and 297// the zero-masking variant of the vector instruction. In the masking case, the 298// preserved vector elements come from a new dummy input operand tied to $dst. 299multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 300 dag Outs, dag Ins, string OpcodeStr, 301 string AttSrcAsm, string IntelSrcAsm, 302 dag RHS, 303 bit IsCommutable = 0, bit IsKCommutable = 0, 304 bit IsKZCommutable = IsCommutable, 305 SDPatternOperator Select = vselect_mask, 306 string ClobberConstraint = ""> : 307 AVX512_maskable_common<O, F, _, Outs, Ins, 308 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 309 !con((ins _.KRCWM:$mask), Ins), 310 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 311 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 312 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 313 IsKZCommutable, ClobberConstraint>; 314 315// This multiclass generates the unconditional/non-masking, the masking and 316// the zero-masking variant of the scalar instruction. 317multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 318 dag Outs, dag Ins, string OpcodeStr, 319 string AttSrcAsm, string IntelSrcAsm, 320 dag RHS> : 321 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 322 RHS, 0, 0, 0, X86selects_mask>; 323 324// Similar to AVX512_maskable but in this case one of the source operands 325// ($src1) is already tied to $dst so we just use that for the preserved 326// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 327// $src1. 328multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 329 dag Outs, dag NonTiedIns, string OpcodeStr, 330 string AttSrcAsm, string IntelSrcAsm, 331 dag RHS, 332 bit IsCommutable = 0, 333 bit IsKCommutable = 0, 334 SDPatternOperator Select = vselect_mask, 335 bit MaskOnly = 0> : 336 AVX512_maskable_common<O, F, _, Outs, 337 !con((ins _.RC:$src1), NonTiedIns), 338 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 339 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 340 OpcodeStr, AttSrcAsm, IntelSrcAsm, 341 !if(MaskOnly, (null_frag), RHS), 342 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 343 Select, "", IsCommutable, IsKCommutable>; 344 345// Similar to AVX512_maskable_3src but in this case the input VT for the tied 346// operand differs from the output VT. This requires a bitconvert on 347// the preserved vector going into the vselect. 348// NOTE: The unmasked pattern is disabled. 349multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 350 X86VectorVTInfo InVT, 351 dag Outs, dag NonTiedIns, string OpcodeStr, 352 string AttSrcAsm, string IntelSrcAsm, 353 dag RHS, bit IsCommutable = 0> : 354 AVX512_maskable_common<O, F, OutVT, Outs, 355 !con((ins InVT.RC:$src1), NonTiedIns), 356 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 357 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 358 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 359 (vselect_mask InVT.KRCWM:$mask, RHS, 360 (bitconvert InVT.RC:$src1)), 361 vselect_mask, "", IsCommutable>; 362 363multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 364 dag Outs, dag NonTiedIns, string OpcodeStr, 365 string AttSrcAsm, string IntelSrcAsm, 366 dag RHS, 367 bit IsCommutable = 0, 368 bit IsKCommutable = 0, 369 bit MaskOnly = 0> : 370 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 371 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 372 X86selects_mask, MaskOnly>; 373 374multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 375 dag Outs, dag Ins, 376 string OpcodeStr, 377 string AttSrcAsm, string IntelSrcAsm, 378 list<dag> Pattern> : 379 AVX512_maskable_custom<O, F, Outs, Ins, 380 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 381 !con((ins _.KRCWM:$mask), Ins), 382 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 383 "$src0 = $dst">; 384 385multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 386 dag Outs, dag NonTiedIns, 387 string OpcodeStr, 388 string AttSrcAsm, string IntelSrcAsm, 389 list<dag> Pattern> : 390 AVX512_maskable_custom<O, F, Outs, 391 !con((ins _.RC:$src1), NonTiedIns), 392 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 393 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 394 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 395 "">; 396 397// Instruction with mask that puts result in mask register, 398// like "compare" and "vptest" 399multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 400 dag Outs, 401 dag Ins, dag MaskingIns, 402 string OpcodeStr, 403 string AttSrcAsm, string IntelSrcAsm, 404 list<dag> Pattern, 405 list<dag> MaskingPattern, 406 bit IsCommutable = 0> { 407 let isCommutable = IsCommutable in { 408 def NAME: AVX512<O, F, Outs, Ins, 409 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 410 "$dst, "#IntelSrcAsm#"}", 411 Pattern>; 412 413 def NAME#k: AVX512<O, F, Outs, MaskingIns, 414 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 415 "$dst {${mask}}, "#IntelSrcAsm#"}", 416 MaskingPattern>, EVEX_K; 417 } 418} 419 420multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 421 dag Outs, 422 dag Ins, dag MaskingIns, 423 string OpcodeStr, 424 string AttSrcAsm, string IntelSrcAsm, 425 dag RHS, dag MaskingRHS, 426 bit IsCommutable = 0> : 427 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 428 AttSrcAsm, IntelSrcAsm, 429 [(set _.KRC:$dst, RHS)], 430 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 431 432multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 433 dag Outs, dag Ins, string OpcodeStr, 434 string AttSrcAsm, string IntelSrcAsm, 435 dag RHS, dag RHS_su, bit IsCommutable = 0> : 436 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 437 !con((ins _.KRCWM:$mask), Ins), 438 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 439 (and _.KRCWM:$mask, RHS_su), IsCommutable>; 440 441// Used by conversion instructions. 442multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _, 443 dag Outs, 444 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 445 string OpcodeStr, 446 string AttSrcAsm, string IntelSrcAsm, 447 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> : 448 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 449 AttSrcAsm, IntelSrcAsm, 450 [(set _.RC:$dst, RHS)], 451 [(set _.RC:$dst, MaskingRHS)], 452 [(set _.RC:$dst, ZeroMaskingRHS)], 453 "$src0 = $dst">; 454 455multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _, 456 dag Outs, dag NonTiedIns, string OpcodeStr, 457 string AttSrcAsm, string IntelSrcAsm, 458 dag RHS, dag MaskingRHS, bit IsCommutable, 459 bit IsKCommutable> : 460 AVX512_maskable_custom<O, F, Outs, 461 !con((ins _.RC:$src1), NonTiedIns), 462 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 463 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 464 OpcodeStr, AttSrcAsm, IntelSrcAsm, 465 [(set _.RC:$dst, RHS)], 466 [(set _.RC:$dst, 467 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))], 468 [(set _.RC:$dst, 469 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))], 470 "", IsCommutable, IsKCommutable>; 471 472// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 473// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 474// swizzled by ExecutionDomainFix to pxor. 475// We set canFoldAsLoad because this can be converted to a constant-pool 476// load of an all-zeros value if folding it would be beneficial. 477let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 478 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 479def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 480 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 481def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 482 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 483} 484 485let Predicates = [HasAVX512] in { 486def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; 487def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; 488def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; 489def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>; 490def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; 491def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; 492} 493 494// Alias instructions that allow VPTERNLOG to be used with a mask to create 495// a mix of all ones and all zeros elements. This is done this way to force 496// the same register to be used as input for all three sources. 497let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 498def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 499 (ins VK16WM:$mask), "", 500 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 501 (v16i32 immAllOnesV), 502 (v16i32 immAllZerosV)))]>; 503def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 504 (ins VK8WM:$mask), "", 505 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 506 (v8i64 immAllOnesV), 507 (v8i64 immAllZerosV)))]>; 508} 509 510let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 511 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 512def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 513 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 514def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 515 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 516} 517 518let Predicates = [HasAVX512] in { 519def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; 520def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; 521def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; 522def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>; 523def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; 524def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; 525def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; 526def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; 527def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; 528def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>; 529def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; 530def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; 531} 532 533// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 534// This is expanded by ExpandPostRAPseudos. 535let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 536 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 537 def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "", 538 [(set FR16X:$dst, fp16imm0)]>; 539 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 540 [(set FR32X:$dst, fp32imm0)]>; 541 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 542 [(set FR64X:$dst, fp64imm0)]>; 543 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 544 [(set VR128X:$dst, fp128imm0)]>; 545} 546 547//===----------------------------------------------------------------------===// 548// AVX-512 - VECTOR INSERT 549// 550 551// Supports two different pattern operators for mask and unmasked ops. Allows 552// null_frag to be passed for one. 553multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 554 X86VectorVTInfo To, 555 SDPatternOperator vinsert_insert, 556 SDPatternOperator vinsert_for_mask, 557 X86FoldableSchedWrite sched> { 558 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 559 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 560 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 561 "vinsert" # From.EltTypeName # "x" # From.NumElts, 562 "$src3, $src2, $src1", "$src1, $src2, $src3", 563 (vinsert_insert:$src3 (To.VT To.RC:$src1), 564 (From.VT From.RC:$src2), 565 (iPTR imm)), 566 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 567 (From.VT From.RC:$src2), 568 (iPTR imm))>, 569 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 570 let mayLoad = 1 in 571 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 572 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 573 "vinsert" # From.EltTypeName # "x" # From.NumElts, 574 "$src3, $src2, $src1", "$src1, $src2, $src3", 575 (vinsert_insert:$src3 (To.VT To.RC:$src1), 576 (From.VT (From.LdFrag addr:$src2)), 577 (iPTR imm)), 578 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 579 (From.VT (From.LdFrag addr:$src2)), 580 (iPTR imm))>, AVX512AIi8Base, EVEX_4V, 581 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 582 Sched<[sched.Folded, sched.ReadAfterFold]>; 583 } 584} 585 586// Passes the same pattern operator for masked and unmasked ops. 587multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 588 X86VectorVTInfo To, 589 SDPatternOperator vinsert_insert, 590 X86FoldableSchedWrite sched> : 591 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 592 593multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 594 X86VectorVTInfo To, PatFrag vinsert_insert, 595 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 596 let Predicates = p in { 597 def : Pat<(vinsert_insert:$ins 598 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 599 (To.VT (!cast<Instruction>(InstrStr#"rr") 600 To.RC:$src1, From.RC:$src2, 601 (INSERT_get_vinsert_imm To.RC:$ins)))>; 602 603 def : Pat<(vinsert_insert:$ins 604 (To.VT To.RC:$src1), 605 (From.VT (From.LdFrag addr:$src2)), 606 (iPTR imm)), 607 (To.VT (!cast<Instruction>(InstrStr#"rm") 608 To.RC:$src1, addr:$src2, 609 (INSERT_get_vinsert_imm To.RC:$ins)))>; 610 } 611} 612 613multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 614 ValueType EltVT64, int Opcode256, 615 X86FoldableSchedWrite sched> { 616 617 let Predicates = [HasVLX] in 618 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 619 X86VectorVTInfo< 4, EltVT32, VR128X>, 620 X86VectorVTInfo< 8, EltVT32, VR256X>, 621 vinsert128_insert, sched>, EVEX_V256; 622 623 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 624 X86VectorVTInfo< 4, EltVT32, VR128X>, 625 X86VectorVTInfo<16, EltVT32, VR512>, 626 vinsert128_insert, sched>, EVEX_V512; 627 628 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 629 X86VectorVTInfo< 4, EltVT64, VR256X>, 630 X86VectorVTInfo< 8, EltVT64, VR512>, 631 vinsert256_insert, sched>, REX_W, EVEX_V512; 632 633 // Even with DQI we'd like to only use these instructions for masking. 634 let Predicates = [HasVLX, HasDQI] in 635 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 636 X86VectorVTInfo< 2, EltVT64, VR128X>, 637 X86VectorVTInfo< 4, EltVT64, VR256X>, 638 null_frag, vinsert128_insert, sched>, 639 VEX_W1X, EVEX_V256; 640 641 // Even with DQI we'd like to only use these instructions for masking. 642 let Predicates = [HasDQI] in { 643 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 644 X86VectorVTInfo< 2, EltVT64, VR128X>, 645 X86VectorVTInfo< 8, EltVT64, VR512>, 646 null_frag, vinsert128_insert, sched>, 647 REX_W, EVEX_V512; 648 649 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 650 X86VectorVTInfo< 8, EltVT32, VR256X>, 651 X86VectorVTInfo<16, EltVT32, VR512>, 652 null_frag, vinsert256_insert, sched>, 653 EVEX_V512; 654 } 655} 656 657// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 658defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 659defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 660 661// Codegen pattern with the alternative types, 662// Even with AVX512DQ we'll still use these for unmasked operations. 663defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 664 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 665defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 666 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 667 668defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 669 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 670defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 671 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 672 673defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 674 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 675defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 676 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 677 678// Codegen pattern with the alternative types insert VEC128 into VEC256 679defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 680 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 681defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 682 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 683defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info, 684 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 685// Codegen pattern with the alternative types insert VEC128 into VEC512 686defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 687 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 688defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 689 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 690defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info, 691 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 692// Codegen pattern with the alternative types insert VEC256 into VEC512 693defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 694 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 695defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 696 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 697defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info, 698 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 699 700 701multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 702 X86VectorVTInfo To, X86VectorVTInfo Cast, 703 PatFrag vinsert_insert, 704 SDNodeXForm INSERT_get_vinsert_imm, 705 list<Predicate> p> { 706let Predicates = p in { 707 def : Pat<(Cast.VT 708 (vselect_mask Cast.KRCWM:$mask, 709 (bitconvert 710 (vinsert_insert:$ins (To.VT To.RC:$src1), 711 (From.VT From.RC:$src2), 712 (iPTR imm))), 713 Cast.RC:$src0)), 714 (!cast<Instruction>(InstrStr#"rrk") 715 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 716 (INSERT_get_vinsert_imm To.RC:$ins))>; 717 def : Pat<(Cast.VT 718 (vselect_mask Cast.KRCWM:$mask, 719 (bitconvert 720 (vinsert_insert:$ins (To.VT To.RC:$src1), 721 (From.VT 722 (bitconvert 723 (From.LdFrag addr:$src2))), 724 (iPTR imm))), 725 Cast.RC:$src0)), 726 (!cast<Instruction>(InstrStr#"rmk") 727 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 728 (INSERT_get_vinsert_imm To.RC:$ins))>; 729 730 def : Pat<(Cast.VT 731 (vselect_mask Cast.KRCWM:$mask, 732 (bitconvert 733 (vinsert_insert:$ins (To.VT To.RC:$src1), 734 (From.VT From.RC:$src2), 735 (iPTR imm))), 736 Cast.ImmAllZerosV)), 737 (!cast<Instruction>(InstrStr#"rrkz") 738 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 739 (INSERT_get_vinsert_imm To.RC:$ins))>; 740 def : Pat<(Cast.VT 741 (vselect_mask Cast.KRCWM:$mask, 742 (bitconvert 743 (vinsert_insert:$ins (To.VT To.RC:$src1), 744 (From.VT (From.LdFrag addr:$src2)), 745 (iPTR imm))), 746 Cast.ImmAllZerosV)), 747 (!cast<Instruction>(InstrStr#"rmkz") 748 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 749 (INSERT_get_vinsert_imm To.RC:$ins))>; 750} 751} 752 753defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 754 v8f32x_info, vinsert128_insert, 755 INSERT_get_vinsert128_imm, [HasVLX]>; 756defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 757 v4f64x_info, vinsert128_insert, 758 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 759 760defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 761 v8i32x_info, vinsert128_insert, 762 INSERT_get_vinsert128_imm, [HasVLX]>; 763defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 764 v8i32x_info, vinsert128_insert, 765 INSERT_get_vinsert128_imm, [HasVLX]>; 766defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 767 v8i32x_info, vinsert128_insert, 768 INSERT_get_vinsert128_imm, [HasVLX]>; 769defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 770 v4i64x_info, vinsert128_insert, 771 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 772defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 773 v4i64x_info, vinsert128_insert, 774 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 775defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 776 v4i64x_info, vinsert128_insert, 777 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 778 779defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 780 v16f32_info, vinsert128_insert, 781 INSERT_get_vinsert128_imm, [HasAVX512]>; 782defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 783 v8f64_info, vinsert128_insert, 784 INSERT_get_vinsert128_imm, [HasDQI]>; 785 786defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 787 v16i32_info, vinsert128_insert, 788 INSERT_get_vinsert128_imm, [HasAVX512]>; 789defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 790 v16i32_info, vinsert128_insert, 791 INSERT_get_vinsert128_imm, [HasAVX512]>; 792defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 793 v16i32_info, vinsert128_insert, 794 INSERT_get_vinsert128_imm, [HasAVX512]>; 795defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 796 v8i64_info, vinsert128_insert, 797 INSERT_get_vinsert128_imm, [HasDQI]>; 798defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 799 v8i64_info, vinsert128_insert, 800 INSERT_get_vinsert128_imm, [HasDQI]>; 801defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 802 v8i64_info, vinsert128_insert, 803 INSERT_get_vinsert128_imm, [HasDQI]>; 804 805defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 806 v16f32_info, vinsert256_insert, 807 INSERT_get_vinsert256_imm, [HasDQI]>; 808defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 809 v8f64_info, vinsert256_insert, 810 INSERT_get_vinsert256_imm, [HasAVX512]>; 811 812defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 813 v16i32_info, vinsert256_insert, 814 INSERT_get_vinsert256_imm, [HasDQI]>; 815defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 816 v16i32_info, vinsert256_insert, 817 INSERT_get_vinsert256_imm, [HasDQI]>; 818defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 819 v16i32_info, vinsert256_insert, 820 INSERT_get_vinsert256_imm, [HasDQI]>; 821defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 822 v8i64_info, vinsert256_insert, 823 INSERT_get_vinsert256_imm, [HasAVX512]>; 824defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 825 v8i64_info, vinsert256_insert, 826 INSERT_get_vinsert256_imm, [HasAVX512]>; 827defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 828 v8i64_info, vinsert256_insert, 829 INSERT_get_vinsert256_imm, [HasAVX512]>; 830 831// vinsertps - insert f32 to XMM 832let ExeDomain = SSEPackedSingle in { 833let isCommutable = 1 in 834def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 835 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 836 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 837 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, 838 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 839def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 840 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 841 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 842 [(set VR128X:$dst, (X86insertps VR128X:$src1, 843 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 844 timm:$src3))]>, 845 EVEX_4V, EVEX_CD8<32, CD8VT1>, 846 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 847} 848 849//===----------------------------------------------------------------------===// 850// AVX-512 VECTOR EXTRACT 851//--- 852 853// Supports two different pattern operators for mask and unmasked ops. Allows 854// null_frag to be passed for one. 855multiclass vextract_for_size_split<int Opcode, 856 X86VectorVTInfo From, X86VectorVTInfo To, 857 SDPatternOperator vextract_extract, 858 SDPatternOperator vextract_for_mask, 859 SchedWrite SchedRR, SchedWrite SchedMR> { 860 861 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 862 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 863 (ins From.RC:$src1, u8imm:$idx), 864 "vextract" # To.EltTypeName # "x" # To.NumElts, 865 "$idx, $src1", "$src1, $idx", 866 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 867 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 868 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 869 870 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 871 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 872 "vextract" # To.EltTypeName # "x" # To.NumElts # 873 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 874 [(store (To.VT (vextract_extract:$idx 875 (From.VT From.RC:$src1), (iPTR imm))), 876 addr:$dst)]>, EVEX, 877 Sched<[SchedMR]>; 878 879 let mayStore = 1, hasSideEffects = 0 in 880 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 881 (ins To.MemOp:$dst, To.KRCWM:$mask, 882 From.RC:$src1, u8imm:$idx), 883 "vextract" # To.EltTypeName # "x" # To.NumElts # 884 "\t{$idx, $src1, $dst {${mask}}|" 885 "$dst {${mask}}, $src1, $idx}", []>, 886 EVEX_K, EVEX, Sched<[SchedMR]>; 887 } 888} 889 890// Passes the same pattern operator for masked and unmasked ops. 891multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 892 X86VectorVTInfo To, 893 SDPatternOperator vextract_extract, 894 SchedWrite SchedRR, SchedWrite SchedMR> : 895 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 896 897// Codegen pattern for the alternative types 898multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 899 X86VectorVTInfo To, PatFrag vextract_extract, 900 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 901 let Predicates = p in { 902 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 903 (To.VT (!cast<Instruction>(InstrStr#"rr") 904 From.RC:$src1, 905 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 906 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 907 (iPTR imm))), addr:$dst), 908 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 909 (EXTRACT_get_vextract_imm To.RC:$ext))>; 910 } 911} 912 913multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 914 ValueType EltVT64, int Opcode256, 915 SchedWrite SchedRR, SchedWrite SchedMR> { 916 let Predicates = [HasAVX512] in { 917 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 918 X86VectorVTInfo<16, EltVT32, VR512>, 919 X86VectorVTInfo< 4, EltVT32, VR128X>, 920 vextract128_extract, SchedRR, SchedMR>, 921 EVEX_V512, EVEX_CD8<32, CD8VT4>; 922 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 923 X86VectorVTInfo< 8, EltVT64, VR512>, 924 X86VectorVTInfo< 4, EltVT64, VR256X>, 925 vextract256_extract, SchedRR, SchedMR>, 926 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 927 } 928 let Predicates = [HasVLX] in 929 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 930 X86VectorVTInfo< 8, EltVT32, VR256X>, 931 X86VectorVTInfo< 4, EltVT32, VR128X>, 932 vextract128_extract, SchedRR, SchedMR>, 933 EVEX_V256, EVEX_CD8<32, CD8VT4>; 934 935 // Even with DQI we'd like to only use these instructions for masking. 936 let Predicates = [HasVLX, HasDQI] in 937 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 938 X86VectorVTInfo< 4, EltVT64, VR256X>, 939 X86VectorVTInfo< 2, EltVT64, VR128X>, 940 null_frag, vextract128_extract, SchedRR, SchedMR>, 941 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>; 942 943 // Even with DQI we'd like to only use these instructions for masking. 944 let Predicates = [HasDQI] in { 945 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 946 X86VectorVTInfo< 8, EltVT64, VR512>, 947 X86VectorVTInfo< 2, EltVT64, VR128X>, 948 null_frag, vextract128_extract, SchedRR, SchedMR>, 949 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 950 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 951 X86VectorVTInfo<16, EltVT32, VR512>, 952 X86VectorVTInfo< 8, EltVT32, VR256X>, 953 null_frag, vextract256_extract, SchedRR, SchedMR>, 954 EVEX_V512, EVEX_CD8<32, CD8VT8>; 955 } 956} 957 958// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 959defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 960defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 961 962// extract_subvector codegen patterns with the alternative types. 963// Even with AVX512DQ we'll still use these for unmasked operations. 964defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 965 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 966defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 967 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 968 969defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 970 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 971defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 972 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 973 974defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 975 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 976defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 977 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 978 979// Codegen pattern with the alternative types extract VEC128 from VEC256 980defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 981 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 982defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 983 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 984defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info, 985 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 986 987// Codegen pattern with the alternative types extract VEC128 from VEC512 988defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 989 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 990defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 991 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 992defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info, 993 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 994// Codegen pattern with the alternative types extract VEC256 from VEC512 995defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 996 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 997defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 998 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 999defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info, 1000 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 1001 1002 1003// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 1004// smaller extract to enable EVEX->VEX. 1005let Predicates = [NoVLX] in { 1006def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 1007 (v2i64 (VEXTRACTI128rr 1008 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 1009 (iPTR 1)))>; 1010def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 1011 (v2f64 (VEXTRACTF128rr 1012 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 1013 (iPTR 1)))>; 1014def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 1015 (v4i32 (VEXTRACTI128rr 1016 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 1017 (iPTR 1)))>; 1018def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 1019 (v4f32 (VEXTRACTF128rr 1020 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 1021 (iPTR 1)))>; 1022def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 1023 (v8i16 (VEXTRACTI128rr 1024 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 1025 (iPTR 1)))>; 1026def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 1027 (v8f16 (VEXTRACTF128rr 1028 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 1029 (iPTR 1)))>; 1030def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 1031 (v16i8 (VEXTRACTI128rr 1032 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 1033 (iPTR 1)))>; 1034} 1035 1036// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 1037// smaller extract to enable EVEX->VEX. 1038let Predicates = [HasVLX] in { 1039def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 1040 (v2i64 (VEXTRACTI32x4Z256rr 1041 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 1042 (iPTR 1)))>; 1043def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 1044 (v2f64 (VEXTRACTF32x4Z256rr 1045 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 1046 (iPTR 1)))>; 1047def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 1048 (v4i32 (VEXTRACTI32x4Z256rr 1049 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 1050 (iPTR 1)))>; 1051def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 1052 (v4f32 (VEXTRACTF32x4Z256rr 1053 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 1054 (iPTR 1)))>; 1055def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 1056 (v8i16 (VEXTRACTI32x4Z256rr 1057 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 1058 (iPTR 1)))>; 1059def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 1060 (v8f16 (VEXTRACTF32x4Z256rr 1061 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 1062 (iPTR 1)))>; 1063def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 1064 (v16i8 (VEXTRACTI32x4Z256rr 1065 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 1066 (iPTR 1)))>; 1067} 1068 1069 1070// Additional patterns for handling a bitcast between the vselect and the 1071// extract_subvector. 1072multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 1073 X86VectorVTInfo To, X86VectorVTInfo Cast, 1074 PatFrag vextract_extract, 1075 SDNodeXForm EXTRACT_get_vextract_imm, 1076 list<Predicate> p> { 1077let Predicates = p in { 1078 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1079 (bitconvert 1080 (To.VT (vextract_extract:$ext 1081 (From.VT From.RC:$src), (iPTR imm)))), 1082 To.RC:$src0)), 1083 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 1084 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 1085 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1086 1087 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 1088 (bitconvert 1089 (To.VT (vextract_extract:$ext 1090 (From.VT From.RC:$src), (iPTR imm)))), 1091 Cast.ImmAllZerosV)), 1092 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 1093 Cast.KRCWM:$mask, From.RC:$src, 1094 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 1095} 1096} 1097 1098defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 1099 v4f32x_info, vextract128_extract, 1100 EXTRACT_get_vextract128_imm, [HasVLX]>; 1101defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 1102 v2f64x_info, vextract128_extract, 1103 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1104 1105defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 1106 v4i32x_info, vextract128_extract, 1107 EXTRACT_get_vextract128_imm, [HasVLX]>; 1108defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 1109 v4i32x_info, vextract128_extract, 1110 EXTRACT_get_vextract128_imm, [HasVLX]>; 1111defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 1112 v4i32x_info, vextract128_extract, 1113 EXTRACT_get_vextract128_imm, [HasVLX]>; 1114defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 1115 v2i64x_info, vextract128_extract, 1116 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1117defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 1118 v2i64x_info, vextract128_extract, 1119 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1120defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 1121 v2i64x_info, vextract128_extract, 1122 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 1123 1124defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 1125 v4f32x_info, vextract128_extract, 1126 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1127defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 1128 v2f64x_info, vextract128_extract, 1129 EXTRACT_get_vextract128_imm, [HasDQI]>; 1130 1131defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 1132 v4i32x_info, vextract128_extract, 1133 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1134defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 1135 v4i32x_info, vextract128_extract, 1136 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1137defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 1138 v4i32x_info, vextract128_extract, 1139 EXTRACT_get_vextract128_imm, [HasAVX512]>; 1140defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 1141 v2i64x_info, vextract128_extract, 1142 EXTRACT_get_vextract128_imm, [HasDQI]>; 1143defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 1144 v2i64x_info, vextract128_extract, 1145 EXTRACT_get_vextract128_imm, [HasDQI]>; 1146defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 1147 v2i64x_info, vextract128_extract, 1148 EXTRACT_get_vextract128_imm, [HasDQI]>; 1149 1150defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 1151 v8f32x_info, vextract256_extract, 1152 EXTRACT_get_vextract256_imm, [HasDQI]>; 1153defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 1154 v4f64x_info, vextract256_extract, 1155 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1156 1157defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 1158 v8i32x_info, vextract256_extract, 1159 EXTRACT_get_vextract256_imm, [HasDQI]>; 1160defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 1161 v8i32x_info, vextract256_extract, 1162 EXTRACT_get_vextract256_imm, [HasDQI]>; 1163defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 1164 v8i32x_info, vextract256_extract, 1165 EXTRACT_get_vextract256_imm, [HasDQI]>; 1166defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 1167 v4i64x_info, vextract256_extract, 1168 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1169defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 1170 v4i64x_info, vextract256_extract, 1171 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1172defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 1173 v4i64x_info, vextract256_extract, 1174 EXTRACT_get_vextract256_imm, [HasAVX512]>; 1175 1176// vextractps - extract 32 bits from XMM 1177def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), 1178 (ins VR128X:$src1, u8imm:$src2), 1179 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1180 [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1181 EVEX, WIG, Sched<[WriteVecExtract]>; 1182 1183def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 1184 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1185 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1186 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1187 addr:$dst)]>, 1188 EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1189 1190//===---------------------------------------------------------------------===// 1191// AVX-512 BROADCAST 1192//--- 1193// broadcast with a scalar argument. 1194multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo, 1195 X86VectorVTInfo SrcInfo> { 1196 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1197 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr) 1198 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1199 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1200 (X86VBroadcast SrcInfo.FRC:$src), 1201 DestInfo.RC:$src0)), 1202 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk) 1203 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1204 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1205 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1206 (X86VBroadcast SrcInfo.FRC:$src), 1207 DestInfo.ImmAllZerosV)), 1208 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz) 1209 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1210} 1211 1212// Split version to allow mask and broadcast node to be different types. This 1213// helps support the 32x2 broadcasts. 1214multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1215 SchedWrite SchedRR, SchedWrite SchedRM, 1216 X86VectorVTInfo MaskInfo, 1217 X86VectorVTInfo DestInfo, 1218 X86VectorVTInfo SrcInfo, 1219 bit IsConvertibleToThreeAddress, 1220 SDPatternOperator UnmaskedOp = X86VBroadcast, 1221 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { 1222 let hasSideEffects = 0 in 1223 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), 1224 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1225 [(set MaskInfo.RC:$dst, 1226 (MaskInfo.VT 1227 (bitconvert 1228 (DestInfo.VT 1229 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], 1230 DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>; 1231 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1232 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), 1233 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1234 "${dst} {${mask}} {z}, $src}"), 1235 [(set MaskInfo.RC:$dst, 1236 (vselect_mask MaskInfo.KRCWM:$mask, 1237 (MaskInfo.VT 1238 (bitconvert 1239 (DestInfo.VT 1240 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1241 MaskInfo.ImmAllZerosV))], 1242 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; 1243 let Constraints = "$src0 = $dst" in 1244 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1245 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1246 SrcInfo.RC:$src), 1247 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1248 "${dst} {${mask}}, $src}"), 1249 [(set MaskInfo.RC:$dst, 1250 (vselect_mask MaskInfo.KRCWM:$mask, 1251 (MaskInfo.VT 1252 (bitconvert 1253 (DestInfo.VT 1254 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1255 MaskInfo.RC:$src0))], 1256 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>; 1257 1258 let hasSideEffects = 0, mayLoad = 1 in 1259 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1260 (ins SrcInfo.ScalarMemOp:$src), 1261 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1262 [(set MaskInfo.RC:$dst, 1263 (MaskInfo.VT 1264 (bitconvert 1265 (DestInfo.VT 1266 (UnmaskedBcastOp addr:$src)))))], 1267 DestInfo.ExeDomain>, T8PD, EVEX, 1268 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1269 1270 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1271 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), 1272 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1273 "${dst} {${mask}} {z}, $src}"), 1274 [(set MaskInfo.RC:$dst, 1275 (vselect_mask MaskInfo.KRCWM:$mask, 1276 (MaskInfo.VT 1277 (bitconvert 1278 (DestInfo.VT 1279 (SrcInfo.BroadcastLdFrag addr:$src)))), 1280 MaskInfo.ImmAllZerosV))], 1281 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, 1282 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1283 1284 let Constraints = "$src0 = $dst", 1285 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 1286 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1287 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1288 SrcInfo.ScalarMemOp:$src), 1289 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1290 "${dst} {${mask}}, $src}"), 1291 [(set MaskInfo.RC:$dst, 1292 (vselect_mask MaskInfo.KRCWM:$mask, 1293 (MaskInfo.VT 1294 (bitconvert 1295 (DestInfo.VT 1296 (SrcInfo.BroadcastLdFrag addr:$src)))), 1297 MaskInfo.RC:$src0))], 1298 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, 1299 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1300} 1301 1302// Helper class to force mask and broadcast result to same type. 1303multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, 1304 SchedWrite SchedRR, SchedWrite SchedRM, 1305 X86VectorVTInfo DestInfo, 1306 X86VectorVTInfo SrcInfo, 1307 bit IsConvertibleToThreeAddress> : 1308 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM, 1309 DestInfo, DestInfo, SrcInfo, 1310 IsConvertibleToThreeAddress>; 1311 1312multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1313 AVX512VLVectorVTInfo _> { 1314 let Predicates = [HasAVX512] in { 1315 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1316 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1317 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1318 EVEX_V512; 1319 } 1320 1321 let Predicates = [HasVLX] in { 1322 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1323 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1324 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1325 EVEX_V256; 1326 } 1327} 1328 1329multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1330 AVX512VLVectorVTInfo _> { 1331 let Predicates = [HasAVX512] in { 1332 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1333 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1334 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1335 EVEX_V512; 1336 } 1337 1338 let Predicates = [HasVLX] in { 1339 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1340 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1341 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1342 EVEX_V256; 1343 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1344 WriteFShuffle256Ld, _.info128, _.info128, 1>, 1345 avx512_broadcast_scalar<NAME, _.info128, _.info128>, 1346 EVEX_V128; 1347 } 1348} 1349defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1350 avx512vl_f32_info>; 1351defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1352 avx512vl_f64_info>, VEX_W1X; 1353 1354multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1355 X86VectorVTInfo _, SDPatternOperator OpNode, 1356 RegisterClass SrcRC> { 1357 // Fold with a mask even if it has multiple uses since it is cheap. 1358 let ExeDomain = _.ExeDomain in 1359 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1360 (ins SrcRC:$src), 1361 "vpbroadcast"#_.Suffix, "$src", "$src", 1362 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0, 1363 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>, 1364 T8PD, EVEX, Sched<[SchedRR]>; 1365} 1366 1367multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1368 X86VectorVTInfo _, SDPatternOperator OpNode, 1369 RegisterClass SrcRC, SubRegIndex Subreg> { 1370 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1371 defm rr : AVX512_maskable_custom<opc, MRMSrcReg, 1372 (outs _.RC:$dst), (ins GR32:$src), 1373 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1374 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1375 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [], 1376 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>; 1377 1378 def : Pat <(_.VT (OpNode SrcRC:$src)), 1379 (!cast<Instruction>(Name#rr) 1380 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1381 1382 // Fold with a mask even if it has multiple uses since it is cheap. 1383 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1384 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask, 1385 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1386 1387 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1388 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask, 1389 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1390} 1391 1392multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1393 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1394 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1395 let Predicates = [prd] in 1396 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1397 OpNode, SrcRC, Subreg>, EVEX_V512; 1398 let Predicates = [prd, HasVLX] in { 1399 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1400 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1401 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1402 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1403 } 1404} 1405 1406multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1407 SDPatternOperator OpNode, 1408 RegisterClass SrcRC, Predicate prd> { 1409 let Predicates = [prd] in 1410 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1411 SrcRC>, EVEX_V512; 1412 let Predicates = [prd, HasVLX] in { 1413 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1414 SrcRC>, EVEX_V256; 1415 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1416 SrcRC>, EVEX_V128; 1417 } 1418} 1419 1420defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1421 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1422defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1423 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1424 HasBWI>; 1425defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1426 X86VBroadcast, GR32, HasAVX512>; 1427defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1428 X86VBroadcast, GR64, HasAVX512>, REX_W; 1429 1430multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1431 AVX512VLVectorVTInfo _, Predicate prd, 1432 bit IsConvertibleToThreeAddress> { 1433 let Predicates = [prd] in { 1434 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1435 WriteShuffle256Ld, _.info512, _.info128, 1436 IsConvertibleToThreeAddress>, 1437 EVEX_V512; 1438 } 1439 let Predicates = [prd, HasVLX] in { 1440 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1441 WriteShuffle256Ld, _.info256, _.info128, 1442 IsConvertibleToThreeAddress>, 1443 EVEX_V256; 1444 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle, 1445 WriteShuffleXLd, _.info128, _.info128, 1446 IsConvertibleToThreeAddress>, 1447 EVEX_V128; 1448 } 1449} 1450 1451defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1452 avx512vl_i8_info, HasBWI, 0>; 1453defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1454 avx512vl_i16_info, HasBWI, 0>; 1455defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1456 avx512vl_i32_info, HasAVX512, 1>; 1457defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1458 avx512vl_i64_info, HasAVX512, 1>, VEX_W1X; 1459 1460multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1461 SDPatternOperator OpNode, 1462 X86VectorVTInfo _Dst, 1463 X86VectorVTInfo _Src> { 1464 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1465 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1466 (_Dst.VT (OpNode addr:$src))>, 1467 Sched<[SchedWriteShuffle.YMM.Folded]>, 1468 AVX5128IBase, EVEX; 1469} 1470 1471// This should be used for the AVX512DQ broadcast instructions. It disables 1472// the unmasked patterns so that we only use the DQ instructions when masking 1473// is requested. 1474multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1475 SDPatternOperator OpNode, 1476 X86VectorVTInfo _Dst, 1477 X86VectorVTInfo _Src> { 1478 let hasSideEffects = 0, mayLoad = 1 in 1479 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1480 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1481 (null_frag), 1482 (_Dst.VT (OpNode addr:$src))>, 1483 Sched<[SchedWriteShuffle.YMM.Folded]>, 1484 AVX5128IBase, EVEX; 1485} 1486let Predicates = [HasBWI] in { 1487 def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)), 1488 (VPBROADCASTWZrm addr:$src)>; 1489 1490 def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))), 1491 (VPBROADCASTWZrr VR128X:$src)>; 1492 def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))), 1493 (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1494} 1495let Predicates = [HasVLX, HasBWI] in { 1496 def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)), 1497 (VPBROADCASTWZ128rm addr:$src)>; 1498 def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)), 1499 (VPBROADCASTWZ256rm addr:$src)>; 1500 1501 def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))), 1502 (VPBROADCASTWZ128rr VR128X:$src)>; 1503 def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))), 1504 (VPBROADCASTWZ256rr VR128X:$src)>; 1505 1506 def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))), 1507 (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1508 def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))), 1509 (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1510} 1511 1512//===----------------------------------------------------------------------===// 1513// AVX-512 BROADCAST SUBVECTORS 1514// 1515 1516defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1517 X86SubVBroadcastld128, v16i32_info, v4i32x_info>, 1518 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1519defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1520 X86SubVBroadcastld128, v16f32_info, v4f32x_info>, 1521 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1522defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1523 X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W, 1524 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1525defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1526 X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W, 1527 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1528 1529let Predicates = [HasAVX512] in { 1530def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)), 1531 (VBROADCASTF64X4rm addr:$src)>; 1532def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)), 1533 (VBROADCASTF64X4rm addr:$src)>; 1534def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)), 1535 (VBROADCASTF64X4rm addr:$src)>; 1536def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)), 1537 (VBROADCASTI64X4rm addr:$src)>; 1538def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)), 1539 (VBROADCASTI64X4rm addr:$src)>; 1540def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)), 1541 (VBROADCASTI64X4rm addr:$src)>; 1542def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)), 1543 (VBROADCASTI64X4rm addr:$src)>; 1544 1545def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)), 1546 (VBROADCASTF32X4rm addr:$src)>; 1547def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)), 1548 (VBROADCASTF32X4rm addr:$src)>; 1549def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)), 1550 (VBROADCASTF32X4rm addr:$src)>; 1551def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)), 1552 (VBROADCASTI32X4rm addr:$src)>; 1553def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)), 1554 (VBROADCASTI32X4rm addr:$src)>; 1555def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)), 1556 (VBROADCASTI32X4rm addr:$src)>; 1557def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)), 1558 (VBROADCASTI32X4rm addr:$src)>; 1559 1560// Patterns for selects of bitcasted operations. 1561def : Pat<(vselect_mask VK16WM:$mask, 1562 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1563 (v16f32 immAllZerosV)), 1564 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1565def : Pat<(vselect_mask VK16WM:$mask, 1566 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1567 VR512:$src0), 1568 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1569def : Pat<(vselect_mask VK16WM:$mask, 1570 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1571 (v16i32 immAllZerosV)), 1572 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1573def : Pat<(vselect_mask VK16WM:$mask, 1574 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1575 VR512:$src0), 1576 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1577 1578def : Pat<(vselect_mask VK8WM:$mask, 1579 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1580 (v8f64 immAllZerosV)), 1581 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1582def : Pat<(vselect_mask VK8WM:$mask, 1583 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1584 VR512:$src0), 1585 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1586def : Pat<(vselect_mask VK8WM:$mask, 1587 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1588 (v8i64 immAllZerosV)), 1589 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1590def : Pat<(vselect_mask VK8WM:$mask, 1591 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1592 VR512:$src0), 1593 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1594} 1595 1596let Predicates = [HasVLX] in { 1597defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1598 X86SubVBroadcastld128, v8i32x_info, v4i32x_info>, 1599 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1600defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1601 X86SubVBroadcastld128, v8f32x_info, v4f32x_info>, 1602 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1603 1604def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), 1605 (VBROADCASTF32X4Z256rm addr:$src)>; 1606def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), 1607 (VBROADCASTF32X4Z256rm addr:$src)>; 1608def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)), 1609 (VBROADCASTF32X4Z256rm addr:$src)>; 1610def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), 1611 (VBROADCASTI32X4Z256rm addr:$src)>; 1612def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), 1613 (VBROADCASTI32X4Z256rm addr:$src)>; 1614def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), 1615 (VBROADCASTI32X4Z256rm addr:$src)>; 1616def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), 1617 (VBROADCASTI32X4Z256rm addr:$src)>; 1618 1619// Patterns for selects of bitcasted operations. 1620def : Pat<(vselect_mask VK8WM:$mask, 1621 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1622 (v8f32 immAllZerosV)), 1623 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1624def : Pat<(vselect_mask VK8WM:$mask, 1625 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1626 VR256X:$src0), 1627 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1628def : Pat<(vselect_mask VK8WM:$mask, 1629 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1630 (v8i32 immAllZerosV)), 1631 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1632def : Pat<(vselect_mask VK8WM:$mask, 1633 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1634 VR256X:$src0), 1635 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1636} 1637 1638let Predicates = [HasVLX, HasDQI] in { 1639defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1640 X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X, 1641 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1642defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1643 X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X, 1644 EVEX_V256, EVEX_CD8<64, CD8VT2>; 1645 1646// Patterns for selects of bitcasted operations. 1647def : Pat<(vselect_mask VK4WM:$mask, 1648 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1649 (v4f64 immAllZerosV)), 1650 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1651def : Pat<(vselect_mask VK4WM:$mask, 1652 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1653 VR256X:$src0), 1654 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1655def : Pat<(vselect_mask VK4WM:$mask, 1656 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1657 (v4i64 immAllZerosV)), 1658 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1659def : Pat<(vselect_mask VK4WM:$mask, 1660 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1661 VR256X:$src0), 1662 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1663} 1664 1665let Predicates = [HasDQI] in { 1666defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1667 X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W, 1668 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1669defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1670 X86SubVBroadcastld256, v16i32_info, v8i32x_info>, 1671 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1672defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1673 X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W, 1674 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1675defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1676 X86SubVBroadcastld256, v16f32_info, v8f32x_info>, 1677 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1678 1679// Patterns for selects of bitcasted operations. 1680def : Pat<(vselect_mask VK16WM:$mask, 1681 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1682 (v16f32 immAllZerosV)), 1683 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1684def : Pat<(vselect_mask VK16WM:$mask, 1685 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1686 VR512:$src0), 1687 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1688def : Pat<(vselect_mask VK16WM:$mask, 1689 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1690 (v16i32 immAllZerosV)), 1691 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1692def : Pat<(vselect_mask VK16WM:$mask, 1693 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1694 VR512:$src0), 1695 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1696 1697def : Pat<(vselect_mask VK8WM:$mask, 1698 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1699 (v8f64 immAllZerosV)), 1700 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1701def : Pat<(vselect_mask VK8WM:$mask, 1702 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1703 VR512:$src0), 1704 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1705def : Pat<(vselect_mask VK8WM:$mask, 1706 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1707 (v8i64 immAllZerosV)), 1708 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1709def : Pat<(vselect_mask VK8WM:$mask, 1710 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1711 VR512:$src0), 1712 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1713} 1714 1715multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1716 AVX512VLVectorVTInfo _Dst, 1717 AVX512VLVectorVTInfo _Src> { 1718 let Predicates = [HasDQI] in 1719 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1720 WriteShuffle256Ld, _Dst.info512, 1721 _Src.info512, _Src.info128, 0, null_frag, null_frag>, 1722 EVEX_V512; 1723 let Predicates = [HasDQI, HasVLX] in 1724 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1725 WriteShuffle256Ld, _Dst.info256, 1726 _Src.info256, _Src.info128, 0, null_frag, null_frag>, 1727 EVEX_V256; 1728} 1729 1730multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1731 AVX512VLVectorVTInfo _Dst, 1732 AVX512VLVectorVTInfo _Src> : 1733 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1734 1735 let Predicates = [HasDQI, HasVLX] in 1736 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle, 1737 WriteShuffleXLd, _Dst.info128, 1738 _Src.info128, _Src.info128, 0, null_frag, null_frag>, 1739 EVEX_V128; 1740} 1741 1742defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1743 avx512vl_i32_info, avx512vl_i64_info>; 1744defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1745 avx512vl_f32_info, avx512vl_f64_info>; 1746 1747//===----------------------------------------------------------------------===// 1748// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1749//--- 1750multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1751 X86VectorVTInfo _, RegisterClass KRC> { 1752 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1753 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1754 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1755 EVEX, Sched<[WriteShuffle]>; 1756} 1757 1758multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1759 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1760 let Predicates = [HasCDI] in 1761 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1762 let Predicates = [HasCDI, HasVLX] in { 1763 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1764 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1765 } 1766} 1767 1768defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1769 avx512vl_i32_info, VK16>; 1770defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1771 avx512vl_i64_info, VK8>, REX_W; 1772 1773//===----------------------------------------------------------------------===// 1774// -- VPERMI2 - 3 source operands form -- 1775multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1776 X86FoldableSchedWrite sched, 1777 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1778let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1779 hasSideEffects = 0 in { 1780 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1781 (ins _.RC:$src2, _.RC:$src3), 1782 OpcodeStr, "$src3, $src2", "$src2, $src3", 1783 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1784 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1785 1786 let mayLoad = 1 in 1787 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1788 (ins _.RC:$src2, _.MemOp:$src3), 1789 OpcodeStr, "$src3, $src2", "$src2, $src3", 1790 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1791 (_.VT (_.LdFrag addr:$src3)))), 1>, 1792 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1793 } 1794} 1795 1796multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1797 X86FoldableSchedWrite sched, 1798 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1799 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1800 hasSideEffects = 0, mayLoad = 1 in 1801 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1802 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1803 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1804 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1805 (_.VT (X86VPermt2 _.RC:$src2, 1806 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1807 AVX5128IBase, EVEX_4V, EVEX_B, 1808 Sched<[sched.Folded, sched.ReadAfterFold]>; 1809} 1810 1811multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1812 X86FoldableSchedWrite sched, 1813 AVX512VLVectorVTInfo VTInfo, 1814 AVX512VLVectorVTInfo ShuffleMask> { 1815 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1816 ShuffleMask.info512>, 1817 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1818 ShuffleMask.info512>, EVEX_V512; 1819 let Predicates = [HasVLX] in { 1820 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1821 ShuffleMask.info128>, 1822 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1823 ShuffleMask.info128>, EVEX_V128; 1824 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1825 ShuffleMask.info256>, 1826 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1827 ShuffleMask.info256>, EVEX_V256; 1828 } 1829} 1830 1831multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1832 X86FoldableSchedWrite sched, 1833 AVX512VLVectorVTInfo VTInfo, 1834 AVX512VLVectorVTInfo Idx, 1835 Predicate Prd> { 1836 let Predicates = [Prd] in 1837 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1838 Idx.info512>, EVEX_V512; 1839 let Predicates = [Prd, HasVLX] in { 1840 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1841 Idx.info128>, EVEX_V128; 1842 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1843 Idx.info256>, EVEX_V256; 1844 } 1845} 1846 1847defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1848 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1849defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1850 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1851defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1852 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1853 REX_W, EVEX_CD8<16, CD8VF>; 1854defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1855 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1856 EVEX_CD8<8, CD8VF>; 1857defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1858 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1859defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1860 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1861 1862// Extra patterns to deal with extra bitcasts due to passthru and index being 1863// different types on the fp versions. 1864multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1865 X86VectorVTInfo IdxVT, 1866 X86VectorVTInfo CastVT> { 1867 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1868 (X86VPermt2 (_.VT _.RC:$src2), 1869 (IdxVT.VT (bitconvert 1870 (CastVT.VT _.RC:$src1))), 1871 _.RC:$src3), 1872 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1873 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1874 _.RC:$src2, _.RC:$src3)>; 1875 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1876 (X86VPermt2 _.RC:$src2, 1877 (IdxVT.VT (bitconvert 1878 (CastVT.VT _.RC:$src1))), 1879 (_.LdFrag addr:$src3)), 1880 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1881 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1882 _.RC:$src2, addr:$src3)>; 1883 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1884 (X86VPermt2 _.RC:$src2, 1885 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1886 (_.BroadcastLdFrag addr:$src3)), 1887 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1888 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1889 _.RC:$src2, addr:$src3)>; 1890} 1891 1892// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1893defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>; 1894defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>; 1895defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>; 1896 1897// VPERMT2 1898multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1899 X86FoldableSchedWrite sched, 1900 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1901let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1902 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1903 (ins IdxVT.RC:$src2, _.RC:$src3), 1904 OpcodeStr, "$src3, $src2", "$src2, $src3", 1905 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1906 EVEX_4V, AVX5128IBase, Sched<[sched]>; 1907 1908 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1909 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1910 OpcodeStr, "$src3, $src2", "$src2, $src3", 1911 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1912 (_.LdFrag addr:$src3))), 1>, 1913 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1914 } 1915} 1916multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1917 X86FoldableSchedWrite sched, 1918 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1919 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1920 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1921 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1922 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1923 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1924 (_.VT (X86VPermt2 _.RC:$src1, 1925 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1926 AVX5128IBase, EVEX_4V, EVEX_B, 1927 Sched<[sched.Folded, sched.ReadAfterFold]>; 1928} 1929 1930multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1931 X86FoldableSchedWrite sched, 1932 AVX512VLVectorVTInfo VTInfo, 1933 AVX512VLVectorVTInfo ShuffleMask> { 1934 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1935 ShuffleMask.info512>, 1936 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1937 ShuffleMask.info512>, EVEX_V512; 1938 let Predicates = [HasVLX] in { 1939 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1940 ShuffleMask.info128>, 1941 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1942 ShuffleMask.info128>, EVEX_V128; 1943 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1944 ShuffleMask.info256>, 1945 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1946 ShuffleMask.info256>, EVEX_V256; 1947 } 1948} 1949 1950multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1951 X86FoldableSchedWrite sched, 1952 AVX512VLVectorVTInfo VTInfo, 1953 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1954 let Predicates = [Prd] in 1955 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1956 Idx.info512>, EVEX_V512; 1957 let Predicates = [Prd, HasVLX] in { 1958 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1959 Idx.info128>, EVEX_V128; 1960 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1961 Idx.info256>, EVEX_V256; 1962 } 1963} 1964 1965defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1966 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1967defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1968 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1969defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1970 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1971 REX_W, EVEX_CD8<16, CD8VF>; 1972defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1973 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1974 EVEX_CD8<8, CD8VF>; 1975defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1976 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1977defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1978 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1979 1980//===----------------------------------------------------------------------===// 1981// AVX-512 - BLEND using mask 1982// 1983 1984multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1985 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1986 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1987 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1988 (ins _.RC:$src1, _.RC:$src2), 1989 !strconcat(OpcodeStr, 1990 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1991 EVEX_4V, Sched<[sched]>; 1992 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1993 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1994 !strconcat(OpcodeStr, 1995 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1996 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 1997 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1998 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1999 !strconcat(OpcodeStr, 2000 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 2001 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>; 2002 let mayLoad = 1 in { 2003 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2004 (ins _.RC:$src1, _.MemOp:$src2), 2005 !strconcat(OpcodeStr, 2006 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 2007 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 2008 Sched<[sched.Folded, sched.ReadAfterFold]>; 2009 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2010 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2011 !strconcat(OpcodeStr, 2012 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 2013 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 2014 Sched<[sched.Folded, sched.ReadAfterFold]>; 2015 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2016 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2017 !strconcat(OpcodeStr, 2018 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 2019 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 2020 Sched<[sched.Folded, sched.ReadAfterFold]>; 2021 } 2022 } 2023} 2024multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 2025 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2026 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { 2027 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2028 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 2029 !strconcat(OpcodeStr, 2030 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2031 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2032 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2033 Sched<[sched.Folded, sched.ReadAfterFold]>; 2034 2035 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2036 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 2037 !strconcat(OpcodeStr, 2038 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 2039 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2040 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2041 Sched<[sched.Folded, sched.ReadAfterFold]>; 2042 2043 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 2044 (ins _.RC:$src1, _.ScalarMemOp:$src2), 2045 !strconcat(OpcodeStr, 2046 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 2047 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 2048 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 2049 Sched<[sched.Folded, sched.ReadAfterFold]>; 2050 } 2051} 2052 2053multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2054 AVX512VLVectorVTInfo VTInfo> { 2055 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2056 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2057 EVEX_V512; 2058 2059 let Predicates = [HasVLX] in { 2060 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2061 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2062 EVEX_V256; 2063 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2064 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2065 EVEX_V128; 2066 } 2067} 2068 2069multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 2070 AVX512VLVectorVTInfo VTInfo> { 2071 let Predicates = [HasBWI] in 2072 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 2073 EVEX_V512; 2074 2075 let Predicates = [HasBWI, HasVLX] in { 2076 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 2077 EVEX_V256; 2078 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 2079 EVEX_V128; 2080 } 2081} 2082 2083defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 2084 avx512vl_f32_info>; 2085defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 2086 avx512vl_f64_info>, REX_W; 2087defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 2088 avx512vl_i32_info>; 2089defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 2090 avx512vl_i64_info>, REX_W; 2091defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 2092 avx512vl_i8_info>; 2093defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 2094 avx512vl_i16_info>, REX_W; 2095 2096//===----------------------------------------------------------------------===// 2097// Compare Instructions 2098//===----------------------------------------------------------------------===// 2099 2100// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 2101 2102multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, 2103 PatFrag OpNode_su, PatFrag OpNodeSAE_su, 2104 X86FoldableSchedWrite sched> { 2105 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2106 (outs _.KRC:$dst), 2107 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2108 "vcmp"#_.Suffix, 2109 "$cc, $src2, $src1", "$src1, $src2, $cc", 2110 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2111 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2112 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2113 let mayLoad = 1 in 2114 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2115 (outs _.KRC:$dst), 2116 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), 2117 "vcmp"#_.Suffix, 2118 "$cc, $src2, $src1", "$src1, $src2, $cc", 2119 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2120 timm:$cc), 2121 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 2122 timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2123 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2124 2125 let Uses = [MXCSR] in 2126 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2127 (outs _.KRC:$dst), 2128 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2129 "vcmp"#_.Suffix, 2130 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", 2131 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2132 timm:$cc), 2133 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 2134 timm:$cc)>, 2135 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 2136 2137 let isCodeGenOnly = 1 in { 2138 let isCommutable = 1 in 2139 def rr : AVX512Ii8<0xC2, MRMSrcReg, 2140 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), 2141 !strconcat("vcmp", _.Suffix, 2142 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2143 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2144 _.FRC:$src2, 2145 timm:$cc))]>, 2146 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC; 2147 def rm : AVX512Ii8<0xC2, MRMSrcMem, 2148 (outs _.KRC:$dst), 2149 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2150 !strconcat("vcmp", _.Suffix, 2151 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2152 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 2153 (_.ScalarLdFrag addr:$src2), 2154 timm:$cc))]>, 2155 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 2156 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 2157 } 2158} 2159 2160def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2161 (X86cmpms node:$src1, node:$src2, node:$cc), [{ 2162 return N->hasOneUse(); 2163}]>; 2164def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2165 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{ 2166 return N->hasOneUse(); 2167}]>; 2168 2169let Predicates = [HasAVX512] in { 2170 let ExeDomain = SSEPackedSingle in 2171 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, 2172 X86cmpms_su, X86cmpmsSAE_su, 2173 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2174 let ExeDomain = SSEPackedDouble in 2175 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, 2176 X86cmpms_su, X86cmpmsSAE_su, 2177 SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W; 2178} 2179let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in 2180 defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE, 2181 X86cmpms_su, X86cmpmsSAE_su, 2182 SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA; 2183 2184multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, 2185 X86FoldableSchedWrite sched, 2186 X86VectorVTInfo _, bit IsCommutable> { 2187 let isCommutable = IsCommutable, hasSideEffects = 0 in 2188 def rr : AVX512BI<opc, MRMSrcReg, 2189 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2190 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2191 []>, EVEX_4V, Sched<[sched]>; 2192 let mayLoad = 1, hasSideEffects = 0 in 2193 def rm : AVX512BI<opc, MRMSrcMem, 2194 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2195 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2196 []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2197 let isCommutable = IsCommutable, hasSideEffects = 0 in 2198 def rrk : AVX512BI<opc, MRMSrcReg, 2199 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2200 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2201 "$dst {${mask}}, $src1, $src2}"), 2202 []>, EVEX_4V, EVEX_K, Sched<[sched]>; 2203 let mayLoad = 1, hasSideEffects = 0 in 2204 def rmk : AVX512BI<opc, MRMSrcMem, 2205 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2206 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2207 "$dst {${mask}}, $src1, $src2}"), 2208 []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2209} 2210 2211multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, 2212 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2213 bit IsCommutable> : 2214 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { 2215 let mayLoad = 1, hasSideEffects = 0 in { 2216 def rmb : AVX512BI<opc, MRMSrcMem, 2217 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2218 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2219 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2220 []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2221 def rmbk : AVX512BI<opc, MRMSrcMem, 2222 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2223 _.ScalarMemOp:$src2), 2224 !strconcat(OpcodeStr, 2225 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2226 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2227 []>, EVEX_4V, EVEX_K, EVEX_B, 2228 Sched<[sched.Folded, sched.ReadAfterFold]>; 2229 } 2230} 2231 2232multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, 2233 X86SchedWriteWidths sched, 2234 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2235 bit IsCommutable = 0> { 2236 let Predicates = [prd] in 2237 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, 2238 VTInfo.info512, IsCommutable>, EVEX_V512; 2239 2240 let Predicates = [prd, HasVLX] in { 2241 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, 2242 VTInfo.info256, IsCommutable>, EVEX_V256; 2243 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, 2244 VTInfo.info128, IsCommutable>, EVEX_V128; 2245 } 2246} 2247 2248multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2249 X86SchedWriteWidths sched, 2250 AVX512VLVectorVTInfo VTInfo, 2251 Predicate prd, bit IsCommutable = 0> { 2252 let Predicates = [prd] in 2253 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, 2254 VTInfo.info512, IsCommutable>, EVEX_V512; 2255 2256 let Predicates = [prd, HasVLX] in { 2257 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, 2258 VTInfo.info256, IsCommutable>, EVEX_V256; 2259 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, 2260 VTInfo.info128, IsCommutable>, EVEX_V128; 2261 } 2262} 2263 2264// This fragment treats X86cmpm as commutable to help match loads in both 2265// operands for PCMPEQ. 2266def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>; 2267def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), 2268 (setcc node:$src1, node:$src2, SETGT)>; 2269 2270// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2271// increase the pattern complexity the way an immediate would. 2272let AddedComplexity = 2 in { 2273// FIXME: Is there a better scheduler class for VPCMP? 2274defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", 2275 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2276 EVEX_CD8<8, CD8VF>, WIG; 2277 2278defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", 2279 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2280 EVEX_CD8<16, CD8VF>, WIG; 2281 2282defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", 2283 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2284 EVEX_CD8<32, CD8VF>; 2285 2286defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", 2287 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2288 T8PD, REX_W, EVEX_CD8<64, CD8VF>; 2289 2290defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", 2291 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2292 EVEX_CD8<8, CD8VF>, WIG; 2293 2294defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", 2295 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2296 EVEX_CD8<16, CD8VF>, WIG; 2297 2298defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", 2299 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2300 EVEX_CD8<32, CD8VF>; 2301 2302defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", 2303 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2304 T8PD, REX_W, EVEX_CD8<64, CD8VF>; 2305} 2306 2307def X86pcmpm_imm : SDNodeXForm<setcc, [{ 2308 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2309 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2310 return getI8Imm(SSECC, SDLoc(N)); 2311}]>; 2312 2313// Swapped operand version of the above. 2314def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{ 2315 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2316 uint8_t SSECC = X86::getVPCMPImmForCond(CC); 2317 SSECC = X86::getSwappedVPCMPImm(SSECC); 2318 return getI8Imm(SSECC, SDLoc(N)); 2319}]>; 2320 2321multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2322 PatFrag Frag_su, 2323 X86FoldableSchedWrite sched, 2324 X86VectorVTInfo _, string Name> { 2325 let isCommutable = 1 in 2326 def rri : AVX512AIi8<opc, MRMSrcReg, 2327 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2328 !strconcat("vpcmp", Suffix, 2329 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2330 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2331 (_.VT _.RC:$src2), 2332 cond)))]>, 2333 EVEX_4V, Sched<[sched]>; 2334 def rmi : AVX512AIi8<opc, MRMSrcMem, 2335 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2336 !strconcat("vpcmp", Suffix, 2337 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2338 [(set _.KRC:$dst, (_.KVT 2339 (Frag:$cc 2340 (_.VT _.RC:$src1), 2341 (_.VT (_.LdFrag addr:$src2)), 2342 cond)))]>, 2343 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 2344 let isCommutable = 1 in 2345 def rrik : AVX512AIi8<opc, MRMSrcReg, 2346 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2347 u8imm:$cc), 2348 !strconcat("vpcmp", Suffix, 2349 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2350 "$dst {${mask}}, $src1, $src2, $cc}"), 2351 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2352 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), 2353 (_.VT _.RC:$src2), 2354 cond))))]>, 2355 EVEX_4V, EVEX_K, Sched<[sched]>; 2356 def rmik : AVX512AIi8<opc, MRMSrcMem, 2357 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2358 u8imm:$cc), 2359 !strconcat("vpcmp", Suffix, 2360 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2361 "$dst {${mask}}, $src1, $src2, $cc}"), 2362 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2363 (_.KVT 2364 (Frag_su:$cc 2365 (_.VT _.RC:$src1), 2366 (_.VT (_.LdFrag addr:$src2)), 2367 cond))))]>, 2368 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2369 2370 def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2), 2371 (_.VT _.RC:$src1), cond)), 2372 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2373 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2374 2375 def : Pat<(and _.KRCWM:$mask, 2376 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2), 2377 (_.VT _.RC:$src1), cond))), 2378 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2379 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2380 (X86pcmpm_imm_commute $cc))>; 2381} 2382 2383multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2384 PatFrag Frag_su, X86FoldableSchedWrite sched, 2385 X86VectorVTInfo _, string Name> : 2386 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> { 2387 def rmib : AVX512AIi8<opc, MRMSrcMem, 2388 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2389 u8imm:$cc), 2390 !strconcat("vpcmp", Suffix, 2391 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2392 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2393 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2394 (_.VT _.RC:$src1), 2395 (_.BroadcastLdFrag addr:$src2), 2396 cond)))]>, 2397 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2398 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2399 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2400 _.ScalarMemOp:$src2, u8imm:$cc), 2401 !strconcat("vpcmp", Suffix, 2402 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2403 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2404 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2405 (_.KVT (Frag_su:$cc 2406 (_.VT _.RC:$src1), 2407 (_.BroadcastLdFrag addr:$src2), 2408 cond))))]>, 2409 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2410 2411 def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2), 2412 (_.VT _.RC:$src1), cond)), 2413 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2414 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2415 2416 def : Pat<(and _.KRCWM:$mask, 2417 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2), 2418 (_.VT _.RC:$src1), cond))), 2419 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2420 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2421 (X86pcmpm_imm_commute $cc))>; 2422} 2423 2424multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2425 PatFrag Frag_su, X86SchedWriteWidths sched, 2426 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2427 let Predicates = [prd] in 2428 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2429 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2430 2431 let Predicates = [prd, HasVLX] in { 2432 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2433 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2434 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2435 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2436 } 2437} 2438 2439multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2440 PatFrag Frag_su, X86SchedWriteWidths sched, 2441 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2442 let Predicates = [prd] in 2443 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2444 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2445 2446 let Predicates = [prd, HasVLX] in { 2447 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2448 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2449 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2450 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2451 } 2452} 2453 2454def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2455 (setcc node:$src1, node:$src2, node:$cc), [{ 2456 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2457 return !ISD::isUnsignedIntSetCC(CC); 2458}], X86pcmpm_imm>; 2459 2460def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2461 (setcc node:$src1, node:$src2, node:$cc), [{ 2462 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2463 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC); 2464}], X86pcmpm_imm>; 2465 2466def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2467 (setcc node:$src1, node:$src2, node:$cc), [{ 2468 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2469 return ISD::isUnsignedIntSetCC(CC); 2470}], X86pcmpm_imm>; 2471 2472def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2473 (setcc node:$src1, node:$src2, node:$cc), [{ 2474 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2475 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC); 2476}], X86pcmpm_imm>; 2477 2478// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2479defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su, 2480 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2481 EVEX_CD8<8, CD8VF>; 2482defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su, 2483 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2484 EVEX_CD8<8, CD8VF>; 2485 2486defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su, 2487 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2488 REX_W, EVEX_CD8<16, CD8VF>; 2489defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su, 2490 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2491 REX_W, EVEX_CD8<16, CD8VF>; 2492 2493defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su, 2494 SchedWriteVecALU, avx512vl_i32_info, 2495 HasAVX512>, EVEX_CD8<32, CD8VF>; 2496defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su, 2497 SchedWriteVecALU, avx512vl_i32_info, 2498 HasAVX512>, EVEX_CD8<32, CD8VF>; 2499 2500defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su, 2501 SchedWriteVecALU, avx512vl_i64_info, 2502 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>; 2503defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su, 2504 SchedWriteVecALU, avx512vl_i64_info, 2505 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>; 2506 2507def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc), 2508 (X86cmpm node:$src1, node:$src2, node:$cc), [{ 2509 return N->hasOneUse(); 2510}]>; 2511 2512def X86cmpm_imm_commute : SDNodeXForm<timm, [{ 2513 uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f); 2514 return getI8Imm(Imm, SDLoc(N)); 2515}]>; 2516 2517multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2518 string Name> { 2519let Uses = [MXCSR], mayRaiseFPException = 1 in { 2520 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2521 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), 2522 "vcmp"#_.Suffix, 2523 "$cc, $src2, $src1", "$src1, $src2, $cc", 2524 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2525 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2526 1>, Sched<[sched]>; 2527 2528 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2529 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2530 "vcmp"#_.Suffix, 2531 "$cc, $src2, $src1", "$src1, $src2, $cc", 2532 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2533 timm:$cc), 2534 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2535 timm:$cc)>, 2536 Sched<[sched.Folded, sched.ReadAfterFold]>; 2537 2538 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2539 (outs _.KRC:$dst), 2540 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2541 "vcmp"#_.Suffix, 2542 "$cc, ${src2}"#_.BroadcastStr#", $src1", 2543 "$src1, ${src2}"#_.BroadcastStr#", $cc", 2544 (X86any_cmpm (_.VT _.RC:$src1), 2545 (_.VT (_.BroadcastLdFrag addr:$src2)), 2546 timm:$cc), 2547 (X86cmpm_su (_.VT _.RC:$src1), 2548 (_.VT (_.BroadcastLdFrag addr:$src2)), 2549 timm:$cc)>, 2550 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2551 } 2552 2553 // Patterns for selecting with loads in other operand. 2554 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2555 timm:$cc), 2556 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2557 (X86cmpm_imm_commute timm:$cc))>; 2558 2559 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), 2560 (_.VT _.RC:$src1), 2561 timm:$cc)), 2562 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2563 _.RC:$src1, addr:$src2, 2564 (X86cmpm_imm_commute timm:$cc))>; 2565 2566 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2), 2567 (_.VT _.RC:$src1), timm:$cc), 2568 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2569 (X86cmpm_imm_commute timm:$cc))>; 2570 2571 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), 2572 (_.VT _.RC:$src1), 2573 timm:$cc)), 2574 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2575 _.RC:$src1, addr:$src2, 2576 (X86cmpm_imm_commute timm:$cc))>; 2577 2578 // Patterns for mask intrinsics. 2579 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, 2580 (_.KVT immAllOnesV)), 2581 (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>; 2582 2583 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask), 2584 (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1, 2585 _.RC:$src2, timm:$cc)>; 2586 2587 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2588 (_.KVT immAllOnesV)), 2589 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>; 2590 2591 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2592 _.KRCWM:$mask), 2593 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, 2594 addr:$src2, timm:$cc)>; 2595 2596 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2597 (_.KVT immAllOnesV)), 2598 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>; 2599 2600 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2601 _.KRCWM:$mask), 2602 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, 2603 addr:$src2, timm:$cc)>; 2604 2605 // Patterns for mask intrinsics with loads in other operand. 2606 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2607 (_.KVT immAllOnesV)), 2608 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2609 (X86cmpm_imm_commute timm:$cc))>; 2610 2611 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2612 _.KRCWM:$mask), 2613 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2614 _.RC:$src1, addr:$src2, 2615 (X86cmpm_imm_commute timm:$cc))>; 2616 2617 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2618 (_.KVT immAllOnesV)), 2619 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2620 (X86cmpm_imm_commute timm:$cc))>; 2621 2622 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2623 _.KRCWM:$mask), 2624 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2625 _.RC:$src1, addr:$src2, 2626 (X86cmpm_imm_commute timm:$cc))>; 2627} 2628 2629multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2630 // comparison code form (VCMP[EQ/LT/LE/...] 2631 let Uses = [MXCSR] in 2632 defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst), 2633 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2634 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc), 2635 "vcmp"#_.Suffix, 2636 "$cc, {sae}, $src2, $src1", 2637 "$src1, $src2, {sae}, $cc", 2638 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2639 (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))], 2640 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2641 (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>, 2642 EVEX_B, Sched<[sched]>; 2643} 2644 2645multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 2646 Predicate Pred = HasAVX512> { 2647 let Predicates = [Pred] in { 2648 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2649 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2650 2651 } 2652 let Predicates = [Pred,HasVLX] in { 2653 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2654 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2655 } 2656} 2657 2658defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2659 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W; 2660defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2661 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 2662defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>, 2663 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA; 2664 2665// Patterns to select fp compares with load as first operand. 2666let Predicates = [HasAVX512] in { 2667 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)), 2668 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2669 2670 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)), 2671 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2672} 2673 2674let Predicates = [HasFP16] in { 2675 def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)), 2676 (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2677} 2678 2679// ---------------------------------------------------------------- 2680// FPClass 2681 2682def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2), 2683 (X86Vfpclasss node:$src1, node:$src2), [{ 2684 return N->hasOneUse(); 2685}]>; 2686 2687def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2), 2688 (X86Vfpclass node:$src1, node:$src2), [{ 2689 return N->hasOneUse(); 2690}]>; 2691 2692//handle fpclass instruction mask = op(reg_scalar,imm) 2693// op(mem_scalar,imm) 2694multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, 2695 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2696 Predicate prd> { 2697 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2698 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2699 (ins _.RC:$src1, i32u8imm:$src2), 2700 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2701 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), 2702 (i32 timm:$src2)))]>, 2703 Sched<[sched]>; 2704 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2705 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2706 OpcodeStr#_.Suffix# 2707 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2708 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2709 (X86Vfpclasss_su (_.VT _.RC:$src1), 2710 (i32 timm:$src2))))]>, 2711 EVEX_K, Sched<[sched]>; 2712 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2713 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2714 OpcodeStr#_.Suffix# 2715 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2716 [(set _.KRC:$dst, 2717 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1), 2718 (i32 timm:$src2)))]>, 2719 Sched<[sched.Folded, sched.ReadAfterFold]>; 2720 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2721 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2722 OpcodeStr#_.Suffix# 2723 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2724 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2725 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1), 2726 (i32 timm:$src2))))]>, 2727 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2728 } 2729} 2730 2731//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2732// fpclass(reg_vec, mem_vec, imm) 2733// fpclass(reg_vec, broadcast(eltVt), imm) 2734multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, 2735 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2736 string mem>{ 2737 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2738 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2739 (ins _.RC:$src1, i32u8imm:$src2), 2740 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2741 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), 2742 (i32 timm:$src2)))]>, 2743 Sched<[sched]>; 2744 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2745 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2746 OpcodeStr#_.Suffix# 2747 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2748 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2749 (X86Vfpclass_su (_.VT _.RC:$src1), 2750 (i32 timm:$src2))))]>, 2751 EVEX_K, Sched<[sched]>; 2752 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2753 (ins _.MemOp:$src1, i32u8imm:$src2), 2754 OpcodeStr#_.Suffix#"{"#mem#"}"# 2755 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2756 [(set _.KRC:$dst,(X86Vfpclass 2757 (_.VT (_.LdFrag addr:$src1)), 2758 (i32 timm:$src2)))]>, 2759 Sched<[sched.Folded, sched.ReadAfterFold]>; 2760 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2761 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2762 OpcodeStr#_.Suffix#"{"#mem#"}"# 2763 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2764 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su 2765 (_.VT (_.LdFrag addr:$src1)), 2766 (i32 timm:$src2))))]>, 2767 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2768 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2769 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2770 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2771 _.BroadcastStr#", $dst|$dst, ${src1}" 2772 #_.BroadcastStr#", $src2}", 2773 [(set _.KRC:$dst,(X86Vfpclass 2774 (_.VT (_.BroadcastLdFrag addr:$src1)), 2775 (i32 timm:$src2)))]>, 2776 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2777 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2778 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2779 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2780 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"# 2781 _.BroadcastStr#", $src2}", 2782 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su 2783 (_.VT (_.BroadcastLdFrag addr:$src1)), 2784 (i32 timm:$src2))))]>, 2785 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2786 } 2787 2788 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate 2789 // the memory form. 2790 def : InstAlias<OpcodeStr#_.Suffix#mem# 2791 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2792 (!cast<Instruction>(NAME#"rr") 2793 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2794 def : InstAlias<OpcodeStr#_.Suffix#mem# 2795 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2796 (!cast<Instruction>(NAME#"rrk") 2797 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2798 def : InstAlias<OpcodeStr#_.Suffix#mem# 2799 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"# 2800 _.BroadcastStr#", $src2}", 2801 (!cast<Instruction>(NAME#"rmb") 2802 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2803 def : InstAlias<OpcodeStr#_.Suffix#mem# 2804 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|" 2805 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}", 2806 (!cast<Instruction>(NAME#"rmbk") 2807 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2808} 2809 2810multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2811 bits<8> opc, X86SchedWriteWidths sched, 2812 Predicate prd>{ 2813 let Predicates = [prd] in { 2814 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM, 2815 _.info512, "z">, EVEX_V512; 2816 } 2817 let Predicates = [prd, HasVLX] in { 2818 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM, 2819 _.info128, "x">, EVEX_V128; 2820 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM, 2821 _.info256, "y">, EVEX_V256; 2822 } 2823} 2824 2825multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2826 bits<8> opcScalar, X86SchedWriteWidths sched> { 2827 defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec, 2828 sched, HasFP16>, 2829 EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA; 2830 defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2831 sched.Scl, f16x_info, HasFP16>, 2832 EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA; 2833 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2834 sched, HasDQI>, 2835 EVEX_CD8<32, CD8VF>, AVX512AIi8Base; 2836 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2837 sched, HasDQI>, 2838 EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W; 2839 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2840 sched.Scl, f32x_info, HasDQI>, VEX_LIG, 2841 EVEX_CD8<32, CD8VT1>, AVX512AIi8Base; 2842 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2843 sched.Scl, f64x_info, HasDQI>, VEX_LIG, 2844 EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W; 2845} 2846 2847defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX; 2848 2849//----------------------------------------------------------------- 2850// Mask register copy, including 2851// - copy between mask registers 2852// - load/store mask registers 2853// - copy from GPR to mask register and vice versa 2854// 2855multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2856 string OpcodeStr, RegisterClass KRC, 2857 ValueType vvt, X86MemOperand x86memop> { 2858 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in 2859 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2860 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2861 Sched<[WriteMove]>; 2862 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2863 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2864 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2865 Sched<[WriteLoad]>; 2866 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2867 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2868 [(store KRC:$src, addr:$dst)]>, 2869 Sched<[WriteStore]>; 2870} 2871 2872multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2873 string OpcodeStr, 2874 RegisterClass KRC, RegisterClass GRC> { 2875 let hasSideEffects = 0 in { 2876 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2877 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2878 Sched<[WriteMove]>; 2879 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2880 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2881 Sched<[WriteMove]>; 2882 } 2883} 2884 2885let Predicates = [HasDQI] in 2886 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2887 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2888 VEX, PD; 2889 2890let Predicates = [HasAVX512] in 2891 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2892 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2893 VEX, PS; 2894 2895let Predicates = [HasBWI] in { 2896 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2897 VEX, PD, REX_W; 2898 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2899 VEX, XD; 2900 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2901 VEX, PS, REX_W; 2902 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2903 VEX, XD, REX_W; 2904} 2905 2906// GR from/to mask register 2907def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2908 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2909def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2910 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2911def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))), 2912 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>; 2913 2914def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2915 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2916def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2917 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2918 2919def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2920 (KMOVWrk VK16:$src)>; 2921def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2922 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; 2923def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2924 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2925def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2926 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; 2927 2928def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2929 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2930def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2931 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; 2932def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2933 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2934def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2935 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; 2936 2937def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2938 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2939def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2940 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2941def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2942 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2943def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2944 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2945 2946// Load/store kreg 2947let Predicates = [HasDQI] in { 2948 def : Pat<(v1i1 (load addr:$src)), 2949 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2950 def : Pat<(v2i1 (load addr:$src)), 2951 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2952 def : Pat<(v4i1 (load addr:$src)), 2953 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2954} 2955 2956let Predicates = [HasAVX512] in { 2957 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2958 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2959 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), 2960 (KMOVWkm addr:$src)>; 2961} 2962 2963def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", 2964 SDTypeProfile<1, 2, [SDTCisVT<0, i8>, 2965 SDTCVecEltisVT<1, i1>, 2966 SDTCisPtrTy<2>]>>; 2967 2968let Predicates = [HasAVX512] in { 2969 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2970 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2971 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2972 2973 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2974 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2975 2976 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), 2977 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; 2978 2979 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), 2980 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; 2981 } 2982 2983 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2984 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2985 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2986 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2987 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2988 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2989 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2990 2991 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2992 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2993 (KMOVWkr (AND32ri 2994 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2995 (i32 1)))>; 2996} 2997 2998// Mask unary operation 2999// - KNOT 3000multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 3001 RegisterClass KRC, SDPatternOperator OpNode, 3002 X86FoldableSchedWrite sched, Predicate prd> { 3003 let Predicates = [prd] in 3004 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 3005 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3006 [(set KRC:$dst, (OpNode KRC:$src))]>, 3007 Sched<[sched]>; 3008} 3009 3010multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 3011 SDPatternOperator OpNode, 3012 X86FoldableSchedWrite sched> { 3013 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3014 sched, HasDQI>, VEX, PD; 3015 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3016 sched, HasAVX512>, VEX, PS; 3017 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3018 sched, HasBWI>, VEX, PD, REX_W; 3019 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3020 sched, HasBWI>, VEX, PS, REX_W; 3021} 3022 3023// TODO - do we need a X86SchedWriteWidths::KMASK type? 3024defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 3025 3026// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 3027let Predicates = [HasAVX512, NoDQI] in 3028def : Pat<(vnot VK8:$src), 3029 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 3030 3031def : Pat<(vnot VK4:$src), 3032 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 3033def : Pat<(vnot VK2:$src), 3034 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 3035def : Pat<(vnot VK1:$src), 3036 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>; 3037 3038// Mask binary operation 3039// - KAND, KANDN, KOR, KXNOR, KXOR 3040multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 3041 RegisterClass KRC, SDPatternOperator OpNode, 3042 X86FoldableSchedWrite sched, Predicate prd, 3043 bit IsCommutable> { 3044 let Predicates = [prd], isCommutable = IsCommutable in 3045 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 3046 !strconcat(OpcodeStr, 3047 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3048 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 3049 Sched<[sched]>; 3050} 3051 3052multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 3053 SDPatternOperator OpNode, 3054 X86FoldableSchedWrite sched, bit IsCommutable, 3055 Predicate prdW = HasAVX512> { 3056 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3057 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; 3058 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3059 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS; 3060 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3061 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, REX_W, PD; 3062 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3063 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, REX_W, PS; 3064} 3065 3066// These nodes use 'vnot' instead of 'not' to support vectors. 3067def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; 3068def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; 3069 3070// TODO - do we need a X86SchedWriteWidths::KMASK type? 3071defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 3072defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 3073defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 3074defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 3075defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 3076defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 3077 3078multiclass avx512_binop_pat<SDPatternOperator VOpNode, 3079 Instruction Inst> { 3080 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 3081 // for the DQI set, this type is legal and KxxxB instruction is used 3082 let Predicates = [NoDQI] in 3083 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 3084 (COPY_TO_REGCLASS 3085 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 3086 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 3087 3088 // All types smaller than 8 bits require conversion anyway 3089 def : Pat<(VOpNode VK1:$src1, VK1:$src2), 3090 (COPY_TO_REGCLASS (Inst 3091 (COPY_TO_REGCLASS VK1:$src1, VK16), 3092 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 3093 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 3094 (COPY_TO_REGCLASS (Inst 3095 (COPY_TO_REGCLASS VK2:$src1, VK16), 3096 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; 3097 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 3098 (COPY_TO_REGCLASS (Inst 3099 (COPY_TO_REGCLASS VK4:$src1, VK16), 3100 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; 3101} 3102 3103defm : avx512_binop_pat<and, KANDWrr>; 3104defm : avx512_binop_pat<vandn, KANDNWrr>; 3105defm : avx512_binop_pat<or, KORWrr>; 3106defm : avx512_binop_pat<vxnor, KXNORWrr>; 3107defm : avx512_binop_pat<xor, KXORWrr>; 3108 3109// Mask unpacking 3110multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, 3111 X86KVectorVTInfo Src, X86FoldableSchedWrite sched, 3112 Predicate prd> { 3113 let Predicates = [prd] in { 3114 let hasSideEffects = 0 in 3115 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), 3116 (ins Src.KRC:$src1, Src.KRC:$src2), 3117 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 3118 VEX_4V, VEX_L, Sched<[sched]>; 3119 3120 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), 3121 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>; 3122 } 3123} 3124 3125defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD; 3126defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS; 3127defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, REX_W; 3128 3129// Mask bit testing 3130multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3131 SDNode OpNode, X86FoldableSchedWrite sched, 3132 Predicate prd> { 3133 let Predicates = [prd], Defs = [EFLAGS] in 3134 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 3135 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 3136 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 3137 Sched<[sched]>; 3138} 3139 3140multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 3141 X86FoldableSchedWrite sched, 3142 Predicate prdW = HasAVX512> { 3143 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 3144 VEX, PD; 3145 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 3146 VEX, PS; 3147 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 3148 VEX, PS, REX_W; 3149 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 3150 VEX, PD, REX_W; 3151} 3152 3153// TODO - do we need a X86SchedWriteWidths::KMASK type? 3154defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 3155defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 3156 3157// Mask shift 3158multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 3159 SDNode OpNode, X86FoldableSchedWrite sched> { 3160 let Predicates = [HasAVX512] in 3161 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 3162 !strconcat(OpcodeStr, 3163 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 3164 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, 3165 Sched<[sched]>; 3166} 3167 3168multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 3169 SDNode OpNode, X86FoldableSchedWrite sched> { 3170 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 3171 sched>, VEX, TAPD, REX_W; 3172 let Predicates = [HasDQI] in 3173 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 3174 sched>, VEX, TAPD; 3175 let Predicates = [HasBWI] in { 3176 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 3177 sched>, VEX, TAPD, REX_W; 3178 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 3179 sched>, VEX, TAPD; 3180 } 3181} 3182 3183defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 3184defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 3185 3186// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 3187multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3188 string InstStr, 3189 X86VectorVTInfo Narrow, 3190 X86VectorVTInfo Wide> { 3191def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3192 (Narrow.VT Narrow.RC:$src2), cond)), 3193 (COPY_TO_REGCLASS 3194 (!cast<Instruction>(InstStr#"Zrri") 3195 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3196 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3197 (X86pcmpm_imm $cc)), Narrow.KRC)>; 3198 3199def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3200 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3201 (Narrow.VT Narrow.RC:$src2), 3202 cond)))), 3203 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3204 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3205 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3206 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3207 (X86pcmpm_imm $cc)), Narrow.KRC)>; 3208} 3209 3210multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 3211 string InstStr, 3212 X86VectorVTInfo Narrow, 3213 X86VectorVTInfo Wide> { 3214// Broadcast load. 3215def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 3216 (Narrow.BroadcastLdFrag addr:$src2), cond)), 3217 (COPY_TO_REGCLASS 3218 (!cast<Instruction>(InstStr#"Zrmib") 3219 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3220 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3221 3222def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3223 (Narrow.KVT 3224 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3225 (Narrow.BroadcastLdFrag addr:$src2), 3226 cond)))), 3227 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3228 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3229 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3230 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3231 3232// Commuted with broadcast load. 3233def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2), 3234 (Narrow.VT Narrow.RC:$src1), 3235 cond)), 3236 (COPY_TO_REGCLASS 3237 (!cast<Instruction>(InstStr#"Zrmib") 3238 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3239 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3240 3241def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3242 (Narrow.KVT 3243 (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), 3244 (Narrow.VT Narrow.RC:$src1), 3245 cond)))), 3246 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3247 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3248 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3249 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3250} 3251 3252// Same as above, but for fp types which don't use PatFrags. 3253multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, 3254 X86VectorVTInfo Narrow, 3255 X86VectorVTInfo Wide> { 3256def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3257 (Narrow.VT Narrow.RC:$src2), timm:$cc)), 3258 (COPY_TO_REGCLASS 3259 (!cast<Instruction>(InstStr#"Zrri") 3260 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3261 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3262 timm:$cc), Narrow.KRC)>; 3263 3264def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3265 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3266 (Narrow.VT Narrow.RC:$src2), timm:$cc))), 3267 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3268 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3269 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3270 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3271 timm:$cc), Narrow.KRC)>; 3272 3273// Broadcast load. 3274def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3275 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), 3276 (COPY_TO_REGCLASS 3277 (!cast<Instruction>(InstStr#"Zrmbi") 3278 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3279 addr:$src2, timm:$cc), Narrow.KRC)>; 3280 3281def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3282 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3283 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), 3284 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3285 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3286 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3287 addr:$src2, timm:$cc), Narrow.KRC)>; 3288 3289// Commuted with broadcast load. 3290def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3291 (Narrow.VT Narrow.RC:$src1), timm:$cc)), 3292 (COPY_TO_REGCLASS 3293 (!cast<Instruction>(InstStr#"Zrmbi") 3294 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3295 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3296 3297def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3298 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3299 (Narrow.VT Narrow.RC:$src1), timm:$cc))), 3300 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3301 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3302 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3303 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3304} 3305 3306let Predicates = [HasAVX512, NoVLX] in { 3307 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3308 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3309 3310 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3311 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3312 3313 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3314 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3315 3316 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3317 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3318 3319 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3320 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3321 3322 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3323 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3324 3325 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3326 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3327 3328 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3329 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3330 3331 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; 3332 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; 3333 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; 3334 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; 3335} 3336 3337let Predicates = [HasBWI, NoVLX] in { 3338 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; 3339 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; 3340 3341 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>; 3342 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>; 3343 3344 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>; 3345 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>; 3346 3347 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>; 3348 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>; 3349} 3350 3351// Mask setting all 0s or 1s 3352multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> { 3353 let Predicates = [HasAVX512] in 3354 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3355 SchedRW = [WriteZero] in 3356 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3357 [(set KRC:$dst, (VT Val))]>; 3358} 3359 3360multiclass avx512_mask_setop_w<SDPatternOperator Val> { 3361 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3362 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3363 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3364} 3365 3366defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3367defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3368 3369// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3370let Predicates = [HasAVX512] in { 3371 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3372 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3373 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3374 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3375 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3376 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3377 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3378 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3379} 3380 3381// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3382multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3383 RegisterClass RC, ValueType VT> { 3384 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3385 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3386 3387 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3388 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3389} 3390defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3391defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3392defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3393defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3394defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3395defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3396 3397defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3398defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3399defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3400defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3401defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3402 3403defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3404defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3405defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3406defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3407 3408defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3409defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3410defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3411 3412defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3413defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3414 3415defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3416 3417//===----------------------------------------------------------------------===// 3418// AVX-512 - Aligned and unaligned load and store 3419// 3420 3421multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3422 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3423 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3424 bit NoRMPattern = 0, 3425 SDPatternOperator SelectOprr = vselect> { 3426 let hasSideEffects = 0 in { 3427 let isMoveReg = 1 in 3428 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3429 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3430 _.ExeDomain>, EVEX, Sched<[Sched.RR]>, 3431 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 3432 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3433 (ins _.KRCWM:$mask, _.RC:$src), 3434 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3435 "${dst} {${mask}} {z}, $src}"), 3436 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3437 (_.VT _.RC:$src), 3438 _.ImmAllZerosV)))], _.ExeDomain>, 3439 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3440 3441 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3442 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3443 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3444 !if(NoRMPattern, [], 3445 [(set _.RC:$dst, 3446 (_.VT (ld_frag addr:$src)))]), 3447 _.ExeDomain>, EVEX, Sched<[Sched.RM]>, 3448 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 3449 3450 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3451 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3452 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3453 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3454 "${dst} {${mask}}, $src1}"), 3455 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3456 (_.VT _.RC:$src1), 3457 (_.VT _.RC:$src0))))], _.ExeDomain>, 3458 EVEX, EVEX_K, Sched<[Sched.RR]>; 3459 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3460 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3461 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3462 "${dst} {${mask}}, $src1}"), 3463 [(set _.RC:$dst, (_.VT 3464 (vselect_mask _.KRCWM:$mask, 3465 (_.VT (ld_frag addr:$src1)), 3466 (_.VT _.RC:$src0))))], _.ExeDomain>, 3467 EVEX, EVEX_K, Sched<[Sched.RM]>; 3468 } 3469 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3470 (ins _.KRCWM:$mask, _.MemOp:$src), 3471 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3472 "${dst} {${mask}} {z}, $src}", 3473 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask, 3474 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], 3475 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3476 } 3477 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3478 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3479 3480 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3481 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3482 3483 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3484 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0, 3485 _.KRCWM:$mask, addr:$ptr)>; 3486} 3487 3488multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3489 AVX512VLVectorVTInfo _, Predicate prd, 3490 X86SchedWriteMoveLSWidths Sched, 3491 string EVEX2VEXOvrd, bit NoRMPattern = 0> { 3492 let Predicates = [prd] in 3493 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3494 _.info512.AlignedLdFrag, masked_load_aligned, 3495 Sched.ZMM, "", NoRMPattern>, EVEX_V512; 3496 3497 let Predicates = [prd, HasVLX] in { 3498 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3499 _.info256.AlignedLdFrag, masked_load_aligned, 3500 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256; 3501 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3502 _.info128.AlignedLdFrag, masked_load_aligned, 3503 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128; 3504 } 3505} 3506 3507multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3508 AVX512VLVectorVTInfo _, Predicate prd, 3509 X86SchedWriteMoveLSWidths Sched, 3510 string EVEX2VEXOvrd, bit NoRMPattern = 0, 3511 SDPatternOperator SelectOprr = vselect> { 3512 let Predicates = [prd] in 3513 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3514 masked_load, Sched.ZMM, "", 3515 NoRMPattern, SelectOprr>, EVEX_V512; 3516 3517 let Predicates = [prd, HasVLX] in { 3518 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3519 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y", 3520 NoRMPattern, SelectOprr>, EVEX_V256; 3521 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3522 masked_load, Sched.XMM, EVEX2VEXOvrd, 3523 NoRMPattern, SelectOprr>, EVEX_V128; 3524 } 3525} 3526 3527multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3528 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3529 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd, 3530 bit NoMRPattern = 0> { 3531 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3532 let isMoveReg = 1 in 3533 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3534 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3535 [], _.ExeDomain>, EVEX, 3536 Sched<[Sched.RR]>, 3537 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">; 3538 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3539 (ins _.KRCWM:$mask, _.RC:$src), 3540 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3541 "${dst} {${mask}}, $src}", 3542 [], _.ExeDomain>, EVEX, EVEX_K, 3543 Sched<[Sched.RR]>; 3544 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3545 (ins _.KRCWM:$mask, _.RC:$src), 3546 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3547 "${dst} {${mask}} {z}, $src}", 3548 [], _.ExeDomain>, EVEX, EVEX_KZ, 3549 Sched<[Sched.RR]>; 3550 } 3551 3552 let hasSideEffects = 0, mayStore = 1 in 3553 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3554 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3555 !if(NoMRPattern, [], 3556 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3557 _.ExeDomain>, EVEX, Sched<[Sched.MR]>, 3558 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">; 3559 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3560 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3561 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3562 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>; 3563 3564 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask), 3565 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3566 _.KRCWM:$mask, _.RC:$src)>; 3567 3568 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3569 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3570 _.RC:$dst, _.RC:$src), 0>; 3571 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3572 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3573 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3574 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3575 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3576 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3577} 3578 3579multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3580 AVX512VLVectorVTInfo _, Predicate prd, 3581 X86SchedWriteMoveLSWidths Sched, 3582 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3583 let Predicates = [prd] in 3584 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3585 masked_store, Sched.ZMM, "", 3586 NoMRPattern>, EVEX_V512; 3587 let Predicates = [prd, HasVLX] in { 3588 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3589 masked_store, Sched.YMM, 3590 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3591 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3592 masked_store, Sched.XMM, EVEX2VEXOvrd, 3593 NoMRPattern>, EVEX_V128; 3594 } 3595} 3596 3597multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3598 AVX512VLVectorVTInfo _, Predicate prd, 3599 X86SchedWriteMoveLSWidths Sched, 3600 string EVEX2VEXOvrd, bit NoMRPattern = 0> { 3601 let Predicates = [prd] in 3602 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3603 masked_store_aligned, Sched.ZMM, "", 3604 NoMRPattern>, EVEX_V512; 3605 3606 let Predicates = [prd, HasVLX] in { 3607 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3608 masked_store_aligned, Sched.YMM, 3609 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256; 3610 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3611 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd, 3612 NoMRPattern>, EVEX_V128; 3613 } 3614} 3615 3616defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3617 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3618 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3619 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">, 3620 PS, EVEX_CD8<32, CD8VF>; 3621 3622defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3623 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3624 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3625 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">, 3626 PD, REX_W, EVEX_CD8<64, CD8VF>; 3627 3628defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3629 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>, 3630 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3631 SchedWriteFMoveLS, "VMOVUPS">, 3632 PS, EVEX_CD8<32, CD8VF>; 3633 3634defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3635 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>, 3636 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3637 SchedWriteFMoveLS, "VMOVUPD">, 3638 PD, REX_W, EVEX_CD8<64, CD8VF>; 3639 3640defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3641 HasAVX512, SchedWriteVecMoveLS, 3642 "VMOVDQA", 1>, 3643 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3644 HasAVX512, SchedWriteVecMoveLS, 3645 "VMOVDQA", 1>, 3646 PD, EVEX_CD8<32, CD8VF>; 3647 3648defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3649 HasAVX512, SchedWriteVecMoveLS, 3650 "VMOVDQA">, 3651 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3652 HasAVX512, SchedWriteVecMoveLS, 3653 "VMOVDQA">, 3654 PD, REX_W, EVEX_CD8<64, CD8VF>; 3655 3656defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3657 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3658 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3659 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3660 XD, EVEX_CD8<8, CD8VF>; 3661 3662defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3663 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3664 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3665 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3666 XD, REX_W, EVEX_CD8<16, CD8VF>; 3667 3668defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3669 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>, 3670 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3671 SchedWriteVecMoveLS, "VMOVDQU", 1>, 3672 XS, EVEX_CD8<32, CD8VF>; 3673 3674defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3675 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>, 3676 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3677 SchedWriteVecMoveLS, "VMOVDQU">, 3678 XS, REX_W, EVEX_CD8<64, CD8VF>; 3679 3680// Special instructions to help with spilling when we don't have VLX. We need 3681// to load or store from a ZMM register instead. These are converted in 3682// expandPostRAPseudos. 3683let isReMaterializable = 1, canFoldAsLoad = 1, 3684 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3685def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3686 "", []>, Sched<[WriteFLoadX]>; 3687def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3688 "", []>, Sched<[WriteFLoadY]>; 3689def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3690 "", []>, Sched<[WriteFLoadX]>; 3691def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3692 "", []>, Sched<[WriteFLoadY]>; 3693} 3694 3695let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3696def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3697 "", []>, Sched<[WriteFStoreX]>; 3698def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3699 "", []>, Sched<[WriteFStoreY]>; 3700def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3701 "", []>, Sched<[WriteFStoreX]>; 3702def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3703 "", []>, Sched<[WriteFStoreY]>; 3704} 3705 3706def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), 3707 (v8i64 VR512:$src))), 3708 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3709 VK8), VR512:$src)>; 3710 3711def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3712 (v16i32 VR512:$src))), 3713 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3714 3715// These patterns exist to prevent the above patterns from introducing a second 3716// mask inversion when one already exists. 3717def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)), 3718 (v8i64 immAllZerosV), 3719 (v8i64 VR512:$src))), 3720 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3721def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)), 3722 (v16i32 immAllZerosV), 3723 (v16i32 VR512:$src))), 3724 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3725 3726multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3727 X86VectorVTInfo Wide> { 3728 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3729 Narrow.RC:$src1, Narrow.RC:$src0)), 3730 (EXTRACT_SUBREG 3731 (Wide.VT 3732 (!cast<Instruction>(InstrStr#"rrk") 3733 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3734 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3735 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3736 Narrow.SubRegIdx)>; 3737 3738 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3739 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3740 (EXTRACT_SUBREG 3741 (Wide.VT 3742 (!cast<Instruction>(InstrStr#"rrkz") 3743 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3744 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3745 Narrow.SubRegIdx)>; 3746} 3747 3748// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3749// available. Use a 512-bit operation and extract. 3750let Predicates = [HasAVX512, NoVLX] in { 3751 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3752 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3753 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3754 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3755 3756 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3757 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3758 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3759 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3760} 3761 3762let Predicates = [HasBWI, NoVLX] in { 3763 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3764 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3765 3766 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3767 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3768 3769 defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>; 3770 defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>; 3771 3772 defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>; 3773 defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>; 3774} 3775 3776let Predicates = [HasAVX512] in { 3777 // 512-bit load. 3778 def : Pat<(alignedloadv16i32 addr:$src), 3779 (VMOVDQA64Zrm addr:$src)>; 3780 def : Pat<(alignedloadv32i16 addr:$src), 3781 (VMOVDQA64Zrm addr:$src)>; 3782 def : Pat<(alignedloadv32f16 addr:$src), 3783 (VMOVAPSZrm addr:$src)>; 3784 def : Pat<(alignedloadv32bf16 addr:$src), 3785 (VMOVAPSZrm addr:$src)>; 3786 def : Pat<(alignedloadv64i8 addr:$src), 3787 (VMOVDQA64Zrm addr:$src)>; 3788 def : Pat<(loadv16i32 addr:$src), 3789 (VMOVDQU64Zrm addr:$src)>; 3790 def : Pat<(loadv32i16 addr:$src), 3791 (VMOVDQU64Zrm addr:$src)>; 3792 def : Pat<(loadv32f16 addr:$src), 3793 (VMOVUPSZrm addr:$src)>; 3794 def : Pat<(loadv32bf16 addr:$src), 3795 (VMOVUPSZrm addr:$src)>; 3796 def : Pat<(loadv64i8 addr:$src), 3797 (VMOVDQU64Zrm addr:$src)>; 3798 3799 // 512-bit store. 3800 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3801 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3802 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3803 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3804 def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst), 3805 (VMOVAPSZmr addr:$dst, VR512:$src)>; 3806 def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst), 3807 (VMOVAPSZmr addr:$dst, VR512:$src)>; 3808 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3809 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3810 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3811 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3812 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3813 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3814 def : Pat<(store (v32f16 VR512:$src), addr:$dst), 3815 (VMOVUPSZmr addr:$dst, VR512:$src)>; 3816 def : Pat<(store (v32bf16 VR512:$src), addr:$dst), 3817 (VMOVUPSZmr addr:$dst, VR512:$src)>; 3818 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3819 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3820} 3821 3822let Predicates = [HasVLX] in { 3823 // 128-bit load. 3824 def : Pat<(alignedloadv4i32 addr:$src), 3825 (VMOVDQA64Z128rm addr:$src)>; 3826 def : Pat<(alignedloadv8i16 addr:$src), 3827 (VMOVDQA64Z128rm addr:$src)>; 3828 def : Pat<(alignedloadv8f16 addr:$src), 3829 (VMOVAPSZ128rm addr:$src)>; 3830 def : Pat<(alignedloadv8bf16 addr:$src), 3831 (VMOVAPSZ128rm addr:$src)>; 3832 def : Pat<(alignedloadv16i8 addr:$src), 3833 (VMOVDQA64Z128rm addr:$src)>; 3834 def : Pat<(loadv4i32 addr:$src), 3835 (VMOVDQU64Z128rm addr:$src)>; 3836 def : Pat<(loadv8i16 addr:$src), 3837 (VMOVDQU64Z128rm addr:$src)>; 3838 def : Pat<(loadv8f16 addr:$src), 3839 (VMOVUPSZ128rm addr:$src)>; 3840 def : Pat<(loadv8bf16 addr:$src), 3841 (VMOVUPSZ128rm addr:$src)>; 3842 def : Pat<(loadv16i8 addr:$src), 3843 (VMOVDQU64Z128rm addr:$src)>; 3844 3845 // 128-bit store. 3846 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3847 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3848 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3849 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3850 def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst), 3851 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 3852 def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst), 3853 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 3854 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3855 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3856 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3857 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3858 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3859 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3860 def : Pat<(store (v8f16 VR128X:$src), addr:$dst), 3861 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 3862 def : Pat<(store (v8bf16 VR128X:$src), addr:$dst), 3863 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 3864 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3865 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3866 3867 // 256-bit load. 3868 def : Pat<(alignedloadv8i32 addr:$src), 3869 (VMOVDQA64Z256rm addr:$src)>; 3870 def : Pat<(alignedloadv16i16 addr:$src), 3871 (VMOVDQA64Z256rm addr:$src)>; 3872 def : Pat<(alignedloadv16f16 addr:$src), 3873 (VMOVAPSZ256rm addr:$src)>; 3874 def : Pat<(alignedloadv16bf16 addr:$src), 3875 (VMOVAPSZ256rm addr:$src)>; 3876 def : Pat<(alignedloadv32i8 addr:$src), 3877 (VMOVDQA64Z256rm addr:$src)>; 3878 def : Pat<(loadv8i32 addr:$src), 3879 (VMOVDQU64Z256rm addr:$src)>; 3880 def : Pat<(loadv16i16 addr:$src), 3881 (VMOVDQU64Z256rm addr:$src)>; 3882 def : Pat<(loadv16f16 addr:$src), 3883 (VMOVUPSZ256rm addr:$src)>; 3884 def : Pat<(loadv16bf16 addr:$src), 3885 (VMOVUPSZ256rm addr:$src)>; 3886 def : Pat<(loadv32i8 addr:$src), 3887 (VMOVDQU64Z256rm addr:$src)>; 3888 3889 // 256-bit store. 3890 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3891 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3892 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3893 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3894 def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst), 3895 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; 3896 def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst), 3897 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; 3898 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3899 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3900 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3901 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3902 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3903 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3904 def : Pat<(store (v16f16 VR256X:$src), addr:$dst), 3905 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; 3906 def : Pat<(store (v16bf16 VR256X:$src), addr:$dst), 3907 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; 3908 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3909 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3910} 3911 3912multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> { 3913let Predicates = [HasBWI] in { 3914 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))), 3915 (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>; 3916 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)), 3917 (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>; 3918 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3919 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))), 3920 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3921 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3922 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)), 3923 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3924 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3925 (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))), 3926 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3927 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3928 (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)), 3929 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3930 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))), 3931 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3932 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)), 3933 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3934 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)), 3935 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3936 3937 def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask), 3938 (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>; 3939} 3940let Predicates = [HasBWI, HasVLX] in { 3941 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))), 3942 (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>; 3943 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)), 3944 (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>; 3945 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3946 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))), 3947 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3948 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3949 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)), 3950 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3951 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3952 (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))), 3953 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3954 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3955 (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)), 3956 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3957 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))), 3958 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3959 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)), 3960 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3961 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)), 3962 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3963 3964 def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask), 3965 (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>; 3966 3967 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))), 3968 (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>; 3969 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)), 3970 (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>; 3971 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3972 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))), 3973 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3974 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3975 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)), 3976 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3977 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3978 (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))), 3979 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3980 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3981 (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)), 3982 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3983 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))), 3984 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3985 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)), 3986 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3987 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)), 3988 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3989 3990 def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask), 3991 (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>; 3992} 3993} 3994 3995defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>; 3996defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>; 3997 3998// Move Int Doubleword to Packed Double Int 3999// 4000let ExeDomain = SSEPackedInt in { 4001def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 4002 "vmovd\t{$src, $dst|$dst, $src}", 4003 [(set VR128X:$dst, 4004 (v4i32 (scalar_to_vector GR32:$src)))]>, 4005 EVEX, Sched<[WriteVecMoveFromGpr]>; 4006def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 4007 "vmovd\t{$src, $dst|$dst, $src}", 4008 [(set VR128X:$dst, 4009 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 4010 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 4011def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 4012 "vmovq\t{$src, $dst|$dst, $src}", 4013 [(set VR128X:$dst, 4014 (v2i64 (scalar_to_vector GR64:$src)))]>, 4015 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 4016let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 4017def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 4018 (ins i64mem:$src), 4019 "vmovq\t{$src, $dst|$dst, $src}", []>, 4020 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 4021let isCodeGenOnly = 1 in { 4022def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 4023 "vmovq\t{$src, $dst|$dst, $src}", 4024 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 4025 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 4026def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 4027 "vmovq\t{$src, $dst|$dst, $src}", 4028 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 4029 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 4030} 4031} // ExeDomain = SSEPackedInt 4032 4033// Move Int Doubleword to Single Scalar 4034// 4035let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4036def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 4037 "vmovd\t{$src, $dst|$dst, $src}", 4038 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 4039 EVEX, Sched<[WriteVecMoveFromGpr]>; 4040} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4041 4042// Move doubleword from xmm register to r/m32 4043// 4044let ExeDomain = SSEPackedInt in { 4045def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 4046 "vmovd\t{$src, $dst|$dst, $src}", 4047 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 4048 (iPTR 0)))]>, 4049 EVEX, Sched<[WriteVecMoveToGpr]>; 4050def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 4051 (ins i32mem:$dst, VR128X:$src), 4052 "vmovd\t{$src, $dst|$dst, $src}", 4053 [(store (i32 (extractelt (v4i32 VR128X:$src), 4054 (iPTR 0))), addr:$dst)]>, 4055 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 4056} // ExeDomain = SSEPackedInt 4057 4058// Move quadword from xmm1 register to r/m64 4059// 4060let ExeDomain = SSEPackedInt in { 4061def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 4062 "vmovq\t{$src, $dst|$dst, $src}", 4063 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 4064 (iPTR 0)))]>, 4065 PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>, 4066 Requires<[HasAVX512]>; 4067 4068let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 4069def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 4070 "vmovq\t{$src, $dst|$dst, $src}", []>, PD, 4071 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>, 4072 Requires<[HasAVX512, In64BitMode]>; 4073 4074def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 4075 (ins i64mem:$dst, VR128X:$src), 4076 "vmovq\t{$src, $dst|$dst, $src}", 4077 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 4078 addr:$dst)]>, 4079 EVEX, PD, REX_W, EVEX_CD8<64, CD8VT1>, 4080 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 4081 4082let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 4083def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 4084 (ins VR128X:$src), 4085 "vmovq\t{$src, $dst|$dst, $src}", []>, 4086 EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>; 4087} // ExeDomain = SSEPackedInt 4088 4089def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 4090 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 4091 4092let Predicates = [HasAVX512] in { 4093 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), 4094 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; 4095} 4096 4097// Move Scalar Single to Double Int 4098// 4099let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 4100def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 4101 (ins FR32X:$src), 4102 "vmovd\t{$src, $dst|$dst, $src}", 4103 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 4104 EVEX, Sched<[WriteVecMoveToGpr]>; 4105} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 4106 4107// Move Quadword Int to Packed Quadword Int 4108// 4109let ExeDomain = SSEPackedInt in { 4110def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 4111 (ins i64mem:$src), 4112 "vmovq\t{$src, $dst|$dst, $src}", 4113 [(set VR128X:$dst, 4114 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 4115 EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 4116} // ExeDomain = SSEPackedInt 4117 4118// Allow "vmovd" but print "vmovq". 4119def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4120 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 4121def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4122 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 4123 4124// Conversions between masks and scalar fp. 4125def : Pat<(v32i1 (bitconvert FR32X:$src)), 4126 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>; 4127def : Pat<(f32 (bitconvert VK32:$src)), 4128 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>; 4129 4130def : Pat<(v64i1 (bitconvert FR64X:$src)), 4131 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>; 4132def : Pat<(f64 (bitconvert VK64:$src)), 4133 (VMOV64toSDZrr (KMOVQrk VK64:$src))>; 4134 4135//===----------------------------------------------------------------------===// 4136// AVX-512 MOVSH, MOVSS, MOVSD 4137//===----------------------------------------------------------------------===// 4138 4139multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, 4140 X86VectorVTInfo _, Predicate prd = HasAVX512> { 4141 let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in 4142 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 4143 (ins _.RC:$src1, _.RC:$src2), 4144 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4145 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 4146 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; 4147 let Predicates = [prd] in { 4148 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 4149 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 4150 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 4151 "$dst {${mask}} {z}, $src1, $src2}"), 4152 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 4153 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4154 _.ImmAllZerosV)))], 4155 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 4156 let Constraints = "$src0 = $dst" in 4157 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 4158 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 4159 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 4160 "$dst {${mask}}, $src1, $src2}"), 4161 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 4162 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4163 (_.VT _.RC:$src0))))], 4164 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 4165 let canFoldAsLoad = 1, isReMaterializable = 1 in { 4166 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), 4167 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4168 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))], 4169 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 4170 // _alt version uses FR32/FR64 register class. 4171 let isCodeGenOnly = 1 in 4172 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 4173 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4174 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 4175 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 4176 } 4177 let mayLoad = 1, hasSideEffects = 0 in { 4178 let Constraints = "$src0 = $dst" in 4179 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 4180 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 4181 !strconcat(asm, "\t{$src, $dst {${mask}}|", 4182 "$dst {${mask}}, $src}"), 4183 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 4184 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 4185 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 4186 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 4187 "$dst {${mask}} {z}, $src}"), 4188 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 4189 } 4190 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 4191 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 4192 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 4193 EVEX, Sched<[WriteFStore]>; 4194 let mayStore = 1, hasSideEffects = 0 in 4195 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 4196 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src), 4197 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 4198 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>; 4199 } 4200} 4201 4202defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, 4203 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; 4204 4205defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, 4206 VEX_LIG, XD, REX_W, EVEX_CD8<64, CD8VT1>; 4207 4208defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info, 4209 HasFP16>, 4210 VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 4211 4212multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 4213 PatLeaf ZeroFP, X86VectorVTInfo _> { 4214 4215def : Pat<(_.VT (OpNode _.RC:$src0, 4216 (_.VT (scalar_to_vector 4217 (_.EltVT (X86selects VK1WM:$mask, 4218 (_.EltVT _.FRC:$src1), 4219 (_.EltVT _.FRC:$src2))))))), 4220 (!cast<Instruction>(InstrStr#rrk) 4221 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 4222 VK1WM:$mask, 4223 (_.VT _.RC:$src0), 4224 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4225 4226def : Pat<(_.VT (OpNode _.RC:$src0, 4227 (_.VT (scalar_to_vector 4228 (_.EltVT (X86selects VK1WM:$mask, 4229 (_.EltVT _.FRC:$src1), 4230 (_.EltVT ZeroFP))))))), 4231 (!cast<Instruction>(InstrStr#rrkz) 4232 VK1WM:$mask, 4233 (_.VT _.RC:$src0), 4234 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 4235} 4236 4237multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4238 dag Mask, RegisterClass MaskRC> { 4239 4240def : Pat<(masked_store 4241 (_.info512.VT (insert_subvector undef, 4242 (_.info128.VT _.info128.RC:$src), 4243 (iPTR 0))), addr:$dst, Mask), 4244 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4245 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4246 _.info128.RC:$src)>; 4247 4248} 4249 4250multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 4251 AVX512VLVectorVTInfo _, 4252 dag Mask, RegisterClass MaskRC, 4253 SubRegIndex subreg> { 4254 4255def : Pat<(masked_store 4256 (_.info512.VT (insert_subvector undef, 4257 (_.info128.VT _.info128.RC:$src), 4258 (iPTR 0))), addr:$dst, Mask), 4259 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4260 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4261 _.info128.RC:$src)>; 4262 4263} 4264 4265// This matches the more recent codegen from clang that avoids emitting a 512 4266// bit masked store directly. Codegen will widen 128-bit masked store to 512 4267// bits on AVX512F only targets. 4268multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4269 AVX512VLVectorVTInfo _, 4270 dag Mask512, dag Mask128, 4271 RegisterClass MaskRC, 4272 SubRegIndex subreg> { 4273 4274// AVX512F pattern. 4275def : Pat<(masked_store 4276 (_.info512.VT (insert_subvector undef, 4277 (_.info128.VT _.info128.RC:$src), 4278 (iPTR 0))), addr:$dst, Mask512), 4279 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4280 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4281 _.info128.RC:$src)>; 4282 4283// AVX512VL pattern. 4284def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128), 4285 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4286 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4287 _.info128.RC:$src)>; 4288} 4289 4290multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4291 dag Mask, RegisterClass MaskRC> { 4292 4293def : Pat<(_.info128.VT (extract_subvector 4294 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4295 _.info512.ImmAllZerosV)), 4296 (iPTR 0))), 4297 (!cast<Instruction>(InstrStr#rmkz) 4298 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4299 addr:$srcAddr)>; 4300 4301def : Pat<(_.info128.VT (extract_subvector 4302 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4303 (_.info512.VT (insert_subvector undef, 4304 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4305 (iPTR 0))))), 4306 (iPTR 0))), 4307 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4308 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4309 addr:$srcAddr)>; 4310 4311} 4312 4313multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4314 AVX512VLVectorVTInfo _, 4315 dag Mask, RegisterClass MaskRC, 4316 SubRegIndex subreg> { 4317 4318def : Pat<(_.info128.VT (extract_subvector 4319 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4320 _.info512.ImmAllZerosV)), 4321 (iPTR 0))), 4322 (!cast<Instruction>(InstrStr#rmkz) 4323 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4324 addr:$srcAddr)>; 4325 4326def : Pat<(_.info128.VT (extract_subvector 4327 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4328 (_.info512.VT (insert_subvector undef, 4329 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4330 (iPTR 0))))), 4331 (iPTR 0))), 4332 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4333 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4334 addr:$srcAddr)>; 4335 4336} 4337 4338// This matches the more recent codegen from clang that avoids emitting a 512 4339// bit masked load directly. Codegen will widen 128-bit masked load to 512 4340// bits on AVX512F only targets. 4341multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4342 AVX512VLVectorVTInfo _, 4343 dag Mask512, dag Mask128, 4344 RegisterClass MaskRC, 4345 SubRegIndex subreg> { 4346// AVX512F patterns. 4347def : Pat<(_.info128.VT (extract_subvector 4348 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4349 _.info512.ImmAllZerosV)), 4350 (iPTR 0))), 4351 (!cast<Instruction>(InstrStr#rmkz) 4352 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4353 addr:$srcAddr)>; 4354 4355def : Pat<(_.info128.VT (extract_subvector 4356 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4357 (_.info512.VT (insert_subvector undef, 4358 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4359 (iPTR 0))))), 4360 (iPTR 0))), 4361 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4362 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4363 addr:$srcAddr)>; 4364 4365// AVX512Vl patterns. 4366def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4367 _.info128.ImmAllZerosV)), 4368 (!cast<Instruction>(InstrStr#rmkz) 4369 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4370 addr:$srcAddr)>; 4371 4372def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4373 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4374 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4375 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4376 addr:$srcAddr)>; 4377} 4378 4379defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4380defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4381 4382defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4383 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4384defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4385 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4386defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4387 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4388 4389let Predicates = [HasFP16] in { 4390defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>; 4391defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4392 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4393defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4394 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4395defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4396 (v32i1 (insert_subvector 4397 (v32i1 immAllZerosV), 4398 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4399 (iPTR 0))), 4400 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4401 GR8, sub_8bit>; 4402 4403defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4404 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4405defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4406 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4407defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4408 (v32i1 (insert_subvector 4409 (v32i1 immAllZerosV), 4410 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4411 (iPTR 0))), 4412 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4413 GR8, sub_8bit>; 4414 4415def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))), 4416 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk 4417 (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)), 4418 VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4419 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4420 4421def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)), 4422 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4423 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4424} 4425 4426defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4427 (v16i1 (insert_subvector 4428 (v16i1 immAllZerosV), 4429 (v4i1 (extract_subvector 4430 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4431 (iPTR 0))), 4432 (iPTR 0))), 4433 (v4i1 (extract_subvector 4434 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4435 (iPTR 0))), GR8, sub_8bit>; 4436defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4437 (v8i1 4438 (extract_subvector 4439 (v16i1 4440 (insert_subvector 4441 (v16i1 immAllZerosV), 4442 (v2i1 (extract_subvector 4443 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4444 (iPTR 0))), 4445 (iPTR 0))), 4446 (iPTR 0))), 4447 (v2i1 (extract_subvector 4448 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4449 (iPTR 0))), GR8, sub_8bit>; 4450 4451defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4452 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4453defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4454 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4455defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4456 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4457 4458defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4459 (v16i1 (insert_subvector 4460 (v16i1 immAllZerosV), 4461 (v4i1 (extract_subvector 4462 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4463 (iPTR 0))), 4464 (iPTR 0))), 4465 (v4i1 (extract_subvector 4466 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4467 (iPTR 0))), GR8, sub_8bit>; 4468defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4469 (v8i1 4470 (extract_subvector 4471 (v16i1 4472 (insert_subvector 4473 (v16i1 immAllZerosV), 4474 (v2i1 (extract_subvector 4475 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4476 (iPTR 0))), 4477 (iPTR 0))), 4478 (iPTR 0))), 4479 (v2i1 (extract_subvector 4480 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4481 (iPTR 0))), GR8, sub_8bit>; 4482 4483def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4484 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4485 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4486 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4487 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4488 4489def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4490 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4491 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4492 4493def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), 4494 (COPY_TO_REGCLASS 4495 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), 4496 VK1WM:$mask, addr:$src)), 4497 FR32X)>; 4498def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), 4499 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; 4500 4501def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4502 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4503 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4504 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4505 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4506 4507def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), 4508 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4509 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4510 4511def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), 4512 (COPY_TO_REGCLASS 4513 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), 4514 VK1WM:$mask, addr:$src)), 4515 FR64X)>; 4516def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), 4517 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; 4518 4519 4520def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))), 4521 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4522def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))), 4523 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4524 4525def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))), 4526 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4527def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))), 4528 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4529 4530let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4531 let Predicates = [HasFP16] in { 4532 def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4533 (ins VR128X:$src1, VR128X:$src2), 4534 "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4535 []>, T_MAP5XS, EVEX_4V, VEX_LIG, 4536 Sched<[SchedWriteFShuffle.XMM]>; 4537 4538 let Constraints = "$src0 = $dst" in 4539 def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4540 (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask, 4541 VR128X:$src1, VR128X:$src2), 4542 "vmovsh\t{$src2, $src1, $dst {${mask}}|"# 4543 "$dst {${mask}}, $src1, $src2}", 4544 []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG, 4545 Sched<[SchedWriteFShuffle.XMM]>; 4546 4547 def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4548 (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4549 "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"# 4550 "$dst {${mask}} {z}, $src1, $src2}", 4551 []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG, 4552 Sched<[SchedWriteFShuffle.XMM]>; 4553 } 4554 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4555 (ins VR128X:$src1, VR128X:$src2), 4556 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4557 []>, XS, EVEX_4V, VEX_LIG, 4558 Sched<[SchedWriteFShuffle.XMM]>; 4559 4560 let Constraints = "$src0 = $dst" in 4561 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4562 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4563 VR128X:$src1, VR128X:$src2), 4564 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4565 "$dst {${mask}}, $src1, $src2}", 4566 []>, EVEX_K, XS, EVEX_4V, VEX_LIG, 4567 Sched<[SchedWriteFShuffle.XMM]>; 4568 4569 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4570 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4571 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4572 "$dst {${mask}} {z}, $src1, $src2}", 4573 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, 4574 Sched<[SchedWriteFShuffle.XMM]>; 4575 4576 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4577 (ins VR128X:$src1, VR128X:$src2), 4578 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4579 []>, XD, EVEX_4V, VEX_LIG, REX_W, 4580 Sched<[SchedWriteFShuffle.XMM]>; 4581 4582 let Constraints = "$src0 = $dst" in 4583 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4584 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4585 VR128X:$src1, VR128X:$src2), 4586 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4587 "$dst {${mask}}, $src1, $src2}", 4588 []>, EVEX_K, XD, EVEX_4V, VEX_LIG, 4589 REX_W, Sched<[SchedWriteFShuffle.XMM]>; 4590 4591 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4592 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4593 VR128X:$src2), 4594 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4595 "$dst {${mask}} {z}, $src1, $src2}", 4596 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, 4597 REX_W, Sched<[SchedWriteFShuffle.XMM]>; 4598} 4599 4600def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4601 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4602def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"# 4603 "$dst {${mask}}, $src1, $src2}", 4604 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask, 4605 VR128X:$src1, VR128X:$src2), 0>; 4606def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4607 "$dst {${mask}} {z}, $src1, $src2}", 4608 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask, 4609 VR128X:$src1, VR128X:$src2), 0>; 4610def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4611 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4612def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4613 "$dst {${mask}}, $src1, $src2}", 4614 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4615 VR128X:$src1, VR128X:$src2), 0>; 4616def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4617 "$dst {${mask}} {z}, $src1, $src2}", 4618 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4619 VR128X:$src1, VR128X:$src2), 0>; 4620def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4621 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4622def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4623 "$dst {${mask}}, $src1, $src2}", 4624 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4625 VR128X:$src1, VR128X:$src2), 0>; 4626def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4627 "$dst {${mask}} {z}, $src1, $src2}", 4628 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4629 VR128X:$src1, VR128X:$src2), 0>; 4630 4631let Predicates = [HasAVX512, OptForSize] in { 4632 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4633 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4634 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4635 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4636 4637 // Move low f32 and clear high bits. 4638 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4639 (SUBREG_TO_REG (i32 0), 4640 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4641 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4642 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4643 (SUBREG_TO_REG (i32 0), 4644 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4645 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4646 4647 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4648 (SUBREG_TO_REG (i32 0), 4649 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4650 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4651 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4652 (SUBREG_TO_REG (i32 0), 4653 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4654 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4655} 4656 4657// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4658// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4659let Predicates = [HasAVX512, OptForSpeed] in { 4660 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4661 (SUBREG_TO_REG (i32 0), 4662 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4663 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4664 (i8 1))), sub_xmm)>; 4665 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4666 (SUBREG_TO_REG (i32 0), 4667 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4668 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4669 (i8 3))), sub_xmm)>; 4670} 4671 4672let Predicates = [HasAVX512] in { 4673 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 4674 (VMOVSSZrm addr:$src)>; 4675 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 4676 (VMOVSDZrm addr:$src)>; 4677 4678 // Represent the same patterns above but in the form they appear for 4679 // 256-bit types 4680 def : Pat<(v8f32 (X86vzload32 addr:$src)), 4681 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4682 def : Pat<(v4f64 (X86vzload64 addr:$src)), 4683 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4684 4685 // Represent the same patterns above but in the form they appear for 4686 // 512-bit types 4687 def : Pat<(v16f32 (X86vzload32 addr:$src)), 4688 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4689 def : Pat<(v8f64 (X86vzload64 addr:$src)), 4690 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4691} 4692let Predicates = [HasFP16] in { 4693 def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))), 4694 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>; 4695 def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))), 4696 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>; 4697 4698 // FIXME we need better canonicalization in dag combine 4699 def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))), 4700 (SUBREG_TO_REG (i32 0), 4701 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4702 (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>; 4703 def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))), 4704 (SUBREG_TO_REG (i32 0), 4705 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), 4706 (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>; 4707 4708 // FIXME we need better canonicalization in dag combine 4709 def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))), 4710 (SUBREG_TO_REG (i32 0), 4711 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4712 (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>; 4713 def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))), 4714 (SUBREG_TO_REG (i32 0), 4715 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), 4716 (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>; 4717 4718 def : Pat<(v8f16 (X86vzload16 addr:$src)), 4719 (VMOVSHZrm addr:$src)>; 4720 4721 def : Pat<(v16f16 (X86vzload16 addr:$src)), 4722 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4723 4724 def : Pat<(v32f16 (X86vzload16 addr:$src)), 4725 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4726} 4727 4728let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4729def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4730 (ins VR128X:$src), 4731 "vmovq\t{$src, $dst|$dst, $src}", 4732 [(set VR128X:$dst, (v2i64 (X86vzmovl 4733 (v2i64 VR128X:$src))))]>, 4734 EVEX, REX_W; 4735} 4736 4737let Predicates = [HasAVX512] in { 4738 def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))), 4739 (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 4740 GR8:$src, sub_8bit)))>; 4741 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4742 (VMOVDI2PDIZrr GR32:$src)>; 4743 4744 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4745 (VMOV64toPQIZrr GR64:$src)>; 4746 4747 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4748 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4749 (VMOVDI2PDIZrm addr:$src)>; 4750 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4751 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4752 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4753 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4754 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4755 (VMOVQI2PQIZrm addr:$src)>; 4756 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4757 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4758 4759 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4760 def : Pat<(v16i32 (X86vzload32 addr:$src)), 4761 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4762 def : Pat<(v8i64 (X86vzload64 addr:$src)), 4763 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4764 4765 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4766 (SUBREG_TO_REG (i32 0), 4767 (v2f64 (VMOVZPQILo2PQIZrr 4768 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), 4769 sub_xmm)>; 4770 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4771 (SUBREG_TO_REG (i32 0), 4772 (v2i64 (VMOVZPQILo2PQIZrr 4773 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), 4774 sub_xmm)>; 4775 4776 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4777 (SUBREG_TO_REG (i32 0), 4778 (v2f64 (VMOVZPQILo2PQIZrr 4779 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), 4780 sub_xmm)>; 4781 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4782 (SUBREG_TO_REG (i32 0), 4783 (v2i64 (VMOVZPQILo2PQIZrr 4784 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), 4785 sub_xmm)>; 4786} 4787 4788//===----------------------------------------------------------------------===// 4789// AVX-512 - Non-temporals 4790//===----------------------------------------------------------------------===// 4791 4792def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4793 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4794 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4795 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4796 4797let Predicates = [HasVLX] in { 4798 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4799 (ins i256mem:$src), 4800 "vmovntdqa\t{$src, $dst|$dst, $src}", 4801 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4802 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4803 4804 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4805 (ins i128mem:$src), 4806 "vmovntdqa\t{$src, $dst|$dst, $src}", 4807 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4808 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4809} 4810 4811multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4812 X86SchedWriteMoveLS Sched, 4813 PatFrag st_frag = alignednontemporalstore> { 4814 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4815 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4816 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4817 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4818 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4819} 4820 4821multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4822 AVX512VLVectorVTInfo VTInfo, 4823 X86SchedWriteMoveLSWidths Sched> { 4824 let Predicates = [HasAVX512] in 4825 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4826 4827 let Predicates = [HasAVX512, HasVLX] in { 4828 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4829 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4830 } 4831} 4832 4833defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4834 SchedWriteVecMoveLSNT>, PD; 4835defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4836 SchedWriteFMoveLSNT>, PD, REX_W; 4837defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4838 SchedWriteFMoveLSNT>, PS; 4839 4840let Predicates = [HasAVX512], AddedComplexity = 400 in { 4841 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4842 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4843 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4844 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4845 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4846 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4847 4848 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4849 (VMOVNTDQAZrm addr:$src)>; 4850 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4851 (VMOVNTDQAZrm addr:$src)>; 4852 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4853 (VMOVNTDQAZrm addr:$src)>; 4854 def : Pat<(v16i32 (alignednontemporalload addr:$src)), 4855 (VMOVNTDQAZrm addr:$src)>; 4856 def : Pat<(v32i16 (alignednontemporalload addr:$src)), 4857 (VMOVNTDQAZrm addr:$src)>; 4858 def : Pat<(v64i8 (alignednontemporalload addr:$src)), 4859 (VMOVNTDQAZrm addr:$src)>; 4860} 4861 4862let Predicates = [HasVLX], AddedComplexity = 400 in { 4863 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4864 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4865 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4866 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4867 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4868 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4869 4870 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4871 (VMOVNTDQAZ256rm addr:$src)>; 4872 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4873 (VMOVNTDQAZ256rm addr:$src)>; 4874 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4875 (VMOVNTDQAZ256rm addr:$src)>; 4876 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 4877 (VMOVNTDQAZ256rm addr:$src)>; 4878 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 4879 (VMOVNTDQAZ256rm addr:$src)>; 4880 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 4881 (VMOVNTDQAZ256rm addr:$src)>; 4882 4883 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4884 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4885 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4886 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4887 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4888 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4889 4890 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4891 (VMOVNTDQAZ128rm addr:$src)>; 4892 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4893 (VMOVNTDQAZ128rm addr:$src)>; 4894 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4895 (VMOVNTDQAZ128rm addr:$src)>; 4896 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 4897 (VMOVNTDQAZ128rm addr:$src)>; 4898 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 4899 (VMOVNTDQAZ128rm addr:$src)>; 4900 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 4901 (VMOVNTDQAZ128rm addr:$src)>; 4902} 4903 4904//===----------------------------------------------------------------------===// 4905// AVX-512 - Integer arithmetic 4906// 4907multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4908 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4909 bit IsCommutable = 0> { 4910 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4911 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4912 "$src2, $src1", "$src1, $src2", 4913 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4914 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V, 4915 Sched<[sched]>; 4916 4917 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4918 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4919 "$src2, $src1", "$src1, $src2", 4920 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, 4921 AVX512BIBase, EVEX_4V, 4922 Sched<[sched.Folded, sched.ReadAfterFold]>; 4923} 4924 4925multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4926 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4927 bit IsCommutable = 0> : 4928 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4929 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4930 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4931 "${src2}"#_.BroadcastStr#", $src1", 4932 "$src1, ${src2}"#_.BroadcastStr, 4933 (_.VT (OpNode _.RC:$src1, 4934 (_.BroadcastLdFrag addr:$src2)))>, 4935 AVX512BIBase, EVEX_4V, EVEX_B, 4936 Sched<[sched.Folded, sched.ReadAfterFold]>; 4937} 4938 4939multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4940 AVX512VLVectorVTInfo VTInfo, 4941 X86SchedWriteWidths sched, Predicate prd, 4942 bit IsCommutable = 0> { 4943 let Predicates = [prd] in 4944 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4945 IsCommutable>, EVEX_V512; 4946 4947 let Predicates = [prd, HasVLX] in { 4948 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4949 sched.YMM, IsCommutable>, EVEX_V256; 4950 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4951 sched.XMM, IsCommutable>, EVEX_V128; 4952 } 4953} 4954 4955multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4956 AVX512VLVectorVTInfo VTInfo, 4957 X86SchedWriteWidths sched, Predicate prd, 4958 bit IsCommutable = 0> { 4959 let Predicates = [prd] in 4960 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4961 IsCommutable>, EVEX_V512; 4962 4963 let Predicates = [prd, HasVLX] in { 4964 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4965 sched.YMM, IsCommutable>, EVEX_V256; 4966 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4967 sched.XMM, IsCommutable>, EVEX_V128; 4968 } 4969} 4970 4971multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4972 X86SchedWriteWidths sched, Predicate prd, 4973 bit IsCommutable = 0> { 4974 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4975 sched, prd, IsCommutable>, 4976 REX_W, EVEX_CD8<64, CD8VF>; 4977} 4978 4979multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4980 X86SchedWriteWidths sched, Predicate prd, 4981 bit IsCommutable = 0> { 4982 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4983 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4984} 4985 4986multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4987 X86SchedWriteWidths sched, Predicate prd, 4988 bit IsCommutable = 0> { 4989 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4990 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4991 WIG; 4992} 4993 4994multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4995 X86SchedWriteWidths sched, Predicate prd, 4996 bit IsCommutable = 0> { 4997 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4998 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4999 WIG; 5000} 5001 5002multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 5003 SDNode OpNode, X86SchedWriteWidths sched, 5004 Predicate prd, bit IsCommutable = 0> { 5005 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 5006 IsCommutable>; 5007 5008 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 5009 IsCommutable>; 5010} 5011 5012multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 5013 SDNode OpNode, X86SchedWriteWidths sched, 5014 Predicate prd, bit IsCommutable = 0> { 5015 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 5016 IsCommutable>; 5017 5018 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 5019 IsCommutable>; 5020} 5021 5022multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 5023 bits<8> opc_d, bits<8> opc_q, 5024 string OpcodeStr, SDNode OpNode, 5025 X86SchedWriteWidths sched, 5026 bit IsCommutable = 0> { 5027 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 5028 sched, HasAVX512, IsCommutable>, 5029 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 5030 sched, HasBWI, IsCommutable>; 5031} 5032 5033multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 5034 X86FoldableSchedWrite sched, 5035 SDNode OpNode,X86VectorVTInfo _Src, 5036 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 5037 bit IsCommutable = 0> { 5038 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 5039 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 5040 "$src2, $src1","$src1, $src2", 5041 (_Dst.VT (OpNode 5042 (_Src.VT _Src.RC:$src1), 5043 (_Src.VT _Src.RC:$src2))), 5044 IsCommutable>, 5045 AVX512BIBase, EVEX_4V, Sched<[sched]>; 5046 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5047 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 5048 "$src2, $src1", "$src1, $src2", 5049 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 5050 (_Src.LdFrag addr:$src2)))>, 5051 AVX512BIBase, EVEX_4V, 5052 Sched<[sched.Folded, sched.ReadAfterFold]>; 5053 5054 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5055 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 5056 OpcodeStr, 5057 "${src2}"#_Brdct.BroadcastStr#", $src1", 5058 "$src1, ${src2}"#_Brdct.BroadcastStr, 5059 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 5060 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, 5061 AVX512BIBase, EVEX_4V, EVEX_B, 5062 Sched<[sched.Folded, sched.ReadAfterFold]>; 5063} 5064 5065defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 5066 SchedWriteVecALU, 1>; 5067defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 5068 SchedWriteVecALU, 0>; 5069defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, 5070 SchedWriteVecALU, HasBWI, 1>; 5071defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, 5072 SchedWriteVecALU, HasBWI, 0>; 5073defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, 5074 SchedWriteVecALU, HasBWI, 1>; 5075defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, 5076 SchedWriteVecALU, HasBWI, 0>; 5077defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 5078 SchedWritePMULLD, HasAVX512, 1>, T8PD; 5079defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 5080 SchedWriteVecIMul, HasBWI, 1>; 5081defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 5082 SchedWriteVecIMul, HasDQI, 1>, T8PD, 5083 NotEVEX2VEXConvertible; 5084defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 5085 HasBWI, 1>; 5086defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 5087 HasBWI, 1>; 5088defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 5089 SchedWriteVecIMul, HasBWI, 1>, T8PD; 5090defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu, 5091 SchedWriteVecALU, HasBWI, 1>; 5092defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 5093 SchedWriteVecIMul, HasAVX512, 1>, T8PD; 5094defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 5095 SchedWriteVecIMul, HasAVX512, 1>; 5096 5097multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 5098 X86SchedWriteWidths sched, 5099 AVX512VLVectorVTInfo _SrcVTInfo, 5100 AVX512VLVectorVTInfo _DstVTInfo, 5101 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 5102 let Predicates = [prd] in 5103 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 5104 _SrcVTInfo.info512, _DstVTInfo.info512, 5105 v8i64_info, IsCommutable>, 5106 EVEX_V512, EVEX_CD8<64, CD8VF>, REX_W; 5107 let Predicates = [HasVLX, prd] in { 5108 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 5109 _SrcVTInfo.info256, _DstVTInfo.info256, 5110 v4i64x_info, IsCommutable>, 5111 EVEX_V256, EVEX_CD8<64, CD8VF>, REX_W; 5112 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 5113 _SrcVTInfo.info128, _DstVTInfo.info128, 5114 v2i64x_info, IsCommutable>, 5115 EVEX_V128, EVEX_CD8<64, CD8VF>, REX_W; 5116 } 5117} 5118 5119defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 5120 avx512vl_i8_info, avx512vl_i8_info, 5121 X86multishift, HasVBMI, 0>, T8PD; 5122 5123multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 5124 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 5125 X86FoldableSchedWrite sched> { 5126 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5127 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 5128 OpcodeStr, 5129 "${src2}"#_Src.BroadcastStr#", $src1", 5130 "$src1, ${src2}"#_Src.BroadcastStr, 5131 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 5132 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, 5133 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 5134 Sched<[sched.Folded, sched.ReadAfterFold]>; 5135} 5136 5137multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 5138 SDNode OpNode,X86VectorVTInfo _Src, 5139 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 5140 bit IsCommutable = 0> { 5141 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 5142 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 5143 "$src2, $src1","$src1, $src2", 5144 (_Dst.VT (OpNode 5145 (_Src.VT _Src.RC:$src1), 5146 (_Src.VT _Src.RC:$src2))), 5147 IsCommutable, IsCommutable>, 5148 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; 5149 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 5150 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 5151 "$src2, $src1", "$src1, $src2", 5152 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 5153 (_Src.LdFrag addr:$src2)))>, 5154 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, 5155 Sched<[sched.Folded, sched.ReadAfterFold]>; 5156} 5157 5158multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 5159 SDNode OpNode> { 5160 let Predicates = [HasBWI] in 5161 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 5162 v32i16_info, SchedWriteShuffle.ZMM>, 5163 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 5164 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 5165 let Predicates = [HasBWI, HasVLX] in { 5166 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 5167 v16i16x_info, SchedWriteShuffle.YMM>, 5168 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 5169 v16i16x_info, SchedWriteShuffle.YMM>, 5170 EVEX_V256; 5171 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 5172 v8i16x_info, SchedWriteShuffle.XMM>, 5173 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 5174 v8i16x_info, SchedWriteShuffle.XMM>, 5175 EVEX_V128; 5176 } 5177} 5178multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 5179 SDNode OpNode> { 5180 let Predicates = [HasBWI] in 5181 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 5182 SchedWriteShuffle.ZMM>, EVEX_V512, WIG; 5183 let Predicates = [HasBWI, HasVLX] in { 5184 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 5185 v32i8x_info, SchedWriteShuffle.YMM>, 5186 EVEX_V256, WIG; 5187 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 5188 v16i8x_info, SchedWriteShuffle.XMM>, 5189 EVEX_V128, WIG; 5190 } 5191} 5192 5193multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 5194 SDNode OpNode, AVX512VLVectorVTInfo _Src, 5195 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 5196 let Predicates = [HasBWI] in 5197 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 5198 _Dst.info512, SchedWriteVecIMul.ZMM, 5199 IsCommutable>, EVEX_V512; 5200 let Predicates = [HasBWI, HasVLX] in { 5201 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 5202 _Dst.info256, SchedWriteVecIMul.YMM, 5203 IsCommutable>, EVEX_V256; 5204 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 5205 _Dst.info128, SchedWriteVecIMul.XMM, 5206 IsCommutable>, EVEX_V128; 5207 } 5208} 5209 5210defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 5211defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 5212defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 5213defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 5214 5215defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 5216 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, WIG; 5217defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 5218 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG; 5219 5220defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 5221 SchedWriteVecALU, HasBWI, 1>, T8PD; 5222defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 5223 SchedWriteVecALU, HasBWI, 1>; 5224defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 5225 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5226defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 5227 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5228 NotEVEX2VEXConvertible; 5229 5230defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 5231 SchedWriteVecALU, HasBWI, 1>; 5232defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 5233 SchedWriteVecALU, HasBWI, 1>, T8PD; 5234defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 5235 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5236defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 5237 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5238 NotEVEX2VEXConvertible; 5239 5240defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 5241 SchedWriteVecALU, HasBWI, 1>, T8PD; 5242defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 5243 SchedWriteVecALU, HasBWI, 1>; 5244defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 5245 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5246defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 5247 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5248 NotEVEX2VEXConvertible; 5249 5250defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 5251 SchedWriteVecALU, HasBWI, 1>; 5252defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 5253 SchedWriteVecALU, HasBWI, 1>, T8PD; 5254defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 5255 SchedWriteVecALU, HasAVX512, 1>, T8PD; 5256defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 5257 SchedWriteVecALU, HasAVX512, 1>, T8PD, 5258 NotEVEX2VEXConvertible; 5259 5260// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 5261let Predicates = [HasDQI, NoVLX] in { 5262 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 5263 (EXTRACT_SUBREG 5264 (VPMULLQZrr 5265 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5266 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5267 sub_ymm)>; 5268 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5269 (EXTRACT_SUBREG 5270 (VPMULLQZrmb 5271 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5272 addr:$src2), 5273 sub_ymm)>; 5274 5275 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5276 (EXTRACT_SUBREG 5277 (VPMULLQZrr 5278 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5279 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5280 sub_xmm)>; 5281 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5282 (EXTRACT_SUBREG 5283 (VPMULLQZrmb 5284 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5285 addr:$src2), 5286 sub_xmm)>; 5287} 5288 5289multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { 5290 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 5291 (EXTRACT_SUBREG 5292 (!cast<Instruction>(Instr#"rr") 5293 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5294 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5295 sub_ymm)>; 5296 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5297 (EXTRACT_SUBREG 5298 (!cast<Instruction>(Instr#"rmb") 5299 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5300 addr:$src2), 5301 sub_ymm)>; 5302 5303 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 5304 (EXTRACT_SUBREG 5305 (!cast<Instruction>(Instr#"rr") 5306 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5307 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5308 sub_xmm)>; 5309 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5310 (EXTRACT_SUBREG 5311 (!cast<Instruction>(Instr#"rmb") 5312 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5313 addr:$src2), 5314 sub_xmm)>; 5315} 5316 5317let Predicates = [HasAVX512, NoVLX] in { 5318 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; 5319 defm : avx512_min_max_lowering<"VPMINUQZ", umin>; 5320 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; 5321 defm : avx512_min_max_lowering<"VPMINSQZ", smin>; 5322} 5323 5324//===----------------------------------------------------------------------===// 5325// AVX-512 Logical Instructions 5326//===----------------------------------------------------------------------===// 5327 5328defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, 5329 SchedWriteVecLogic, HasAVX512, 1>; 5330defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, 5331 SchedWriteVecLogic, HasAVX512, 1>; 5332defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 5333 SchedWriteVecLogic, HasAVX512, 1>; 5334defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5335 SchedWriteVecLogic, HasAVX512>; 5336 5337let Predicates = [HasVLX] in { 5338 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)), 5339 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5340 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)), 5341 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5342 5343 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)), 5344 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5345 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)), 5346 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5347 5348 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)), 5349 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5350 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)), 5351 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5352 5353 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)), 5354 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5355 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)), 5356 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5357 5358 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)), 5359 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5360 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)), 5361 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5362 5363 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)), 5364 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5365 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)), 5366 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5367 5368 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)), 5369 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5370 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)), 5371 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5372 5373 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)), 5374 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5375 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), 5376 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5377 5378 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), 5379 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5380 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), 5381 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5382 5383 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)), 5384 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5385 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)), 5386 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5387 5388 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)), 5389 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5390 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)), 5391 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5392 5393 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)), 5394 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5395 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)), 5396 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5397 5398 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)), 5399 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5400 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)), 5401 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5402 5403 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)), 5404 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5405 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)), 5406 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5407 5408 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)), 5409 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5410 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)), 5411 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5412 5413 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)), 5414 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5415 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), 5416 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5417} 5418 5419let Predicates = [HasAVX512] in { 5420 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)), 5421 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5422 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)), 5423 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5424 5425 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)), 5426 (VPORQZrr VR512:$src1, VR512:$src2)>; 5427 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)), 5428 (VPORQZrr VR512:$src1, VR512:$src2)>; 5429 5430 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)), 5431 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5432 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)), 5433 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5434 5435 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)), 5436 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5437 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)), 5438 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5439 5440 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)), 5441 (VPANDQZrm VR512:$src1, addr:$src2)>; 5442 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)), 5443 (VPANDQZrm VR512:$src1, addr:$src2)>; 5444 5445 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)), 5446 (VPORQZrm VR512:$src1, addr:$src2)>; 5447 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)), 5448 (VPORQZrm VR512:$src1, addr:$src2)>; 5449 5450 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)), 5451 (VPXORQZrm VR512:$src1, addr:$src2)>; 5452 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)), 5453 (VPXORQZrm VR512:$src1, addr:$src2)>; 5454 5455 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)), 5456 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5457 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), 5458 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5459} 5460 5461// Patterns to catch vselect with different type than logic op. 5462multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, 5463 X86VectorVTInfo _, 5464 X86VectorVTInfo IntInfo> { 5465 // Masked register-register logical operations. 5466 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5467 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5468 _.RC:$src0)), 5469 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5470 _.RC:$src1, _.RC:$src2)>; 5471 5472 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5473 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5474 _.ImmAllZerosV)), 5475 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5476 _.RC:$src2)>; 5477 5478 // Masked register-memory logical operations. 5479 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5480 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5481 (load addr:$src2)))), 5482 _.RC:$src0)), 5483 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5484 _.RC:$src1, addr:$src2)>; 5485 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5486 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5487 (load addr:$src2)))), 5488 _.ImmAllZerosV)), 5489 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5490 addr:$src2)>; 5491} 5492 5493multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, 5494 X86VectorVTInfo _, 5495 X86VectorVTInfo IntInfo> { 5496 // Register-broadcast logical operations. 5497 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5498 (bitconvert 5499 (IntInfo.VT (OpNode _.RC:$src1, 5500 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5501 _.RC:$src0)), 5502 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5503 _.RC:$src1, addr:$src2)>; 5504 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5505 (bitconvert 5506 (IntInfo.VT (OpNode _.RC:$src1, 5507 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5508 _.ImmAllZerosV)), 5509 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5510 _.RC:$src1, addr:$src2)>; 5511} 5512 5513multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode, 5514 AVX512VLVectorVTInfo SelectInfo, 5515 AVX512VLVectorVTInfo IntInfo> { 5516let Predicates = [HasVLX] in { 5517 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128, 5518 IntInfo.info128>; 5519 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256, 5520 IntInfo.info256>; 5521} 5522let Predicates = [HasAVX512] in { 5523 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512, 5524 IntInfo.info512>; 5525} 5526} 5527 5528multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode, 5529 AVX512VLVectorVTInfo SelectInfo, 5530 AVX512VLVectorVTInfo IntInfo> { 5531let Predicates = [HasVLX] in { 5532 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode, 5533 SelectInfo.info128, IntInfo.info128>; 5534 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode, 5535 SelectInfo.info256, IntInfo.info256>; 5536} 5537let Predicates = [HasAVX512] in { 5538 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode, 5539 SelectInfo.info512, IntInfo.info512>; 5540} 5541} 5542 5543multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> { 5544 // i64 vselect with i32/i16/i8 logic op 5545 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5546 avx512vl_i32_info>; 5547 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5548 avx512vl_i16_info>; 5549 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5550 avx512vl_i8_info>; 5551 5552 // i32 vselect with i64/i16/i8 logic op 5553 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5554 avx512vl_i64_info>; 5555 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5556 avx512vl_i16_info>; 5557 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5558 avx512vl_i8_info>; 5559 5560 // f32 vselect with i64/i32/i16/i8 logic op 5561 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5562 avx512vl_i64_info>; 5563 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5564 avx512vl_i32_info>; 5565 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5566 avx512vl_i16_info>; 5567 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5568 avx512vl_i8_info>; 5569 5570 // f64 vselect with i64/i32/i16/i8 logic op 5571 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5572 avx512vl_i64_info>; 5573 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5574 avx512vl_i32_info>; 5575 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5576 avx512vl_i16_info>; 5577 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5578 avx512vl_i8_info>; 5579 5580 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode, 5581 avx512vl_f32_info, 5582 avx512vl_i32_info>; 5583 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode, 5584 avx512vl_f64_info, 5585 avx512vl_i64_info>; 5586} 5587 5588defm : avx512_logical_lowering_types<"VPAND", and>; 5589defm : avx512_logical_lowering_types<"VPOR", or>; 5590defm : avx512_logical_lowering_types<"VPXOR", xor>; 5591defm : avx512_logical_lowering_types<"VPANDN", X86andnp>; 5592 5593//===----------------------------------------------------------------------===// 5594// AVX-512 FP arithmetic 5595//===----------------------------------------------------------------------===// 5596 5597multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5598 SDPatternOperator OpNode, SDNode VecNode, 5599 X86FoldableSchedWrite sched, bit IsCommutable> { 5600 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5601 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5602 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5603 "$src2, $src1", "$src1, $src2", 5604 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5605 Sched<[sched]>; 5606 5607 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5608 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5609 "$src2, $src1", "$src1, $src2", 5610 (_.VT (VecNode _.RC:$src1, 5611 (_.ScalarIntMemFrags addr:$src2)))>, 5612 Sched<[sched.Folded, sched.ReadAfterFold]>; 5613 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5614 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5615 (ins _.FRC:$src1, _.FRC:$src2), 5616 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5617 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5618 Sched<[sched]> { 5619 let isCommutable = IsCommutable; 5620 } 5621 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5622 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5623 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5624 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5625 (_.ScalarLdFrag addr:$src2)))]>, 5626 Sched<[sched.Folded, sched.ReadAfterFold]>; 5627 } 5628 } 5629} 5630 5631multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5632 SDNode VecNode, X86FoldableSchedWrite sched> { 5633 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5634 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5635 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5636 "$rc, $src2, $src1", "$src1, $src2, $rc", 5637 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5638 (i32 timm:$rc))>, 5639 EVEX_B, EVEX_RC, Sched<[sched]>; 5640} 5641multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5642 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5643 X86FoldableSchedWrite sched, bit IsCommutable, 5644 string EVEX2VexOvrd> { 5645 let ExeDomain = _.ExeDomain in { 5646 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5647 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5648 "$src2, $src1", "$src1, $src2", 5649 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5650 Sched<[sched]>, SIMD_EXC; 5651 5652 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5653 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5654 "$src2, $src1", "$src1, $src2", 5655 (_.VT (VecNode _.RC:$src1, 5656 (_.ScalarIntMemFrags addr:$src2)))>, 5657 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 5658 5659 let isCodeGenOnly = 1, Predicates = [HasAVX512], 5660 Uses = [MXCSR], mayRaiseFPException = 1 in { 5661 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5662 (ins _.FRC:$src1, _.FRC:$src2), 5663 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5664 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5665 Sched<[sched]>, 5666 EVEX2VEXOverride<EVEX2VexOvrd#"rr"> { 5667 let isCommutable = IsCommutable; 5668 } 5669 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5670 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5671 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5672 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5673 (_.ScalarLdFrag addr:$src2)))]>, 5674 Sched<[sched.Folded, sched.ReadAfterFold]>, 5675 EVEX2VEXOverride<EVEX2VexOvrd#"rm">; 5676 } 5677 5678 let Uses = [MXCSR] in 5679 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5680 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5681 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5682 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 5683 EVEX_B, Sched<[sched]>; 5684 } 5685} 5686 5687multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5688 SDNode VecNode, SDNode RndNode, 5689 X86SchedWriteSizes sched, bit IsCommutable> { 5690 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5691 sched.PS.Scl, IsCommutable>, 5692 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, 5693 sched.PS.Scl>, 5694 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5695 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5696 sched.PD.Scl, IsCommutable>, 5697 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, 5698 sched.PD.Scl>, 5699 XD, REX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5700 let Predicates = [HasFP16] in 5701 defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode, 5702 VecNode, sched.PH.Scl, IsCommutable>, 5703 avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode, 5704 sched.PH.Scl>, 5705 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>; 5706} 5707 5708multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5709 SDNode VecNode, SDNode SaeNode, 5710 X86SchedWriteSizes sched, bit IsCommutable> { 5711 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5712 VecNode, SaeNode, sched.PS.Scl, IsCommutable, 5713 NAME#"SS">, 5714 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5715 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5716 VecNode, SaeNode, sched.PD.Scl, IsCommutable, 5717 NAME#"SD">, 5718 XD, REX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5719 let Predicates = [HasFP16] in { 5720 defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode, 5721 VecNode, SaeNode, sched.PH.Scl, IsCommutable, 5722 NAME#"SH">, 5723 T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, 5724 NotEVEX2VEXConvertible; 5725 } 5726} 5727defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, 5728 SchedWriteFAddSizes, 1>; 5729defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, 5730 SchedWriteFMulSizes, 1>; 5731defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, 5732 SchedWriteFAddSizes, 0>; 5733defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, 5734 SchedWriteFDivSizes, 0>; 5735defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, 5736 SchedWriteFCmpSizes, 0>; 5737defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, 5738 SchedWriteFCmpSizes, 0>; 5739 5740// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5741// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5742multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5743 X86VectorVTInfo _, SDNode OpNode, 5744 X86FoldableSchedWrite sched, 5745 string EVEX2VEXOvrd> { 5746 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5747 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5748 (ins _.FRC:$src1, _.FRC:$src2), 5749 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5750 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5751 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> { 5752 let isCommutable = 1; 5753 } 5754 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5755 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5756 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5757 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5758 (_.ScalarLdFrag addr:$src2)))]>, 5759 Sched<[sched.Folded, sched.ReadAfterFold]>, 5760 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 5761 } 5762} 5763defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5764 SchedWriteFCmp.Scl, "VMINCSS">, XS, 5765 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5766 5767defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5768 SchedWriteFCmp.Scl, "VMINCSD">, XD, 5769 REX_W, EVEX_4V, VEX_LIG, 5770 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5771 5772defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5773 SchedWriteFCmp.Scl, "VMAXCSS">, XS, 5774 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5775 5776defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5777 SchedWriteFCmp.Scl, "VMAXCSD">, XD, 5778 REX_W, EVEX_4V, VEX_LIG, 5779 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5780 5781defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc, 5782 SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS, 5783 EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, 5784 NotEVEX2VEXConvertible; 5785defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc, 5786 SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS, 5787 EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC, 5788 NotEVEX2VEXConvertible; 5789 5790multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5791 SDPatternOperator MaskOpNode, 5792 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5793 bit IsCommutable, 5794 bit IsKCommutable = IsCommutable, 5795 string suffix = _.Suffix, 5796 string ClobberConstraint = "", 5797 bit MayRaiseFPException = 1> { 5798 let ExeDomain = _.ExeDomain, hasSideEffects = 0, 5799 Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in { 5800 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 5801 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix, 5802 "$src2, $src1", "$src1, $src2", 5803 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 5804 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint, 5805 IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>; 5806 let mayLoad = 1 in { 5807 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5808 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix, 5809 "$src2, $src1", "$src1, $src2", 5810 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5811 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5812 ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5813 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5814 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix, 5815 "${src2}"#_.BroadcastStr#", $src1", 5816 "$src1, ${src2}"#_.BroadcastStr, 5817 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5818 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5819 ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5820 } 5821 } 5822} 5823 5824multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5825 SDPatternOperator OpNodeRnd, 5826 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5827 string suffix = _.Suffix, 5828 string ClobberConstraint = ""> { 5829 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5830 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5831 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix, 5832 "$rc, $src2, $src1", "$src1, $src2, $rc", 5833 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))), 5834 0, 0, 0, vselect_mask, ClobberConstraint>, 5835 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 5836} 5837 5838multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5839 SDPatternOperator OpNodeSAE, 5840 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5841 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5842 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5843 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5844 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5845 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, 5846 EVEX_4V, EVEX_B, Sched<[sched]>; 5847} 5848 5849multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5850 SDPatternOperator MaskOpNode, 5851 Predicate prd, X86SchedWriteSizes sched, 5852 bit IsCommutable = 0, 5853 bit IsPD128Commutable = IsCommutable> { 5854 let Predicates = [prd] in { 5855 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 5856 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS, 5857 EVEX_CD8<32, CD8VF>; 5858 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info, 5859 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, REX_W, 5860 EVEX_CD8<64, CD8VF>; 5861 } 5862 5863 // Define only if AVX512VL feature is present. 5864 let Predicates = [prd, HasVLX] in { 5865 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 5866 sched.PS.XMM, IsCommutable>, EVEX_V128, PS, 5867 EVEX_CD8<32, CD8VF>; 5868 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 5869 sched.PS.YMM, IsCommutable>, EVEX_V256, PS, 5870 EVEX_CD8<32, CD8VF>; 5871 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info, 5872 sched.PD.XMM, IsPD128Commutable, 5873 IsCommutable>, EVEX_V128, PD, REX_W, 5874 EVEX_CD8<64, CD8VF>; 5875 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info, 5876 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, REX_W, 5877 EVEX_CD8<64, CD8VF>; 5878 } 5879} 5880 5881multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5882 SDPatternOperator MaskOpNode, 5883 X86SchedWriteSizes sched, bit IsCommutable = 0> { 5884 let Predicates = [HasFP16] in { 5885 defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info, 5886 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS, 5887 EVEX_CD8<16, CD8VF>; 5888 } 5889 let Predicates = [HasVLX, HasFP16] in { 5890 defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info, 5891 sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS, 5892 EVEX_CD8<16, CD8VF>; 5893 defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info, 5894 sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS, 5895 EVEX_CD8<16, CD8VF>; 5896 } 5897} 5898 5899let Uses = [MXCSR] in 5900multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5901 X86SchedWriteSizes sched> { 5902 let Predicates = [HasFP16] in { 5903 defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5904 v32f16_info>, 5905 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 5906 } 5907 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5908 v16f32_info>, 5909 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5910 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5911 v8f64_info>, 5912 EVEX_V512, PD, REX_W,EVEX_CD8<64, CD8VF>; 5913} 5914 5915let Uses = [MXCSR] in 5916multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5917 X86SchedWriteSizes sched> { 5918 let Predicates = [HasFP16] in { 5919 defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5920 v32f16_info>, 5921 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 5922 } 5923 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5924 v16f32_info>, 5925 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 5926 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5927 v8f64_info>, 5928 EVEX_V512, PD, REX_W,EVEX_CD8<64, CD8VF>; 5929} 5930 5931defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512, 5932 SchedWriteFAddSizes, 1>, 5933 avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>, 5934 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5935defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512, 5936 SchedWriteFMulSizes, 1>, 5937 avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>, 5938 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5939defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512, 5940 SchedWriteFAddSizes>, 5941 avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>, 5942 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5943defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512, 5944 SchedWriteFDivSizes>, 5945 avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>, 5946 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5947defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512, 5948 SchedWriteFCmpSizes, 0>, 5949 avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>, 5950 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 5951defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512, 5952 SchedWriteFCmpSizes, 0>, 5953 avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>, 5954 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 5955let isCodeGenOnly = 1 in { 5956 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512, 5957 SchedWriteFCmpSizes, 1>, 5958 avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc, 5959 SchedWriteFCmpSizes, 1>; 5960 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512, 5961 SchedWriteFCmpSizes, 1>, 5962 avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc, 5963 SchedWriteFCmpSizes, 1>; 5964} 5965let Uses = []<Register>, mayRaiseFPException = 0 in { 5966defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI, 5967 SchedWriteFLogicSizes, 1>; 5968defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI, 5969 SchedWriteFLogicSizes, 0>; 5970defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI, 5971 SchedWriteFLogicSizes, 1>; 5972defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI, 5973 SchedWriteFLogicSizes, 1>; 5974} 5975 5976multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5977 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5978 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5979 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5980 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5981 "$src2, $src1", "$src1, $src2", 5982 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5983 EVEX_4V, Sched<[sched]>; 5984 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5985 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, 5986 "$src2, $src1", "$src1, $src2", 5987 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5988 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 5989 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5990 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, 5991 "${src2}"#_.BroadcastStr#", $src1", 5992 "$src1, ${src2}"#_.BroadcastStr, 5993 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5994 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5995 } 5996} 5997 5998multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5999 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6000 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 6001 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6002 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 6003 "$src2, $src1", "$src1, $src2", 6004 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 6005 Sched<[sched]>; 6006 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 6007 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix, 6008 "$src2, $src1", "$src1, $src2", 6009 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>, 6010 Sched<[sched.Folded, sched.ReadAfterFold]>; 6011 } 6012} 6013 6014multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 6015 X86SchedWriteWidths sched> { 6016 let Predicates = [HasFP16] in { 6017 defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>, 6018 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>, 6019 EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>; 6020 defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>, 6021 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>, 6022 EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>; 6023 } 6024 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, 6025 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, 6026 EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD; 6027 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, 6028 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, 6029 EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8PD; 6030 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, 6031 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info, 6032 X86scalefsRnd, sched.Scl>, 6033 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD; 6034 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, 6035 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info, 6036 X86scalefsRnd, sched.Scl>, 6037 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8PD; 6038 6039 // Define only if AVX512VL feature is present. 6040 let Predicates = [HasVLX] in { 6041 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, 6042 EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD; 6043 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, 6044 EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD; 6045 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, 6046 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8PD; 6047 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, 6048 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8PD; 6049 } 6050 6051 let Predicates = [HasFP16, HasVLX] in { 6052 defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>, 6053 EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD; 6054 defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>, 6055 EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD; 6056 } 6057} 6058defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", 6059 SchedWriteFAdd>, NotEVEX2VEXConvertible; 6060 6061//===----------------------------------------------------------------------===// 6062// AVX-512 VPTESTM instructions 6063//===----------------------------------------------------------------------===// 6064 6065multiclass avx512_vptest<bits<8> opc, string OpcodeStr, 6066 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6067 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. 6068 // There are just too many permutations due to commutability and bitcasts. 6069 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 6070 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 6071 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 6072 "$src2, $src1", "$src1, $src2", 6073 (null_frag), (null_frag), 1>, 6074 EVEX_4V, Sched<[sched]>; 6075 let mayLoad = 1 in 6076 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 6077 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 6078 "$src2, $src1", "$src1, $src2", 6079 (null_frag), (null_frag)>, 6080 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6081 Sched<[sched.Folded, sched.ReadAfterFold]>; 6082 } 6083} 6084 6085multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, 6086 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6087 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 6088 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 6089 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6090 "${src2}"#_.BroadcastStr#", $src1", 6091 "$src1, ${src2}"#_.BroadcastStr, 6092 (null_frag), (null_frag)>, 6093 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6094 Sched<[sched.Folded, sched.ReadAfterFold]>; 6095} 6096 6097multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, 6098 X86SchedWriteWidths sched, 6099 AVX512VLVectorVTInfo _> { 6100 let Predicates = [HasAVX512] in 6101 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>, 6102 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512; 6103 6104 let Predicates = [HasAVX512, HasVLX] in { 6105 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>, 6106 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256; 6107 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>, 6108 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128; 6109 } 6110} 6111 6112multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, 6113 X86SchedWriteWidths sched> { 6114 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched, 6115 avx512vl_i32_info>; 6116 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched, 6117 avx512vl_i64_info>, REX_W; 6118} 6119 6120multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 6121 X86SchedWriteWidths sched> { 6122 let Predicates = [HasBWI] in { 6123 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM, 6124 v32i16_info>, EVEX_V512, REX_W; 6125 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM, 6126 v64i8_info>, EVEX_V512; 6127 } 6128 6129 let Predicates = [HasVLX, HasBWI] in { 6130 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM, 6131 v16i16x_info>, EVEX_V256, REX_W; 6132 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM, 6133 v8i16x_info>, EVEX_V128, REX_W; 6134 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM, 6135 v32i8x_info>, EVEX_V256; 6136 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM, 6137 v16i8x_info>, EVEX_V128; 6138 } 6139} 6140 6141multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 6142 X86SchedWriteWidths sched> : 6143 avx512_vptest_wb<opc_wb, OpcodeStr, sched>, 6144 avx512_vptest_dq<opc_dq, OpcodeStr, sched>; 6145 6146defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", 6147 SchedWriteVecLogic>, T8PD; 6148defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", 6149 SchedWriteVecLogic>, T8XS; 6150 6151//===----------------------------------------------------------------------===// 6152// AVX-512 Shift instructions 6153//===----------------------------------------------------------------------===// 6154 6155multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 6156 string OpcodeStr, SDNode OpNode, 6157 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6158 let ExeDomain = _.ExeDomain in { 6159 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 6160 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 6161 "$src2, $src1", "$src1, $src2", 6162 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, 6163 Sched<[sched]>; 6164 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 6165 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 6166 "$src2, $src1", "$src1, $src2", 6167 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), 6168 (i8 timm:$src2)))>, 6169 Sched<[sched.Folded]>; 6170 } 6171} 6172 6173multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 6174 string OpcodeStr, SDNode OpNode, 6175 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6176 let ExeDomain = _.ExeDomain in 6177 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 6178 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 6179 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2", 6180 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, 6181 EVEX_B, Sched<[sched.Folded]>; 6182} 6183 6184multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6185 X86FoldableSchedWrite sched, ValueType SrcVT, 6186 X86VectorVTInfo _> { 6187 // src2 is always 128-bit 6188 let ExeDomain = _.ExeDomain in { 6189 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 6190 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 6191 "$src2, $src1", "$src1, $src2", 6192 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 6193 AVX512BIBase, EVEX_4V, Sched<[sched]>; 6194 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6195 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 6196 "$src2, $src1", "$src1, $src2", 6197 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, 6198 AVX512BIBase, 6199 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 6200 } 6201} 6202 6203multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6204 X86SchedWriteWidths sched, ValueType SrcVT, 6205 AVX512VLVectorVTInfo VTInfo, 6206 Predicate prd> { 6207 let Predicates = [prd] in 6208 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 6209 VTInfo.info512>, EVEX_V512, 6210 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 6211 let Predicates = [prd, HasVLX] in { 6212 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 6213 VTInfo.info256>, EVEX_V256, 6214 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 6215 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 6216 VTInfo.info128>, EVEX_V128, 6217 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 6218 } 6219} 6220 6221multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 6222 string OpcodeStr, SDNode OpNode, 6223 X86SchedWriteWidths sched, 6224 bit NotEVEX2VEXConvertibleQ = 0> { 6225 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 6226 avx512vl_i32_info, HasAVX512>; 6227 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 6228 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 6229 avx512vl_i64_info, HasAVX512>, REX_W; 6230 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 6231 avx512vl_i16_info, HasBWI>; 6232} 6233 6234multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6235 string OpcodeStr, SDNode OpNode, 6236 X86SchedWriteWidths sched, 6237 AVX512VLVectorVTInfo VTInfo> { 6238 let Predicates = [HasAVX512] in 6239 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6240 sched.ZMM, VTInfo.info512>, 6241 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 6242 VTInfo.info512>, EVEX_V512; 6243 let Predicates = [HasAVX512, HasVLX] in { 6244 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6245 sched.YMM, VTInfo.info256>, 6246 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 6247 VTInfo.info256>, EVEX_V256; 6248 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6249 sched.XMM, VTInfo.info128>, 6250 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 6251 VTInfo.info128>, EVEX_V128; 6252 } 6253} 6254 6255multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 6256 string OpcodeStr, SDNode OpNode, 6257 X86SchedWriteWidths sched> { 6258 let Predicates = [HasBWI] in 6259 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6260 sched.ZMM, v32i16_info>, EVEX_V512, WIG; 6261 let Predicates = [HasVLX, HasBWI] in { 6262 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6263 sched.YMM, v16i16x_info>, EVEX_V256, WIG; 6264 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6265 sched.XMM, v8i16x_info>, EVEX_V128, WIG; 6266 } 6267} 6268 6269multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 6270 Format ImmFormR, Format ImmFormM, 6271 string OpcodeStr, SDNode OpNode, 6272 X86SchedWriteWidths sched, 6273 bit NotEVEX2VEXConvertibleQ = 0> { 6274 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 6275 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 6276 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in 6277 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 6278 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W; 6279} 6280 6281defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 6282 SchedWriteVecShiftImm>, 6283 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 6284 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6285 6286defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 6287 SchedWriteVecShiftImm>, 6288 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 6289 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6290 6291defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 6292 SchedWriteVecShiftImm, 1>, 6293 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 6294 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6295 6296defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 6297 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6298defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 6299 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V; 6300 6301defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 6302 SchedWriteVecShift>; 6303defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 6304 SchedWriteVecShift, 1>; 6305defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 6306 SchedWriteVecShift>; 6307 6308// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 6309let Predicates = [HasAVX512, NoVLX] in { 6310 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 6311 (EXTRACT_SUBREG (v8i64 6312 (VPSRAQZrr 6313 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6314 VR128X:$src2)), sub_ymm)>; 6315 6316 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6317 (EXTRACT_SUBREG (v8i64 6318 (VPSRAQZrr 6319 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6320 VR128X:$src2)), sub_xmm)>; 6321 6322 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), 6323 (EXTRACT_SUBREG (v8i64 6324 (VPSRAQZri 6325 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6326 timm:$src2)), sub_ymm)>; 6327 6328 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), 6329 (EXTRACT_SUBREG (v8i64 6330 (VPSRAQZri 6331 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6332 timm:$src2)), sub_xmm)>; 6333} 6334 6335//===-------------------------------------------------------------------===// 6336// Variable Bit Shifts 6337//===-------------------------------------------------------------------===// 6338 6339multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 6340 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6341 let ExeDomain = _.ExeDomain in { 6342 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 6343 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 6344 "$src2, $src1", "$src1, $src2", 6345 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 6346 AVX5128IBase, EVEX_4V, Sched<[sched]>; 6347 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6348 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 6349 "$src2, $src1", "$src1, $src2", 6350 (_.VT (OpNode _.RC:$src1, 6351 (_.VT (_.LdFrag addr:$src2))))>, 6352 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6353 Sched<[sched.Folded, sched.ReadAfterFold]>; 6354 } 6355} 6356 6357multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 6358 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6359 let ExeDomain = _.ExeDomain in 6360 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6361 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6362 "${src2}"#_.BroadcastStr#", $src1", 6363 "$src1, ${src2}"#_.BroadcastStr, 6364 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, 6365 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6366 Sched<[sched.Folded, sched.ReadAfterFold]>; 6367} 6368 6369multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6370 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 6371 let Predicates = [HasAVX512] in 6372 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 6373 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 6374 6375 let Predicates = [HasAVX512, HasVLX] in { 6376 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 6377 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 6378 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 6379 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 6380 } 6381} 6382 6383multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 6384 SDNode OpNode, X86SchedWriteWidths sched> { 6385 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 6386 avx512vl_i32_info>; 6387 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 6388 avx512vl_i64_info>, REX_W; 6389} 6390 6391// Use 512bit version to implement 128/256 bit in case NoVLX. 6392multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 6393 SDNode OpNode, list<Predicate> p> { 6394 let Predicates = p in { 6395 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 6396 (_.info256.VT _.info256.RC:$src2))), 6397 (EXTRACT_SUBREG 6398 (!cast<Instruction>(OpcodeStr#"Zrr") 6399 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 6400 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 6401 sub_ymm)>; 6402 6403 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 6404 (_.info128.VT _.info128.RC:$src2))), 6405 (EXTRACT_SUBREG 6406 (!cast<Instruction>(OpcodeStr#"Zrr") 6407 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6408 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6409 sub_xmm)>; 6410 } 6411} 6412multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6413 SDNode OpNode, X86SchedWriteWidths sched> { 6414 let Predicates = [HasBWI] in 6415 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6416 EVEX_V512, REX_W; 6417 let Predicates = [HasVLX, HasBWI] in { 6418 6419 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6420 EVEX_V256, REX_W; 6421 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6422 EVEX_V128, REX_W; 6423 } 6424} 6425 6426defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, 6427 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; 6428 6429defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, 6430 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; 6431 6432defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, 6433 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; 6434 6435defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6436defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6437 6438defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>; 6439defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>; 6440defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>; 6441defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>; 6442 6443 6444// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6445let Predicates = [HasAVX512, NoVLX] in { 6446 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6447 (EXTRACT_SUBREG (v8i64 6448 (VPROLVQZrr 6449 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6450 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6451 sub_xmm)>; 6452 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6453 (EXTRACT_SUBREG (v8i64 6454 (VPROLVQZrr 6455 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6456 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6457 sub_ymm)>; 6458 6459 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6460 (EXTRACT_SUBREG (v16i32 6461 (VPROLVDZrr 6462 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6463 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6464 sub_xmm)>; 6465 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6466 (EXTRACT_SUBREG (v16i32 6467 (VPROLVDZrr 6468 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6469 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6470 sub_ymm)>; 6471 6472 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), 6473 (EXTRACT_SUBREG (v8i64 6474 (VPROLQZri 6475 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6476 timm:$src2)), sub_xmm)>; 6477 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), 6478 (EXTRACT_SUBREG (v8i64 6479 (VPROLQZri 6480 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6481 timm:$src2)), sub_ymm)>; 6482 6483 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), 6484 (EXTRACT_SUBREG (v16i32 6485 (VPROLDZri 6486 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6487 timm:$src2)), sub_xmm)>; 6488 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), 6489 (EXTRACT_SUBREG (v16i32 6490 (VPROLDZri 6491 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6492 timm:$src2)), sub_ymm)>; 6493} 6494 6495// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6496let Predicates = [HasAVX512, NoVLX] in { 6497 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6498 (EXTRACT_SUBREG (v8i64 6499 (VPRORVQZrr 6500 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6501 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6502 sub_xmm)>; 6503 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6504 (EXTRACT_SUBREG (v8i64 6505 (VPRORVQZrr 6506 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6507 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6508 sub_ymm)>; 6509 6510 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6511 (EXTRACT_SUBREG (v16i32 6512 (VPRORVDZrr 6513 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6514 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6515 sub_xmm)>; 6516 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6517 (EXTRACT_SUBREG (v16i32 6518 (VPRORVDZrr 6519 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6520 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6521 sub_ymm)>; 6522 6523 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), 6524 (EXTRACT_SUBREG (v8i64 6525 (VPRORQZri 6526 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6527 timm:$src2)), sub_xmm)>; 6528 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), 6529 (EXTRACT_SUBREG (v8i64 6530 (VPRORQZri 6531 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6532 timm:$src2)), sub_ymm)>; 6533 6534 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), 6535 (EXTRACT_SUBREG (v16i32 6536 (VPRORDZri 6537 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6538 timm:$src2)), sub_xmm)>; 6539 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), 6540 (EXTRACT_SUBREG (v16i32 6541 (VPRORDZri 6542 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6543 timm:$src2)), sub_ymm)>; 6544} 6545 6546//===-------------------------------------------------------------------===// 6547// 1-src variable permutation VPERMW/D/Q 6548//===-------------------------------------------------------------------===// 6549 6550multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6551 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6552 let Predicates = [HasAVX512] in 6553 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6554 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6555 6556 let Predicates = [HasAVX512, HasVLX] in 6557 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6558 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6559} 6560 6561multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6562 string OpcodeStr, SDNode OpNode, 6563 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6564 let Predicates = [HasAVX512] in 6565 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6566 sched, VTInfo.info512>, 6567 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6568 sched, VTInfo.info512>, EVEX_V512; 6569 let Predicates = [HasAVX512, HasVLX] in 6570 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6571 sched, VTInfo.info256>, 6572 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6573 sched, VTInfo.info256>, EVEX_V256; 6574} 6575 6576multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6577 Predicate prd, SDNode OpNode, 6578 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6579 let Predicates = [prd] in 6580 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6581 EVEX_V512 ; 6582 let Predicates = [HasVLX, prd] in { 6583 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6584 EVEX_V256 ; 6585 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6586 EVEX_V128 ; 6587 } 6588} 6589 6590defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6591 WriteVarShuffle256, avx512vl_i16_info>, REX_W; 6592defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6593 WriteVarShuffle256, avx512vl_i8_info>; 6594 6595defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6596 WriteVarShuffle256, avx512vl_i32_info>; 6597defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6598 WriteVarShuffle256, avx512vl_i64_info>, REX_W; 6599defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6600 WriteFVarShuffle256, avx512vl_f32_info>; 6601defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6602 WriteFVarShuffle256, avx512vl_f64_info>, REX_W; 6603 6604defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6605 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6606 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W; 6607defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6608 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6609 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W; 6610 6611//===----------------------------------------------------------------------===// 6612// AVX-512 - VPERMIL 6613//===----------------------------------------------------------------------===// 6614 6615multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6616 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6617 X86VectorVTInfo Ctrl> { 6618 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6619 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6620 "$src2, $src1", "$src1, $src2", 6621 (_.VT (OpNode _.RC:$src1, 6622 (Ctrl.VT Ctrl.RC:$src2)))>, 6623 T8PD, EVEX_4V, Sched<[sched]>; 6624 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6625 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6626 "$src2, $src1", "$src1, $src2", 6627 (_.VT (OpNode 6628 _.RC:$src1, 6629 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, 6630 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 6631 Sched<[sched.Folded, sched.ReadAfterFold]>; 6632 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6633 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6634 "${src2}"#_.BroadcastStr#", $src1", 6635 "$src1, ${src2}"#_.BroadcastStr, 6636 (_.VT (OpNode 6637 _.RC:$src1, 6638 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, 6639 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6640 Sched<[sched.Folded, sched.ReadAfterFold]>; 6641} 6642 6643multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6644 X86SchedWriteWidths sched, 6645 AVX512VLVectorVTInfo _, 6646 AVX512VLVectorVTInfo Ctrl> { 6647 let Predicates = [HasAVX512] in { 6648 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6649 _.info512, Ctrl.info512>, EVEX_V512; 6650 } 6651 let Predicates = [HasAVX512, HasVLX] in { 6652 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6653 _.info128, Ctrl.info128>, EVEX_V128; 6654 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6655 _.info256, Ctrl.info256>, EVEX_V256; 6656 } 6657} 6658 6659multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6660 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6661 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6662 _, Ctrl>; 6663 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6664 X86VPermilpi, SchedWriteFShuffle, _>, 6665 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6666} 6667 6668let ExeDomain = SSEPackedSingle in 6669defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6670 avx512vl_i32_info>; 6671let ExeDomain = SSEPackedDouble in 6672defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6673 avx512vl_i64_info>, VEX_W1X; 6674 6675//===----------------------------------------------------------------------===// 6676// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6677//===----------------------------------------------------------------------===// 6678 6679defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6680 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6681 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6682defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6683 X86PShufhw, SchedWriteShuffle>, 6684 EVEX, AVX512XSIi8Base; 6685defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6686 X86PShuflw, SchedWriteShuffle>, 6687 EVEX, AVX512XDIi8Base; 6688 6689//===----------------------------------------------------------------------===// 6690// AVX-512 - VPSHUFB 6691//===----------------------------------------------------------------------===// 6692 6693multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6694 X86SchedWriteWidths sched> { 6695 let Predicates = [HasBWI] in 6696 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6697 EVEX_V512; 6698 6699 let Predicates = [HasVLX, HasBWI] in { 6700 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6701 EVEX_V256; 6702 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6703 EVEX_V128; 6704 } 6705} 6706 6707defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6708 SchedWriteVarShuffle>, WIG; 6709 6710//===----------------------------------------------------------------------===// 6711// Move Low to High and High to Low packed FP Instructions 6712//===----------------------------------------------------------------------===// 6713 6714def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6715 (ins VR128X:$src1, VR128X:$src2), 6716 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6717 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6718 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; 6719let isCommutable = 1 in 6720def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6721 (ins VR128X:$src1, VR128X:$src2), 6722 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6723 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6724 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; 6725 6726//===----------------------------------------------------------------------===// 6727// VMOVHPS/PD VMOVLPS Instructions 6728// All patterns was taken from SSS implementation. 6729//===----------------------------------------------------------------------===// 6730 6731multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6732 SDPatternOperator OpNode, 6733 X86VectorVTInfo _> { 6734 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6735 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6736 (ins _.RC:$src1, f64mem:$src2), 6737 !strconcat(OpcodeStr, 6738 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6739 [(set _.RC:$dst, 6740 (OpNode _.RC:$src1, 6741 (_.VT (bitconvert 6742 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6743 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V; 6744} 6745 6746// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6747// SSE1. And MOVLPS pattern is even more complex. 6748defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6749 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6750defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6751 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, REX_W; 6752defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6753 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS; 6754defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6755 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, REX_W; 6756 6757let Predicates = [HasAVX512] in { 6758 // VMOVHPD patterns 6759 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), 6760 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6761 6762 // VMOVLPD patterns 6763 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), 6764 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; 6765} 6766 6767let SchedRW = [WriteFStore] in { 6768let mayStore = 1, hasSideEffects = 0 in 6769def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6770 (ins f64mem:$dst, VR128X:$src), 6771 "vmovhps\t{$src, $dst|$dst, $src}", 6772 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6773def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6774 (ins f64mem:$dst, VR128X:$src), 6775 "vmovhpd\t{$src, $dst|$dst, $src}", 6776 [(store (f64 (extractelt 6777 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6778 (iPTR 0))), addr:$dst)]>, 6779 EVEX, EVEX_CD8<64, CD8VT1>, REX_W; 6780let mayStore = 1, hasSideEffects = 0 in 6781def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6782 (ins f64mem:$dst, VR128X:$src), 6783 "vmovlps\t{$src, $dst|$dst, $src}", 6784 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6785def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6786 (ins f64mem:$dst, VR128X:$src), 6787 "vmovlpd\t{$src, $dst|$dst, $src}", 6788 [(store (f64 (extractelt (v2f64 VR128X:$src), 6789 (iPTR 0))), addr:$dst)]>, 6790 EVEX, EVEX_CD8<64, CD8VT1>, REX_W; 6791} // SchedRW 6792 6793let Predicates = [HasAVX512] in { 6794 // VMOVHPD patterns 6795 def : Pat<(store (f64 (extractelt 6796 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6797 (iPTR 0))), addr:$dst), 6798 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6799} 6800//===----------------------------------------------------------------------===// 6801// FMA - Fused Multiply Operations 6802// 6803 6804multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6805 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6806 X86VectorVTInfo _> { 6807 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6808 Uses = [MXCSR], mayRaiseFPException = 1 in { 6809 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6810 (ins _.RC:$src2, _.RC:$src3), 6811 OpcodeStr, "$src3, $src2", "$src2, $src3", 6812 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 6813 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6814 EVEX_4V, Sched<[sched]>; 6815 6816 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6817 (ins _.RC:$src2, _.MemOp:$src3), 6818 OpcodeStr, "$src3, $src2", "$src2, $src3", 6819 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 6820 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6821 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold, 6822 sched.ReadAfterFold]>; 6823 6824 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6825 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6826 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6827 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6828 (OpNode _.RC:$src2, 6829 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 6830 (MaskOpNode _.RC:$src2, 6831 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, 6832 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, 6833 sched.ReadAfterFold]>; 6834 } 6835} 6836 6837multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6838 X86FoldableSchedWrite sched, 6839 X86VectorVTInfo _> { 6840 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6841 Uses = [MXCSR] in 6842 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6843 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6844 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6845 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 6846 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, 6847 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 6848} 6849 6850multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6851 SDNode MaskOpNode, SDNode OpNodeRnd, 6852 X86SchedWriteWidths sched, 6853 AVX512VLVectorVTInfo _, 6854 Predicate prd = HasAVX512> { 6855 let Predicates = [prd] in { 6856 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6857 sched.ZMM, _.info512>, 6858 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6859 _.info512>, 6860 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6861 } 6862 let Predicates = [HasVLX, prd] in { 6863 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6864 sched.YMM, _.info256>, 6865 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6866 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6867 sched.XMM, _.info128>, 6868 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6869 } 6870} 6871 6872multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6873 SDNode MaskOpNode, SDNode OpNodeRnd> { 6874 defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6875 OpNodeRnd, SchedWriteFMA, 6876 avx512vl_f16_info, HasFP16>, T_MAP6PD; 6877 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6878 OpNodeRnd, SchedWriteFMA, 6879 avx512vl_f32_info>, T8PD; 6880 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6881 OpNodeRnd, SchedWriteFMA, 6882 avx512vl_f64_info>, T8PD, REX_W; 6883} 6884 6885defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma, 6886 fma, X86FmaddRnd>; 6887defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub, 6888 X86Fmsub, X86FmsubRnd>; 6889defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, 6890 X86Fmaddsub, X86FmaddsubRnd>; 6891defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, 6892 X86Fmsubadd, X86FmsubaddRnd>; 6893defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd, 6894 X86Fnmadd, X86FnmaddRnd>; 6895defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub, 6896 X86Fnmsub, X86FnmsubRnd>; 6897 6898 6899multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6900 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6901 X86VectorVTInfo _> { 6902 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6903 Uses = [MXCSR], mayRaiseFPException = 1 in { 6904 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6905 (ins _.RC:$src2, _.RC:$src3), 6906 OpcodeStr, "$src3, $src2", "$src2, $src3", 6907 (null_frag), 6908 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6909 EVEX_4V, Sched<[sched]>; 6910 6911 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6912 (ins _.RC:$src2, _.MemOp:$src3), 6913 OpcodeStr, "$src3, $src2", "$src2, $src3", 6914 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 6915 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6916 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold, 6917 sched.ReadAfterFold]>; 6918 6919 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6920 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6921 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6922 "$src2, ${src3}"#_.BroadcastStr, 6923 (_.VT (OpNode _.RC:$src2, 6924 (_.VT (_.BroadcastLdFrag addr:$src3)), 6925 _.RC:$src1)), 6926 (_.VT (MaskOpNode _.RC:$src2, 6927 (_.VT (_.BroadcastLdFrag addr:$src3)), 6928 _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B, 6929 Sched<[sched.Folded, sched.ReadAfterFold, 6930 sched.ReadAfterFold]>; 6931 } 6932} 6933 6934multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6935 X86FoldableSchedWrite sched, 6936 X86VectorVTInfo _> { 6937 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6938 Uses = [MXCSR] in 6939 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6940 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6941 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6942 (null_frag), 6943 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), 6944 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 6945} 6946 6947multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6948 SDNode MaskOpNode, SDNode OpNodeRnd, 6949 X86SchedWriteWidths sched, 6950 AVX512VLVectorVTInfo _, 6951 Predicate prd = HasAVX512> { 6952 let Predicates = [prd] in { 6953 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6954 sched.ZMM, _.info512>, 6955 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6956 _.info512>, 6957 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6958 } 6959 let Predicates = [HasVLX, prd] in { 6960 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6961 sched.YMM, _.info256>, 6962 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6963 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6964 sched.XMM, _.info128>, 6965 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6966 } 6967} 6968 6969multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6970 SDNode MaskOpNode, SDNode OpNodeRnd > { 6971 defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6972 OpNodeRnd, SchedWriteFMA, 6973 avx512vl_f16_info, HasFP16>, T_MAP6PD; 6974 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6975 OpNodeRnd, SchedWriteFMA, 6976 avx512vl_f32_info>, T8PD; 6977 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6978 OpNodeRnd, SchedWriteFMA, 6979 avx512vl_f64_info>, T8PD, REX_W; 6980} 6981 6982defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma, 6983 fma, X86FmaddRnd>; 6984defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub, 6985 X86Fmsub, X86FmsubRnd>; 6986defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, 6987 X86Fmaddsub, X86FmaddsubRnd>; 6988defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, 6989 X86Fmsubadd, X86FmsubaddRnd>; 6990defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd, 6991 X86Fnmadd, X86FnmaddRnd>; 6992defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub, 6993 X86Fnmsub, X86FnmsubRnd>; 6994 6995multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6996 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6997 X86VectorVTInfo _> { 6998 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6999 Uses = [MXCSR], mayRaiseFPException = 1 in { 7000 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 7001 (ins _.RC:$src2, _.RC:$src3), 7002 OpcodeStr, "$src3, $src2", "$src2, $src3", 7003 (null_frag), 7004 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>, 7005 EVEX_4V, Sched<[sched]>; 7006 7007 // Pattern is 312 order so that the load is in a different place from the 7008 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 7009 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 7010 (ins _.RC:$src2, _.MemOp:$src3), 7011 OpcodeStr, "$src3, $src2", "$src2, $src3", 7012 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 7013 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 7014 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold, 7015 sched.ReadAfterFold]>; 7016 7017 // Pattern is 312 order so that the load is in a different place from the 7018 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 7019 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 7020 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7021 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 7022 "$src2, ${src3}"#_.BroadcastStr, 7023 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 7024 _.RC:$src1, _.RC:$src2)), 7025 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 7026 _.RC:$src1, _.RC:$src2)), 1, 0>, 7027 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, 7028 sched.ReadAfterFold]>; 7029 } 7030} 7031 7032multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 7033 X86FoldableSchedWrite sched, 7034 X86VectorVTInfo _> { 7035 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 7036 Uses = [MXCSR] in 7037 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 7038 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 7039 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 7040 (null_frag), 7041 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), 7042 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; 7043} 7044 7045multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 7046 SDNode MaskOpNode, SDNode OpNodeRnd, 7047 X86SchedWriteWidths sched, 7048 AVX512VLVectorVTInfo _, 7049 Predicate prd = HasAVX512> { 7050 let Predicates = [prd] in { 7051 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 7052 sched.ZMM, _.info512>, 7053 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 7054 _.info512>, 7055 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7056 } 7057 let Predicates = [HasVLX, prd] in { 7058 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 7059 sched.YMM, _.info256>, 7060 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7061 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 7062 sched.XMM, _.info128>, 7063 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7064 } 7065} 7066 7067multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 7068 SDNode MaskOpNode, SDNode OpNodeRnd > { 7069 defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 7070 OpNodeRnd, SchedWriteFMA, 7071 avx512vl_f16_info, HasFP16>, T_MAP6PD; 7072 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 7073 OpNodeRnd, SchedWriteFMA, 7074 avx512vl_f32_info>, T8PD; 7075 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 7076 OpNodeRnd, SchedWriteFMA, 7077 avx512vl_f64_info>, T8PD, REX_W; 7078} 7079 7080defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma, 7081 fma, X86FmaddRnd>; 7082defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub, 7083 X86Fmsub, X86FmsubRnd>; 7084defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, 7085 X86Fmaddsub, X86FmaddsubRnd>; 7086defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, 7087 X86Fmsubadd, X86FmsubaddRnd>; 7088defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd, 7089 X86Fnmadd, X86FnmaddRnd>; 7090defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub, 7091 X86Fnmsub, X86FnmsubRnd>; 7092 7093// Scalar FMA 7094multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7095 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 7096let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 7097 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7098 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 7099 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 7100 EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 7101 7102 let mayLoad = 1 in 7103 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7104 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 7105 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 7106 EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold, 7107 SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 7108 7109 let Uses = [MXCSR] in 7110 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7111 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 7112 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 7113 EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 7114 7115 let isCodeGenOnly = 1, isCommutable = 1 in { 7116 def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 7117 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 7118 !strconcat(OpcodeStr, 7119 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7120 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC; 7121 def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst), 7122 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 7123 !strconcat(OpcodeStr, 7124 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7125 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold, 7126 SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC; 7127 7128 let Uses = [MXCSR] in 7129 def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 7130 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 7131 !strconcat(OpcodeStr, 7132 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), 7133 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 7134 Sched<[SchedWriteFMA.Scl]>, EVEX_4V; 7135 }// isCodeGenOnly = 1 7136}// Constraints = "$src1 = $dst" 7137} 7138 7139multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 7140 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd, 7141 X86VectorVTInfo _, string SUFF> { 7142 let ExeDomain = _.ExeDomain in { 7143 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 7144 // Operands for intrinsic are in 123 order to preserve passthu 7145 // semantics. 7146 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 7147 _.FRC:$src3))), 7148 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 7149 (_.ScalarLdFrag addr:$src3)))), 7150 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 7151 _.FRC:$src3, (i32 timm:$rc)))), 0>; 7152 7153 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 7154 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 7155 _.FRC:$src1))), 7156 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 7157 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 7158 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 7159 _.FRC:$src1, (i32 timm:$rc)))), 1>; 7160 7161 // One pattern is 312 order so that the load is in a different place from the 7162 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 7163 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 7164 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 7165 _.FRC:$src2))), 7166 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 7167 _.FRC:$src1, _.FRC:$src2))), 7168 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 7169 _.FRC:$src2, (i32 timm:$rc)))), 1>; 7170 } 7171} 7172 7173multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 7174 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> { 7175 let Predicates = [HasAVX512] in { 7176 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 7177 OpNodeRnd, f32x_info, "SS">, 7178 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD; 7179 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 7180 OpNodeRnd, f64x_info, "SD">, 7181 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD; 7182 } 7183 let Predicates = [HasFP16] in { 7184 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 7185 OpNodeRnd, f16x_info, "SH">, 7186 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD; 7187 } 7188} 7189 7190defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>; 7191defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>; 7192defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>; 7193defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>; 7194 7195multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp, 7196 SDNode RndOp, string Prefix, 7197 string Suffix, SDNode Move, 7198 X86VectorVTInfo _, PatLeaf ZeroFP, 7199 Predicate prd = HasAVX512> { 7200 let Predicates = [prd] in { 7201 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7202 (Op _.FRC:$src2, 7203 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7204 _.FRC:$src3))))), 7205 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 7206 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7207 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7208 7209 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7210 (Op _.FRC:$src2, _.FRC:$src3, 7211 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7212 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 7213 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7214 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7215 7216 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7217 (Op _.FRC:$src2, 7218 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7219 (_.ScalarLdFrag addr:$src3)))))), 7220 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 7221 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7222 addr:$src3)>; 7223 7224 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7225 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7226 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 7227 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 7228 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7229 addr:$src3)>; 7230 7231 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7232 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7233 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7234 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 7235 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7236 addr:$src3)>; 7237 7238 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7239 (X86selects_mask VK1WM:$mask, 7240 (MaskedOp _.FRC:$src2, 7241 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7242 _.FRC:$src3), 7243 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7244 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 7245 VR128X:$src1, VK1WM:$mask, 7246 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7247 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7248 7249 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7250 (X86selects_mask VK1WM:$mask, 7251 (MaskedOp _.FRC:$src2, 7252 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7253 (_.ScalarLdFrag addr:$src3)), 7254 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7255 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 7256 VR128X:$src1, VK1WM:$mask, 7257 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7258 7259 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7260 (X86selects_mask VK1WM:$mask, 7261 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7262 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 7263 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7264 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 7265 VR128X:$src1, VK1WM:$mask, 7266 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7267 7268 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7269 (X86selects_mask VK1WM:$mask, 7270 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7271 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7272 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7273 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 7274 VR128X:$src1, VK1WM:$mask, 7275 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7276 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7277 7278 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7279 (X86selects_mask VK1WM:$mask, 7280 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7281 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7282 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7283 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 7284 VR128X:$src1, VK1WM:$mask, 7285 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7286 7287 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7288 (X86selects_mask VK1WM:$mask, 7289 (MaskedOp _.FRC:$src2, 7290 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7291 _.FRC:$src3), 7292 (_.EltVT ZeroFP)))))), 7293 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 7294 VR128X:$src1, VK1WM:$mask, 7295 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7296 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7297 7298 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7299 (X86selects_mask VK1WM:$mask, 7300 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7301 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7302 (_.EltVT ZeroFP)))))), 7303 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 7304 VR128X:$src1, VK1WM:$mask, 7305 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7306 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7307 7308 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7309 (X86selects_mask VK1WM:$mask, 7310 (MaskedOp _.FRC:$src2, 7311 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7312 (_.ScalarLdFrag addr:$src3)), 7313 (_.EltVT ZeroFP)))))), 7314 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 7315 VR128X:$src1, VK1WM:$mask, 7316 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7317 7318 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7319 (X86selects_mask VK1WM:$mask, 7320 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7321 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 7322 (_.EltVT ZeroFP)))))), 7323 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 7324 VR128X:$src1, VK1WM:$mask, 7325 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7326 7327 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7328 (X86selects_mask VK1WM:$mask, 7329 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7330 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7331 (_.EltVT ZeroFP)))))), 7332 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 7333 VR128X:$src1, VK1WM:$mask, 7334 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7335 7336 // Patterns with rounding mode. 7337 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7338 (RndOp _.FRC:$src2, 7339 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7340 _.FRC:$src3, (i32 timm:$rc)))))), 7341 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 7342 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7343 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7344 7345 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7346 (RndOp _.FRC:$src2, _.FRC:$src3, 7347 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7348 (i32 timm:$rc)))))), 7349 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 7350 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7351 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7352 7353 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7354 (X86selects_mask VK1WM:$mask, 7355 (RndOp _.FRC:$src2, 7356 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7357 _.FRC:$src3, (i32 timm:$rc)), 7358 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7359 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 7360 VR128X:$src1, VK1WM:$mask, 7361 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7362 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7363 7364 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7365 (X86selects_mask VK1WM:$mask, 7366 (RndOp _.FRC:$src2, _.FRC:$src3, 7367 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7368 (i32 timm:$rc)), 7369 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7370 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 7371 VR128X:$src1, VK1WM:$mask, 7372 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7373 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7374 7375 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7376 (X86selects_mask VK1WM:$mask, 7377 (RndOp _.FRC:$src2, 7378 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7379 _.FRC:$src3, (i32 timm:$rc)), 7380 (_.EltVT ZeroFP)))))), 7381 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 7382 VR128X:$src1, VK1WM:$mask, 7383 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7384 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7385 7386 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7387 (X86selects_mask VK1WM:$mask, 7388 (RndOp _.FRC:$src2, _.FRC:$src3, 7389 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7390 (i32 timm:$rc)), 7391 (_.EltVT ZeroFP)))))), 7392 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 7393 VR128X:$src1, VK1WM:$mask, 7394 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7395 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7396 } 7397} 7398defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH", 7399 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7400defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH", 7401 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7402defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH", 7403 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7404defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH", 7405 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7406 7407defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7408 "SS", X86Movss, v4f32x_info, fp32imm0>; 7409defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7410 "SS", X86Movss, v4f32x_info, fp32imm0>; 7411defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7412 "SS", X86Movss, v4f32x_info, fp32imm0>; 7413defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7414 "SS", X86Movss, v4f32x_info, fp32imm0>; 7415 7416defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7417 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7418defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7419 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7420defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7421 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7422defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7423 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7424 7425//===----------------------------------------------------------------------===// 7426// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 7427//===----------------------------------------------------------------------===// 7428let Constraints = "$src1 = $dst" in { 7429multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7430 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 7431 // NOTE: The SDNode have the multiply operands first with the add last. 7432 // This enables commuted load patterns to be autogenerated by tablegen. 7433 let ExeDomain = _.ExeDomain in { 7434 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 7435 (ins _.RC:$src2, _.RC:$src3), 7436 OpcodeStr, "$src3, $src2", "$src2, $src3", 7437 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 7438 T8PD, EVEX_4V, Sched<[sched]>; 7439 7440 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7441 (ins _.RC:$src2, _.MemOp:$src3), 7442 OpcodeStr, "$src3, $src2", "$src2, $src3", 7443 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 7444 T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold, 7445 sched.ReadAfterFold]>; 7446 7447 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7448 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7449 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 7450 !strconcat("$src2, ${src3}", _.BroadcastStr ), 7451 (OpNode _.RC:$src2, 7452 (_.VT (_.BroadcastLdFrag addr:$src3)), 7453 _.RC:$src1)>, 7454 T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, 7455 sched.ReadAfterFold]>; 7456 } 7457} 7458} // Constraints = "$src1 = $dst" 7459 7460multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 7461 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 7462 let Predicates = [HasIFMA] in { 7463 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 7464 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7465 } 7466 let Predicates = [HasVLX, HasIFMA] in { 7467 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 7468 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7469 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 7470 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7471 } 7472} 7473 7474defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 7475 SchedWriteVecIMul, avx512vl_i64_info>, 7476 REX_W; 7477defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 7478 SchedWriteVecIMul, avx512vl_i64_info>, 7479 REX_W; 7480 7481//===----------------------------------------------------------------------===// 7482// AVX-512 Scalar convert from sign integer to float/double 7483//===----------------------------------------------------------------------===// 7484 7485multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, 7486 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7487 X86MemOperand x86memop, PatFrag ld_frag, string asm, 7488 string mem, list<Register> _Uses = [MXCSR], 7489 bit _mayRaiseFPException = 1> { 7490let ExeDomain = DstVT.ExeDomain, Uses = _Uses, 7491 mayRaiseFPException = _mayRaiseFPException in { 7492 let hasSideEffects = 0, isCodeGenOnly = 1 in { 7493 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7494 (ins DstVT.FRC:$src1, SrcRC:$src), 7495 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7496 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7497 let mayLoad = 1 in 7498 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7499 (ins DstVT.FRC:$src1, x86memop:$src), 7500 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 7501 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7502 } // hasSideEffects = 0 7503 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7504 (ins DstVT.RC:$src1, SrcRC:$src2), 7505 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7506 [(set DstVT.RC:$dst, 7507 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, 7508 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7509 7510 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7511 (ins DstVT.RC:$src1, x86memop:$src2), 7512 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7513 [(set DstVT.RC:$dst, 7514 (OpNode (DstVT.VT DstVT.RC:$src1), 7515 (ld_frag addr:$src2)))]>, 7516 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 7517} 7518 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7519 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, 7520 DstVT.RC:$src1, SrcRC:$src2), 0, "att">; 7521} 7522 7523multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7524 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7525 X86VectorVTInfo DstVT, string asm, 7526 string mem> { 7527 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in 7528 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7529 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7530 !strconcat(asm, 7531 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7532 [(set DstVT.RC:$dst, 7533 (OpNode (DstVT.VT DstVT.RC:$src1), 7534 SrcRC:$src2, 7535 (i32 timm:$rc)))]>, 7536 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7537 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", 7538 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, 7539 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; 7540} 7541 7542multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd, 7543 X86FoldableSchedWrite sched, 7544 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7545 X86MemOperand x86memop, PatFrag ld_frag, 7546 string asm, string mem> { 7547 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>, 7548 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7549 ld_frag, asm, mem>, VEX_LIG; 7550} 7551 7552let Predicates = [HasAVX512] in { 7553defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7554 WriteCvtI2SS, GR32, 7555 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, 7556 XS, EVEX_CD8<32, CD8VT1>; 7557defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7558 WriteCvtI2SS, GR64, 7559 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, 7560 XS, REX_W, EVEX_CD8<64, CD8VT1>; 7561defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, 7562 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>, 7563 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7564defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7565 WriteCvtI2SD, GR64, 7566 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, 7567 XD, REX_W, EVEX_CD8<64, CD8VT1>; 7568 7569def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7570 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7571def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7572 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7573 7574def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 7575 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7576def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 7577 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7578def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 7579 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7580def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 7581 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7582 7583def : Pat<(f32 (any_sint_to_fp GR32:$src)), 7584 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7585def : Pat<(f32 (any_sint_to_fp GR64:$src)), 7586 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7587def : Pat<(f64 (any_sint_to_fp GR32:$src)), 7588 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7589def : Pat<(f64 (any_sint_to_fp GR64:$src)), 7590 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7591 7592defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7593 WriteCvtI2SS, GR32, 7594 v4f32x_info, i32mem, loadi32, 7595 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>; 7596defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7597 WriteCvtI2SS, GR64, 7598 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, 7599 XS, REX_W, EVEX_CD8<64, CD8VT1>; 7600defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, 7601 i32mem, loadi32, "cvtusi2sd", "l", [], 0>, 7602 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7603defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7604 WriteCvtI2SD, GR64, 7605 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, 7606 XD, REX_W, EVEX_CD8<64, CD8VT1>; 7607 7608def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7609 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7610def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7611 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7612 7613def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))), 7614 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7615def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))), 7616 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7617def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))), 7618 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7619def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))), 7620 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7621 7622def : Pat<(f32 (any_uint_to_fp GR32:$src)), 7623 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7624def : Pat<(f32 (any_uint_to_fp GR64:$src)), 7625 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7626def : Pat<(f64 (any_uint_to_fp GR32:$src)), 7627 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7628def : Pat<(f64 (any_uint_to_fp GR64:$src)), 7629 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7630} 7631 7632//===----------------------------------------------------------------------===// 7633// AVX-512 Scalar convert from float/double to integer 7634//===----------------------------------------------------------------------===// 7635 7636multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7637 X86VectorVTInfo DstVT, SDNode OpNode, 7638 SDNode OpNodeRnd, 7639 X86FoldableSchedWrite sched, string asm, 7640 string aliasStr, Predicate prd = HasAVX512> { 7641 let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in { 7642 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7643 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7644 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, 7645 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7646 let Uses = [MXCSR] in 7647 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7648 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7649 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, 7650 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7651 Sched<[sched]>; 7652 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7653 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7654 [(set DstVT.RC:$dst, (OpNode 7655 (SrcVT.ScalarIntMemFrags addr:$src)))]>, 7656 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7657 } // Predicates = [prd] 7658 7659 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7660 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7661 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7662 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7663 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7664 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7665 SrcVT.IntScalarMemOp:$src), 0, "att">; 7666} 7667 7668// Convert float/double to signed/unsigned int 32/64 7669defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, 7670 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, 7671 XS, EVEX_CD8<32, CD8VT1>; 7672defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, 7673 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, 7674 XS, REX_W, EVEX_CD8<32, CD8VT1>; 7675defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, 7676 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, 7677 XS, EVEX_CD8<32, CD8VT1>; 7678defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, 7679 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, 7680 XS, REX_W, EVEX_CD8<32, CD8VT1>; 7681defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, 7682 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, 7683 XD, EVEX_CD8<64, CD8VT1>; 7684defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, 7685 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, 7686 XD, REX_W, EVEX_CD8<64, CD8VT1>; 7687defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, 7688 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7689 XD, EVEX_CD8<64, CD8VT1>; 7690defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, 7691 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7692 XD, REX_W, EVEX_CD8<64, CD8VT1>; 7693 7694multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT, 7695 X86VectorVTInfo DstVT, SDNode OpNode, 7696 X86FoldableSchedWrite sched> { 7697 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7698 let isCodeGenOnly = 1 in { 7699 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src), 7700 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7701 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>, 7702 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7703 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src), 7704 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7705 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>, 7706 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7707 } 7708 } // Predicates = [HasAVX512] 7709} 7710 7711defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info, 7712 lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>; 7713defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info, 7714 llrint, WriteCvtSS2I>, REX_W, XS, EVEX_CD8<32, CD8VT1>; 7715defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info, 7716 lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>; 7717defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info, 7718 llrint, WriteCvtSD2I>, REX_W, XD, EVEX_CD8<64, CD8VT1>; 7719 7720let Predicates = [HasAVX512] in { 7721 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>; 7722 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>; 7723 7724 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>; 7725 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>; 7726} 7727 7728// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7729// which produce unnecessary vmovs{s,d} instructions 7730let Predicates = [HasAVX512] in { 7731def : Pat<(v4f32 (X86Movss 7732 (v4f32 VR128X:$dst), 7733 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 7734 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7735 7736def : Pat<(v4f32 (X86Movss 7737 (v4f32 VR128X:$dst), 7738 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 7739 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7740 7741def : Pat<(v4f32 (X86Movss 7742 (v4f32 VR128X:$dst), 7743 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 7744 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7745 7746def : Pat<(v4f32 (X86Movss 7747 (v4f32 VR128X:$dst), 7748 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 7749 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7750 7751def : Pat<(v2f64 (X86Movsd 7752 (v2f64 VR128X:$dst), 7753 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 7754 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7755 7756def : Pat<(v2f64 (X86Movsd 7757 (v2f64 VR128X:$dst), 7758 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 7759 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7760 7761def : Pat<(v2f64 (X86Movsd 7762 (v2f64 VR128X:$dst), 7763 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 7764 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7765 7766def : Pat<(v2f64 (X86Movsd 7767 (v2f64 VR128X:$dst), 7768 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 7769 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7770 7771def : Pat<(v4f32 (X86Movss 7772 (v4f32 VR128X:$dst), 7773 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))), 7774 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7775 7776def : Pat<(v4f32 (X86Movss 7777 (v4f32 VR128X:$dst), 7778 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))), 7779 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7780 7781def : Pat<(v4f32 (X86Movss 7782 (v4f32 VR128X:$dst), 7783 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))), 7784 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7785 7786def : Pat<(v4f32 (X86Movss 7787 (v4f32 VR128X:$dst), 7788 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))), 7789 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7790 7791def : Pat<(v2f64 (X86Movsd 7792 (v2f64 VR128X:$dst), 7793 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))), 7794 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7795 7796def : Pat<(v2f64 (X86Movsd 7797 (v2f64 VR128X:$dst), 7798 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))), 7799 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7800 7801def : Pat<(v2f64 (X86Movsd 7802 (v2f64 VR128X:$dst), 7803 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))), 7804 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7805 7806def : Pat<(v2f64 (X86Movsd 7807 (v2f64 VR128X:$dst), 7808 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))), 7809 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7810} // Predicates = [HasAVX512] 7811 7812// Convert float/double to signed/unsigned int 32/64 with truncation 7813multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7814 X86VectorVTInfo _DstRC, SDPatternOperator OpNode, 7815 SDNode OpNodeInt, SDNode OpNodeSAE, 7816 X86FoldableSchedWrite sched, string aliasStr, 7817 Predicate prd = HasAVX512> { 7818let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in { 7819 let isCodeGenOnly = 1 in { 7820 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7821 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7822 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7823 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7824 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7825 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7826 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7827 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7828 } 7829 7830 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7831 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7832 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 7833 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7834 let Uses = [MXCSR] in 7835 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7836 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7837 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 7838 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 7839 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7840 (ins _SrcRC.IntScalarMemOp:$src), 7841 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7842 [(set _DstRC.RC:$dst, 7843 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, 7844 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7845} // Predicates = [prd] 7846 7847 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7848 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7849 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7850 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7851 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7852 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7853 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7854} 7855 7856defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7857 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7858 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7859defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7860 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7861 "{q}">, REX_W, XS, EVEX_CD8<32, CD8VT1>; 7862defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7863 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7864 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7865defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7866 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7867 "{q}">, REX_W, XD, EVEX_CD8<64, CD8VT1>; 7868 7869defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, 7870 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7871 "{l}">, XS, EVEX_CD8<32, CD8VT1>; 7872defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, 7873 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7874 "{q}">, XS,REX_W, EVEX_CD8<32, CD8VT1>; 7875defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7876 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7877 "{l}">, XD, EVEX_CD8<64, CD8VT1>; 7878defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7879 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7880 "{q}">, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7881 7882//===----------------------------------------------------------------------===// 7883// AVX-512 Convert form float to double and back 7884//===----------------------------------------------------------------------===// 7885 7886let Uses = [MXCSR], mayRaiseFPException = 1 in 7887multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7888 X86VectorVTInfo _Src, SDNode OpNode, 7889 X86FoldableSchedWrite sched> { 7890 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7891 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7892 "$src2, $src1", "$src1, $src2", 7893 (_.VT (OpNode (_.VT _.RC:$src1), 7894 (_Src.VT _Src.RC:$src2)))>, 7895 EVEX_4V, VEX_LIG, Sched<[sched]>; 7896 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7897 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7898 "$src2, $src1", "$src1, $src2", 7899 (_.VT (OpNode (_.VT _.RC:$src1), 7900 (_Src.ScalarIntMemFrags addr:$src2)))>, 7901 EVEX_4V, VEX_LIG, 7902 Sched<[sched.Folded, sched.ReadAfterFold]>; 7903 7904 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7905 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7906 (ins _.FRC:$src1, _Src.FRC:$src2), 7907 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7908 EVEX_4V, VEX_LIG, Sched<[sched]>; 7909 let mayLoad = 1 in 7910 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7911 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7912 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7913 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7914 } 7915} 7916 7917// Scalar Conversion with SAE - suppress all exceptions 7918multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7919 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7920 X86FoldableSchedWrite sched> { 7921 let Uses = [MXCSR] in 7922 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7923 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7924 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7925 (_.VT (OpNodeSAE (_.VT _.RC:$src1), 7926 (_Src.VT _Src.RC:$src2)))>, 7927 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; 7928} 7929 7930// Scalar Conversion with rounding control (RC) 7931multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7932 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7933 X86FoldableSchedWrite sched> { 7934 let Uses = [MXCSR] in 7935 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7936 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7937 "$rc, $src2, $src1", "$src1, $src2, $rc", 7938 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7939 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 7940 EVEX_4V, VEX_LIG, Sched<[sched]>, 7941 EVEX_B, EVEX_RC; 7942} 7943multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr, 7944 SDNode OpNode, SDNode OpNodeRnd, 7945 X86FoldableSchedWrite sched, 7946 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7947 Predicate prd = HasAVX512> { 7948 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7949 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7950 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7951 OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>; 7952 } 7953} 7954 7955multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr, 7956 SDNode OpNode, SDNode OpNodeSAE, 7957 X86FoldableSchedWrite sched, 7958 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7959 Predicate prd = HasAVX512> { 7960 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7961 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7962 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, 7963 EVEX_CD8<_src.EltSize, CD8VT1>; 7964 } 7965} 7966defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds, 7967 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7968 f32x_info>, XD, REX_W; 7969defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts, 7970 X86fpextsSAE, WriteCvtSS2SD, f32x_info, 7971 f64x_info>, XS; 7972defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds, 7973 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7974 f16x_info, HasFP16>, T_MAP5XD, REX_W; 7975defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts, 7976 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7977 f64x_info, HasFP16>, T_MAP5XS; 7978defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds, 7979 X86froundsRnd, WriteCvtSD2SS, f32x_info, 7980 f16x_info, HasFP16>, T_MAP5PS; 7981defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts, 7982 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7983 f32x_info, HasFP16>, T_MAP6PS; 7984 7985def : Pat<(f64 (any_fpextend FR32X:$src)), 7986 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7987 Requires<[HasAVX512]>; 7988def : Pat<(f64 (any_fpextend (loadf32 addr:$src))), 7989 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7990 Requires<[HasAVX512, OptForSize]>; 7991 7992def : Pat<(f32 (any_fpround FR64X:$src)), 7993 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7994 Requires<[HasAVX512]>; 7995 7996def : Pat<(f32 (any_fpextend FR16X:$src)), 7997 (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>, 7998 Requires<[HasFP16]>; 7999def : Pat<(f32 (any_fpextend (loadf16 addr:$src))), 8000 (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>, 8001 Requires<[HasFP16, OptForSize]>; 8002 8003def : Pat<(f64 (any_fpextend FR16X:$src)), 8004 (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>, 8005 Requires<[HasFP16]>; 8006def : Pat<(f64 (any_fpextend (loadf16 addr:$src))), 8007 (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 8008 Requires<[HasFP16, OptForSize]>; 8009 8010def : Pat<(f16 (any_fpround FR32X:$src)), 8011 (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>, 8012 Requires<[HasFP16]>; 8013def : Pat<(f16 (any_fpround FR64X:$src)), 8014 (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>, 8015 Requires<[HasFP16]>; 8016 8017def : Pat<(v4f32 (X86Movss 8018 (v4f32 VR128X:$dst), 8019 (v4f32 (scalar_to_vector 8020 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 8021 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 8022 Requires<[HasAVX512]>; 8023 8024def : Pat<(v2f64 (X86Movsd 8025 (v2f64 VR128X:$dst), 8026 (v2f64 (scalar_to_vector 8027 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 8028 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 8029 Requires<[HasAVX512]>; 8030 8031//===----------------------------------------------------------------------===// 8032// AVX-512 Vector convert from signed/unsigned integer to float/double 8033// and from float/double to signed/unsigned integer 8034//===----------------------------------------------------------------------===// 8035 8036multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8037 X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode, 8038 X86FoldableSchedWrite sched, 8039 string Broadcast = _.BroadcastStr, 8040 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 8041 RegisterClass MaskRC = _.KRCWM, 8042 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))), 8043 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> { 8044let Uses = [MXCSR], mayRaiseFPException = 1 in { 8045 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst), 8046 (ins _Src.RC:$src), 8047 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), 8048 (ins MaskRC:$mask, _Src.RC:$src), 8049 OpcodeStr, "$src", "$src", 8050 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 8051 (vselect_mask MaskRC:$mask, 8052 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 8053 _.RC:$src0), 8054 (vselect_mask MaskRC:$mask, 8055 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 8056 _.ImmAllZerosV)>, 8057 EVEX, Sched<[sched]>; 8058 8059 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 8060 (ins MemOp:$src), 8061 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), 8062 (ins MaskRC:$mask, MemOp:$src), 8063 OpcodeStr#Alias, "$src", "$src", 8064 LdDAG, 8065 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0), 8066 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>, 8067 EVEX, Sched<[sched.Folded]>; 8068 8069 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 8070 (ins _Src.ScalarMemOp:$src), 8071 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), 8072 (ins MaskRC:$mask, _Src.ScalarMemOp:$src), 8073 OpcodeStr, 8074 "${src}"#Broadcast, "${src}"#Broadcast, 8075 (_.VT (OpNode (_Src.VT 8076 (_Src.BroadcastLdFrag addr:$src)) 8077 )), 8078 (vselect_mask MaskRC:$mask, 8079 (_.VT 8080 (MaskOpNode 8081 (_Src.VT 8082 (_Src.BroadcastLdFrag addr:$src)))), 8083 _.RC:$src0), 8084 (vselect_mask MaskRC:$mask, 8085 (_.VT 8086 (MaskOpNode 8087 (_Src.VT 8088 (_Src.BroadcastLdFrag addr:$src)))), 8089 _.ImmAllZerosV)>, 8090 EVEX, EVEX_B, Sched<[sched.Folded]>; 8091 } 8092} 8093// Conversion with SAE - suppress all exceptions 8094multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8095 X86VectorVTInfo _Src, SDNode OpNodeSAE, 8096 X86FoldableSchedWrite sched> { 8097 let Uses = [MXCSR] in 8098 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8099 (ins _Src.RC:$src), OpcodeStr, 8100 "{sae}, $src", "$src, {sae}", 8101 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, 8102 EVEX, EVEX_B, Sched<[sched]>; 8103} 8104 8105// Conversion with rounding control (RC) 8106multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8107 X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd, 8108 X86FoldableSchedWrite sched> { 8109 let Uses = [MXCSR] in 8110 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 8111 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 8112 "$rc, $src", "$src, $rc", 8113 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, 8114 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 8115} 8116 8117// Similar to avx512_vcvt_fp, but uses an extload for the memory form. 8118multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 8119 X86VectorVTInfo _Src, SDPatternOperator OpNode, 8120 SDNode MaskOpNode, 8121 X86FoldableSchedWrite sched, 8122 string Broadcast = _.BroadcastStr, 8123 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 8124 RegisterClass MaskRC = _.KRCWM> 8125 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast, 8126 Alias, MemOp, MaskRC, 8127 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)), 8128 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; 8129 8130// Extend [Float to Double, Half to Float] 8131multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr, 8132 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8133 X86SchedWriteWidths sched, Predicate prd = HasAVX512> { 8134 let Predicates = [prd] in { 8135 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256, 8136 any_fpextend, fpextend, sched.ZMM>, 8137 avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256, 8138 X86vfpextSAE, sched.ZMM>, EVEX_V512; 8139 } 8140 let Predicates = [prd, HasVLX] in { 8141 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128, 8142 X86any_vfpext, X86vfpext, sched.XMM, 8143 _dst.info128.BroadcastStr, 8144 "", f64mem>, EVEX_V128; 8145 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128, 8146 any_fpextend, fpextend, sched.YMM>, EVEX_V256; 8147 } 8148} 8149 8150// Truncate [Double to Float, Float to Half] 8151multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr, 8152 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8153 X86SchedWriteWidths sched, Predicate prd = HasAVX512, 8154 PatFrag bcast128 = _src.info128.BroadcastLdFrag, 8155 PatFrag loadVT128 = _src.info128.LdFrag, 8156 RegisterClass maskRC128 = _src.info128.KRCWM> { 8157 let Predicates = [prd] in { 8158 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, 8159 X86any_vfpround, X86vfpround, sched.ZMM>, 8160 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 8161 X86vfproundRnd, sched.ZMM>, EVEX_V512; 8162 } 8163 let Predicates = [prd, HasVLX] in { 8164 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, 8165 null_frag, null_frag, sched.XMM, 8166 _src.info128.BroadcastStr, "{x}", 8167 f128mem, maskRC128>, EVEX_V128; 8168 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, 8169 X86any_vfpround, X86vfpround, 8170 sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256; 8171 8172 // Special patterns to allow use of X86vmfpround for masking. Instruction 8173 // patterns have been disabled with null_frag. 8174 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))), 8175 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 8176 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 8177 maskRC128:$mask), 8178 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>; 8179 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 8180 maskRC128:$mask), 8181 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>; 8182 8183 def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))), 8184 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 8185 def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0), 8186 maskRC128:$mask), 8187 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 8188 def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV, 8189 maskRC128:$mask), 8190 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>; 8191 8192 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))), 8193 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 8194 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 8195 (_dst.info128.VT VR128X:$src0), maskRC128:$mask), 8196 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 8197 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 8198 _dst.info128.ImmAllZerosV, maskRC128:$mask), 8199 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>; 8200 } 8201 8202 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8203 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 8204 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8205 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8206 VK2WM:$mask, VR128X:$src), 0, "att">; 8207 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|" 8208 "$dst {${mask}} {z}, $src}", 8209 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8210 VK2WM:$mask, VR128X:$src), 0, "att">; 8211 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8212 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; 8213 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8214 "$dst {${mask}}, ${src}{1to2}}", 8215 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8216 VK2WM:$mask, f64mem:$src), 0, "att">; 8217 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8218 "$dst {${mask}} {z}, ${src}{1to2}}", 8219 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8220 VK2WM:$mask, f64mem:$src), 0, "att">; 8221 8222 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8223 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 8224 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8225 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8226 VK4WM:$mask, VR256X:$src), 0, "att">; 8227 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8228 "$dst {${mask}} {z}, $src}", 8229 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8230 VK4WM:$mask, VR256X:$src), 0, "att">; 8231 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8232 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; 8233 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8234 "$dst {${mask}}, ${src}{1to4}}", 8235 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8236 VK4WM:$mask, f64mem:$src), 0, "att">; 8237 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8238 "$dst {${mask}} {z}, ${src}{1to4}}", 8239 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8240 VK4WM:$mask, f64mem:$src), 0, "att">; 8241} 8242 8243defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps", 8244 avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>, 8245 REX_W, PD, EVEX_CD8<64, CD8VF>; 8246defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd", 8247 avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>, 8248 PS, EVEX_CD8<32, CD8VH>; 8249 8250// Extend Half to Double 8251multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr, 8252 X86SchedWriteWidths sched> { 8253 let Predicates = [HasFP16] in { 8254 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info, 8255 any_fpextend, fpextend, sched.ZMM>, 8256 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info, 8257 X86vfpextSAE, sched.ZMM>, EVEX_V512; 8258 def : Pat<(v8f64 (extloadv8f16 addr:$src)), 8259 (!cast<Instruction>(NAME # "Zrm") addr:$src)>; 8260 } 8261 let Predicates = [HasFP16, HasVLX] in { 8262 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info, 8263 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "", 8264 f32mem>, EVEX_V128; 8265 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info, 8266 X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "", 8267 f64mem>, EVEX_V256; 8268 } 8269} 8270 8271// Truncate Double to Half 8272multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 8273 let Predicates = [HasFP16] in { 8274 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info, 8275 X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">, 8276 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info, 8277 X86vfproundRnd, sched.ZMM>, EVEX_V512; 8278 } 8279 let Predicates = [HasFP16, HasVLX] in { 8280 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag, 8281 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8282 VK2WM>, EVEX_V128; 8283 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag, 8284 null_frag, sched.YMM, "{1to4}", "{y}", f256mem, 8285 VK4WM>, EVEX_V256; 8286 } 8287 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8288 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8289 VR128X:$src), 0, "att">; 8290 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8291 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8292 VK2WM:$mask, VR128X:$src), 0, "att">; 8293 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8294 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8295 VK2WM:$mask, VR128X:$src), 0, "att">; 8296 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8297 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8298 i64mem:$src), 0, "att">; 8299 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8300 "$dst {${mask}}, ${src}{1to2}}", 8301 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8302 VK2WM:$mask, i64mem:$src), 0, "att">; 8303 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8304 "$dst {${mask}} {z}, ${src}{1to2}}", 8305 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8306 VK2WM:$mask, i64mem:$src), 0, "att">; 8307 8308 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8309 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8310 VR256X:$src), 0, "att">; 8311 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8312 "$dst {${mask}}, $src}", 8313 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8314 VK4WM:$mask, VR256X:$src), 0, "att">; 8315 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8316 "$dst {${mask}} {z}, $src}", 8317 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8318 VK4WM:$mask, VR256X:$src), 0, "att">; 8319 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8320 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8321 i64mem:$src), 0, "att">; 8322 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8323 "$dst {${mask}}, ${src}{1to4}}", 8324 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8325 VK4WM:$mask, i64mem:$src), 0, "att">; 8326 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8327 "$dst {${mask}} {z}, ${src}{1to4}}", 8328 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8329 VK4WM:$mask, i64mem:$src), 0, "att">; 8330 8331 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 8332 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 8333 VR512:$src), 0, "att">; 8334 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 8335 "$dst {${mask}}, $src}", 8336 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 8337 VK8WM:$mask, VR512:$src), 0, "att">; 8338 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 8339 "$dst {${mask}} {z}, $src}", 8340 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 8341 VK8WM:$mask, VR512:$src), 0, "att">; 8342 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 8343 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 8344 i64mem:$src), 0, "att">; 8345 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 8346 "$dst {${mask}}, ${src}{1to8}}", 8347 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 8348 VK8WM:$mask, i64mem:$src), 0, "att">; 8349 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 8350 "$dst {${mask}} {z}, ${src}{1to8}}", 8351 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 8352 VK8WM:$mask, i64mem:$src), 0, "att">; 8353} 8354 8355defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info, 8356 avx512vl_f32_info, SchedWriteCvtPD2PS, 8357 HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>; 8358defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info, 8359 avx512vl_f16_info, SchedWriteCvtPS2PD, 8360 HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>; 8361defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>, 8362 REX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>; 8363defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>, 8364 T_MAP5PS, EVEX_CD8<16, CD8VQ>; 8365 8366let Predicates = [HasFP16, HasVLX] in { 8367 // Special patterns to allow use of X86vmfpround for masking. Instruction 8368 // patterns have been disabled with null_frag. 8369 def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))), 8370 (VCVTPD2PHZ256rr VR256X:$src)>; 8371 def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0), 8372 VK4WM:$mask)), 8373 (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 8374 def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV, 8375 VK4WM:$mask), 8376 (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 8377 8378 def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))), 8379 (VCVTPD2PHZ256rm addr:$src)>; 8380 def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0), 8381 VK4WM:$mask), 8382 (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8383 def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV, 8384 VK4WM:$mask), 8385 (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>; 8386 8387 def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))), 8388 (VCVTPD2PHZ256rmb addr:$src)>; 8389 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8390 (v8f16 VR128X:$src0), VK4WM:$mask), 8391 (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8392 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8393 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 8394 (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 8395 8396 def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))), 8397 (VCVTPD2PHZ128rr VR128X:$src)>; 8398 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0), 8399 VK2WM:$mask), 8400 (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8401 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV, 8402 VK2WM:$mask), 8403 (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 8404 8405 def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))), 8406 (VCVTPD2PHZ128rm addr:$src)>; 8407 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0), 8408 VK2WM:$mask), 8409 (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8410 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV, 8411 VK2WM:$mask), 8412 (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>; 8413 8414 def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))), 8415 (VCVTPD2PHZ128rmb addr:$src)>; 8416 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8417 (v8f16 VR128X:$src0), VK2WM:$mask), 8418 (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8419 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8420 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 8421 (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 8422} 8423 8424// Convert Signed/Unsigned Doubleword to Double 8425let Uses = []<Register>, mayRaiseFPException = 0 in 8426multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8427 SDNode MaskOpNode, SDPatternOperator OpNode128, 8428 SDNode MaskOpNode128, 8429 X86SchedWriteWidths sched> { 8430 // No rounding in this op 8431 let Predicates = [HasAVX512] in 8432 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 8433 MaskOpNode, sched.ZMM>, EVEX_V512; 8434 8435 let Predicates = [HasVLX] in { 8436 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 8437 OpNode128, MaskOpNode128, sched.XMM, "{1to2}", 8438 "", i64mem, VK2WM, 8439 (v2f64 (OpNode128 (bc_v4i32 8440 (v2i64 8441 (scalar_to_vector (loadi64 addr:$src)))))), 8442 (v2f64 (MaskOpNode128 (bc_v4i32 8443 (v2i64 8444 (scalar_to_vector (loadi64 addr:$src))))))>, 8445 EVEX_V128; 8446 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 8447 MaskOpNode, sched.YMM>, EVEX_V256; 8448 } 8449} 8450 8451// Convert Signed/Unsigned Doubleword to Float 8452multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8453 SDNode MaskOpNode, SDNode OpNodeRnd, 8454 X86SchedWriteWidths sched> { 8455 let Predicates = [HasAVX512] in 8456 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 8457 MaskOpNode, sched.ZMM>, 8458 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 8459 OpNodeRnd, sched.ZMM>, EVEX_V512; 8460 8461 let Predicates = [HasVLX] in { 8462 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 8463 MaskOpNode, sched.XMM>, EVEX_V128; 8464 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 8465 MaskOpNode, sched.YMM>, EVEX_V256; 8466 } 8467} 8468 8469// Convert Float to Signed/Unsigned Doubleword with truncation 8470multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8471 SDNode MaskOpNode, 8472 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 8473 let Predicates = [HasAVX512] in { 8474 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8475 MaskOpNode, sched.ZMM>, 8476 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 8477 OpNodeSAE, sched.ZMM>, EVEX_V512; 8478 } 8479 let Predicates = [HasVLX] in { 8480 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8481 MaskOpNode, sched.XMM>, EVEX_V128; 8482 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8483 MaskOpNode, sched.YMM>, EVEX_V256; 8484 } 8485} 8486 8487// Convert Float to Signed/Unsigned Doubleword 8488multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8489 SDNode MaskOpNode, SDNode OpNodeRnd, 8490 X86SchedWriteWidths sched> { 8491 let Predicates = [HasAVX512] in { 8492 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8493 MaskOpNode, sched.ZMM>, 8494 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 8495 OpNodeRnd, sched.ZMM>, EVEX_V512; 8496 } 8497 let Predicates = [HasVLX] in { 8498 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8499 MaskOpNode, sched.XMM>, EVEX_V128; 8500 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8501 MaskOpNode, sched.YMM>, EVEX_V256; 8502 } 8503} 8504 8505// Convert Double to Signed/Unsigned Doubleword with truncation 8506multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8507 SDNode MaskOpNode, SDNode OpNodeSAE, 8508 X86SchedWriteWidths sched> { 8509 let Predicates = [HasAVX512] in { 8510 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8511 MaskOpNode, sched.ZMM>, 8512 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 8513 OpNodeSAE, sched.ZMM>, EVEX_V512; 8514 } 8515 let Predicates = [HasVLX] in { 8516 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8517 // memory forms of these instructions in Asm Parser. They have the same 8518 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8519 // due to the same reason. 8520 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8521 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8522 VK2WM>, EVEX_V128; 8523 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8524 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8525 } 8526 8527 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8528 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8529 VR128X:$src), 0, "att">; 8530 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8531 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8532 VK2WM:$mask, VR128X:$src), 0, "att">; 8533 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8534 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8535 VK2WM:$mask, VR128X:$src), 0, "att">; 8536 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8537 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8538 f64mem:$src), 0, "att">; 8539 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8540 "$dst {${mask}}, ${src}{1to2}}", 8541 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8542 VK2WM:$mask, f64mem:$src), 0, "att">; 8543 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8544 "$dst {${mask}} {z}, ${src}{1to2}}", 8545 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8546 VK2WM:$mask, f64mem:$src), 0, "att">; 8547 8548 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8549 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8550 VR256X:$src), 0, "att">; 8551 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8552 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8553 VK4WM:$mask, VR256X:$src), 0, "att">; 8554 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8555 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8556 VK4WM:$mask, VR256X:$src), 0, "att">; 8557 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8558 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8559 f64mem:$src), 0, "att">; 8560 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8561 "$dst {${mask}}, ${src}{1to4}}", 8562 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8563 VK4WM:$mask, f64mem:$src), 0, "att">; 8564 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8565 "$dst {${mask}} {z}, ${src}{1to4}}", 8566 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8567 VK4WM:$mask, f64mem:$src), 0, "att">; 8568} 8569 8570// Convert Double to Signed/Unsigned Doubleword 8571multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8572 SDNode MaskOpNode, SDNode OpNodeRnd, 8573 X86SchedWriteWidths sched> { 8574 let Predicates = [HasAVX512] in { 8575 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8576 MaskOpNode, sched.ZMM>, 8577 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 8578 OpNodeRnd, sched.ZMM>, EVEX_V512; 8579 } 8580 let Predicates = [HasVLX] in { 8581 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8582 // memory forms of these instructions in Asm Parcer. They have the same 8583 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8584 // due to the same reason. 8585 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8586 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8587 VK2WM>, EVEX_V128; 8588 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8589 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8590 } 8591 8592 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8593 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 8594 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8595 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8596 VK2WM:$mask, VR128X:$src), 0, "att">; 8597 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8598 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8599 VK2WM:$mask, VR128X:$src), 0, "att">; 8600 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8601 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8602 f64mem:$src), 0, "att">; 8603 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8604 "$dst {${mask}}, ${src}{1to2}}", 8605 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8606 VK2WM:$mask, f64mem:$src), 0, "att">; 8607 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8608 "$dst {${mask}} {z}, ${src}{1to2}}", 8609 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8610 VK2WM:$mask, f64mem:$src), 0, "att">; 8611 8612 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8613 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 8614 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8615 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8616 VK4WM:$mask, VR256X:$src), 0, "att">; 8617 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8618 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8619 VK4WM:$mask, VR256X:$src), 0, "att">; 8620 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8621 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8622 f64mem:$src), 0, "att">; 8623 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8624 "$dst {${mask}}, ${src}{1to4}}", 8625 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8626 VK4WM:$mask, f64mem:$src), 0, "att">; 8627 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8628 "$dst {${mask}} {z}, ${src}{1to4}}", 8629 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8630 VK4WM:$mask, f64mem:$src), 0, "att">; 8631} 8632 8633// Convert Double to Signed/Unsigned Quardword 8634multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8635 SDNode MaskOpNode, SDNode OpNodeRnd, 8636 X86SchedWriteWidths sched> { 8637 let Predicates = [HasDQI] in { 8638 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8639 MaskOpNode, sched.ZMM>, 8640 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 8641 OpNodeRnd, sched.ZMM>, EVEX_V512; 8642 } 8643 let Predicates = [HasDQI, HasVLX] in { 8644 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8645 MaskOpNode, sched.XMM>, EVEX_V128; 8646 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8647 MaskOpNode, sched.YMM>, EVEX_V256; 8648 } 8649} 8650 8651// Convert Double to Signed/Unsigned Quardword with truncation 8652multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8653 SDNode MaskOpNode, SDNode OpNodeRnd, 8654 X86SchedWriteWidths sched> { 8655 let Predicates = [HasDQI] in { 8656 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8657 MaskOpNode, sched.ZMM>, 8658 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 8659 OpNodeRnd, sched.ZMM>, EVEX_V512; 8660 } 8661 let Predicates = [HasDQI, HasVLX] in { 8662 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8663 MaskOpNode, sched.XMM>, EVEX_V128; 8664 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8665 MaskOpNode, sched.YMM>, EVEX_V256; 8666 } 8667} 8668 8669// Convert Signed/Unsigned Quardword to Double 8670multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8671 SDNode MaskOpNode, SDNode OpNodeRnd, 8672 X86SchedWriteWidths sched> { 8673 let Predicates = [HasDQI] in { 8674 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 8675 MaskOpNode, sched.ZMM>, 8676 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 8677 OpNodeRnd, sched.ZMM>, EVEX_V512; 8678 } 8679 let Predicates = [HasDQI, HasVLX] in { 8680 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 8681 MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible; 8682 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 8683 MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible; 8684 } 8685} 8686 8687// Convert Float to Signed/Unsigned Quardword 8688multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8689 SDNode MaskOpNode, SDNode OpNodeRnd, 8690 X86SchedWriteWidths sched> { 8691 let Predicates = [HasDQI] in { 8692 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8693 MaskOpNode, sched.ZMM>, 8694 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 8695 OpNodeRnd, sched.ZMM>, EVEX_V512; 8696 } 8697 let Predicates = [HasDQI, HasVLX] in { 8698 // Explicitly specified broadcast string, since we take only 2 elements 8699 // from v4f32x_info source 8700 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8701 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8702 (v2i64 (OpNode (bc_v4f32 8703 (v2f64 8704 (scalar_to_vector (loadf64 addr:$src)))))), 8705 (v2i64 (MaskOpNode (bc_v4f32 8706 (v2f64 8707 (scalar_to_vector (loadf64 addr:$src))))))>, 8708 EVEX_V128; 8709 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8710 MaskOpNode, sched.YMM>, EVEX_V256; 8711 } 8712} 8713 8714// Convert Float to Signed/Unsigned Quardword with truncation 8715multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8716 SDNode MaskOpNode, SDNode OpNodeRnd, 8717 X86SchedWriteWidths sched> { 8718 let Predicates = [HasDQI] in { 8719 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8720 MaskOpNode, sched.ZMM>, 8721 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 8722 OpNodeRnd, sched.ZMM>, EVEX_V512; 8723 } 8724 let Predicates = [HasDQI, HasVLX] in { 8725 // Explicitly specified broadcast string, since we take only 2 elements 8726 // from v4f32x_info source 8727 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8728 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8729 (v2i64 (OpNode (bc_v4f32 8730 (v2f64 8731 (scalar_to_vector (loadf64 addr:$src)))))), 8732 (v2i64 (MaskOpNode (bc_v4f32 8733 (v2f64 8734 (scalar_to_vector (loadf64 addr:$src))))))>, 8735 EVEX_V128; 8736 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8737 MaskOpNode, sched.YMM>, EVEX_V256; 8738 } 8739} 8740 8741// Convert Signed/Unsigned Quardword to Float 8742// Also Convert Signed/Unsigned Doubleword to Half 8743multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8744 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128, 8745 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd, 8746 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8747 X86SchedWriteWidths sched, Predicate prd = HasDQI> { 8748 let Predicates = [prd] in { 8749 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode, 8750 MaskOpNode, sched.ZMM>, 8751 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 8752 OpNodeRnd, sched.ZMM>, EVEX_V512; 8753 } 8754 let Predicates = [prd, HasVLX] in { 8755 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8756 // memory forms of these instructions in Asm Parcer. They have the same 8757 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8758 // due to the same reason. 8759 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag, 8760 null_frag, sched.XMM, _src.info128.BroadcastStr, 8761 "{x}", i128mem, _src.info128.KRCWM>, 8762 EVEX_V128, NotEVEX2VEXConvertible; 8763 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode, 8764 MaskOpNode, sched.YMM, _src.info256.BroadcastStr, 8765 "{y}">, EVEX_V256, 8766 NotEVEX2VEXConvertible; 8767 8768 // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction 8769 // patterns have been disabled with null_frag. 8770 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))), 8771 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 8772 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 8773 _src.info128.KRCWM:$mask), 8774 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>; 8775 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 8776 _src.info128.KRCWM:$mask), 8777 (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>; 8778 8779 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))), 8780 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 8781 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0), 8782 _src.info128.KRCWM:$mask), 8783 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8784 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV, 8785 _src.info128.KRCWM:$mask), 8786 (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>; 8787 8788 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))), 8789 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 8790 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8791 (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask), 8792 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8793 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8794 _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask), 8795 (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>; 8796 } 8797 8798 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8799 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8800 VR128X:$src), 0, "att">; 8801 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8802 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8803 VK2WM:$mask, VR128X:$src), 0, "att">; 8804 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8805 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8806 VK2WM:$mask, VR128X:$src), 0, "att">; 8807 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8808 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8809 i64mem:$src), 0, "att">; 8810 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8811 "$dst {${mask}}, ${src}{1to2}}", 8812 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8813 VK2WM:$mask, i64mem:$src), 0, "att">; 8814 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8815 "$dst {${mask}} {z}, ${src}{1to2}}", 8816 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8817 VK2WM:$mask, i64mem:$src), 0, "att">; 8818 8819 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8820 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8821 VR256X:$src), 0, "att">; 8822 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8823 "$dst {${mask}}, $src}", 8824 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8825 VK4WM:$mask, VR256X:$src), 0, "att">; 8826 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8827 "$dst {${mask}} {z}, $src}", 8828 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8829 VK4WM:$mask, VR256X:$src), 0, "att">; 8830 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8831 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8832 i64mem:$src), 0, "att">; 8833 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8834 "$dst {${mask}}, ${src}{1to4}}", 8835 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8836 VK4WM:$mask, i64mem:$src), 0, "att">; 8837 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8838 "$dst {${mask}} {z}, ${src}{1to4}}", 8839 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8840 VK4WM:$mask, i64mem:$src), 0, "att">; 8841} 8842 8843defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp, 8844 X86any_VSintToFP, X86VSintToFP, 8845 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8846 8847defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp, 8848 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8849 PS, EVEX_CD8<32, CD8VF>; 8850 8851defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, 8852 X86cvttp2si, X86cvttp2siSAE, 8853 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>; 8854 8855defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, 8856 X86cvttp2si, X86cvttp2siSAE, 8857 SchedWriteCvtPD2DQ>, 8858 PD, REX_W, EVEX_CD8<64, CD8VF>; 8859 8860defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, 8861 X86cvttp2ui, X86cvttp2uiSAE, 8862 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>; 8863 8864defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, 8865 X86cvttp2ui, X86cvttp2uiSAE, 8866 SchedWriteCvtPD2DQ>, 8867 PS, REX_W, EVEX_CD8<64, CD8VF>; 8868 8869defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, 8870 uint_to_fp, X86any_VUintToFP, X86VUintToFP, 8871 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; 8872 8873defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, 8874 uint_to_fp, X86VUintToFpRnd, 8875 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>; 8876 8877defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int, 8878 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8879 EVEX_CD8<32, CD8VF>; 8880 8881defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int, 8882 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD, 8883 REX_W, EVEX_CD8<64, CD8VF>; 8884 8885defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt, 8886 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8887 PS, EVEX_CD8<32, CD8VF>; 8888 8889defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt, 8890 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W, 8891 PS, EVEX_CD8<64, CD8VF>; 8892 8893defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int, 8894 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W, 8895 PD, EVEX_CD8<64, CD8VF>; 8896 8897defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int, 8898 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD, 8899 EVEX_CD8<32, CD8VH>; 8900 8901defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt, 8902 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W, 8903 PD, EVEX_CD8<64, CD8VF>; 8904 8905defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt, 8906 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD, 8907 EVEX_CD8<32, CD8VH>; 8908 8909defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, 8910 X86cvttp2si, X86cvttp2siSAE, 8911 SchedWriteCvtPD2DQ>, REX_W, 8912 PD, EVEX_CD8<64, CD8VF>; 8913 8914defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, 8915 X86cvttp2si, X86cvttp2siSAE, 8916 SchedWriteCvtPS2DQ>, PD, 8917 EVEX_CD8<32, CD8VH>; 8918 8919defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, 8920 X86cvttp2ui, X86cvttp2uiSAE, 8921 SchedWriteCvtPD2DQ>, REX_W, 8922 PD, EVEX_CD8<64, CD8VF>; 8923 8924defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, 8925 X86cvttp2ui, X86cvttp2uiSAE, 8926 SchedWriteCvtPS2DQ>, PD, 8927 EVEX_CD8<32, CD8VH>; 8928 8929defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp, 8930 sint_to_fp, X86VSintToFpRnd, 8931 SchedWriteCvtDQ2PD>, REX_W, XS, EVEX_CD8<64, CD8VF>; 8932 8933defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp, 8934 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, 8935 REX_W, XS, EVEX_CD8<64, CD8VF>; 8936 8937defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp, 8938 X86any_VSintToFP, X86VMSintToFP, 8939 X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8940 SchedWriteCvtDQ2PS, HasFP16>, 8941 T_MAP5PS, EVEX_CD8<32, CD8VF>; 8942 8943defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp, 8944 X86any_VUintToFP, X86VMUintToFP, 8945 X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8946 SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD, 8947 EVEX_CD8<32, CD8VF>; 8948 8949defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp, 8950 X86any_VSintToFP, X86VMSintToFP, 8951 X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8952 SchedWriteCvtDQ2PS>, REX_W, PS, 8953 EVEX_CD8<64, CD8VF>; 8954 8955defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp, 8956 X86any_VUintToFP, X86VMUintToFP, 8957 X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8958 SchedWriteCvtDQ2PS>, REX_W, XD, 8959 EVEX_CD8<64, CD8VF>; 8960 8961let Predicates = [HasVLX] in { 8962 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction 8963 // patterns have been disabled with null_frag. 8964 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), 8965 (VCVTPD2DQZ128rr VR128X:$src)>; 8966 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8967 VK2WM:$mask), 8968 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8969 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8970 VK2WM:$mask), 8971 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8972 8973 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), 8974 (VCVTPD2DQZ128rm addr:$src)>; 8975 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8976 VK2WM:$mask), 8977 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8978 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8979 VK2WM:$mask), 8980 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8981 8982 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), 8983 (VCVTPD2DQZ128rmb addr:$src)>; 8984 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8985 (v4i32 VR128X:$src0), VK2WM:$mask), 8986 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8987 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8988 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8989 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8990 8991 // Special patterns to allow use of X86mcvttp2si for masking. Instruction 8992 // patterns have been disabled with null_frag. 8993 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))), 8994 (VCVTTPD2DQZ128rr VR128X:$src)>; 8995 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8996 VK2WM:$mask), 8997 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8998 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8999 VK2WM:$mask), 9000 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 9001 9002 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))), 9003 (VCVTTPD2DQZ128rm addr:$src)>; 9004 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 9005 VK2WM:$mask), 9006 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9007 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 9008 VK2WM:$mask), 9009 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 9010 9011 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), 9012 (VCVTTPD2DQZ128rmb addr:$src)>; 9013 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 9014 (v4i32 VR128X:$src0), VK2WM:$mask), 9015 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9016 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 9017 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 9018 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 9019 9020 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 9021 // patterns have been disabled with null_frag. 9022 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), 9023 (VCVTPD2UDQZ128rr VR128X:$src)>; 9024 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 9025 VK2WM:$mask), 9026 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 9027 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 9028 VK2WM:$mask), 9029 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 9030 9031 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), 9032 (VCVTPD2UDQZ128rm addr:$src)>; 9033 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 9034 VK2WM:$mask), 9035 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9036 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 9037 VK2WM:$mask), 9038 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 9039 9040 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), 9041 (VCVTPD2UDQZ128rmb addr:$src)>; 9042 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 9043 (v4i32 VR128X:$src0), VK2WM:$mask), 9044 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9045 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 9046 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 9047 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 9048 9049 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 9050 // patterns have been disabled with null_frag. 9051 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))), 9052 (VCVTTPD2UDQZ128rr VR128X:$src)>; 9053 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 9054 VK2WM:$mask), 9055 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 9056 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 9057 VK2WM:$mask), 9058 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 9059 9060 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))), 9061 (VCVTTPD2UDQZ128rm addr:$src)>; 9062 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 9063 VK2WM:$mask), 9064 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9065 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 9066 VK2WM:$mask), 9067 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 9068 9069 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), 9070 (VCVTTPD2UDQZ128rmb addr:$src)>; 9071 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 9072 (v4i32 VR128X:$src0), VK2WM:$mask), 9073 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9074 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 9075 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 9076 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 9077} 9078 9079let Predicates = [HasDQI, HasVLX] in { 9080 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9081 (VCVTPS2QQZ128rm addr:$src)>; 9082 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9083 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9084 VR128X:$src0)), 9085 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9086 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9087 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9088 v2i64x_info.ImmAllZerosV)), 9089 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 9090 9091 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9092 (VCVTPS2UQQZ128rm addr:$src)>; 9093 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9094 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9095 VR128X:$src0)), 9096 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9097 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9098 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9099 v2i64x_info.ImmAllZerosV)), 9100 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 9101 9102 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9103 (VCVTTPS2QQZ128rm addr:$src)>; 9104 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9105 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9106 VR128X:$src0)), 9107 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9108 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9109 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9110 v2i64x_info.ImmAllZerosV)), 9111 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 9112 9113 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 9114 (VCVTTPS2UQQZ128rm addr:$src)>; 9115 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9116 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9117 VR128X:$src0)), 9118 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9119 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 9120 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 9121 v2i64x_info.ImmAllZerosV)), 9122 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 9123} 9124 9125let Predicates = [HasVLX] in { 9126 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9127 (VCVTDQ2PDZ128rm addr:$src)>; 9128 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9129 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9130 VR128X:$src0)), 9131 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9132 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9133 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9134 v2f64x_info.ImmAllZerosV)), 9135 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 9136 9137 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 9138 (VCVTUDQ2PDZ128rm addr:$src)>; 9139 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9140 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9141 VR128X:$src0)), 9142 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 9143 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 9144 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 9145 v2f64x_info.ImmAllZerosV)), 9146 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 9147} 9148 9149//===----------------------------------------------------------------------===// 9150// Half precision conversion instructions 9151//===----------------------------------------------------------------------===// 9152 9153let Uses = [MXCSR], mayRaiseFPException = 1 in 9154multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9155 X86MemOperand x86memop, dag ld_dag, 9156 X86FoldableSchedWrite sched> { 9157 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 9158 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 9159 (X86any_cvtph2ps (_src.VT _src.RC:$src)), 9160 (X86cvtph2ps (_src.VT _src.RC:$src))>, 9161 T8PD, Sched<[sched]>; 9162 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 9163 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 9164 (X86any_cvtph2ps (_src.VT ld_dag)), 9165 (X86cvtph2ps (_src.VT ld_dag))>, 9166 T8PD, Sched<[sched.Folded]>; 9167} 9168 9169multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9170 X86FoldableSchedWrite sched> { 9171 let Uses = [MXCSR] in 9172 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 9173 (ins _src.RC:$src), "vcvtph2ps", 9174 "{sae}, $src", "$src, {sae}", 9175 (X86cvtph2psSAE (_src.VT _src.RC:$src))>, 9176 T8PD, EVEX_B, Sched<[sched]>; 9177} 9178 9179let Predicates = [HasAVX512] in 9180 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, 9181 (load addr:$src), WriteCvtPH2PSZ>, 9182 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 9183 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 9184 9185let Predicates = [HasVLX] in { 9186 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 9187 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256, 9188 EVEX_CD8<32, CD8VH>; 9189 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 9190 (bitconvert (v2i64 (X86vzload64 addr:$src))), 9191 WriteCvtPH2PS>, EVEX, EVEX_V128, 9192 EVEX_CD8<32, CD8VH>; 9193 9194 // Pattern match vcvtph2ps of a scalar i64 load. 9195 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert 9196 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 9197 (VCVTPH2PSZ128rm addr:$src)>; 9198} 9199 9200multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9201 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 9202let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9203 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9204 (ins _src.RC:$src1, i32u8imm:$src2), 9205 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 9206 [(set _dest.RC:$dst, 9207 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 9208 Sched<[RR]>; 9209 let Constraints = "$src0 = $dst" in 9210 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9211 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9212 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 9213 [(set _dest.RC:$dst, 9214 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 9215 _dest.RC:$src0, _src.KRCWM:$mask))]>, 9216 Sched<[RR]>, EVEX_K; 9217 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9218 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9219 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", 9220 [(set _dest.RC:$dst, 9221 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 9222 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 9223 Sched<[RR]>, EVEX_KZ; 9224 let hasSideEffects = 0, mayStore = 1 in { 9225 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 9226 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 9227 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9228 Sched<[MR]>; 9229 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 9230 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9231 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 9232 EVEX_K, Sched<[MR]>; 9233 } 9234} 9235} 9236 9237multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9238 SchedWrite Sched> { 9239 let hasSideEffects = 0, Uses = [MXCSR] in { 9240 def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9241 (ins _src.RC:$src1, i32u8imm:$src2), 9242 "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}", 9243 [(set _dest.RC:$dst, 9244 (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 9245 EVEX_B, Sched<[Sched]>; 9246 let Constraints = "$src0 = $dst" in 9247 def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9248 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9249 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}", 9250 [(set _dest.RC:$dst, 9251 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2), 9252 _dest.RC:$src0, _src.KRCWM:$mask))]>, 9253 EVEX_B, Sched<[Sched]>, EVEX_K; 9254 def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9255 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9256 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}", 9257 [(set _dest.RC:$dst, 9258 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2), 9259 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 9260 EVEX_B, Sched<[Sched]>, EVEX_KZ; 9261} 9262} 9263 9264let Predicates = [HasAVX512] in { 9265 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 9266 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 9267 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 9268 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 9269 9270 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), 9271 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; 9272} 9273 9274let Predicates = [HasVLX] in { 9275 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 9276 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 9277 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 9278 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 9279 WriteCvtPS2PH, WriteCvtPS2PHSt>, 9280 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 9281 9282 def : Pat<(store (f64 (extractelt 9283 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9284 (iPTR 0))), addr:$dst), 9285 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9286 def : Pat<(store (i64 (extractelt 9287 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9288 (iPTR 0))), addr:$dst), 9289 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9290 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), 9291 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; 9292} 9293 9294// Unordered/Ordered scalar fp compare with Sae and set EFLAGS 9295multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 9296 string OpcodeStr, Domain d, 9297 X86FoldableSchedWrite sched = WriteFComX> { 9298 let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in 9299 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 9300 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 9301 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 9302} 9303 9304let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9305 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>, 9306 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9307 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>, 9308 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>; 9309 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>, 9310 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9311 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>, 9312 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>; 9313} 9314 9315let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9316 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32, 9317 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9318 EVEX_CD8<32, CD8VT1>; 9319 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64, 9320 "ucomisd", SSEPackedDouble>, PD, EVEX, 9321 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9322 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32, 9323 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9324 EVEX_CD8<32, CD8VT1>; 9325 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64, 9326 "comisd", SSEPackedDouble>, PD, EVEX, 9327 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9328 let isCodeGenOnly = 1 in { 9329 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 9330 sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9331 EVEX_CD8<32, CD8VT1>; 9332 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 9333 sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX, 9334 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9335 9336 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 9337 sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, 9338 EVEX_CD8<32, CD8VT1>; 9339 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 9340 sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX, 9341 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9342 } 9343} 9344 9345let Defs = [EFLAGS], Predicates = [HasFP16] in { 9346 defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish", 9347 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS, 9348 EVEX_CD8<16, CD8VT1>; 9349 defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish", 9350 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS, 9351 EVEX_CD8<16, CD8VT1>; 9352 defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16, 9353 "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX, 9354 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9355 defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16, 9356 "comish", SSEPackedSingle>, T_MAP5PS, EVEX, 9357 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9358 let isCodeGenOnly = 1 in { 9359 defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem, 9360 sse_load_f16, "ucomish", SSEPackedSingle>, 9361 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9362 9363 defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem, 9364 sse_load_f16, "comish", SSEPackedSingle>, 9365 T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9366 } 9367} 9368 9369/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh 9370multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9371 X86FoldableSchedWrite sched, X86VectorVTInfo _, 9372 Predicate prd = HasAVX512> { 9373 let Predicates = [prd], ExeDomain = _.ExeDomain in { 9374 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9375 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9376 "$src2, $src1", "$src1, $src2", 9377 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9378 EVEX_4V, VEX_LIG, Sched<[sched]>; 9379 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9380 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9381 "$src2, $src1", "$src1, $src2", 9382 (OpNode (_.VT _.RC:$src1), 9383 (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG, 9384 Sched<[sched.Folded, sched.ReadAfterFold]>; 9385} 9386} 9387 9388defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl, 9389 f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>, 9390 T_MAP6PD; 9391defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s, 9392 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>, 9393 EVEX_CD8<16, CD8VT1>, T_MAP6PD; 9394let Uses = [MXCSR] in { 9395defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 9396 f32x_info>, EVEX_CD8<32, CD8VT1>, 9397 T8PD; 9398defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 9399 f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>, 9400 T8PD; 9401defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 9402 SchedWriteFRsqrt.Scl, f32x_info>, 9403 EVEX_CD8<32, CD8VT1>, T8PD; 9404defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 9405 SchedWriteFRsqrt.Scl, f64x_info>, REX_W, 9406 EVEX_CD8<64, CD8VT1>, T8PD; 9407} 9408 9409/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 9410multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 9411 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9412 let ExeDomain = _.ExeDomain in { 9413 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9414 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9415 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD, 9416 Sched<[sched]>; 9417 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9418 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9419 (OpNode (_.VT 9420 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, 9421 Sched<[sched.Folded, sched.ReadAfterFold]>; 9422 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9423 (ins _.ScalarMemOp:$src), OpcodeStr, 9424 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9425 (OpNode (_.VT 9426 (_.BroadcastLdFrag addr:$src)))>, 9427 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9428 } 9429} 9430 9431multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 9432 X86SchedWriteWidths sched> { 9433 let Uses = [MXCSR] in { 9434 defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM, 9435 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 9436 defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM, 9437 v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>; 9438 } 9439 let Predicates = [HasFP16] in 9440 defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM, 9441 v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9442 9443 // Define only if AVX512VL feature is present. 9444 let Predicates = [HasVLX], Uses = [MXCSR] in { 9445 defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9446 OpNode, sched.XMM, v4f32x_info>, 9447 EVEX_V128, EVEX_CD8<32, CD8VF>; 9448 defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9449 OpNode, sched.YMM, v8f32x_info>, 9450 EVEX_V256, EVEX_CD8<32, CD8VF>; 9451 defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9452 OpNode, sched.XMM, v2f64x_info>, 9453 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>; 9454 defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9455 OpNode, sched.YMM, v4f64x_info>, 9456 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>; 9457 } 9458 let Predicates = [HasFP16, HasVLX] in { 9459 defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9460 OpNode, sched.XMM, v8f16x_info>, 9461 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9462 defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9463 OpNode, sched.YMM, v16f16x_info>, 9464 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9465 } 9466} 9467 9468defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>; 9469defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>; 9470 9471/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 9472multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 9473 SDNode OpNode, SDNode OpNodeSAE, 9474 X86FoldableSchedWrite sched> { 9475 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 9476 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9477 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9478 "$src2, $src1", "$src1, $src2", 9479 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9480 Sched<[sched]>, SIMD_EXC; 9481 9482 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9483 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9484 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 9485 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9486 EVEX_B, Sched<[sched]>; 9487 9488 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9489 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9490 "$src2, $src1", "$src1, $src2", 9491 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>, 9492 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9493 } 9494} 9495 9496multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9497 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9498 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, 9499 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V; 9500 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, 9501 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD, EVEX_4V; 9502} 9503 9504multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode, 9505 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9506 let Predicates = [HasFP16] in 9507 defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>, 9508 EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V; 9509} 9510 9511let Predicates = [HasERI] in { 9512 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs, 9513 SchedWriteFRcp.Scl>; 9514 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs, 9515 SchedWriteFRsqrt.Scl>; 9516} 9517 9518defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9519 SchedWriteFRnd.Scl>, 9520 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9521 SchedWriteFRnd.Scl>; 9522/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 9523 9524multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9525 SDNode OpNode, X86FoldableSchedWrite sched> { 9526 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9527 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9528 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9529 (OpNode (_.VT _.RC:$src))>, 9530 Sched<[sched]>; 9531 9532 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9533 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9534 (OpNode (_.VT 9535 (bitconvert (_.LdFrag addr:$src))))>, 9536 Sched<[sched.Folded, sched.ReadAfterFold]>; 9537 9538 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9539 (ins _.ScalarMemOp:$src), OpcodeStr, 9540 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9541 (OpNode (_.VT 9542 (_.BroadcastLdFrag addr:$src)))>, 9543 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9544 } 9545} 9546multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9547 SDNode OpNode, X86FoldableSchedWrite sched> { 9548 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 9549 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9550 (ins _.RC:$src), OpcodeStr, 9551 "{sae}, $src", "$src, {sae}", 9552 (OpNode (_.VT _.RC:$src))>, 9553 EVEX_B, Sched<[sched]>; 9554} 9555 9556multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 9557 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9558 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 9559 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, 9560 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 9561 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 9562 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, 9563 T8PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>; 9564} 9565 9566multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 9567 SDNode OpNode, X86SchedWriteWidths sched> { 9568 // Define only if AVX512VL feature is present. 9569 let Predicates = [HasVLX] in { 9570 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, 9571 sched.XMM>, 9572 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; 9573 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, 9574 sched.YMM>, 9575 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; 9576 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, 9577 sched.XMM>, 9578 EVEX_V128, REX_W, T8PD, EVEX_CD8<64, CD8VF>; 9579 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, 9580 sched.YMM>, 9581 EVEX_V256, REX_W, T8PD, EVEX_CD8<64, CD8VF>; 9582 } 9583} 9584 9585multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode, 9586 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9587 let Predicates = [HasFP16] in 9588 defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>, 9589 avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>, 9590 T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>; 9591 let Predicates = [HasFP16, HasVLX] in { 9592 defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>, 9593 EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9594 defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>, 9595 EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>; 9596 } 9597} 9598let Predicates = [HasERI] in { 9599 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE, 9600 SchedWriteFRsqrt>, EVEX; 9601 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE, 9602 SchedWriteFRcp>, EVEX; 9603 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE, 9604 SchedWriteFAdd>, EVEX; 9605} 9606defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9607 SchedWriteFRnd>, 9608 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9609 SchedWriteFRnd>, 9610 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, 9611 SchedWriteFRnd>, EVEX; 9612 9613multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 9614 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9615 let ExeDomain = _.ExeDomain in 9616 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9617 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 9618 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, 9619 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 9620} 9621 9622multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 9623 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9624 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9625 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 9626 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9627 (_.VT (any_fsqrt _.RC:$src)), 9628 (_.VT (fsqrt _.RC:$src))>, EVEX, 9629 Sched<[sched]>; 9630 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9631 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9632 (any_fsqrt (_.VT (_.LdFrag addr:$src))), 9633 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX, 9634 Sched<[sched.Folded, sched.ReadAfterFold]>; 9635 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9636 (ins _.ScalarMemOp:$src), OpcodeStr, 9637 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9638 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))), 9639 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>, 9640 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9641 } 9642} 9643 9644let Uses = [MXCSR], mayRaiseFPException = 1 in 9645multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 9646 X86SchedWriteSizes sched> { 9647 let Predicates = [HasFP16] in 9648 defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9649 sched.PH.ZMM, v32f16_info>, 9650 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9651 let Predicates = [HasFP16, HasVLX] in { 9652 defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9653 sched.PH.XMM, v8f16x_info>, 9654 EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9655 defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9656 sched.PH.YMM, v16f16x_info>, 9657 EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9658 } 9659 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9660 sched.PS.ZMM, v16f32_info>, 9661 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 9662 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9663 sched.PD.ZMM, v8f64_info>, 9664 EVEX_V512, REX_W, PD, EVEX_CD8<64, CD8VF>; 9665 // Define only if AVX512VL feature is present. 9666 let Predicates = [HasVLX] in { 9667 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9668 sched.PS.XMM, v4f32x_info>, 9669 EVEX_V128, PS, EVEX_CD8<32, CD8VF>; 9670 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9671 sched.PS.YMM, v8f32x_info>, 9672 EVEX_V256, PS, EVEX_CD8<32, CD8VF>; 9673 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9674 sched.PD.XMM, v2f64x_info>, 9675 EVEX_V128, REX_W, PD, EVEX_CD8<64, CD8VF>; 9676 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9677 sched.PD.YMM, v4f64x_info>, 9678 EVEX_V256, REX_W, PD, EVEX_CD8<64, CD8VF>; 9679 } 9680} 9681 9682let Uses = [MXCSR] in 9683multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 9684 X86SchedWriteSizes sched> { 9685 let Predicates = [HasFP16] in 9686 defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"), 9687 sched.PH.ZMM, v32f16_info>, 9688 EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>; 9689 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 9690 sched.PS.ZMM, v16f32_info>, 9691 EVEX_V512, PS, EVEX_CD8<32, CD8VF>; 9692 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 9693 sched.PD.ZMM, v8f64_info>, 9694 EVEX_V512, REX_W, PD, EVEX_CD8<64, CD8VF>; 9695} 9696 9697multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9698 X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> { 9699 let ExeDomain = _.ExeDomain, Predicates = [prd] in { 9700 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9701 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9702 "$src2, $src1", "$src1, $src2", 9703 (X86fsqrts (_.VT _.RC:$src1), 9704 (_.VT _.RC:$src2))>, 9705 Sched<[sched]>, SIMD_EXC; 9706 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9707 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9708 "$src2, $src1", "$src1, $src2", 9709 (X86fsqrts (_.VT _.RC:$src1), 9710 (_.ScalarIntMemFrags addr:$src2))>, 9711 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9712 let Uses = [MXCSR] in 9713 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9714 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 9715 "$rc, $src2, $src1", "$src1, $src2, $rc", 9716 (X86fsqrtRnds (_.VT _.RC:$src1), 9717 (_.VT _.RC:$src2), 9718 (i32 timm:$rc))>, 9719 EVEX_B, EVEX_RC, Sched<[sched]>; 9720 9721 let isCodeGenOnly = 1, hasSideEffects = 0 in { 9722 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9723 (ins _.FRC:$src1, _.FRC:$src2), 9724 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9725 Sched<[sched]>, SIMD_EXC; 9726 let mayLoad = 1 in 9727 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9728 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 9729 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9730 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9731 } 9732 } 9733 9734 let Predicates = [prd] in { 9735 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)), 9736 (!cast<Instruction>(Name#Zr) 9737 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 9738 } 9739 9740 let Predicates = [prd, OptForSize] in { 9741 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))), 9742 (!cast<Instruction>(Name#Zm) 9743 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 9744 } 9745} 9746 9747multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 9748 X86SchedWriteSizes sched> { 9749 defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>, 9750 EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS; 9751 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 9752 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; 9753 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 9754 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, REX_W; 9755} 9756 9757defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 9758 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 9759 9760defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 9761 9762multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 9763 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9764 let ExeDomain = _.ExeDomain in { 9765 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9766 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9767 "$src3, $src2, $src1", "$src1, $src2, $src3", 9768 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9769 (i32 timm:$src3)))>, 9770 Sched<[sched]>, SIMD_EXC; 9771 9772 let Uses = [MXCSR] in 9773 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9774 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9775 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 9776 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9777 (i32 timm:$src3)))>, EVEX_B, 9778 Sched<[sched]>; 9779 9780 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9781 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 9782 OpcodeStr, 9783 "$src3, $src2, $src1", "$src1, $src2, $src3", 9784 (_.VT (X86RndScales _.RC:$src1, 9785 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>, 9786 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9787 9788 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 9789 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9790 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 9791 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9792 []>, Sched<[sched]>, SIMD_EXC; 9793 9794 let mayLoad = 1 in 9795 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9796 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9797 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9798 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9799 } 9800 } 9801 9802 let Predicates = [HasAVX512] in { 9803 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2), 9804 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)), 9805 _.FRC:$src1, timm:$src2))>; 9806 } 9807 9808 let Predicates = [HasAVX512, OptForSize] in { 9809 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), 9810 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)), 9811 addr:$src1, timm:$src2))>; 9812 } 9813} 9814 9815let Predicates = [HasFP16] in 9816defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh", 9817 SchedWriteFRnd.Scl, f16x_info>, 9818 AVX512PSIi8Base, TA, EVEX_4V, 9819 EVEX_CD8<16, CD8VT1>; 9820 9821defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 9822 SchedWriteFRnd.Scl, f32x_info>, 9823 AVX512AIi8Base, EVEX_4V, VEX_LIG, 9824 EVEX_CD8<32, CD8VT1>; 9825 9826defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 9827 SchedWriteFRnd.Scl, f64x_info>, 9828 REX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG, 9829 EVEX_CD8<64, CD8VT1>; 9830 9831multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 9832 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 9833 dag OutMask, Predicate BasePredicate> { 9834 let Predicates = [BasePredicate] in { 9835 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9836 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9837 (extractelt _.VT:$dst, (iPTR 0))))), 9838 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 9839 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 9840 9841 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9842 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9843 ZeroFP))), 9844 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 9845 OutMask, _.VT:$src2, _.VT:$src1)>; 9846 } 9847} 9848 9849defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh, 9850 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info, 9851 fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>; 9852defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 9853 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 9854 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9855defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 9856 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 9857 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9858 9859 9860//------------------------------------------------- 9861// Integer truncate and extend operations 9862//------------------------------------------------- 9863 9864// PatFrags that contain a select and a truncate op. The take operands in the 9865// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass 9866// either to the multiclasses. 9867def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask), 9868 (vselect_mask node:$mask, 9869 (trunc node:$src), node:$src0)>; 9870def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask), 9871 (vselect_mask node:$mask, 9872 (X86vtruncs node:$src), node:$src0)>; 9873def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask), 9874 (vselect_mask node:$mask, 9875 (X86vtruncus node:$src), node:$src0)>; 9876 9877multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9878 SDPatternOperator MaskNode, 9879 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9880 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9881 let ExeDomain = DestInfo.ExeDomain in { 9882 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9883 (ins SrcInfo.RC:$src), 9884 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9885 [(set DestInfo.RC:$dst, 9886 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>, 9887 EVEX, Sched<[sched]>; 9888 let Constraints = "$src0 = $dst" in 9889 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9890 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9891 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9892 [(set DestInfo.RC:$dst, 9893 (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9894 (DestInfo.VT DestInfo.RC:$src0), 9895 SrcInfo.KRCWM:$mask))]>, 9896 EVEX, EVEX_K, Sched<[sched]>; 9897 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9898 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9899 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 9900 [(set DestInfo.RC:$dst, 9901 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9902 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>, 9903 EVEX, EVEX_KZ, Sched<[sched]>; 9904 } 9905 9906 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9907 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9908 (ins x86memop:$dst, SrcInfo.RC:$src), 9909 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9910 EVEX, Sched<[sched.Folded]>; 9911 9912 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9913 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9914 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9915 EVEX, EVEX_K, Sched<[sched.Folded]>; 9916 }//mayStore = 1, hasSideEffects = 0 9917} 9918 9919multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9920 PatFrag truncFrag, PatFrag mtruncFrag, 9921 string Name> { 9922 9923 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9924 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr) 9925 addr:$dst, SrcInfo.RC:$src)>; 9926 9927 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, 9928 SrcInfo.KRCWM:$mask), 9929 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk) 9930 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9931} 9932 9933multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9934 SDNode OpNode256, SDNode OpNode512, 9935 SDPatternOperator MaskNode128, 9936 SDPatternOperator MaskNode256, 9937 SDPatternOperator MaskNode512, 9938 X86SchedWriteWidths sched, 9939 AVX512VLVectorVTInfo VTSrcInfo, 9940 X86VectorVTInfo DestInfoZ128, 9941 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9942 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9943 X86MemOperand x86memopZ, PatFrag truncFrag, 9944 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9945 9946 let Predicates = [HasVLX, prd] in { 9947 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM, 9948 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9949 avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag, 9950 mtruncFrag, NAME>, EVEX_V128; 9951 9952 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM, 9953 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9954 avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag, 9955 mtruncFrag, NAME>, EVEX_V256; 9956 } 9957 let Predicates = [prd] in 9958 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM, 9959 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9960 avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag, 9961 mtruncFrag, NAME>, EVEX_V512; 9962} 9963 9964multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, 9965 X86SchedWriteWidths sched, PatFrag StoreNode, 9966 PatFrag MaskedStoreNode, SDNode InVecNode, 9967 SDPatternOperator InVecMaskNode> { 9968 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, 9969 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched, 9970 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9971 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9972 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9973} 9974 9975multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9976 SDPatternOperator MaskNode, 9977 X86SchedWriteWidths sched, PatFrag StoreNode, 9978 PatFrag MaskedStoreNode, SDNode InVecNode, 9979 SDPatternOperator InVecMaskNode> { 9980 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9981 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9982 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9983 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9984 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9985} 9986 9987multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9988 SDPatternOperator MaskNode, 9989 X86SchedWriteWidths sched, PatFrag StoreNode, 9990 PatFrag MaskedStoreNode, SDNode InVecNode, 9991 SDPatternOperator InVecMaskNode> { 9992 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9993 InVecMaskNode, MaskNode, MaskNode, sched, 9994 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9995 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9996 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9997} 9998 9999multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 10000 SDPatternOperator MaskNode, 10001 X86SchedWriteWidths sched, PatFrag StoreNode, 10002 PatFrag MaskedStoreNode, SDNode InVecNode, 10003 SDPatternOperator InVecMaskNode> { 10004 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 10005 InVecMaskNode, InVecMaskNode, MaskNode, sched, 10006 avx512vl_i32_info, v16i8x_info, v16i8x_info, 10007 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 10008 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 10009} 10010 10011multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 10012 SDPatternOperator MaskNode, 10013 X86SchedWriteWidths sched, PatFrag StoreNode, 10014 PatFrag MaskedStoreNode, SDNode InVecNode, 10015 SDPatternOperator InVecMaskNode> { 10016 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 10017 InVecMaskNode, MaskNode, MaskNode, sched, 10018 avx512vl_i32_info, v8i16x_info, v8i16x_info, 10019 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 10020 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 10021} 10022 10023multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 10024 SDPatternOperator MaskNode, 10025 X86SchedWriteWidths sched, PatFrag StoreNode, 10026 PatFrag MaskedStoreNode, SDNode InVecNode, 10027 SDPatternOperator InVecMaskNode> { 10028 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 10029 InVecMaskNode, MaskNode, MaskNode, sched, 10030 avx512vl_i16_info, v16i8x_info, v16i8x_info, 10031 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 10032 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 10033} 10034 10035defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", 10036 SchedWriteVecTruncate, truncstorevi8, 10037 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 10038defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", 10039 SchedWriteVecTruncate, truncstore_s_vi8, 10040 masked_truncstore_s_vi8, X86vtruncs, 10041 X86vmtruncs>; 10042defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", 10043 SchedWriteVecTruncate, truncstore_us_vi8, 10044 masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; 10045 10046defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, 10047 SchedWriteVecTruncate, truncstorevi16, 10048 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 10049defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, 10050 SchedWriteVecTruncate, truncstore_s_vi16, 10051 masked_truncstore_s_vi16, X86vtruncs, 10052 X86vmtruncs>; 10053defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, 10054 select_truncus, SchedWriteVecTruncate, 10055 truncstore_us_vi16, masked_truncstore_us_vi16, 10056 X86vtruncus, X86vmtruncus>; 10057 10058defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, 10059 SchedWriteVecTruncate, truncstorevi32, 10060 masked_truncstorevi32, X86vtrunc, X86vmtrunc>; 10061defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, 10062 SchedWriteVecTruncate, truncstore_s_vi32, 10063 masked_truncstore_s_vi32, X86vtruncs, 10064 X86vmtruncs>; 10065defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, 10066 select_truncus, SchedWriteVecTruncate, 10067 truncstore_us_vi32, masked_truncstore_us_vi32, 10068 X86vtruncus, X86vmtruncus>; 10069 10070defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, 10071 SchedWriteVecTruncate, truncstorevi8, 10072 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 10073defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, 10074 SchedWriteVecTruncate, truncstore_s_vi8, 10075 masked_truncstore_s_vi8, X86vtruncs, 10076 X86vmtruncs>; 10077defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, 10078 select_truncus, SchedWriteVecTruncate, 10079 truncstore_us_vi8, masked_truncstore_us_vi8, 10080 X86vtruncus, X86vmtruncus>; 10081 10082defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, 10083 SchedWriteVecTruncate, truncstorevi16, 10084 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 10085defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, 10086 SchedWriteVecTruncate, truncstore_s_vi16, 10087 masked_truncstore_s_vi16, X86vtruncs, 10088 X86vmtruncs>; 10089defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, 10090 select_truncus, SchedWriteVecTruncate, 10091 truncstore_us_vi16, masked_truncstore_us_vi16, 10092 X86vtruncus, X86vmtruncus>; 10093 10094defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, 10095 SchedWriteVecTruncate, truncstorevi8, 10096 masked_truncstorevi8, X86vtrunc, 10097 X86vmtrunc>; 10098defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, 10099 SchedWriteVecTruncate, truncstore_s_vi8, 10100 masked_truncstore_s_vi8, X86vtruncs, 10101 X86vmtruncs>; 10102defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, 10103 select_truncus, SchedWriteVecTruncate, 10104 truncstore_us_vi8, masked_truncstore_us_vi8, 10105 X86vtruncus, X86vmtruncus>; 10106 10107let Predicates = [HasAVX512, NoVLX] in { 10108def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 10109 (v8i16 (EXTRACT_SUBREG 10110 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 10111 VR256X:$src, sub_ymm)))), sub_xmm))>; 10112def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 10113 (v4i32 (EXTRACT_SUBREG 10114 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 10115 VR256X:$src, sub_ymm)))), sub_xmm))>; 10116} 10117 10118let Predicates = [HasBWI, NoVLX] in { 10119def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 10120 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 10121 VR256X:$src, sub_ymm))), sub_xmm))>; 10122} 10123 10124// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. 10125multiclass mtrunc_lowering<string InstrName, SDNode OpNode, 10126 X86VectorVTInfo DestInfo, 10127 X86VectorVTInfo SrcInfo> { 10128 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 10129 DestInfo.RC:$src0, 10130 SrcInfo.KRCWM:$mask)), 10131 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0, 10132 SrcInfo.KRCWM:$mask, 10133 SrcInfo.RC:$src)>; 10134 10135 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 10136 DestInfo.ImmAllZerosV, 10137 SrcInfo.KRCWM:$mask)), 10138 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask, 10139 SrcInfo.RC:$src)>; 10140} 10141 10142let Predicates = [HasVLX] in { 10143defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; 10144defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; 10145defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; 10146} 10147 10148let Predicates = [HasAVX512] in { 10149defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; 10150defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; 10151defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; 10152 10153defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; 10154defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; 10155defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; 10156 10157defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; 10158defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; 10159defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; 10160} 10161 10162multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 10163 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 10164 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 10165 let ExeDomain = DestInfo.ExeDomain in { 10166 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10167 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 10168 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 10169 EVEX, Sched<[sched]>; 10170 10171 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10172 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 10173 (DestInfo.VT (LdFrag addr:$src))>, 10174 EVEX, Sched<[sched.Folded]>; 10175 } 10176} 10177 10178multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr, 10179 SDNode OpNode, SDNode InVecNode, string ExtTy, 10180 X86SchedWriteWidths sched, 10181 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10182 let Predicates = [HasVLX, HasBWI] in { 10183 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info, 10184 v16i8x_info, i64mem, LdFrag, InVecNode>, 10185 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, WIG; 10186 10187 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info, 10188 v16i8x_info, i128mem, LdFrag, OpNode>, 10189 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, WIG; 10190 } 10191 let Predicates = [HasBWI] in { 10192 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info, 10193 v32i8x_info, i256mem, LdFrag, OpNode>, 10194 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, WIG; 10195 } 10196} 10197 10198multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr, 10199 SDNode OpNode, SDNode InVecNode, string ExtTy, 10200 X86SchedWriteWidths sched, 10201 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10202 let Predicates = [HasVLX, HasAVX512] in { 10203 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info, 10204 v16i8x_info, i32mem, LdFrag, InVecNode>, 10205 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, WIG; 10206 10207 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info, 10208 v16i8x_info, i64mem, LdFrag, InVecNode>, 10209 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, WIG; 10210 } 10211 let Predicates = [HasAVX512] in { 10212 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info, 10213 v16i8x_info, i128mem, LdFrag, OpNode>, 10214 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, WIG; 10215 } 10216} 10217 10218multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr, 10219 SDNode InVecNode, string ExtTy, 10220 X86SchedWriteWidths sched, 10221 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10222 let Predicates = [HasVLX, HasAVX512] in { 10223 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 10224 v16i8x_info, i16mem, LdFrag, InVecNode>, 10225 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, WIG; 10226 10227 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 10228 v16i8x_info, i32mem, LdFrag, InVecNode>, 10229 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, WIG; 10230 } 10231 let Predicates = [HasAVX512] in { 10232 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 10233 v16i8x_info, i64mem, LdFrag, InVecNode>, 10234 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, WIG; 10235 } 10236} 10237 10238multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr, 10239 SDNode OpNode, SDNode InVecNode, string ExtTy, 10240 X86SchedWriteWidths sched, 10241 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 10242 let Predicates = [HasVLX, HasAVX512] in { 10243 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info, 10244 v8i16x_info, i64mem, LdFrag, InVecNode>, 10245 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, WIG; 10246 10247 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info, 10248 v8i16x_info, i128mem, LdFrag, OpNode>, 10249 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, WIG; 10250 } 10251 let Predicates = [HasAVX512] in { 10252 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info, 10253 v16i16x_info, i256mem, LdFrag, OpNode>, 10254 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, WIG; 10255 } 10256} 10257 10258multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr, 10259 SDNode OpNode, SDNode InVecNode, string ExtTy, 10260 X86SchedWriteWidths sched, 10261 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 10262 let Predicates = [HasVLX, HasAVX512] in { 10263 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 10264 v8i16x_info, i32mem, LdFrag, InVecNode>, 10265 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, WIG; 10266 10267 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 10268 v8i16x_info, i64mem, LdFrag, InVecNode>, 10269 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, WIG; 10270 } 10271 let Predicates = [HasAVX512] in { 10272 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 10273 v8i16x_info, i128mem, LdFrag, OpNode>, 10274 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, WIG; 10275 } 10276} 10277 10278multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr, 10279 SDNode OpNode, SDNode InVecNode, string ExtTy, 10280 X86SchedWriteWidths sched, 10281 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 10282 10283 let Predicates = [HasVLX, HasAVX512] in { 10284 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 10285 v4i32x_info, i64mem, LdFrag, InVecNode>, 10286 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; 10287 10288 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 10289 v4i32x_info, i128mem, LdFrag, OpNode>, 10290 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; 10291 } 10292 let Predicates = [HasAVX512] in { 10293 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 10294 v8i32x_info, i256mem, LdFrag, OpNode>, 10295 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; 10296 } 10297} 10298 10299defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>; 10300defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>; 10301defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteVecExtend>; 10302defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>; 10303defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>; 10304defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>; 10305 10306defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>; 10307defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>; 10308defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteVecExtend>; 10309defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>; 10310defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>; 10311defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>; 10312 10313 10314// Patterns that we also need any extend versions of. aext_vector_inreg 10315// is currently legalized to zext_vector_inreg. 10316multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { 10317 // 256-bit patterns 10318 let Predicates = [HasVLX, HasBWI] in { 10319 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 10320 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 10321 } 10322 10323 let Predicates = [HasVLX] in { 10324 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 10325 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 10326 10327 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 10328 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 10329 } 10330 10331 // 512-bit patterns 10332 let Predicates = [HasBWI] in { 10333 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))), 10334 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 10335 } 10336 let Predicates = [HasAVX512] in { 10337 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))), 10338 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 10339 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))), 10340 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 10341 10342 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))), 10343 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 10344 10345 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))), 10346 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 10347 } 10348} 10349 10350multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 10351 SDNode InVecOp> : 10352 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { 10353 // 128-bit patterns 10354 let Predicates = [HasVLX, HasBWI] in { 10355 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10356 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10357 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10358 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10359 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10360 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10361 } 10362 let Predicates = [HasVLX] in { 10363 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10364 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10365 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10366 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10367 10368 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 10369 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 10370 10371 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10372 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10373 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10374 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10375 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10376 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10377 10378 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10379 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10380 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 10381 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10382 10383 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10384 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10385 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10386 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10387 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 10388 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10389 } 10390 let Predicates = [HasVLX] in { 10391 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10392 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10393 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10394 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10395 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10396 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10397 10398 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10399 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10400 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10401 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10402 10403 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10404 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10405 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10406 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10407 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10408 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10409 } 10410 // 512-bit patterns 10411 let Predicates = [HasAVX512] in { 10412 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10413 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10414 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10415 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10416 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10417 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10418 } 10419} 10420 10421defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; 10422defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; 10423 10424// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge 10425// ext+trunc aggressively making it impossible to legalize the DAG to this 10426// pattern directly. 10427let Predicates = [HasAVX512, NoBWI] in { 10428def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 10429 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; 10430def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), 10431 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; 10432} 10433 10434//===----------------------------------------------------------------------===// 10435// GATHER - SCATTER Operations 10436 10437// FIXME: Improve scheduling of gather/scatter instructions. 10438multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10439 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10440 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 10441 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 10442 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 10443 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 10444 !strconcat(OpcodeStr#_.Suffix, 10445 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 10446 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10447 Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 10448} 10449 10450multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 10451 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10452 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, 10453 vy512xmem>, EVEX_V512, REX_W; 10454 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512, 10455 vz512mem>, EVEX_V512, REX_W; 10456let Predicates = [HasVLX] in { 10457 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10458 vx256xmem>, EVEX_V256, REX_W; 10459 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256, 10460 vy256xmem>, EVEX_V256, REX_W; 10461 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10462 vx128xmem>, EVEX_V128, REX_W; 10463 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10464 vx128xmem>, EVEX_V128, REX_W; 10465} 10466} 10467 10468multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 10469 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10470 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10471 EVEX_V512; 10472 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10473 EVEX_V512; 10474let Predicates = [HasVLX] in { 10475 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10476 vy256xmem>, EVEX_V256; 10477 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10478 vy128xmem>, EVEX_V256; 10479 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10480 vx128xmem>, EVEX_V128; 10481 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10482 vx64xmem, VK2WM>, EVEX_V128; 10483} 10484} 10485 10486 10487defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 10488 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 10489 10490defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 10491 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 10492 10493multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10494 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10495 10496let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain, 10497 hasSideEffects = 0 in 10498 10499 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 10500 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 10501 !strconcat(OpcodeStr#_.Suffix, 10502 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 10503 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10504 Sched<[WriteStore]>; 10505} 10506 10507multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 10508 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10509 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, 10510 vy512xmem>, EVEX_V512, REX_W; 10511 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512, 10512 vz512mem>, EVEX_V512, REX_W; 10513let Predicates = [HasVLX] in { 10514 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10515 vx256xmem>, EVEX_V256, REX_W; 10516 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256, 10517 vy256xmem>, EVEX_V256, REX_W; 10518 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10519 vx128xmem>, EVEX_V128, REX_W; 10520 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10521 vx128xmem>, EVEX_V128, REX_W; 10522} 10523} 10524 10525multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 10526 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10527 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10528 EVEX_V512; 10529 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10530 EVEX_V512; 10531let Predicates = [HasVLX] in { 10532 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10533 vy256xmem>, EVEX_V256; 10534 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10535 vy128xmem>, EVEX_V256; 10536 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10537 vx128xmem>, EVEX_V128; 10538 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10539 vx64xmem, VK2WM>, EVEX_V128; 10540} 10541} 10542 10543defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 10544 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 10545 10546defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 10547 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 10548 10549// prefetch 10550multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 10551 RegisterClass KRC, X86MemOperand memop> { 10552 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in 10553 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 10554 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 10555 EVEX, EVEX_K, Sched<[WriteLoad]>; 10556} 10557 10558defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 10559 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10560 10561defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 10562 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10563 10564defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 10565 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10566 10567defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 10568 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10569 10570defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 10571 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10572 10573defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 10574 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10575 10576defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 10577 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10578 10579defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 10580 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10581 10582defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 10583 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10584 10585defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 10586 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10587 10588defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 10589 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10590 10591defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 10592 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10593 10594defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 10595 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10596 10597defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 10598 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10599 10600defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 10601 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10602 10603defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 10604 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10605 10606multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> { 10607def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 10608 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 10609 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 10610 EVEX, Sched<[Sched]>; 10611} 10612 10613multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 10614 string OpcodeStr, Predicate prd> { 10615let Predicates = [prd] in 10616 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512; 10617 10618 let Predicates = [prd, HasVLX] in { 10619 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256; 10620 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128; 10621 } 10622} 10623 10624defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 10625defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W; 10626defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 10627defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W; 10628 10629multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 10630 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 10631 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 10632 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 10633 EVEX, Sched<[WriteMove]>; 10634} 10635 10636// Use 512bit version to implement 128/256 bit in case NoVLX. 10637multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 10638 X86VectorVTInfo _, 10639 string Name> { 10640 10641 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 10642 (_.KVT (COPY_TO_REGCLASS 10643 (!cast<Instruction>(Name#"Zrr") 10644 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 10645 _.RC:$src, _.SubRegIdx)), 10646 _.KRC))>; 10647} 10648 10649multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 10650 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10651 let Predicates = [prd] in 10652 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 10653 EVEX_V512; 10654 10655 let Predicates = [prd, HasVLX] in { 10656 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 10657 EVEX_V256; 10658 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 10659 EVEX_V128; 10660 } 10661 let Predicates = [prd, NoVLX] in { 10662 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 10663 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 10664 } 10665} 10666 10667defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 10668 avx512vl_i8_info, HasBWI>; 10669defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 10670 avx512vl_i16_info, HasBWI>, REX_W; 10671defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 10672 avx512vl_i32_info, HasDQI>; 10673defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 10674 avx512vl_i64_info, HasDQI>, REX_W; 10675 10676// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 10677// is available, but BWI is not. We can't handle this in lowering because 10678// a target independent DAG combine likes to combine sext and trunc. 10679let Predicates = [HasDQI, NoBWI] in { 10680 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 10681 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10682 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 10683 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10684} 10685 10686let Predicates = [HasDQI, NoBWI, HasVLX] in { 10687 def : Pat<(v8i16 (sext (v8i1 VK8:$src))), 10688 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 10689} 10690 10691//===----------------------------------------------------------------------===// 10692// AVX-512 - COMPRESS and EXPAND 10693// 10694 10695multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 10696 string OpcodeStr, X86FoldableSchedWrite sched> { 10697 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 10698 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10699 (null_frag)>, AVX5128IBase, 10700 Sched<[sched]>; 10701 10702 let mayStore = 1, hasSideEffects = 0 in 10703 def mr : AVX5128I<opc, MRMDestMem, (outs), 10704 (ins _.MemOp:$dst, _.RC:$src), 10705 OpcodeStr # "\t{$src, $dst|$dst, $src}", 10706 []>, EVEX_CD8<_.EltSize, CD8VT1>, 10707 Sched<[sched.Folded]>; 10708 10709 def mrk : AVX5128I<opc, MRMDestMem, (outs), 10710 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 10711 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 10712 []>, 10713 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10714 Sched<[sched.Folded]>; 10715} 10716 10717multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10718 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), 10719 (!cast<Instruction>(Name#_.ZSuffix#mrk) 10720 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 10721 10722 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10723 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10724 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10725 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10726 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10727 _.KRCWM:$mask, _.RC:$src)>; 10728} 10729 10730multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 10731 X86FoldableSchedWrite sched, 10732 AVX512VLVectorVTInfo VTInfo, 10733 Predicate Pred = HasAVX512> { 10734 let Predicates = [Pred] in 10735 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 10736 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10737 10738 let Predicates = [Pred, HasVLX] in { 10739 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 10740 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10741 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 10742 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10743 } 10744} 10745 10746// FIXME: Is there a better scheduler class for VPCOMPRESS? 10747defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 10748 avx512vl_i32_info>, EVEX; 10749defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 10750 avx512vl_i64_info>, EVEX, REX_W; 10751defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 10752 avx512vl_f32_info>, EVEX; 10753defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 10754 avx512vl_f64_info>, EVEX, REX_W; 10755 10756// expand 10757multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 10758 string OpcodeStr, X86FoldableSchedWrite sched> { 10759 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10760 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10761 (null_frag)>, AVX5128IBase, 10762 Sched<[sched]>; 10763 10764 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10765 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 10766 (null_frag)>, 10767 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 10768 Sched<[sched.Folded, sched.ReadAfterFold]>; 10769} 10770 10771multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10772 10773 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 10774 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10775 _.KRCWM:$mask, addr:$src)>; 10776 10777 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 10778 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10779 _.KRCWM:$mask, addr:$src)>; 10780 10781 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 10782 (_.VT _.RC:$src0))), 10783 (!cast<Instruction>(Name#_.ZSuffix#rmk) 10784 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 10785 10786 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10787 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10788 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10789 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10790 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10791 _.KRCWM:$mask, _.RC:$src)>; 10792} 10793 10794multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 10795 X86FoldableSchedWrite sched, 10796 AVX512VLVectorVTInfo VTInfo, 10797 Predicate Pred = HasAVX512> { 10798 let Predicates = [Pred] in 10799 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 10800 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10801 10802 let Predicates = [Pred, HasVLX] in { 10803 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 10804 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10805 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 10806 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10807 } 10808} 10809 10810// FIXME: Is there a better scheduler class for VPEXPAND? 10811defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 10812 avx512vl_i32_info>, EVEX; 10813defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 10814 avx512vl_i64_info>, EVEX, REX_W; 10815defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 10816 avx512vl_f32_info>, EVEX; 10817defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 10818 avx512vl_f64_info>, EVEX, REX_W; 10819 10820//handle instruction reg_vec1 = op(reg_vec,imm) 10821// op(mem_vec,imm) 10822// op(broadcast(eltVt),imm) 10823//all instruction created with FROUND_CURRENT 10824multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, 10825 SDPatternOperator OpNode, 10826 SDPatternOperator MaskOpNode, 10827 X86FoldableSchedWrite sched, 10828 X86VectorVTInfo _> { 10829 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10830 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 10831 (ins _.RC:$src1, i32u8imm:$src2), 10832 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10833 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)), 10834 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>, 10835 Sched<[sched]>; 10836 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10837 (ins _.MemOp:$src1, i32u8imm:$src2), 10838 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10839 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10840 (i32 timm:$src2)), 10841 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10842 (i32 timm:$src2))>, 10843 Sched<[sched.Folded, sched.ReadAfterFold]>; 10844 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10845 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 10846 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr, 10847 "${src1}"#_.BroadcastStr#", $src2", 10848 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10849 (i32 timm:$src2)), 10850 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10851 (i32 timm:$src2))>, EVEX_B, 10852 Sched<[sched.Folded, sched.ReadAfterFold]>; 10853 } 10854} 10855 10856//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10857multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10858 SDNode OpNode, X86FoldableSchedWrite sched, 10859 X86VectorVTInfo _> { 10860 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10861 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10862 (ins _.RC:$src1, i32u8imm:$src2), 10863 OpcodeStr#_.Suffix, "$src2, {sae}, $src1", 10864 "$src1, {sae}, $src2", 10865 (OpNode (_.VT _.RC:$src1), 10866 (i32 timm:$src2))>, 10867 EVEX_B, Sched<[sched]>; 10868} 10869 10870multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 10871 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode, 10872 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched, 10873 Predicate prd>{ 10874 let Predicates = [prd] in { 10875 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10876 sched.ZMM, _.info512>, 10877 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, 10878 sched.ZMM, _.info512>, EVEX_V512; 10879 } 10880 let Predicates = [prd, HasVLX] in { 10881 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10882 sched.XMM, _.info128>, EVEX_V128; 10883 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10884 sched.YMM, _.info256>, EVEX_V256; 10885 } 10886} 10887 10888//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10889// op(reg_vec2,mem_vec,imm) 10890// op(reg_vec2,broadcast(eltVt),imm) 10891//all instruction created with FROUND_CURRENT 10892multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10893 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10894 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10895 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10896 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10897 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10898 (OpNode (_.VT _.RC:$src1), 10899 (_.VT _.RC:$src2), 10900 (i32 timm:$src3))>, 10901 Sched<[sched]>; 10902 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10903 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 10904 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10905 (OpNode (_.VT _.RC:$src1), 10906 (_.VT (bitconvert (_.LdFrag addr:$src2))), 10907 (i32 timm:$src3))>, 10908 Sched<[sched.Folded, sched.ReadAfterFold]>; 10909 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10910 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10911 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10912 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10913 (OpNode (_.VT _.RC:$src1), 10914 (_.VT (_.BroadcastLdFrag addr:$src2)), 10915 (i32 timm:$src3))>, EVEX_B, 10916 Sched<[sched.Folded, sched.ReadAfterFold]>; 10917 } 10918} 10919 10920//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10921// op(reg_vec2,mem_vec,imm) 10922multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10923 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 10924 X86VectorVTInfo SrcInfo>{ 10925 let ExeDomain = DestInfo.ExeDomain in { 10926 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10927 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 10928 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10929 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10930 (SrcInfo.VT SrcInfo.RC:$src2), 10931 (i8 timm:$src3)))>, 10932 Sched<[sched]>; 10933 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10934 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 10935 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10936 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10937 (SrcInfo.VT (bitconvert 10938 (SrcInfo.LdFrag addr:$src2))), 10939 (i8 timm:$src3)))>, 10940 Sched<[sched.Folded, sched.ReadAfterFold]>; 10941 } 10942} 10943 10944//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10945// op(reg_vec2,mem_vec,imm) 10946// op(reg_vec2,broadcast(eltVt),imm) 10947multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10948 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 10949 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 10950 10951 let ExeDomain = _.ExeDomain in 10952 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10953 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10954 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10955 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10956 (OpNode (_.VT _.RC:$src1), 10957 (_.VT (_.BroadcastLdFrag addr:$src2)), 10958 (i8 timm:$src3))>, EVEX_B, 10959 Sched<[sched.Folded, sched.ReadAfterFold]>; 10960} 10961 10962//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10963// op(reg_vec2,mem_scalar,imm) 10964multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10965 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10966 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10967 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10968 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10969 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10970 (OpNode (_.VT _.RC:$src1), 10971 (_.VT _.RC:$src2), 10972 (i32 timm:$src3))>, 10973 Sched<[sched]>; 10974 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 10975 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 10976 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10977 (OpNode (_.VT _.RC:$src1), 10978 (_.ScalarIntMemFrags addr:$src2), 10979 (i32 timm:$src3))>, 10980 Sched<[sched.Folded, sched.ReadAfterFold]>; 10981 } 10982} 10983 10984//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10985multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10986 SDNode OpNode, X86FoldableSchedWrite sched, 10987 X86VectorVTInfo _> { 10988 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10989 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10990 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10991 OpcodeStr, "$src3, {sae}, $src2, $src1", 10992 "$src1, $src2, {sae}, $src3", 10993 (OpNode (_.VT _.RC:$src1), 10994 (_.VT _.RC:$src2), 10995 (i32 timm:$src3))>, 10996 EVEX_B, Sched<[sched]>; 10997} 10998 10999//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 11000multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 11001 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11002 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 11003 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11004 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 11005 OpcodeStr, "$src3, {sae}, $src2, $src1", 11006 "$src1, $src2, {sae}, $src3", 11007 (OpNode (_.VT _.RC:$src1), 11008 (_.VT _.RC:$src2), 11009 (i32 timm:$src3))>, 11010 EVEX_B, Sched<[sched]>; 11011} 11012 11013multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 11014 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 11015 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 11016 let Predicates = [prd] in { 11017 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 11018 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, 11019 EVEX_V512; 11020 11021 } 11022 let Predicates = [prd, HasVLX] in { 11023 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 11024 EVEX_V128; 11025 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 11026 EVEX_V256; 11027 } 11028} 11029 11030multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 11031 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 11032 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 11033 let Predicates = [Pred] in { 11034 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 11035 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V; 11036 } 11037 let Predicates = [Pred, HasVLX] in { 11038 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 11039 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V; 11040 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 11041 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V; 11042 } 11043} 11044 11045multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 11046 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 11047 Predicate Pred = HasAVX512> { 11048 let Predicates = [Pred] in { 11049 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 11050 EVEX_V512; 11051 } 11052 let Predicates = [Pred, HasVLX] in { 11053 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 11054 EVEX_V128; 11055 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 11056 EVEX_V256; 11057 } 11058} 11059 11060multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 11061 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 11062 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> { 11063 let Predicates = [prd] in { 11064 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 11065 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>; 11066 } 11067} 11068 11069multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 11070 bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode, 11071 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, 11072 X86SchedWriteWidths sched, Predicate prd>{ 11073 defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info, 11074 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>, 11075 AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 11076 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 11077 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 11078 AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>; 11079 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 11080 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 11081 AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W; 11082} 11083 11084defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 11085 X86VReduce, X86VReduce, X86VReduceSAE, 11086 SchedWriteFRnd, HasDQI>; 11087defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 11088 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE, 11089 SchedWriteFRnd, HasAVX512>; 11090defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 11091 X86VGetMant, X86VGetMant, X86VGetMantSAE, 11092 SchedWriteFRnd, HasAVX512>; 11093 11094defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 11095 0x50, X86VRange, X86VRangeSAE, 11096 SchedWriteFAdd, HasDQI>, 11097 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W; 11098defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 11099 0x50, X86VRange, X86VRangeSAE, 11100 SchedWriteFAdd, HasDQI>, 11101 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11102 11103defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 11104 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 11105 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W; 11106defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 11107 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 11108 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11109 11110defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 11111 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 11112 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W; 11113defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 11114 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 11115 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11116defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info, 11117 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>, 11118 AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>; 11119 11120defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 11121 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 11122 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W; 11123defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 11124 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 11125 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 11126defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info, 11127 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>, 11128 AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>; 11129 11130multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 11131 X86FoldableSchedWrite sched, 11132 X86VectorVTInfo _, 11133 X86VectorVTInfo CastInfo, 11134 string EVEX2VEXOvrd> { 11135 let ExeDomain = _.ExeDomain in { 11136 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11137 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 11138 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11139 (_.VT (bitconvert 11140 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 11141 (i8 timm:$src3)))))>, 11142 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">; 11143 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11144 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 11145 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11146 (_.VT 11147 (bitconvert 11148 (CastInfo.VT (X86Shuf128 _.RC:$src1, 11149 (CastInfo.LdFrag addr:$src2), 11150 (i8 timm:$src3)))))>, 11151 Sched<[sched.Folded, sched.ReadAfterFold]>, 11152 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">; 11153 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11154 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11155 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 11156 "$src1, ${src2}"#_.BroadcastStr#", $src3", 11157 (_.VT 11158 (bitconvert 11159 (CastInfo.VT 11160 (X86Shuf128 _.RC:$src1, 11161 (_.BroadcastLdFrag addr:$src2), 11162 (i8 timm:$src3)))))>, EVEX_B, 11163 Sched<[sched.Folded, sched.ReadAfterFold]>; 11164 } 11165} 11166 11167multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 11168 AVX512VLVectorVTInfo _, 11169 AVX512VLVectorVTInfo CastInfo, bits<8> opc, 11170 string EVEX2VEXOvrd>{ 11171 let Predicates = [HasAVX512] in 11172 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 11173 _.info512, CastInfo.info512, "">, EVEX_V512; 11174 11175 let Predicates = [HasAVX512, HasVLX] in 11176 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 11177 _.info256, CastInfo.info256, 11178 EVEX2VEXOvrd>, EVEX_V256; 11179} 11180 11181defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 11182 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11183defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 11184 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W; 11185defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 11186 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 11187defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 11188 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W; 11189 11190multiclass avx512_valign<bits<8> opc, string OpcodeStr, 11191 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11192 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the 11193 // instantiation of this class. 11194 let ExeDomain = _.ExeDomain in { 11195 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11196 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 11197 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11198 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, 11199 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">; 11200 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11201 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 11202 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11203 (_.VT (X86VAlign _.RC:$src1, 11204 (bitconvert (_.LdFrag addr:$src2)), 11205 (i8 timm:$src3)))>, 11206 Sched<[sched.Folded, sched.ReadAfterFold]>, 11207 EVEX2VEXOverride<"VPALIGNRrmi">; 11208 11209 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11210 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11211 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 11212 "$src1, ${src2}"#_.BroadcastStr#", $src3", 11213 (X86VAlign _.RC:$src1, 11214 (_.VT (_.BroadcastLdFrag addr:$src2)), 11215 (i8 timm:$src3))>, EVEX_B, 11216 Sched<[sched.Folded, sched.ReadAfterFold]>; 11217 } 11218} 11219 11220multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 11221 AVX512VLVectorVTInfo _> { 11222 let Predicates = [HasAVX512] in { 11223 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 11224 AVX512AIi8Base, EVEX_4V, EVEX_V512; 11225 } 11226 let Predicates = [HasAVX512, HasVLX] in { 11227 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 11228 AVX512AIi8Base, EVEX_4V, EVEX_V128; 11229 // We can't really override the 256-bit version so change it back to unset. 11230 let EVEX2VEXOverride = ? in 11231 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 11232 AVX512AIi8Base, EVEX_4V, EVEX_V256; 11233 } 11234} 11235 11236defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 11237 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11238defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 11239 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 11240 REX_W; 11241 11242defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 11243 SchedWriteShuffle, avx512vl_i8_info, 11244 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 11245 11246// Fragments to help convert valignq into masked valignd. Or valignq/valignd 11247// into vpalignr. 11248def ValignqImm32XForm : SDNodeXForm<timm, [{ 11249 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 11250}]>; 11251def ValignqImm8XForm : SDNodeXForm<timm, [{ 11252 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 11253}]>; 11254def ValigndImm8XForm : SDNodeXForm<timm, [{ 11255 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 11256}]>; 11257 11258multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 11259 X86VectorVTInfo From, X86VectorVTInfo To, 11260 SDNodeXForm ImmXForm> { 11261 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11262 (bitconvert 11263 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 11264 timm:$src3))), 11265 To.RC:$src0)), 11266 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 11267 To.RC:$src1, To.RC:$src2, 11268 (ImmXForm timm:$src3))>; 11269 11270 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11271 (bitconvert 11272 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 11273 timm:$src3))), 11274 To.ImmAllZerosV)), 11275 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 11276 To.RC:$src1, To.RC:$src2, 11277 (ImmXForm timm:$src3))>; 11278 11279 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11280 (bitconvert 11281 (From.VT (OpNode From.RC:$src1, 11282 (From.LdFrag addr:$src2), 11283 timm:$src3))), 11284 To.RC:$src0)), 11285 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 11286 To.RC:$src1, addr:$src2, 11287 (ImmXForm timm:$src3))>; 11288 11289 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11290 (bitconvert 11291 (From.VT (OpNode From.RC:$src1, 11292 (From.LdFrag addr:$src2), 11293 timm:$src3))), 11294 To.ImmAllZerosV)), 11295 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 11296 To.RC:$src1, addr:$src2, 11297 (ImmXForm timm:$src3))>; 11298} 11299 11300multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 11301 X86VectorVTInfo From, 11302 X86VectorVTInfo To, 11303 SDNodeXForm ImmXForm> : 11304 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 11305 def : Pat<(From.VT (OpNode From.RC:$src1, 11306 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), 11307 timm:$src3)), 11308 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 11309 (ImmXForm timm:$src3))>; 11310 11311 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11312 (bitconvert 11313 (From.VT (OpNode From.RC:$src1, 11314 (bitconvert 11315 (To.VT (To.BroadcastLdFrag addr:$src2))), 11316 timm:$src3))), 11317 To.RC:$src0)), 11318 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 11319 To.RC:$src1, addr:$src2, 11320 (ImmXForm timm:$src3))>; 11321 11322 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11323 (bitconvert 11324 (From.VT (OpNode From.RC:$src1, 11325 (bitconvert 11326 (To.VT (To.BroadcastLdFrag addr:$src2))), 11327 timm:$src3))), 11328 To.ImmAllZerosV)), 11329 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 11330 To.RC:$src1, addr:$src2, 11331 (ImmXForm timm:$src3))>; 11332} 11333 11334let Predicates = [HasAVX512] in { 11335 // For 512-bit we lower to the widest element type we can. So we only need 11336 // to handle converting valignq to valignd. 11337 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 11338 v16i32_info, ValignqImm32XForm>; 11339} 11340 11341let Predicates = [HasVLX] in { 11342 // For 128-bit we lower to the widest element type we can. So we only need 11343 // to handle converting valignq to valignd. 11344 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 11345 v4i32x_info, ValignqImm32XForm>; 11346 // For 256-bit we lower to the widest element type we can. So we only need 11347 // to handle converting valignq to valignd. 11348 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 11349 v8i32x_info, ValignqImm32XForm>; 11350} 11351 11352let Predicates = [HasVLX, HasBWI] in { 11353 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 11354 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 11355 v16i8x_info, ValignqImm8XForm>; 11356 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 11357 v16i8x_info, ValigndImm8XForm>; 11358} 11359 11360defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 11361 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 11362 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible; 11363 11364multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 11365 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11366 let ExeDomain = _.ExeDomain in { 11367 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11368 (ins _.RC:$src1), OpcodeStr, 11369 "$src1", "$src1", 11370 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, 11371 Sched<[sched]>; 11372 11373 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11374 (ins _.MemOp:$src1), OpcodeStr, 11375 "$src1", "$src1", 11376 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, 11377 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 11378 Sched<[sched.Folded]>; 11379 } 11380} 11381 11382multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 11383 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 11384 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 11385 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11386 (ins _.ScalarMemOp:$src1), OpcodeStr, 11387 "${src1}"#_.BroadcastStr, 11388 "${src1}"#_.BroadcastStr, 11389 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, 11390 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 11391 Sched<[sched.Folded]>; 11392} 11393 11394multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11395 X86SchedWriteWidths sched, 11396 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 11397 let Predicates = [prd] in 11398 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11399 EVEX_V512; 11400 11401 let Predicates = [prd, HasVLX] in { 11402 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11403 EVEX_V256; 11404 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11405 EVEX_V128; 11406 } 11407} 11408 11409multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11410 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 11411 Predicate prd> { 11412 let Predicates = [prd] in 11413 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11414 EVEX_V512; 11415 11416 let Predicates = [prd, HasVLX] in { 11417 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11418 EVEX_V256; 11419 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11420 EVEX_V128; 11421 } 11422} 11423 11424multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 11425 SDNode OpNode, X86SchedWriteWidths sched, 11426 Predicate prd> { 11427 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 11428 avx512vl_i64_info, prd>, REX_W; 11429 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 11430 avx512vl_i32_info, prd>; 11431} 11432 11433multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 11434 SDNode OpNode, X86SchedWriteWidths sched, 11435 Predicate prd> { 11436 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 11437 avx512vl_i16_info, prd>, WIG; 11438 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 11439 avx512vl_i8_info, prd>, WIG; 11440} 11441 11442multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 11443 bits<8> opc_d, bits<8> opc_q, 11444 string OpcodeStr, SDNode OpNode, 11445 X86SchedWriteWidths sched> { 11446 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 11447 HasAVX512>, 11448 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 11449 HasBWI>; 11450} 11451 11452defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 11453 SchedWriteVecALU>; 11454 11455// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 11456let Predicates = [HasAVX512, NoVLX] in { 11457 def : Pat<(v4i64 (abs VR256X:$src)), 11458 (EXTRACT_SUBREG 11459 (VPABSQZrr 11460 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 11461 sub_ymm)>; 11462 def : Pat<(v2i64 (abs VR128X:$src)), 11463 (EXTRACT_SUBREG 11464 (VPABSQZrr 11465 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 11466 sub_xmm)>; 11467} 11468 11469// Use 512bit version to implement 128/256 bit. 11470multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 11471 AVX512VLVectorVTInfo _, Predicate prd> { 11472 let Predicates = [prd, NoVLX] in { 11473 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), 11474 (EXTRACT_SUBREG 11475 (!cast<Instruction>(InstrStr # "Zrr") 11476 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11477 _.info256.RC:$src1, 11478 _.info256.SubRegIdx)), 11479 _.info256.SubRegIdx)>; 11480 11481 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), 11482 (EXTRACT_SUBREG 11483 (!cast<Instruction>(InstrStr # "Zrr") 11484 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11485 _.info128.RC:$src1, 11486 _.info128.SubRegIdx)), 11487 _.info128.SubRegIdx)>; 11488 } 11489} 11490 11491defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 11492 SchedWriteVecIMul, HasCDI>; 11493 11494// FIXME: Is there a better scheduler class for VPCONFLICT? 11495defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 11496 SchedWriteVecALU, HasCDI>; 11497 11498// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 11499defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 11500defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 11501 11502//===---------------------------------------------------------------------===// 11503// Counts number of ones - VPOPCNTD and VPOPCNTQ 11504//===---------------------------------------------------------------------===// 11505 11506// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 11507defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 11508 SchedWriteVecALU, HasVPOPCNTDQ>; 11509 11510defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 11511defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 11512 11513//===---------------------------------------------------------------------===// 11514// Replicate Single FP - MOVSHDUP and MOVSLDUP 11515//===---------------------------------------------------------------------===// 11516 11517multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 11518 X86SchedWriteWidths sched> { 11519 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 11520 avx512vl_f32_info, HasAVX512>, XS; 11521} 11522 11523defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 11524 SchedWriteFShuffle>; 11525defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 11526 SchedWriteFShuffle>; 11527 11528//===----------------------------------------------------------------------===// 11529// AVX-512 - MOVDDUP 11530//===----------------------------------------------------------------------===// 11531 11532multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, 11533 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11534 let ExeDomain = _.ExeDomain in { 11535 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11536 (ins _.RC:$src), OpcodeStr, "$src", "$src", 11537 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, 11538 Sched<[sched]>; 11539 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11540 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 11541 (_.VT (_.BroadcastLdFrag addr:$src))>, 11542 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 11543 Sched<[sched.Folded]>; 11544 } 11545} 11546 11547multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, 11548 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 11549 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 11550 VTInfo.info512>, EVEX_V512; 11551 11552 let Predicates = [HasAVX512, HasVLX] in { 11553 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 11554 VTInfo.info256>, EVEX_V256; 11555 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM, 11556 VTInfo.info128>, EVEX_V128; 11557 } 11558} 11559 11560multiclass avx512_movddup<bits<8> opc, string OpcodeStr, 11561 X86SchedWriteWidths sched> { 11562 defm NAME: avx512_movddup_common<opc, OpcodeStr, sched, 11563 avx512vl_f64_info>, XD, REX_W; 11564} 11565 11566defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>; 11567 11568let Predicates = [HasVLX] in { 11569def : Pat<(v2f64 (X86VBroadcast f64:$src)), 11570 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11571 11572def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11573 (v2f64 VR128X:$src0)), 11574 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 11575 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11576def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11577 immAllZerosV), 11578 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11579} 11580 11581//===----------------------------------------------------------------------===// 11582// AVX-512 - Unpack Instructions 11583//===----------------------------------------------------------------------===// 11584 11585let Uses = []<Register>, mayRaiseFPException = 0 in { 11586defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512, 11587 SchedWriteFShuffleSizes, 0, 1>; 11588defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512, 11589 SchedWriteFShuffleSizes>; 11590} 11591 11592defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 11593 SchedWriteShuffle, HasBWI>; 11594defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 11595 SchedWriteShuffle, HasBWI>; 11596defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 11597 SchedWriteShuffle, HasBWI>; 11598defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 11599 SchedWriteShuffle, HasBWI>; 11600 11601defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 11602 SchedWriteShuffle, HasAVX512>; 11603defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 11604 SchedWriteShuffle, HasAVX512>; 11605defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 11606 SchedWriteShuffle, HasAVX512>; 11607defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 11608 SchedWriteShuffle, HasAVX512>; 11609 11610//===----------------------------------------------------------------------===// 11611// AVX-512 - Extract & Insert Integer Instructions 11612//===----------------------------------------------------------------------===// 11613 11614multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11615 X86VectorVTInfo _> { 11616 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 11617 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11618 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11619 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))), 11620 addr:$dst)]>, 11621 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 11622} 11623 11624multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 11625 let Predicates = [HasBWI] in { 11626 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 11627 (ins _.RC:$src1, u8imm:$src2), 11628 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11629 [(set GR32orGR64:$dst, 11630 (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>, 11631 EVEX, TAPD, Sched<[WriteVecExtract]>; 11632 11633 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD; 11634 } 11635} 11636 11637multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 11638 let Predicates = [HasBWI] in { 11639 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 11640 (ins _.RC:$src1, u8imm:$src2), 11641 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11642 [(set GR32orGR64:$dst, 11643 (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>, 11644 EVEX, PD, Sched<[WriteVecExtract]>; 11645 11646 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 11647 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 11648 (ins _.RC:$src1, u8imm:$src2), 11649 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 11650 EVEX, TAPD, Sched<[WriteVecExtract]>; 11651 11652 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; 11653 } 11654} 11655 11656multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 11657 RegisterClass GRC> { 11658 let Predicates = [HasDQI] in { 11659 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 11660 (ins _.RC:$src1, u8imm:$src2), 11661 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11662 [(set GRC:$dst, 11663 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 11664 EVEX, TAPD, Sched<[WriteVecExtract]>; 11665 11666 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 11667 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11668 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11669 [(store (extractelt (_.VT _.RC:$src1), 11670 imm:$src2),addr:$dst)]>, 11671 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD, 11672 Sched<[WriteVecExtractSt]>; 11673 } 11674} 11675 11676defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG; 11677defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG; 11678defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 11679defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W; 11680 11681multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11682 X86VectorVTInfo _, PatFrag LdFrag, 11683 SDPatternOperator immoperator> { 11684 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 11685 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11686 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11687 [(set _.RC:$dst, 11688 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>, 11689 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 11690} 11691 11692multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 11693 X86VectorVTInfo _, PatFrag LdFrag> { 11694 let Predicates = [HasBWI] in { 11695 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11696 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 11697 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11698 [(set _.RC:$dst, 11699 (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V, 11700 Sched<[WriteVecInsert]>; 11701 11702 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>; 11703 } 11704} 11705 11706multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 11707 X86VectorVTInfo _, RegisterClass GRC> { 11708 let Predicates = [HasDQI] in { 11709 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11710 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 11711 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11712 [(set _.RC:$dst, 11713 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 11714 EVEX_4V, TAPD, Sched<[WriteVecInsert]>; 11715 11716 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 11717 _.ScalarLdFrag, imm>, TAPD; 11718 } 11719} 11720 11721defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 11722 extloadi8>, TAPD, WIG; 11723defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 11724 extloadi16>, PD, WIG; 11725defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 11726defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W; 11727 11728let Predicates = [HasAVX512, NoBWI] in { 11729 def : Pat<(X86pinsrb VR128:$src1, 11730 (i32 (anyext (i8 (bitconvert v8i1:$src2)))), 11731 timm:$src3), 11732 (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)), 11733 timm:$src3)>; 11734} 11735 11736let Predicates = [HasBWI] in { 11737 def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3), 11738 (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 11739 GR8:$src2, sub_8bit), timm:$src3)>; 11740 def : Pat<(X86pinsrb VR128:$src1, 11741 (i32 (anyext (i8 (bitconvert v8i1:$src2)))), 11742 timm:$src3), 11743 (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)), 11744 timm:$src3)>; 11745} 11746 11747// Always select FP16 instructions if available. 11748let Predicates = [HasBWI], AddedComplexity = -10 in { 11749 def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>; 11750 def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>; 11751 def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>; 11752 def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>; 11753} 11754 11755//===----------------------------------------------------------------------===// 11756// VSHUFPS - VSHUFPD Operations 11757//===----------------------------------------------------------------------===// 11758 11759multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{ 11760 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 11761 SchedWriteFShuffle>, 11762 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 11763 AVX512AIi8Base, EVEX_4V; 11764} 11765 11766defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS; 11767defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, REX_W; 11768 11769//===----------------------------------------------------------------------===// 11770// AVX-512 - Byte shift Left/Right 11771//===----------------------------------------------------------------------===// 11772 11773multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 11774 Format MRMm, string OpcodeStr, 11775 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11776 def ri : AVX512<opc, MRMr, 11777 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 11778 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11779 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, 11780 Sched<[sched]>; 11781 def mi : AVX512<opc, MRMm, 11782 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 11783 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11784 [(set _.RC:$dst,(_.VT (OpNode 11785 (_.VT (bitconvert (_.LdFrag addr:$src1))), 11786 (i8 timm:$src2))))]>, 11787 Sched<[sched.Folded, sched.ReadAfterFold]>; 11788} 11789 11790multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 11791 Format MRMm, string OpcodeStr, 11792 X86SchedWriteWidths sched, Predicate prd>{ 11793 let Predicates = [prd] in 11794 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11795 sched.ZMM, v64i8_info>, EVEX_V512; 11796 let Predicates = [prd, HasVLX] in { 11797 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11798 sched.YMM, v32i8x_info>, EVEX_V256; 11799 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11800 sched.XMM, v16i8x_info>, EVEX_V128; 11801 } 11802} 11803defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11804 SchedWriteShuffle, HasBWI>, 11805 AVX512PDIi8Base, EVEX_4V, WIG; 11806defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11807 SchedWriteShuffle, HasBWI>, 11808 AVX512PDIi8Base, EVEX_4V, WIG; 11809 11810multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11811 string OpcodeStr, X86FoldableSchedWrite sched, 11812 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11813 let isCommutable = 1 in 11814 def rr : AVX512BI<opc, MRMSrcReg, 11815 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11816 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11817 [(set _dst.RC:$dst,(_dst.VT 11818 (OpNode (_src.VT _src.RC:$src1), 11819 (_src.VT _src.RC:$src2))))]>, 11820 Sched<[sched]>; 11821 def rm : AVX512BI<opc, MRMSrcMem, 11822 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11823 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11824 [(set _dst.RC:$dst,(_dst.VT 11825 (OpNode (_src.VT _src.RC:$src1), 11826 (_src.VT (bitconvert 11827 (_src.LdFrag addr:$src2))))))]>, 11828 Sched<[sched.Folded, sched.ReadAfterFold]>; 11829} 11830 11831multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11832 string OpcodeStr, X86SchedWriteWidths sched, 11833 Predicate prd> { 11834 let Predicates = [prd] in 11835 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11836 v8i64_info, v64i8_info>, EVEX_V512; 11837 let Predicates = [prd, HasVLX] in { 11838 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11839 v4i64x_info, v32i8x_info>, EVEX_V256; 11840 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11841 v2i64x_info, v16i8x_info>, EVEX_V128; 11842 } 11843} 11844 11845defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11846 SchedWritePSADBW, HasBWI>, EVEX_4V, WIG; 11847 11848// Transforms to swizzle an immediate to enable better matching when 11849// memory operand isn't in the right place. 11850def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{ 11851 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11852 uint8_t Imm = N->getZExtValue(); 11853 // Swap bits 1/4 and 3/6. 11854 uint8_t NewImm = Imm & 0xa5; 11855 if (Imm & 0x02) NewImm |= 0x10; 11856 if (Imm & 0x10) NewImm |= 0x02; 11857 if (Imm & 0x08) NewImm |= 0x40; 11858 if (Imm & 0x40) NewImm |= 0x08; 11859 return getI8Imm(NewImm, SDLoc(N)); 11860}]>; 11861def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{ 11862 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11863 uint8_t Imm = N->getZExtValue(); 11864 // Swap bits 2/4 and 3/5. 11865 uint8_t NewImm = Imm & 0xc3; 11866 if (Imm & 0x04) NewImm |= 0x10; 11867 if (Imm & 0x10) NewImm |= 0x04; 11868 if (Imm & 0x08) NewImm |= 0x20; 11869 if (Imm & 0x20) NewImm |= 0x08; 11870 return getI8Imm(NewImm, SDLoc(N)); 11871}]>; 11872def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{ 11873 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11874 uint8_t Imm = N->getZExtValue(); 11875 // Swap bits 1/2 and 5/6. 11876 uint8_t NewImm = Imm & 0x99; 11877 if (Imm & 0x02) NewImm |= 0x04; 11878 if (Imm & 0x04) NewImm |= 0x02; 11879 if (Imm & 0x20) NewImm |= 0x40; 11880 if (Imm & 0x40) NewImm |= 0x20; 11881 return getI8Imm(NewImm, SDLoc(N)); 11882}]>; 11883def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{ 11884 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11885 uint8_t Imm = N->getZExtValue(); 11886 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11887 uint8_t NewImm = Imm & 0x81; 11888 if (Imm & 0x02) NewImm |= 0x04; 11889 if (Imm & 0x04) NewImm |= 0x10; 11890 if (Imm & 0x08) NewImm |= 0x40; 11891 if (Imm & 0x10) NewImm |= 0x02; 11892 if (Imm & 0x20) NewImm |= 0x08; 11893 if (Imm & 0x40) NewImm |= 0x20; 11894 return getI8Imm(NewImm, SDLoc(N)); 11895}]>; 11896def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{ 11897 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11898 uint8_t Imm = N->getZExtValue(); 11899 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11900 uint8_t NewImm = Imm & 0x81; 11901 if (Imm & 0x02) NewImm |= 0x10; 11902 if (Imm & 0x04) NewImm |= 0x02; 11903 if (Imm & 0x08) NewImm |= 0x20; 11904 if (Imm & 0x10) NewImm |= 0x04; 11905 if (Imm & 0x20) NewImm |= 0x40; 11906 if (Imm & 0x40) NewImm |= 0x08; 11907 return getI8Imm(NewImm, SDLoc(N)); 11908}]>; 11909 11910multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11911 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11912 string Name>{ 11913 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11914 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11915 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11916 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11917 (OpNode (_.VT _.RC:$src1), 11918 (_.VT _.RC:$src2), 11919 (_.VT _.RC:$src3), 11920 (i8 timm:$src4)), 1, 1>, 11921 AVX512AIi8Base, EVEX_4V, Sched<[sched]>; 11922 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11923 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11924 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11925 (OpNode (_.VT _.RC:$src1), 11926 (_.VT _.RC:$src2), 11927 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11928 (i8 timm:$src4)), 1, 0>, 11929 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11930 Sched<[sched.Folded, sched.ReadAfterFold]>; 11931 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11932 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11933 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11934 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11935 (OpNode (_.VT _.RC:$src1), 11936 (_.VT _.RC:$src2), 11937 (_.VT (_.BroadcastLdFrag addr:$src3)), 11938 (i8 timm:$src4)), 1, 0>, EVEX_B, 11939 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, 11940 Sched<[sched.Folded, sched.ReadAfterFold]>; 11941 }// Constraints = "$src1 = $dst" 11942 11943 // Additional patterns for matching passthru operand in other positions. 11944 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11945 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11946 _.RC:$src1)), 11947 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11948 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11949 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11950 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), 11951 _.RC:$src1)), 11952 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11953 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11954 11955 // Additional patterns for matching zero masking with loads in other 11956 // positions. 11957 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11958 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11959 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11960 _.ImmAllZerosV)), 11961 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11962 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11963 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11964 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11965 _.RC:$src2, (i8 timm:$src4)), 11966 _.ImmAllZerosV)), 11967 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11968 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11969 11970 // Additional patterns for matching masked loads with different 11971 // operand orders. 11972 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11973 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11974 _.RC:$src2, (i8 timm:$src4)), 11975 _.RC:$src1)), 11976 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11977 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11978 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11979 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11980 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11981 _.RC:$src1)), 11982 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11983 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11984 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11985 (OpNode _.RC:$src2, _.RC:$src1, 11986 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), 11987 _.RC:$src1)), 11988 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11989 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11990 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11991 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11992 _.RC:$src1, (i8 timm:$src4)), 11993 _.RC:$src1)), 11994 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11995 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11996 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11997 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11998 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11999 _.RC:$src1)), 12000 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 12001 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 12002 12003 // Additional patterns for matching zero masking with broadcasts in other 12004 // positions. 12005 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 12006 (OpNode (_.BroadcastLdFrag addr:$src3), 12007 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 12008 _.ImmAllZerosV)), 12009 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 12010 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 12011 (VPTERNLOG321_imm8 timm:$src4))>; 12012 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 12013 (OpNode _.RC:$src1, 12014 (_.BroadcastLdFrag addr:$src3), 12015 _.RC:$src2, (i8 timm:$src4)), 12016 _.ImmAllZerosV)), 12017 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 12018 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 12019 (VPTERNLOG132_imm8 timm:$src4))>; 12020 12021 // Additional patterns for matching masked broadcasts with different 12022 // operand orders. 12023 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 12024 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), 12025 _.RC:$src2, (i8 timm:$src4)), 12026 _.RC:$src1)), 12027 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 12028 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 12029 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 12030 (OpNode (_.BroadcastLdFrag addr:$src3), 12031 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 12032 _.RC:$src1)), 12033 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 12034 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 12035 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 12036 (OpNode _.RC:$src2, _.RC:$src1, 12037 (_.BroadcastLdFrag addr:$src3), 12038 (i8 timm:$src4)), _.RC:$src1)), 12039 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 12040 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 12041 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 12042 (OpNode _.RC:$src2, 12043 (_.BroadcastLdFrag addr:$src3), 12044 _.RC:$src1, (i8 timm:$src4)), 12045 _.RC:$src1)), 12046 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 12047 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 12048 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 12049 (OpNode (_.BroadcastLdFrag addr:$src3), 12050 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 12051 _.RC:$src1)), 12052 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 12053 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 12054} 12055 12056multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 12057 AVX512VLVectorVTInfo _> { 12058 let Predicates = [HasAVX512] in 12059 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 12060 _.info512, NAME>, EVEX_V512; 12061 let Predicates = [HasAVX512, HasVLX] in { 12062 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 12063 _.info128, NAME>, EVEX_V128; 12064 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 12065 _.info256, NAME>, EVEX_V256; 12066 } 12067} 12068 12069defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 12070 avx512vl_i32_info>; 12071defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 12072 avx512vl_i64_info>, REX_W; 12073 12074// Patterns to implement vnot using vpternlog instead of creating all ones 12075// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 12076// so that the result is only dependent on src0. But we use the same source 12077// for all operands to prevent a false dependency. 12078// TODO: We should maybe have a more generalized algorithm for folding to 12079// vpternlog. 12080let Predicates = [HasAVX512] in { 12081 def : Pat<(v64i8 (vnot VR512:$src)), 12082 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 12083 def : Pat<(v32i16 (vnot VR512:$src)), 12084 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 12085 def : Pat<(v16i32 (vnot VR512:$src)), 12086 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 12087 def : Pat<(v8i64 (vnot VR512:$src)), 12088 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 12089} 12090 12091let Predicates = [HasAVX512, NoVLX] in { 12092 def : Pat<(v16i8 (vnot VR128X:$src)), 12093 (EXTRACT_SUBREG 12094 (VPTERNLOGQZrri 12095 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12096 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12097 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12098 (i8 15)), sub_xmm)>; 12099 def : Pat<(v8i16 (vnot VR128X:$src)), 12100 (EXTRACT_SUBREG 12101 (VPTERNLOGQZrri 12102 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12103 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12104 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12105 (i8 15)), sub_xmm)>; 12106 def : Pat<(v4i32 (vnot VR128X:$src)), 12107 (EXTRACT_SUBREG 12108 (VPTERNLOGQZrri 12109 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12110 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12111 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12112 (i8 15)), sub_xmm)>; 12113 def : Pat<(v2i64 (vnot VR128X:$src)), 12114 (EXTRACT_SUBREG 12115 (VPTERNLOGQZrri 12116 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12117 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12118 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 12119 (i8 15)), sub_xmm)>; 12120 12121 def : Pat<(v32i8 (vnot VR256X:$src)), 12122 (EXTRACT_SUBREG 12123 (VPTERNLOGQZrri 12124 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12125 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12126 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12127 (i8 15)), sub_ymm)>; 12128 def : Pat<(v16i16 (vnot VR256X:$src)), 12129 (EXTRACT_SUBREG 12130 (VPTERNLOGQZrri 12131 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12132 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12133 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12134 (i8 15)), sub_ymm)>; 12135 def : Pat<(v8i32 (vnot VR256X:$src)), 12136 (EXTRACT_SUBREG 12137 (VPTERNLOGQZrri 12138 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12139 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12140 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12141 (i8 15)), sub_ymm)>; 12142 def : Pat<(v4i64 (vnot VR256X:$src)), 12143 (EXTRACT_SUBREG 12144 (VPTERNLOGQZrri 12145 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12146 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12147 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 12148 (i8 15)), sub_ymm)>; 12149} 12150 12151let Predicates = [HasVLX] in { 12152 def : Pat<(v16i8 (vnot VR128X:$src)), 12153 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12154 def : Pat<(v8i16 (vnot VR128X:$src)), 12155 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12156 def : Pat<(v4i32 (vnot VR128X:$src)), 12157 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12158 def : Pat<(v2i64 (vnot VR128X:$src)), 12159 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 12160 12161 def : Pat<(v32i8 (vnot VR256X:$src)), 12162 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12163 def : Pat<(v16i16 (vnot VR256X:$src)), 12164 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12165 def : Pat<(v8i32 (vnot VR256X:$src)), 12166 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12167 def : Pat<(v4i64 (vnot VR256X:$src)), 12168 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 12169} 12170 12171//===----------------------------------------------------------------------===// 12172// AVX-512 - FixupImm 12173//===----------------------------------------------------------------------===// 12174 12175multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, 12176 X86FoldableSchedWrite sched, X86VectorVTInfo _, 12177 X86VectorVTInfo TblVT>{ 12178 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 12179 Uses = [MXCSR], mayRaiseFPException = 1 in { 12180 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12181 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12182 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12183 (X86VFixupimm (_.VT _.RC:$src1), 12184 (_.VT _.RC:$src2), 12185 (TblVT.VT _.RC:$src3), 12186 (i32 timm:$src4))>, Sched<[sched]>; 12187 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12188 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 12189 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12190 (X86VFixupimm (_.VT _.RC:$src1), 12191 (_.VT _.RC:$src2), 12192 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 12193 (i32 timm:$src4))>, 12194 Sched<[sched.Folded, sched.ReadAfterFold]>; 12195 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12196 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 12197 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2", 12198 "$src2, ${src3}"#_.BroadcastStr#", $src4", 12199 (X86VFixupimm (_.VT _.RC:$src1), 12200 (_.VT _.RC:$src2), 12201 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), 12202 (i32 timm:$src4))>, 12203 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 12204 } // Constraints = "$src1 = $dst" 12205} 12206 12207multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 12208 X86FoldableSchedWrite sched, 12209 X86VectorVTInfo _, X86VectorVTInfo TblVT> 12210 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> { 12211let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 12212 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12213 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12214 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 12215 "$src2, $src3, {sae}, $src4", 12216 (X86VFixupimmSAE (_.VT _.RC:$src1), 12217 (_.VT _.RC:$src2), 12218 (TblVT.VT _.RC:$src3), 12219 (i32 timm:$src4))>, 12220 EVEX_B, Sched<[sched]>; 12221 } 12222} 12223 12224multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, 12225 X86FoldableSchedWrite sched, X86VectorVTInfo _, 12226 X86VectorVTInfo _src3VT> { 12227 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 12228 ExeDomain = _.ExeDomain in { 12229 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 12230 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12231 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12232 (X86VFixupimms (_.VT _.RC:$src1), 12233 (_.VT _.RC:$src2), 12234 (_src3VT.VT _src3VT.RC:$src3), 12235 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC; 12236 let Uses = [MXCSR] in 12237 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 12238 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12239 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 12240 "$src2, $src3, {sae}, $src4", 12241 (X86VFixupimmSAEs (_.VT _.RC:$src1), 12242 (_.VT _.RC:$src2), 12243 (_src3VT.VT _src3VT.RC:$src3), 12244 (i32 timm:$src4))>, 12245 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 12246 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 12247 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 12248 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12249 (X86VFixupimms (_.VT _.RC:$src1), 12250 (_.VT _.RC:$src2), 12251 (_src3VT.VT (scalar_to_vector 12252 (_src3VT.ScalarLdFrag addr:$src3))), 12253 (i32 timm:$src4))>, 12254 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 12255 } 12256} 12257 12258multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 12259 AVX512VLVectorVTInfo _Vec, 12260 AVX512VLVectorVTInfo _Tbl> { 12261 let Predicates = [HasAVX512] in 12262 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, 12263 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 12264 EVEX_4V, EVEX_V512; 12265 let Predicates = [HasAVX512, HasVLX] in { 12266 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, 12267 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 12268 EVEX_4V, EVEX_V128; 12269 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, 12270 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 12271 EVEX_4V, EVEX_V256; 12272 } 12273} 12274 12275defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 12276 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 12277 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; 12278defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 12279 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 12280 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W; 12281defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 12282 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12283defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 12284 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W; 12285 12286// Patterns used to select SSE scalar fp arithmetic instructions from 12287// either: 12288// 12289// (1) a scalar fp operation followed by a blend 12290// 12291// The effect is that the backend no longer emits unnecessary vector 12292// insert instructions immediately after SSE scalar fp instructions 12293// like addss or mulss. 12294// 12295// For example, given the following code: 12296// __m128 foo(__m128 A, __m128 B) { 12297// A[0] += B[0]; 12298// return A; 12299// } 12300// 12301// Previously we generated: 12302// addss %xmm0, %xmm1 12303// movss %xmm1, %xmm0 12304// 12305// We now generate: 12306// addss %xmm1, %xmm0 12307// 12308// (2) a vector packed single/double fp operation followed by a vector insert 12309// 12310// The effect is that the backend converts the packed fp instruction 12311// followed by a vector insert into a single SSE scalar fp instruction. 12312// 12313// For example, given the following code: 12314// __m128 foo(__m128 A, __m128 B) { 12315// __m128 C = A + B; 12316// return (__m128) {c[0], a[1], a[2], a[3]}; 12317// } 12318// 12319// Previously we generated: 12320// addps %xmm0, %xmm1 12321// movss %xmm1, %xmm0 12322// 12323// We now generate: 12324// addss %xmm1, %xmm0 12325 12326// TODO: Some canonicalization in lowering would simplify the number of 12327// patterns we have to try to match. 12328multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp, 12329 string OpcPrefix, SDNode MoveNode, 12330 X86VectorVTInfo _, PatLeaf ZeroFP> { 12331 let Predicates = [HasAVX512] in { 12332 // extracted scalar math op with insert via movss 12333 def : Pat<(MoveNode 12334 (_.VT VR128X:$dst), 12335 (_.VT (scalar_to_vector 12336 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12337 _.FRC:$src)))), 12338 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst, 12339 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 12340 def : Pat<(MoveNode 12341 (_.VT VR128X:$dst), 12342 (_.VT (scalar_to_vector 12343 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12344 (_.ScalarLdFrag addr:$src))))), 12345 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>; 12346 12347 // extracted masked scalar math op with insert via movss 12348 def : Pat<(MoveNode (_.VT VR128X:$src1), 12349 (scalar_to_vector 12350 (X86selects_mask VK1WM:$mask, 12351 (MaskedOp (_.EltVT 12352 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12353 _.FRC:$src2), 12354 _.FRC:$src0))), 12355 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk") 12356 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12357 VK1WM:$mask, _.VT:$src1, 12358 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12359 def : Pat<(MoveNode (_.VT VR128X:$src1), 12360 (scalar_to_vector 12361 (X86selects_mask VK1WM:$mask, 12362 (MaskedOp (_.EltVT 12363 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12364 (_.ScalarLdFrag addr:$src2)), 12365 _.FRC:$src0))), 12366 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk") 12367 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12368 VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12369 12370 // extracted masked scalar math op with insert via movss 12371 def : Pat<(MoveNode (_.VT VR128X:$src1), 12372 (scalar_to_vector 12373 (X86selects_mask VK1WM:$mask, 12374 (MaskedOp (_.EltVT 12375 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12376 _.FRC:$src2), (_.EltVT ZeroFP)))), 12377 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz") 12378 VK1WM:$mask, _.VT:$src1, 12379 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12380 def : Pat<(MoveNode (_.VT VR128X:$src1), 12381 (scalar_to_vector 12382 (X86selects_mask VK1WM:$mask, 12383 (MaskedOp (_.EltVT 12384 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12385 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), 12386 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12387 } 12388} 12389 12390defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 12391defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 12392defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 12393defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 12394 12395defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 12396defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 12397defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 12398defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 12399 12400defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>; 12401defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>; 12402defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>; 12403defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>; 12404 12405multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, 12406 SDNode Move, X86VectorVTInfo _> { 12407 let Predicates = [HasAVX512] in { 12408 def : Pat<(_.VT (Move _.VT:$dst, 12409 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 12410 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>; 12411 } 12412} 12413 12414defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 12415defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 12416defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>; 12417 12418//===----------------------------------------------------------------------===// 12419// AES instructions 12420//===----------------------------------------------------------------------===// 12421 12422multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 12423 let Predicates = [HasVLX, HasVAES] in { 12424 defm Z128 : AESI_binop_rm_int<Op, OpStr, 12425 !cast<Intrinsic>(IntPrefix), 12426 loadv2i64, 0, VR128X, i128mem>, 12427 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG; 12428 defm Z256 : AESI_binop_rm_int<Op, OpStr, 12429 !cast<Intrinsic>(IntPrefix#"_256"), 12430 loadv4i64, 0, VR256X, i256mem>, 12431 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG; 12432 } 12433 let Predicates = [HasAVX512, HasVAES] in 12434 defm Z : AESI_binop_rm_int<Op, OpStr, 12435 !cast<Intrinsic>(IntPrefix#"_512"), 12436 loadv8i64, 0, VR512, i512mem>, 12437 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG; 12438} 12439 12440defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 12441defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 12442defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 12443defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 12444 12445//===----------------------------------------------------------------------===// 12446// PCLMUL instructions - Carry less multiplication 12447//===----------------------------------------------------------------------===// 12448 12449let Predicates = [HasAVX512, HasVPCLMULQDQ] in 12450defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 12451 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG; 12452 12453let Predicates = [HasVLX, HasVPCLMULQDQ] in { 12454defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 12455 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG; 12456 12457defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 12458 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256, 12459 EVEX_CD8<64, CD8VF>, WIG; 12460} 12461 12462// Aliases 12463defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 12464defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 12465defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 12466 12467//===----------------------------------------------------------------------===// 12468// VBMI2 12469//===----------------------------------------------------------------------===// 12470 12471multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 12472 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12473 let Constraints = "$src1 = $dst", 12474 ExeDomain = VTI.ExeDomain in { 12475 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12476 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12477 "$src3, $src2", "$src2, $src3", 12478 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 12479 T8PD, EVEX_4V, Sched<[sched]>; 12480 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12481 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12482 "$src3, $src2", "$src2, $src3", 12483 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12484 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12485 T8PD, EVEX_4V, 12486 Sched<[sched.Folded, sched.ReadAfterFold]>; 12487 } 12488} 12489 12490multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12491 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 12492 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 12493 let Constraints = "$src1 = $dst", 12494 ExeDomain = VTI.ExeDomain in 12495 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12496 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 12497 "${src3}"#VTI.BroadcastStr#", $src2", 12498 "$src2, ${src3}"#VTI.BroadcastStr, 12499 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12500 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12501 T8PD, EVEX_4V, EVEX_B, 12502 Sched<[sched.Folded, sched.ReadAfterFold]>; 12503} 12504 12505multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 12506 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12507 let Predicates = [HasVBMI2] in 12508 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12509 EVEX_V512; 12510 let Predicates = [HasVBMI2, HasVLX] in { 12511 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12512 EVEX_V256; 12513 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12514 EVEX_V128; 12515 } 12516} 12517 12518multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 12519 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12520 let Predicates = [HasVBMI2] in 12521 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12522 EVEX_V512; 12523 let Predicates = [HasVBMI2, HasVLX] in { 12524 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12525 EVEX_V256; 12526 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12527 EVEX_V128; 12528 } 12529} 12530multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 12531 SDNode OpNode, X86SchedWriteWidths sched> { 12532 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched, 12533 avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>; 12534 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched, 12535 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12536 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched, 12537 avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 12538} 12539 12540multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 12541 SDNode OpNode, X86SchedWriteWidths sched> { 12542 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched, 12543 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 12544 REX_W, EVEX_CD8<16, CD8VF>; 12545 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp, 12546 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; 12547 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode, 12548 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W; 12549} 12550 12551// Concat & Shift 12552defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 12553defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 12554defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 12555defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 12556 12557// Compress 12558defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 12559 avx512vl_i8_info, HasVBMI2>, EVEX; 12560defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 12561 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W; 12562// Expand 12563defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 12564 avx512vl_i8_info, HasVBMI2>, EVEX; 12565defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 12566 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W; 12567 12568//===----------------------------------------------------------------------===// 12569// VNNI 12570//===----------------------------------------------------------------------===// 12571 12572let Constraints = "$src1 = $dst" in 12573multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12574 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12575 bit IsCommutable> { 12576 let ExeDomain = VTI.ExeDomain in { 12577 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12578 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12579 "$src3, $src2", "$src2, $src3", 12580 (VTI.VT (OpNode VTI.RC:$src1, 12581 VTI.RC:$src2, VTI.RC:$src3)), 12582 IsCommutable, IsCommutable>, 12583 EVEX_4V, T8PD, Sched<[sched]>; 12584 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12585 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12586 "$src3, $src2", "$src2, $src3", 12587 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12588 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12589 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, 12590 Sched<[sched.Folded, sched.ReadAfterFold, 12591 sched.ReadAfterFold]>; 12592 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12593 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 12594 OpStr, "${src3}"#VTI.BroadcastStr#", $src2", 12595 "$src2, ${src3}"#VTI.BroadcastStr, 12596 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12597 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12598 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, 12599 T8PD, Sched<[sched.Folded, sched.ReadAfterFold, 12600 sched.ReadAfterFold]>; 12601 } 12602} 12603 12604multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 12605 X86SchedWriteWidths sched, bit IsCommutable> { 12606 let Predicates = [HasVNNI] in 12607 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info, 12608 IsCommutable>, EVEX_V512; 12609 let Predicates = [HasVNNI, HasVLX] in { 12610 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info, 12611 IsCommutable>, EVEX_V256; 12612 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info, 12613 IsCommutable>, EVEX_V128; 12614 } 12615} 12616 12617// FIXME: Is there a better scheduler class for VPDP? 12618defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; 12619defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; 12620defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; 12621defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; 12622 12623// Patterns to match VPDPWSSD from existing instructions/intrinsics. 12624let Predicates = [HasVNNI] in { 12625 def : Pat<(v16i32 (add VR512:$src1, 12626 (X86vpmaddwd_su VR512:$src2, VR512:$src3))), 12627 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; 12628 def : Pat<(v16i32 (add VR512:$src1, 12629 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), 12630 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; 12631} 12632let Predicates = [HasVNNI,HasVLX] in { 12633 def : Pat<(v8i32 (add VR256X:$src1, 12634 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), 12635 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; 12636 def : Pat<(v8i32 (add VR256X:$src1, 12637 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), 12638 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; 12639 def : Pat<(v4i32 (add VR128X:$src1, 12640 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), 12641 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; 12642 def : Pat<(v4i32 (add VR128X:$src1, 12643 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), 12644 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; 12645} 12646 12647//===----------------------------------------------------------------------===// 12648// Bit Algorithms 12649//===----------------------------------------------------------------------===// 12650 12651// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 12652defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 12653 avx512vl_i8_info, HasBITALG>; 12654defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 12655 avx512vl_i16_info, HasBITALG>, REX_W; 12656 12657defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 12658defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 12659 12660def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2), 12661 (X86Vpshufbitqmb node:$src1, node:$src2), [{ 12662 return N->hasOneUse(); 12663}]>; 12664 12665multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12666 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 12667 (ins VTI.RC:$src1, VTI.RC:$src2), 12668 "vpshufbitqmb", 12669 "$src2, $src1", "$src1, $src2", 12670 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12671 (VTI.VT VTI.RC:$src2)), 12672 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12673 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, 12674 Sched<[sched]>; 12675 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 12676 (ins VTI.RC:$src1, VTI.MemOp:$src2), 12677 "vpshufbitqmb", 12678 "$src2, $src1", "$src1, $src2", 12679 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12680 (VTI.VT (VTI.LdFrag addr:$src2))), 12681 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12682 (VTI.VT (VTI.LdFrag addr:$src2)))>, 12683 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, 12684 Sched<[sched.Folded, sched.ReadAfterFold]>; 12685} 12686 12687multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12688 let Predicates = [HasBITALG] in 12689 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 12690 let Predicates = [HasBITALG, HasVLX] in { 12691 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 12692 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 12693 } 12694} 12695 12696// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 12697defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 12698 12699//===----------------------------------------------------------------------===// 12700// GFNI 12701//===----------------------------------------------------------------------===// 12702 12703multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12704 X86SchedWriteWidths sched> { 12705 let Predicates = [HasGFNI, HasAVX512] in 12706 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 12707 EVEX_V512; 12708 let Predicates = [HasGFNI, HasVLX] in { 12709 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 12710 EVEX_V256; 12711 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 12712 EVEX_V128; 12713 } 12714} 12715 12716defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 12717 SchedWriteVecALU>, 12718 EVEX_CD8<8, CD8VF>, T8PD; 12719 12720multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 12721 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12722 X86VectorVTInfo BcstVTI> 12723 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 12724 let ExeDomain = VTI.ExeDomain in 12725 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12726 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), 12727 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1", 12728 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3", 12729 (OpNode (VTI.VT VTI.RC:$src1), 12730 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), 12731 (i8 timm:$src3))>, EVEX_B, 12732 Sched<[sched.Folded, sched.ReadAfterFold]>; 12733} 12734 12735multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12736 X86SchedWriteWidths sched> { 12737 let Predicates = [HasGFNI, HasAVX512] in 12738 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 12739 v64i8_info, v8i64_info>, EVEX_V512; 12740 let Predicates = [HasGFNI, HasVLX] in { 12741 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 12742 v32i8x_info, v4i64x_info>, EVEX_V256; 12743 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 12744 v16i8x_info, v2i64x_info>, EVEX_V128; 12745 } 12746} 12747 12748defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 12749 X86GF2P8affineinvqb, SchedWriteVecIMul>, 12750 EVEX_4V, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base; 12751defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 12752 X86GF2P8affineqb, SchedWriteVecIMul>, 12753 EVEX_4V, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base; 12754 12755 12756//===----------------------------------------------------------------------===// 12757// AVX5124FMAPS 12758//===----------------------------------------------------------------------===// 12759 12760let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 12761 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in { 12762defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 12763 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12764 "v4fmaddps", "$src3, $src2", "$src2, $src3", 12765 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12766 Sched<[SchedWriteFMA.ZMM.Folded]>; 12767 12768defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 12769 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12770 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 12771 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12772 Sched<[SchedWriteFMA.ZMM.Folded]>; 12773 12774defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 12775 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12776 "v4fmaddss", "$src3, $src2", "$src2, $src3", 12777 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12778 Sched<[SchedWriteFMA.Scl.Folded]>; 12779 12780defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 12781 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12782 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 12783 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>, 12784 Sched<[SchedWriteFMA.Scl.Folded]>; 12785} 12786 12787//===----------------------------------------------------------------------===// 12788// AVX5124VNNIW 12789//===----------------------------------------------------------------------===// 12790 12791let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 12792 Constraints = "$src1 = $dst" in { 12793defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 12794 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12795 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 12796 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12797 Sched<[SchedWriteFMA.ZMM.Folded]>; 12798 12799defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 12800 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12801 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 12802 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>, 12803 Sched<[SchedWriteFMA.ZMM.Folded]>; 12804} 12805 12806let hasSideEffects = 0 in { 12807 let mayStore = 1, SchedRW = [WriteFStoreX] in 12808 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>; 12809 let mayLoad = 1, SchedRW = [WriteFLoadX] in 12810 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>; 12811} 12812 12813//===----------------------------------------------------------------------===// 12814// VP2INTERSECT 12815//===----------------------------------------------------------------------===// 12816 12817multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 12818 def rr : I<0x68, MRMSrcReg, 12819 (outs _.KRPC:$dst), 12820 (ins _.RC:$src1, _.RC:$src2), 12821 !strconcat("vp2intersect", _.Suffix, 12822 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12823 [(set _.KRPC:$dst, (X86vp2intersect 12824 _.RC:$src1, (_.VT _.RC:$src2)))]>, 12825 EVEX_4V, T8XD, Sched<[sched]>; 12826 12827 def rm : I<0x68, MRMSrcMem, 12828 (outs _.KRPC:$dst), 12829 (ins _.RC:$src1, _.MemOp:$src2), 12830 !strconcat("vp2intersect", _.Suffix, 12831 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12832 [(set _.KRPC:$dst, (X86vp2intersect 12833 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 12834 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>, 12835 Sched<[sched.Folded, sched.ReadAfterFold]>; 12836 12837 def rmb : I<0x68, MRMSrcMem, 12838 (outs _.KRPC:$dst), 12839 (ins _.RC:$src1, _.ScalarMemOp:$src2), 12840 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, 12841 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), 12842 [(set _.KRPC:$dst, (X86vp2intersect 12843 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, 12844 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 12845 Sched<[sched.Folded, sched.ReadAfterFold]>; 12846} 12847 12848multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 12849 let Predicates = [HasAVX512, HasVP2INTERSECT] in 12850 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512; 12851 12852 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { 12853 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256; 12854 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128; 12855 } 12856} 12857 12858defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>; 12859defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W; 12860 12861multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, 12862 X86SchedWriteWidths sched, 12863 AVX512VLVectorVTInfo _SrcVTInfo, 12864 AVX512VLVectorVTInfo _DstVTInfo, 12865 SDNode OpNode, Predicate prd, 12866 bit IsCommutable = 0> { 12867 let Predicates = [prd] in 12868 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 12869 _SrcVTInfo.info512, _DstVTInfo.info512, 12870 _SrcVTInfo.info512, IsCommutable>, 12871 EVEX_V512, EVEX_CD8<32, CD8VF>; 12872 let Predicates = [HasVLX, prd] in { 12873 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 12874 _SrcVTInfo.info256, _DstVTInfo.info256, 12875 _SrcVTInfo.info256, IsCommutable>, 12876 EVEX_V256, EVEX_CD8<32, CD8VF>; 12877 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 12878 _SrcVTInfo.info128, _DstVTInfo.info128, 12879 _SrcVTInfo.info128, IsCommutable>, 12880 EVEX_V128, EVEX_CD8<32, CD8VF>; 12881 } 12882} 12883 12884let ExeDomain = SSEPackedSingle in 12885defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", 12886 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF 12887 avx512vl_f32_info, avx512vl_bf16_info, 12888 X86cvtne2ps2bf16, HasBF16, 0>, T8XD; 12889 12890// Truncate Float to BFloat16 12891multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, 12892 X86SchedWriteWidths sched> { 12893 let ExeDomain = SSEPackedSingle in { 12894 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in { 12895 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info, 12896 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512; 12897 } 12898 let Predicates = [HasBF16, HasVLX] in { 12899 let Uses = []<Register>, mayRaiseFPException = 0 in { 12900 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info, 12901 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem, 12902 VK4WM>, EVEX_V128; 12903 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info, 12904 X86cvtneps2bf16, X86cvtneps2bf16, 12905 sched.YMM, "{1to8}", "{y}">, EVEX_V256; 12906 } 12907 } // Predicates = [HasBF16, HasVLX] 12908 } // ExeDomain = SSEPackedSingle 12909 12910 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12911 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 12912 VR128X:$src), 0>; 12913 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12914 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, 12915 f128mem:$src), 0, "intel">; 12916 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12917 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 12918 VR256X:$src), 0>; 12919 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12920 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, 12921 f256mem:$src), 0, "intel">; 12922} 12923 12924defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", 12925 SchedWriteCvtPD2PS>, T8XS, 12926 EVEX_CD8<32, CD8VF>; 12927 12928let Predicates = [HasBF16, HasVLX] in { 12929 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction 12930 // patterns have been disabled with null_frag. 12931 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))), 12932 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12933 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0), 12934 VK4WM:$mask), 12935 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>; 12936 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV, 12937 VK4WM:$mask), 12938 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>; 12939 12940 def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), 12941 (VCVTNEPS2BF16Z128rm addr:$src)>; 12942 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0), 12943 VK4WM:$mask), 12944 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12945 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV, 12946 VK4WM:$mask), 12947 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; 12948 12949 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 12950 (X86VBroadcastld32 addr:$src)))), 12951 (VCVTNEPS2BF16Z128rmb addr:$src)>; 12952 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12953 (v8bf16 VR128X:$src0), VK4WM:$mask), 12954 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12955 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12956 v8bf16x_info.ImmAllZerosV, VK4WM:$mask), 12957 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; 12958 12959 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))), 12960 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12961 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))), 12962 (VCVTNEPS2BF16Z128rm addr:$src)>; 12963 12964 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))), 12965 (VCVTNEPS2BF16Z256rr VR256X:$src)>; 12966 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))), 12967 (VCVTNEPS2BF16Z256rm addr:$src)>; 12968 12969 def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)), 12970 (VPBROADCASTWZ128rm addr:$src)>; 12971 def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)), 12972 (VPBROADCASTWZ256rm addr:$src)>; 12973 12974 def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12975 (VPBROADCASTWZ128rr VR128X:$src)>; 12976 def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12977 (VPBROADCASTWZ256rr VR128X:$src)>; 12978 12979 def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))), 12980 (VCVTNEPS2BF16Z256rr VR256X:$src)>; 12981 def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))), 12982 (VCVTNEPS2BF16Z256rm addr:$src)>; 12983 12984 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. 12985} 12986 12987let Predicates = [HasBF16] in { 12988 def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)), 12989 (VPBROADCASTWZrm addr:$src)>; 12990 12991 def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12992 (VPBROADCASTWZrr VR128X:$src)>; 12993 12994 def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))), 12995 (VCVTNEPS2BF16Zrr VR512:$src)>; 12996 def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))), 12997 (VCVTNEPS2BF16Zrm addr:$src)>; 12998 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. 12999} 13000 13001let Constraints = "$src1 = $dst" in { 13002multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 13003 X86FoldableSchedWrite sched, 13004 X86VectorVTInfo _, X86VectorVTInfo src_v> { 13005 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13006 (ins src_v.RC:$src2, src_v.RC:$src3), 13007 OpcodeStr, "$src3, $src2", "$src2, $src3", 13008 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>, 13009 EVEX_4V, Sched<[sched]>; 13010 13011 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13012 (ins src_v.RC:$src2, src_v.MemOp:$src3), 13013 OpcodeStr, "$src3, $src2", "$src2, $src3", 13014 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 13015 (src_v.LdFrag addr:$src3)))>, EVEX_4V, 13016 Sched<[sched.Folded, sched.ReadAfterFold]>; 13017 13018 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13019 (ins src_v.RC:$src2, f32mem:$src3), 13020 OpcodeStr, 13021 !strconcat("${src3}", _.BroadcastStr,", $src2"), 13022 !strconcat("$src2, ${src3}", _.BroadcastStr), 13023 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 13024 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, 13025 EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; 13026 13027} 13028} // Constraints = "$src1 = $dst" 13029 13030multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 13031 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 13032 AVX512VLVectorVTInfo src_v, Predicate prd> { 13033 let Predicates = [prd] in { 13034 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, 13035 src_v.info512>, EVEX_V512; 13036 } 13037 let Predicates = [HasVLX, prd] in { 13038 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256, 13039 src_v.info256>, EVEX_V256; 13040 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128, 13041 src_v.info128>, EVEX_V128; 13042 } 13043} 13044 13045let ExeDomain = SSEPackedSingle in 13046defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, 13047 avx512vl_f32_info, avx512vl_bf16_info, 13048 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>; 13049 13050//===----------------------------------------------------------------------===// 13051// AVX512FP16 13052//===----------------------------------------------------------------------===// 13053 13054let Predicates = [HasFP16] in { 13055// Move word ( r/m16) to Packed word 13056def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 13057 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>; 13058def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src), 13059 "vmovw\t{$src, $dst|$dst, $src}", 13060 [(set VR128X:$dst, 13061 (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>, 13062 T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>; 13063 13064def : Pat<(f16 (bitconvert GR16:$src)), 13065 (f16 (COPY_TO_REGCLASS 13066 (VMOVW2SHrr 13067 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), 13068 FR16X))>; 13069def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))), 13070 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 13071def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))), 13072 (VMOVW2SHrr GR32:$src)>; 13073// FIXME: We should really find a way to improve these patterns. 13074def : Pat<(v8i32 (X86vzmovl 13075 (insert_subvector undef, 13076 (v4i32 (scalar_to_vector 13077 (and GR32:$src, 0xffff))), 13078 (iPTR 0)))), 13079 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 13080def : Pat<(v16i32 (X86vzmovl 13081 (insert_subvector undef, 13082 (v4i32 (scalar_to_vector 13083 (and GR32:$src, 0xffff))), 13084 (iPTR 0)))), 13085 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 13086 13087def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))), 13088 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 13089 13090// AVX 128-bit movw instruction write zeros in the high 128-bit part. 13091def : Pat<(v8i16 (X86vzload16 addr:$src)), 13092 (VMOVWrm addr:$src)>; 13093def : Pat<(v16i16 (X86vzload16 addr:$src)), 13094 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 13095 13096// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 13097def : Pat<(v32i16 (X86vzload16 addr:$src)), 13098 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 13099 13100def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))), 13101 (VMOVWrm addr:$src)>; 13102def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))), 13103 (VMOVWrm addr:$src)>; 13104def : Pat<(v8i32 (X86vzmovl 13105 (insert_subvector undef, 13106 (v4i32 (scalar_to_vector 13107 (i32 (zextloadi16 addr:$src)))), 13108 (iPTR 0)))), 13109 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 13110def : Pat<(v16i32 (X86vzmovl 13111 (insert_subvector undef, 13112 (v4i32 (scalar_to_vector 13113 (i32 (zextloadi16 addr:$src)))), 13114 (iPTR 0)))), 13115 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 13116 13117// Move word from xmm register to r/m16 13118def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 13119 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>; 13120def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs), 13121 (ins i16mem:$dst, VR128X:$src), 13122 "vmovw\t{$src, $dst|$dst, $src}", 13123 [(store (i16 (extractelt (v8i16 VR128X:$src), 13124 (iPTR 0))), addr:$dst)]>, 13125 T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>; 13126 13127def : Pat<(i16 (bitconvert FR16X:$src)), 13128 (i16 (EXTRACT_SUBREG 13129 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)), 13130 sub_16bit))>; 13131def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))), 13132 (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>; 13133 13134// Allow "vmovw" to use GR64 13135let hasSideEffects = 0 in { 13136 def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 13137 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 13138 def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 13139 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>; 13140} 13141} 13142 13143// Convert 16-bit float to i16/u16 13144multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13145 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13146 AVX512VLVectorVTInfo _Dst, 13147 AVX512VLVectorVTInfo _Src, 13148 X86SchedWriteWidths sched> { 13149 let Predicates = [HasFP16] in { 13150 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 13151 OpNode, MaskOpNode, sched.ZMM>, 13152 avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512, 13153 OpNodeRnd, sched.ZMM>, EVEX_V512; 13154 } 13155 let Predicates = [HasFP16, HasVLX] in { 13156 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 13157 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 13158 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 13159 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 13160 } 13161} 13162 13163// Convert 16-bit float to i16/u16 truncate 13164multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13165 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13166 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src, 13167 X86SchedWriteWidths sched> { 13168 let Predicates = [HasFP16] in { 13169 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 13170 OpNode, MaskOpNode, sched.ZMM>, 13171 avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512, 13172 OpNodeRnd, sched.ZMM>, EVEX_V512; 13173 } 13174 let Predicates = [HasFP16, HasVLX] in { 13175 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 13176 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 13177 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 13178 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 13179 } 13180} 13181 13182defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt, 13183 X86cvtp2UIntRnd, avx512vl_i16_info, 13184 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 13185 T_MAP5PS, EVEX_CD8<16, CD8VF>; 13186defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp, 13187 X86VUintToFpRnd, avx512vl_f16_info, 13188 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 13189 T_MAP5XD, EVEX_CD8<16, CD8VF>; 13190defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si, 13191 X86cvttp2si, X86cvttp2siSAE, 13192 avx512vl_i16_info, avx512vl_f16_info, 13193 SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>; 13194defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui, 13195 X86cvttp2ui, X86cvttp2uiSAE, 13196 avx512vl_i16_info, avx512vl_f16_info, 13197 SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>; 13198defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int, 13199 X86cvtp2IntRnd, avx512vl_i16_info, 13200 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 13201 T_MAP5PD, EVEX_CD8<16, CD8VF>; 13202defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp, 13203 X86VSintToFpRnd, avx512vl_f16_info, 13204 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 13205 T_MAP5XS, EVEX_CD8<16, CD8VF>; 13206 13207// Convert Half to Signed/Unsigned Doubleword 13208multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13209 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13210 X86SchedWriteWidths sched> { 13211 let Predicates = [HasFP16] in { 13212 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 13213 MaskOpNode, sched.ZMM>, 13214 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info, 13215 OpNodeRnd, sched.ZMM>, EVEX_V512; 13216 } 13217 let Predicates = [HasFP16, HasVLX] in { 13218 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 13219 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 13220 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 13221 MaskOpNode, sched.YMM>, EVEX_V256; 13222 } 13223} 13224 13225// Convert Half to Signed/Unsigned Doubleword with truncation 13226multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13227 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13228 X86SchedWriteWidths sched> { 13229 let Predicates = [HasFP16] in { 13230 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 13231 MaskOpNode, sched.ZMM>, 13232 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info, 13233 OpNodeRnd, sched.ZMM>, EVEX_V512; 13234 } 13235 let Predicates = [HasFP16, HasVLX] in { 13236 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 13237 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 13238 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 13239 MaskOpNode, sched.YMM>, EVEX_V256; 13240 } 13241} 13242 13243 13244defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int, 13245 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, 13246 EVEX_CD8<16, CD8VH>; 13247defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt, 13248 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS, 13249 EVEX_CD8<16, CD8VH>; 13250 13251defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si, 13252 X86cvttp2si, X86cvttp2siSAE, 13253 SchedWriteCvtPS2DQ>, T_MAP5XS, 13254 EVEX_CD8<16, CD8VH>; 13255 13256defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui, 13257 X86cvttp2ui, X86cvttp2uiSAE, 13258 SchedWriteCvtPS2DQ>, T_MAP5PS, 13259 EVEX_CD8<16, CD8VH>; 13260 13261// Convert Half to Signed/Unsigned Quardword 13262multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13263 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13264 X86SchedWriteWidths sched> { 13265 let Predicates = [HasFP16] in { 13266 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 13267 MaskOpNode, sched.ZMM>, 13268 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info, 13269 OpNodeRnd, sched.ZMM>, EVEX_V512; 13270 } 13271 let Predicates = [HasFP16, HasVLX] in { 13272 // Explicitly specified broadcast string, since we take only 2 elements 13273 // from v8f16x_info source 13274 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13275 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, 13276 EVEX_V128; 13277 // Explicitly specified broadcast string, since we take only 4 elements 13278 // from v8f16x_info source 13279 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13280 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, 13281 EVEX_V256; 13282 } 13283} 13284 13285// Convert Half to Signed/Unsigned Quardword with truncation 13286multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13287 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13288 X86SchedWriteWidths sched> { 13289 let Predicates = [HasFP16] in { 13290 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 13291 MaskOpNode, sched.ZMM>, 13292 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info, 13293 OpNodeRnd, sched.ZMM>, EVEX_V512; 13294 } 13295 let Predicates = [HasFP16, HasVLX] in { 13296 // Explicitly specified broadcast string, since we take only 2 elements 13297 // from v8f16x_info source 13298 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13299 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128; 13300 // Explicitly specified broadcast string, since we take only 4 elements 13301 // from v8f16x_info source 13302 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13303 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256; 13304 } 13305} 13306 13307defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int, 13308 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, 13309 EVEX_CD8<16, CD8VQ>; 13310 13311defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt, 13312 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD, 13313 EVEX_CD8<16, CD8VQ>; 13314 13315defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si, 13316 X86cvttp2si, X86cvttp2siSAE, 13317 SchedWriteCvtPS2DQ>, T_MAP5PD, 13318 EVEX_CD8<16, CD8VQ>; 13319 13320defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui, 13321 X86cvttp2ui, X86cvttp2uiSAE, 13322 SchedWriteCvtPS2DQ>, T_MAP5PD, 13323 EVEX_CD8<16, CD8VQ>; 13324 13325// Convert Signed/Unsigned Quardword to Half 13326multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13327 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13328 X86SchedWriteWidths sched> { 13329 // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and 13330 // 512 memory forms of these instructions in Asm Parcer. They have the same 13331 // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly 13332 // due to the same reason. 13333 let Predicates = [HasFP16] in { 13334 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode, 13335 MaskOpNode, sched.ZMM, "{1to8}", "{z}">, 13336 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info, 13337 OpNodeRnd, sched.ZMM>, EVEX_V512; 13338 } 13339 let Predicates = [HasFP16, HasVLX] in { 13340 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info, 13341 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", 13342 i128mem, VK2WM>, 13343 EVEX_V128, NotEVEX2VEXConvertible; 13344 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info, 13345 null_frag, null_frag, sched.YMM, "{1to4}", "{y}", 13346 i256mem, VK4WM>, 13347 EVEX_V256, NotEVEX2VEXConvertible; 13348 } 13349 13350 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 13351 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 13352 VR128X:$src), 0, "att">; 13353 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 13354 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 13355 VK2WM:$mask, VR128X:$src), 0, "att">; 13356 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 13357 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 13358 VK2WM:$mask, VR128X:$src), 0, "att">; 13359 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 13360 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 13361 i64mem:$src), 0, "att">; 13362 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 13363 "$dst {${mask}}, ${src}{1to2}}", 13364 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 13365 VK2WM:$mask, i64mem:$src), 0, "att">; 13366 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 13367 "$dst {${mask}} {z}, ${src}{1to2}}", 13368 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 13369 VK2WM:$mask, i64mem:$src), 0, "att">; 13370 13371 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 13372 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 13373 VR256X:$src), 0, "att">; 13374 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 13375 "$dst {${mask}}, $src}", 13376 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 13377 VK4WM:$mask, VR256X:$src), 0, "att">; 13378 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 13379 "$dst {${mask}} {z}, $src}", 13380 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 13381 VK4WM:$mask, VR256X:$src), 0, "att">; 13382 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 13383 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 13384 i64mem:$src), 0, "att">; 13385 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 13386 "$dst {${mask}}, ${src}{1to4}}", 13387 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 13388 VK4WM:$mask, i64mem:$src), 0, "att">; 13389 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 13390 "$dst {${mask}} {z}, ${src}{1to4}}", 13391 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 13392 VK4WM:$mask, i64mem:$src), 0, "att">; 13393 13394 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 13395 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 13396 VR512:$src), 0, "att">; 13397 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 13398 "$dst {${mask}}, $src}", 13399 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 13400 VK8WM:$mask, VR512:$src), 0, "att">; 13401 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 13402 "$dst {${mask}} {z}, $src}", 13403 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 13404 VK8WM:$mask, VR512:$src), 0, "att">; 13405 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 13406 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 13407 i64mem:$src), 0, "att">; 13408 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 13409 "$dst {${mask}}, ${src}{1to8}}", 13410 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 13411 VK8WM:$mask, i64mem:$src), 0, "att">; 13412 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 13413 "$dst {${mask}} {z}, ${src}{1to8}}", 13414 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 13415 VK8WM:$mask, i64mem:$src), 0, "att">; 13416} 13417 13418defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp, 13419 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5PS, 13420 EVEX_CD8<64, CD8VF>; 13421 13422defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp, 13423 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5XD, 13424 EVEX_CD8<64, CD8VF>; 13425 13426// Convert half to signed/unsigned int 32/64 13427defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si, 13428 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>, 13429 T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13430defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si, 13431 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>, 13432 T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>; 13433defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi, 13434 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>, 13435 T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13436defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi, 13437 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>, 13438 T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>; 13439 13440defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info, 13441 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13442 "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13443defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info, 13444 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13445 "{q}", HasFP16>, REX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13446defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info, 13447 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13448 "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>; 13449defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info, 13450 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13451 "{q}", HasFP16>, T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>; 13452 13453let Predicates = [HasFP16] in { 13454 defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32, 13455 v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">, 13456 T_MAP5XS, EVEX_CD8<32, CD8VT1>; 13457 defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64, 13458 v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">, 13459 T_MAP5XS, REX_W, EVEX_CD8<64, CD8VT1>; 13460 defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32, 13461 v8f16x_info, i32mem, loadi32, 13462 "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>; 13463 defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64, 13464 v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">, 13465 T_MAP5XS, REX_W, EVEX_CD8<64, CD8VT1>; 13466 def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13467 (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13468 13469 def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13470 (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13471 13472 13473 def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))), 13474 (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13475 def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))), 13476 (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13477 13478 def : Pat<(f16 (any_sint_to_fp GR32:$src)), 13479 (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13480 def : Pat<(f16 (any_sint_to_fp GR64:$src)), 13481 (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13482 13483 def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))), 13484 (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13485 def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))), 13486 (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13487 13488 def : Pat<(f16 (any_uint_to_fp GR32:$src)), 13489 (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13490 def : Pat<(f16 (any_uint_to_fp GR64:$src)), 13491 (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13492 13493 // Patterns used for matching vcvtsi2sh intrinsic sequences from clang 13494 // which produce unnecessary vmovsh instructions 13495 def : Pat<(v8f16 (X86Movsh 13496 (v8f16 VR128X:$dst), 13497 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))), 13498 (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13499 13500 def : Pat<(v8f16 (X86Movsh 13501 (v8f16 VR128X:$dst), 13502 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))), 13503 (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13504 13505 def : Pat<(v8f16 (X86Movsh 13506 (v8f16 VR128X:$dst), 13507 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))), 13508 (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13509 13510 def : Pat<(v8f16 (X86Movsh 13511 (v8f16 VR128X:$dst), 13512 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))), 13513 (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13514 13515 def : Pat<(v8f16 (X86Movsh 13516 (v8f16 VR128X:$dst), 13517 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))), 13518 (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13519 13520 def : Pat<(v8f16 (X86Movsh 13521 (v8f16 VR128X:$dst), 13522 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))), 13523 (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13524 13525 def : Pat<(v8f16 (X86Movsh 13526 (v8f16 VR128X:$dst), 13527 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))), 13528 (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13529 13530 def : Pat<(v8f16 (X86Movsh 13531 (v8f16 VR128X:$dst), 13532 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))), 13533 (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13534} // Predicates = [HasFP16] 13535 13536let Predicates = [HasFP16, HasVLX] in { 13537 // Special patterns to allow use of X86VMSintToFP for masking. Instruction 13538 // patterns have been disabled with null_frag. 13539 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))), 13540 (VCVTQQ2PHZ256rr VR256X:$src)>; 13541 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13542 VK4WM:$mask), 13543 (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13544 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13545 VK4WM:$mask), 13546 (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13547 13548 def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))), 13549 (VCVTQQ2PHZ256rm addr:$src)>; 13550 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13551 VK4WM:$mask), 13552 (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13553 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13554 VK4WM:$mask), 13555 (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13556 13557 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13558 (VCVTQQ2PHZ256rmb addr:$src)>; 13559 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13560 (v8f16 VR128X:$src0), VK4WM:$mask), 13561 (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13562 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13563 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13564 (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13565 13566 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))), 13567 (VCVTQQ2PHZ128rr VR128X:$src)>; 13568 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13569 VK2WM:$mask), 13570 (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13571 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13572 VK2WM:$mask), 13573 (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13574 13575 def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))), 13576 (VCVTQQ2PHZ128rm addr:$src)>; 13577 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13578 VK2WM:$mask), 13579 (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13580 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13581 VK2WM:$mask), 13582 (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13583 13584 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13585 (VCVTQQ2PHZ128rmb addr:$src)>; 13586 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13587 (v8f16 VR128X:$src0), VK2WM:$mask), 13588 (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13589 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13590 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13591 (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13592 13593 // Special patterns to allow use of X86VMUintToFP for masking. Instruction 13594 // patterns have been disabled with null_frag. 13595 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))), 13596 (VCVTUQQ2PHZ256rr VR256X:$src)>; 13597 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13598 VK4WM:$mask), 13599 (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13600 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13601 VK4WM:$mask), 13602 (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13603 13604 def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))), 13605 (VCVTUQQ2PHZ256rm addr:$src)>; 13606 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13607 VK4WM:$mask), 13608 (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13609 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13610 VK4WM:$mask), 13611 (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13612 13613 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13614 (VCVTUQQ2PHZ256rmb addr:$src)>; 13615 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13616 (v8f16 VR128X:$src0), VK4WM:$mask), 13617 (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13618 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13619 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13620 (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13621 13622 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))), 13623 (VCVTUQQ2PHZ128rr VR128X:$src)>; 13624 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13625 VK2WM:$mask), 13626 (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13627 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13628 VK2WM:$mask), 13629 (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13630 13631 def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))), 13632 (VCVTUQQ2PHZ128rm addr:$src)>; 13633 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13634 VK2WM:$mask), 13635 (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13636 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13637 VK2WM:$mask), 13638 (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13639 13640 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13641 (VCVTUQQ2PHZ128rmb addr:$src)>; 13642 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13643 (v8f16 VR128X:$src0), VK2WM:$mask), 13644 (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13645 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13646 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13647 (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13648} 13649 13650let Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13651 multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> { 13652 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13653 (ins _.RC:$src2, _.RC:$src3), 13654 OpcodeStr, "$src3, $src2", "$src2, $src3", 13655 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V; 13656 13657 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13658 (ins _.RC:$src2, _.MemOp:$src3), 13659 OpcodeStr, "$src3, $src2", "$src2, $src3", 13660 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V; 13661 13662 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13663 (ins _.RC:$src2, _.ScalarMemOp:$src3), 13664 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr), 13665 (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V; 13666 } 13667} // Constraints = "@earlyclobber $dst, $src1 = $dst" 13668 13669multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 13670 X86VectorVTInfo _> { 13671 let Constraints = "@earlyclobber $dst, $src1 = $dst" in 13672 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13673 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 13674 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 13675 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>, 13676 EVEX_4V, EVEX_B, EVEX_RC; 13677} 13678 13679 13680multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> { 13681 let Predicates = [HasFP16] in { 13682 defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>, 13683 avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>, 13684 EVEX_V512, Sched<[WriteFMAZ]>; 13685 } 13686 let Predicates = [HasVLX, HasFP16] in { 13687 defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>; 13688 defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>; 13689 } 13690} 13691 13692multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13693 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> { 13694 let Predicates = [HasFP16] in { 13695 defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 13696 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, 13697 avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info, 13698 "", "@earlyclobber $dst">, EVEX_V512; 13699 } 13700 let Predicates = [HasVLX, HasFP16] in { 13701 defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 13702 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256; 13703 defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 13704 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128; 13705 } 13706} 13707 13708 13709let Uses = [MXCSR] in { 13710 defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>, 13711 T_MAP6XS, EVEX_CD8<32, CD8VF>; 13712 defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>, 13713 T_MAP6XD, EVEX_CD8<32, CD8VF>; 13714 13715 defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc, 13716 x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>; 13717 defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc, 13718 x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>; 13719} 13720 13721 13722multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 13723 bit IsCommutable> { 13724 let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13725 defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13726 (ins VR128X:$src2, VR128X:$src3), OpcodeStr, 13727 "$src3, $src2", "$src2, $src3", 13728 (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>, 13729 Sched<[WriteFMAX]>; 13730 defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst), 13731 (ins VR128X:$src2, ssmem:$src3), OpcodeStr, 13732 "$src3, $src2", "$src2, $src3", 13733 (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>, 13734 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13735 defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13736 (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr, 13737 "$rc, $src3, $src2", "$src2, $src3, $rc", 13738 (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>, 13739 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13740 } 13741} 13742 13743multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13744 SDNode OpNodeRnd, bit IsCommutable> { 13745 let Predicates = [HasFP16] in { 13746 defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13747 (ins VR128X:$src1, VR128X:$src2), OpcodeStr, 13748 "$src2, $src1", "$src1, $src2", 13749 (v4f32 (OpNode VR128X:$src1, VR128X:$src2)), 13750 IsCommutable, IsCommutable, IsCommutable, 13751 X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>; 13752 defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst), 13753 (ins VR128X:$src1, ssmem:$src2), OpcodeStr, 13754 "$src2, $src1", "$src1, $src2", 13755 (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))), 13756 0, 0, 0, X86selects, "@earlyclobber $dst">, 13757 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13758 defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13759 (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr, 13760 "$rc, $src2, $src1", "$src1, $src2, $rc", 13761 (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)), 13762 0, 0, 0, X86selects, "@earlyclobber $dst">, 13763 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13764 } 13765} 13766 13767let Uses = [MXCSR] in { 13768 defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>, 13769 T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V; 13770 defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>, 13771 T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V; 13772 13773 defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>, 13774 T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V; 13775 defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>, 13776 T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V; 13777} 13778