1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX512 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// This multiclass generates the masking variants from the non-masking 16// variant. It only provides the assembly pieces for the masking variants. 17// It assumes custom ISel patterns for masking which can be provided as 18// template arguments. 19multiclass AVX512_maskable_custom<bits<8> O, Format F, 20 dag Outs, 21 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 22 string OpcodeStr, 23 string AttSrcAsm, string IntelSrcAsm, 24 list<dag> Pattern, 25 list<dag> MaskingPattern, 26 list<dag> ZeroMaskingPattern, 27 string MaskingConstraint = "", 28 bit IsCommutable = 0, 29 bit IsKCommutable = 0, 30 bit IsKZCommutable = IsCommutable, 31 string ClobberConstraint = ""> { 32 let isCommutable = IsCommutable, Constraints = ClobberConstraint in 33 def NAME: AVX512<O, F, Outs, Ins, 34 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 35 "$dst, "#IntelSrcAsm#"}", 36 Pattern>; 37 38 // Prefer over VMOV*rrk Pat<> 39 let isCommutable = IsKCommutable in 40 def NAME#k: AVX512<O, F, Outs, MaskingIns, 41 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 42 "$dst {${mask}}, "#IntelSrcAsm#"}", 43 MaskingPattern>, 44 EVEX_K { 45 // In case of the 3src subclass this is overridden with a let. 46 string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint, 47 !if(!eq(MaskingConstraint, ""), ClobberConstraint, 48 !strconcat(ClobberConstraint, ", ", MaskingConstraint))); 49 } 50 51 // Zero mask does not add any restrictions to commute operands transformation. 52 // So, it is Ok to use IsCommutable instead of IsKCommutable. 53 let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<> 54 Constraints = ClobberConstraint in 55 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 56 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 57 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 58 ZeroMaskingPattern>, 59 EVEX_KZ; 60} 61 62 63// Common base class of AVX512_maskable and AVX512_maskable_3src. 64multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 65 dag Outs, 66 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 67 string OpcodeStr, 68 string AttSrcAsm, string IntelSrcAsm, 69 dag RHS, dag MaskingRHS, 70 SDPatternOperator Select = vselect_mask, 71 string MaskingConstraint = "", 72 bit IsCommutable = 0, 73 bit IsKCommutable = 0, 74 bit IsKZCommutable = IsCommutable, 75 string ClobberConstraint = ""> : 76 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 77 AttSrcAsm, IntelSrcAsm, 78 [(set _.RC:$dst, RHS)], 79 [(set _.RC:$dst, MaskingRHS)], 80 [(set _.RC:$dst, 81 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 82 MaskingConstraint, IsCommutable, 83 IsKCommutable, IsKZCommutable, ClobberConstraint>; 84 85// This multiclass generates the unconditional/non-masking, the masking and 86// the zero-masking variant of the vector instruction. In the masking case, the 87// preserved vector elements come from a new dummy input operand tied to $dst. 88// This version uses a separate dag for non-masking and masking. 89multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 90 dag Outs, dag Ins, string OpcodeStr, 91 string AttSrcAsm, string IntelSrcAsm, 92 dag RHS, dag MaskRHS, 93 string ClobberConstraint = "", 94 bit IsCommutable = 0, bit IsKCommutable = 0, 95 bit IsKZCommutable = IsCommutable> : 96 AVX512_maskable_custom<O, F, Outs, Ins, 97 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 98 !con((ins _.KRCWM:$mask), Ins), 99 OpcodeStr, AttSrcAsm, IntelSrcAsm, 100 [(set _.RC:$dst, RHS)], 101 [(set _.RC:$dst, 102 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 103 [(set _.RC:$dst, 104 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 105 "$src0 = $dst", IsCommutable, IsKCommutable, 106 IsKZCommutable, ClobberConstraint>; 107 108// This multiclass generates the unconditional/non-masking, the masking and 109// the zero-masking variant of the vector instruction. In the masking case, the 110// preserved vector elements come from a new dummy input operand tied to $dst. 111multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 112 dag Outs, dag Ins, string OpcodeStr, 113 string AttSrcAsm, string IntelSrcAsm, 114 dag RHS, 115 bit IsCommutable = 0, bit IsKCommutable = 0, 116 bit IsKZCommutable = IsCommutable, 117 SDPatternOperator Select = vselect_mask, 118 string ClobberConstraint = ""> : 119 AVX512_maskable_common<O, F, _, Outs, Ins, 120 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 121 !con((ins _.KRCWM:$mask), Ins), 122 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 123 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 124 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 125 IsKZCommutable, ClobberConstraint>; 126 127// This multiclass generates the unconditional/non-masking, the masking and 128// the zero-masking variant of the scalar instruction. 129multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 130 dag Outs, dag Ins, string OpcodeStr, 131 string AttSrcAsm, string IntelSrcAsm, 132 dag RHS> : 133 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 134 RHS, 0, 0, 0, X86selects_mask>; 135 136// Similar to AVX512_maskable but in this case one of the source operands 137// ($src1) is already tied to $dst so we just use that for the preserved 138// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 139// $src1. 140multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 141 dag Outs, dag NonTiedIns, string OpcodeStr, 142 string AttSrcAsm, string IntelSrcAsm, 143 dag RHS, 144 bit IsCommutable = 0, 145 bit IsKCommutable = 0, 146 SDPatternOperator Select = vselect_mask, 147 bit MaskOnly = 0> : 148 AVX512_maskable_common<O, F, _, Outs, 149 !con((ins _.RC:$src1), NonTiedIns), 150 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 151 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 152 OpcodeStr, AttSrcAsm, IntelSrcAsm, 153 !if(MaskOnly, (null_frag), RHS), 154 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 155 Select, "", IsCommutable, IsKCommutable>; 156 157// Similar to AVX512_maskable_3src but in this case the input VT for the tied 158// operand differs from the output VT. This requires a bitconvert on 159// the preserved vector going into the vselect. 160// NOTE: The unmasked pattern is disabled. 161multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 162 X86VectorVTInfo InVT, 163 dag Outs, dag NonTiedIns, string OpcodeStr, 164 string AttSrcAsm, string IntelSrcAsm, 165 dag RHS, bit IsCommutable = 0> : 166 AVX512_maskable_common<O, F, OutVT, Outs, 167 !con((ins InVT.RC:$src1), NonTiedIns), 168 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 169 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 170 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 171 (vselect_mask InVT.KRCWM:$mask, RHS, 172 (bitconvert InVT.RC:$src1)), 173 vselect_mask, "", IsCommutable>; 174 175multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 176 dag Outs, dag NonTiedIns, string OpcodeStr, 177 string AttSrcAsm, string IntelSrcAsm, 178 dag RHS, 179 bit IsCommutable = 0, 180 bit IsKCommutable = 0, 181 bit MaskOnly = 0> : 182 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 183 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 184 X86selects_mask, MaskOnly>; 185 186multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 187 dag Outs, dag Ins, 188 string OpcodeStr, 189 string AttSrcAsm, string IntelSrcAsm, 190 list<dag> Pattern> : 191 AVX512_maskable_custom<O, F, Outs, Ins, 192 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 193 !con((ins _.KRCWM:$mask), Ins), 194 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 195 "$src0 = $dst">; 196 197multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 198 dag Outs, dag NonTiedIns, 199 string OpcodeStr, 200 string AttSrcAsm, string IntelSrcAsm, 201 list<dag> Pattern> : 202 AVX512_maskable_custom<O, F, Outs, 203 !con((ins _.RC:$src1), NonTiedIns), 204 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 205 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 206 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 207 "">; 208 209// Instruction with mask that puts result in mask register, 210// like "compare" and "vptest" 211multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 212 dag Outs, 213 dag Ins, dag MaskingIns, 214 string OpcodeStr, 215 string AttSrcAsm, string IntelSrcAsm, 216 list<dag> Pattern, 217 list<dag> MaskingPattern, 218 bit IsCommutable = 0> { 219 let isCommutable = IsCommutable in { 220 def NAME: AVX512<O, F, Outs, Ins, 221 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 222 "$dst, "#IntelSrcAsm#"}", 223 Pattern>; 224 225 def NAME#k: AVX512<O, F, Outs, MaskingIns, 226 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 227 "$dst {${mask}}, "#IntelSrcAsm#"}", 228 MaskingPattern>, EVEX_K; 229 } 230} 231 232multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 233 dag Outs, 234 dag Ins, dag MaskingIns, 235 string OpcodeStr, 236 string AttSrcAsm, string IntelSrcAsm, 237 dag RHS, dag MaskingRHS, 238 bit IsCommutable = 0> : 239 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 240 AttSrcAsm, IntelSrcAsm, 241 [(set _.KRC:$dst, RHS)], 242 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 243 244multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 245 dag Outs, dag Ins, string OpcodeStr, 246 string AttSrcAsm, string IntelSrcAsm, 247 dag RHS, dag RHS_su, bit IsCommutable = 0> : 248 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 249 !con((ins _.KRCWM:$mask), Ins), 250 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 251 (and _.KRCWM:$mask, RHS_su), IsCommutable>; 252 253// Used by conversion instructions. 254multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _, 255 dag Outs, 256 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 257 string OpcodeStr, 258 string AttSrcAsm, string IntelSrcAsm, 259 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> : 260 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 261 AttSrcAsm, IntelSrcAsm, 262 [(set _.RC:$dst, RHS)], 263 [(set _.RC:$dst, MaskingRHS)], 264 [(set _.RC:$dst, ZeroMaskingRHS)], 265 "$src0 = $dst">; 266 267multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _, 268 dag Outs, dag NonTiedIns, string OpcodeStr, 269 string AttSrcAsm, string IntelSrcAsm, 270 dag RHS, dag MaskingRHS, bit IsCommutable, 271 bit IsKCommutable> : 272 AVX512_maskable_custom<O, F, Outs, 273 !con((ins _.RC:$src1), NonTiedIns), 274 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 275 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 276 OpcodeStr, AttSrcAsm, IntelSrcAsm, 277 [(set _.RC:$dst, RHS)], 278 [(set _.RC:$dst, 279 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))], 280 [(set _.RC:$dst, 281 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))], 282 "", IsCommutable, IsKCommutable>; 283 284// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 285// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 286// swizzled by ExecutionDomainFix to pxor. 287// We set canFoldAsLoad because this can be converted to a constant-pool 288// load of an all-zeros value if folding it would be beneficial. 289let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 290 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 291def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 292 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 293def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 294 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 295} 296 297let Predicates = [HasAVX512] in { 298def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; 299def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; 300def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; 301def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>; 302def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; 303def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; 304} 305 306// Alias instructions that allow VPTERNLOG to be used with a mask to create 307// a mix of all ones and all zeros elements. This is done this way to force 308// the same register to be used as input for all three sources. 309let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 310def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 311 (ins VK16WM:$mask), "", 312 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 313 (v16i32 immAllOnesV), 314 (v16i32 immAllZerosV)))]>; 315def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 316 (ins VK8WM:$mask), "", 317 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 318 (v8i64 immAllOnesV), 319 (v8i64 immAllZerosV)))]>; 320} 321 322let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 323 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 324def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 325 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 326def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 327 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 328} 329 330let Predicates = [HasAVX512] in { 331def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; 332def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; 333def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; 334def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>; 335def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; 336def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; 337def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; 338def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; 339def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; 340def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>; 341def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; 342def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; 343} 344 345// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 346// This is expanded by ExpandPostRAPseudos. 347let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 348 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 349 def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "", 350 [(set FR16X:$dst, fp16imm0)]>; 351 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 352 [(set FR32X:$dst, fp32imm0)]>; 353 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 354 [(set FR64X:$dst, fp64imm0)]>; 355 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 356 [(set VR128X:$dst, fp128imm0)]>; 357} 358 359//===----------------------------------------------------------------------===// 360// AVX-512 - VECTOR INSERT 361// 362 363// Supports two different pattern operators for mask and unmasked ops. Allows 364// null_frag to be passed for one. 365multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 366 X86VectorVTInfo To, 367 SDPatternOperator vinsert_insert, 368 SDPatternOperator vinsert_for_mask, 369 X86FoldableSchedWrite sched> { 370 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 371 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 372 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 373 "vinsert" # From.EltTypeName # "x" # From.NumElts, 374 "$src3, $src2, $src1", "$src1, $src2, $src3", 375 (vinsert_insert:$src3 (To.VT To.RC:$src1), 376 (From.VT From.RC:$src2), 377 (iPTR imm)), 378 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 379 (From.VT From.RC:$src2), 380 (iPTR imm))>, 381 AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>; 382 let mayLoad = 1 in 383 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 384 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 385 "vinsert" # From.EltTypeName # "x" # From.NumElts, 386 "$src3, $src2, $src1", "$src1, $src2, $src3", 387 (vinsert_insert:$src3 (To.VT To.RC:$src1), 388 (From.VT (From.LdFrag addr:$src2)), 389 (iPTR imm)), 390 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 391 (From.VT (From.LdFrag addr:$src2)), 392 (iPTR imm))>, AVX512AIi8Base, EVEX, VVVV, 393 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 394 Sched<[sched.Folded, sched.ReadAfterFold]>; 395 } 396} 397 398// Passes the same pattern operator for masked and unmasked ops. 399multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 400 X86VectorVTInfo To, 401 SDPatternOperator vinsert_insert, 402 X86FoldableSchedWrite sched> : 403 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 404 405multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 406 X86VectorVTInfo To, PatFrag vinsert_insert, 407 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 408 let Predicates = p in { 409 def : Pat<(vinsert_insert:$ins 410 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 411 (To.VT (!cast<Instruction>(InstrStr#"rr") 412 To.RC:$src1, From.RC:$src2, 413 (INSERT_get_vinsert_imm To.RC:$ins)))>; 414 415 def : Pat<(vinsert_insert:$ins 416 (To.VT To.RC:$src1), 417 (From.VT (From.LdFrag addr:$src2)), 418 (iPTR imm)), 419 (To.VT (!cast<Instruction>(InstrStr#"rm") 420 To.RC:$src1, addr:$src2, 421 (INSERT_get_vinsert_imm To.RC:$ins)))>; 422 } 423} 424 425multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 426 ValueType EltVT64, int Opcode256, 427 X86FoldableSchedWrite sched> { 428 429 let Predicates = [HasVLX] in 430 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 431 X86VectorVTInfo< 4, EltVT32, VR128X>, 432 X86VectorVTInfo< 8, EltVT32, VR256X>, 433 vinsert128_insert, sched>, EVEX_V256; 434 435 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 436 X86VectorVTInfo< 4, EltVT32, VR128X>, 437 X86VectorVTInfo<16, EltVT32, VR512>, 438 vinsert128_insert, sched>, EVEX_V512; 439 440 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 441 X86VectorVTInfo< 4, EltVT64, VR256X>, 442 X86VectorVTInfo< 8, EltVT64, VR512>, 443 vinsert256_insert, sched>, REX_W, EVEX_V512; 444 445 // Even with DQI we'd like to only use these instructions for masking. 446 let Predicates = [HasVLX, HasDQI] in 447 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 448 X86VectorVTInfo< 2, EltVT64, VR128X>, 449 X86VectorVTInfo< 4, EltVT64, VR256X>, 450 null_frag, vinsert128_insert, sched>, 451 EVEX_V256, REX_W; 452 453 // Even with DQI we'd like to only use these instructions for masking. 454 let Predicates = [HasDQI] in { 455 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 456 X86VectorVTInfo< 2, EltVT64, VR128X>, 457 X86VectorVTInfo< 8, EltVT64, VR512>, 458 null_frag, vinsert128_insert, sched>, 459 REX_W, EVEX_V512; 460 461 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 462 X86VectorVTInfo< 8, EltVT32, VR256X>, 463 X86VectorVTInfo<16, EltVT32, VR512>, 464 null_frag, vinsert256_insert, sched>, 465 EVEX_V512; 466 } 467} 468 469// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 470defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 471defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 472 473// Codegen pattern with the alternative types, 474// Even with AVX512DQ we'll still use these for unmasked operations. 475defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 476 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 477defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 478 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 479 480defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 481 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 482defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 483 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 484 485defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 486 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 487defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 488 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 489 490// Codegen pattern with the alternative types insert VEC128 into VEC256 491defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 492 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 493defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 494 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 495defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info, 496 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 497defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8bf16x_info, v16bf16x_info, 498 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 499// Codegen pattern with the alternative types insert VEC128 into VEC512 500defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 501 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 502defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 503 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 504defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info, 505 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 506defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8bf16x_info, v32bf16_info, 507 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 508// Codegen pattern with the alternative types insert VEC256 into VEC512 509defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 510 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 511defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 512 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 513defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info, 514 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 515defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16bf16x_info, v32bf16_info, 516 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 517 518 519multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 520 X86VectorVTInfo To, X86VectorVTInfo Cast, 521 PatFrag vinsert_insert, 522 SDNodeXForm INSERT_get_vinsert_imm, 523 list<Predicate> p> { 524let Predicates = p in { 525 def : Pat<(Cast.VT 526 (vselect_mask Cast.KRCWM:$mask, 527 (bitconvert 528 (vinsert_insert:$ins (To.VT To.RC:$src1), 529 (From.VT From.RC:$src2), 530 (iPTR imm))), 531 Cast.RC:$src0)), 532 (!cast<Instruction>(InstrStr#"rrk") 533 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 534 (INSERT_get_vinsert_imm To.RC:$ins))>; 535 def : Pat<(Cast.VT 536 (vselect_mask Cast.KRCWM:$mask, 537 (bitconvert 538 (vinsert_insert:$ins (To.VT To.RC:$src1), 539 (From.VT 540 (bitconvert 541 (From.LdFrag addr:$src2))), 542 (iPTR imm))), 543 Cast.RC:$src0)), 544 (!cast<Instruction>(InstrStr#"rmk") 545 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 546 (INSERT_get_vinsert_imm To.RC:$ins))>; 547 548 def : Pat<(Cast.VT 549 (vselect_mask Cast.KRCWM:$mask, 550 (bitconvert 551 (vinsert_insert:$ins (To.VT To.RC:$src1), 552 (From.VT From.RC:$src2), 553 (iPTR imm))), 554 Cast.ImmAllZerosV)), 555 (!cast<Instruction>(InstrStr#"rrkz") 556 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 557 (INSERT_get_vinsert_imm To.RC:$ins))>; 558 def : Pat<(Cast.VT 559 (vselect_mask Cast.KRCWM:$mask, 560 (bitconvert 561 (vinsert_insert:$ins (To.VT To.RC:$src1), 562 (From.VT (From.LdFrag addr:$src2)), 563 (iPTR imm))), 564 Cast.ImmAllZerosV)), 565 (!cast<Instruction>(InstrStr#"rmkz") 566 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 567 (INSERT_get_vinsert_imm To.RC:$ins))>; 568} 569} 570 571defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 572 v8f32x_info, vinsert128_insert, 573 INSERT_get_vinsert128_imm, [HasVLX]>; 574defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 575 v4f64x_info, vinsert128_insert, 576 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 577 578defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 579 v8i32x_info, vinsert128_insert, 580 INSERT_get_vinsert128_imm, [HasVLX]>; 581defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 582 v8i32x_info, vinsert128_insert, 583 INSERT_get_vinsert128_imm, [HasVLX]>; 584defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 585 v8i32x_info, vinsert128_insert, 586 INSERT_get_vinsert128_imm, [HasVLX]>; 587defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 588 v4i64x_info, vinsert128_insert, 589 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 590defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 591 v4i64x_info, vinsert128_insert, 592 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 593defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 594 v4i64x_info, vinsert128_insert, 595 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 596 597defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 598 v16f32_info, vinsert128_insert, 599 INSERT_get_vinsert128_imm, [HasAVX512]>; 600defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 601 v8f64_info, vinsert128_insert, 602 INSERT_get_vinsert128_imm, [HasDQI]>; 603 604defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 605 v16i32_info, vinsert128_insert, 606 INSERT_get_vinsert128_imm, [HasAVX512]>; 607defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 608 v16i32_info, vinsert128_insert, 609 INSERT_get_vinsert128_imm, [HasAVX512]>; 610defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 611 v16i32_info, vinsert128_insert, 612 INSERT_get_vinsert128_imm, [HasAVX512]>; 613defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 614 v8i64_info, vinsert128_insert, 615 INSERT_get_vinsert128_imm, [HasDQI]>; 616defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 617 v8i64_info, vinsert128_insert, 618 INSERT_get_vinsert128_imm, [HasDQI]>; 619defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 620 v8i64_info, vinsert128_insert, 621 INSERT_get_vinsert128_imm, [HasDQI]>; 622 623defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 624 v16f32_info, vinsert256_insert, 625 INSERT_get_vinsert256_imm, [HasDQI]>; 626defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 627 v8f64_info, vinsert256_insert, 628 INSERT_get_vinsert256_imm, [HasAVX512]>; 629 630defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 631 v16i32_info, vinsert256_insert, 632 INSERT_get_vinsert256_imm, [HasDQI]>; 633defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 634 v16i32_info, vinsert256_insert, 635 INSERT_get_vinsert256_imm, [HasDQI]>; 636defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 637 v16i32_info, vinsert256_insert, 638 INSERT_get_vinsert256_imm, [HasDQI]>; 639defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 640 v8i64_info, vinsert256_insert, 641 INSERT_get_vinsert256_imm, [HasAVX512]>; 642defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 643 v8i64_info, vinsert256_insert, 644 INSERT_get_vinsert256_imm, [HasAVX512]>; 645defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 646 v8i64_info, vinsert256_insert, 647 INSERT_get_vinsert256_imm, [HasAVX512]>; 648 649// vinsertps - insert f32 to XMM 650let ExeDomain = SSEPackedSingle in { 651let isCommutable = 1 in 652def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 653 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 654 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 655 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, 656 EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>; 657def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 658 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 659 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 660 [(set VR128X:$dst, (X86insertps VR128X:$src1, 661 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 662 timm:$src3))]>, 663 EVEX, VVVV, EVEX_CD8<32, CD8VT1>, 664 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 665} 666 667//===----------------------------------------------------------------------===// 668// AVX-512 VECTOR EXTRACT 669//--- 670 671// Supports two different pattern operators for mask and unmasked ops. Allows 672// null_frag to be passed for one. 673multiclass vextract_for_size_split<int Opcode, 674 X86VectorVTInfo From, X86VectorVTInfo To, 675 SDPatternOperator vextract_extract, 676 SDPatternOperator vextract_for_mask, 677 SchedWrite SchedRR, SchedWrite SchedMR> { 678 679 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 680 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 681 (ins From.RC:$src1, u8imm:$idx), 682 "vextract" # To.EltTypeName # "x" # To.NumElts, 683 "$idx, $src1", "$src1, $idx", 684 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 685 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 686 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 687 688 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 689 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 690 "vextract" # To.EltTypeName # "x" # To.NumElts # 691 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 692 [(store (To.VT (vextract_extract:$idx 693 (From.VT From.RC:$src1), (iPTR imm))), 694 addr:$dst)]>, EVEX, 695 Sched<[SchedMR]>; 696 697 let mayStore = 1, hasSideEffects = 0 in 698 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 699 (ins To.MemOp:$dst, To.KRCWM:$mask, 700 From.RC:$src1, u8imm:$idx), 701 "vextract" # To.EltTypeName # "x" # To.NumElts # 702 "\t{$idx, $src1, $dst {${mask}}|" 703 "$dst {${mask}}, $src1, $idx}", []>, 704 EVEX_K, EVEX, Sched<[SchedMR]>; 705 } 706} 707 708// Passes the same pattern operator for masked and unmasked ops. 709multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 710 X86VectorVTInfo To, 711 SDPatternOperator vextract_extract, 712 SchedWrite SchedRR, SchedWrite SchedMR> : 713 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 714 715// Codegen pattern for the alternative types 716multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 717 X86VectorVTInfo To, PatFrag vextract_extract, 718 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 719 let Predicates = p in { 720 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 721 (To.VT (!cast<Instruction>(InstrStr#"rr") 722 From.RC:$src1, 723 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 724 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 725 (iPTR imm))), addr:$dst), 726 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 727 (EXTRACT_get_vextract_imm To.RC:$ext))>; 728 } 729} 730 731multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 732 ValueType EltVT64, int Opcode256, 733 SchedWrite SchedRR, SchedWrite SchedMR> { 734 let Predicates = [HasAVX512] in { 735 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 736 X86VectorVTInfo<16, EltVT32, VR512>, 737 X86VectorVTInfo< 4, EltVT32, VR128X>, 738 vextract128_extract, SchedRR, SchedMR>, 739 EVEX_V512, EVEX_CD8<32, CD8VT4>; 740 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 741 X86VectorVTInfo< 8, EltVT64, VR512>, 742 X86VectorVTInfo< 4, EltVT64, VR256X>, 743 vextract256_extract, SchedRR, SchedMR>, 744 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 745 } 746 let Predicates = [HasVLX] in 747 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 748 X86VectorVTInfo< 8, EltVT32, VR256X>, 749 X86VectorVTInfo< 4, EltVT32, VR128X>, 750 vextract128_extract, SchedRR, SchedMR>, 751 EVEX_V256, EVEX_CD8<32, CD8VT4>; 752 753 // Even with DQI we'd like to only use these instructions for masking. 754 let Predicates = [HasVLX, HasDQI] in 755 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 756 X86VectorVTInfo< 4, EltVT64, VR256X>, 757 X86VectorVTInfo< 2, EltVT64, VR128X>, 758 null_frag, vextract128_extract, SchedRR, SchedMR>, 759 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; 760 761 // Even with DQI we'd like to only use these instructions for masking. 762 let Predicates = [HasDQI] in { 763 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 764 X86VectorVTInfo< 8, EltVT64, VR512>, 765 X86VectorVTInfo< 2, EltVT64, VR128X>, 766 null_frag, vextract128_extract, SchedRR, SchedMR>, 767 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 768 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 769 X86VectorVTInfo<16, EltVT32, VR512>, 770 X86VectorVTInfo< 8, EltVT32, VR256X>, 771 null_frag, vextract256_extract, SchedRR, SchedMR>, 772 EVEX_V512, EVEX_CD8<32, CD8VT8>; 773 } 774} 775 776// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 777defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 778defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 779 780// extract_subvector codegen patterns with the alternative types. 781// Even with AVX512DQ we'll still use these for unmasked operations. 782defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 783 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 784defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 785 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 786 787defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 788 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 789defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 790 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 791 792defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 793 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 794defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 795 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 796 797// Codegen pattern with the alternative types extract VEC128 from VEC256 798defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 799 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 800defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 801 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 802defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info, 803 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 804defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16bf16x_info, v8bf16x_info, 805 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 806 807// Codegen pattern with the alternative types extract VEC128 from VEC512 808defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 809 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 810defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 811 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 812defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info, 813 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 814defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32bf16_info, v8bf16x_info, 815 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 816// Codegen pattern with the alternative types extract VEC256 from VEC512 817defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 818 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 819defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 820 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 821defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info, 822 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 823defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32bf16_info, v16bf16x_info, 824 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 825 826 827// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 828// smaller extract to enable EVEX->VEX. 829let Predicates = [NoVLX, HasEVEX512] in { 830def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 831 (v2i64 (VEXTRACTI128rr 832 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 833 (iPTR 1)))>; 834def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 835 (v2f64 (VEXTRACTF128rr 836 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 837 (iPTR 1)))>; 838def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 839 (v4i32 (VEXTRACTI128rr 840 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 841 (iPTR 1)))>; 842def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 843 (v4f32 (VEXTRACTF128rr 844 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 845 (iPTR 1)))>; 846def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 847 (v8i16 (VEXTRACTI128rr 848 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 849 (iPTR 1)))>; 850def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 851 (v8f16 (VEXTRACTF128rr 852 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 853 (iPTR 1)))>; 854def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 855 (v16i8 (VEXTRACTI128rr 856 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 857 (iPTR 1)))>; 858} 859 860// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 861// smaller extract to enable EVEX->VEX. 862let Predicates = [HasVLX] in { 863def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 864 (v2i64 (VEXTRACTI32x4Z256rr 865 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 866 (iPTR 1)))>; 867def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 868 (v2f64 (VEXTRACTF32x4Z256rr 869 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 870 (iPTR 1)))>; 871def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 872 (v4i32 (VEXTRACTI32x4Z256rr 873 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 874 (iPTR 1)))>; 875def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 876 (v4f32 (VEXTRACTF32x4Z256rr 877 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 878 (iPTR 1)))>; 879def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 880 (v8i16 (VEXTRACTI32x4Z256rr 881 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 882 (iPTR 1)))>; 883def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 884 (v8f16 (VEXTRACTF32x4Z256rr 885 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 886 (iPTR 1)))>; 887def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 888 (v16i8 (VEXTRACTI32x4Z256rr 889 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 890 (iPTR 1)))>; 891} 892 893 894// Additional patterns for handling a bitcast between the vselect and the 895// extract_subvector. 896multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 897 X86VectorVTInfo To, X86VectorVTInfo Cast, 898 PatFrag vextract_extract, 899 SDNodeXForm EXTRACT_get_vextract_imm, 900 list<Predicate> p> { 901let Predicates = p in { 902 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 903 (bitconvert 904 (To.VT (vextract_extract:$ext 905 (From.VT From.RC:$src), (iPTR imm)))), 906 To.RC:$src0)), 907 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 908 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 909 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 910 911 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 912 (bitconvert 913 (To.VT (vextract_extract:$ext 914 (From.VT From.RC:$src), (iPTR imm)))), 915 Cast.ImmAllZerosV)), 916 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 917 Cast.KRCWM:$mask, From.RC:$src, 918 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 919} 920} 921 922defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 923 v4f32x_info, vextract128_extract, 924 EXTRACT_get_vextract128_imm, [HasVLX]>; 925defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 926 v2f64x_info, vextract128_extract, 927 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 928 929defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 930 v4i32x_info, vextract128_extract, 931 EXTRACT_get_vextract128_imm, [HasVLX]>; 932defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 933 v4i32x_info, vextract128_extract, 934 EXTRACT_get_vextract128_imm, [HasVLX]>; 935defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 936 v4i32x_info, vextract128_extract, 937 EXTRACT_get_vextract128_imm, [HasVLX]>; 938defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 939 v2i64x_info, vextract128_extract, 940 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 941defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 942 v2i64x_info, vextract128_extract, 943 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 944defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 945 v2i64x_info, vextract128_extract, 946 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 947 948defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 949 v4f32x_info, vextract128_extract, 950 EXTRACT_get_vextract128_imm, [HasAVX512]>; 951defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 952 v2f64x_info, vextract128_extract, 953 EXTRACT_get_vextract128_imm, [HasDQI]>; 954 955defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 956 v4i32x_info, vextract128_extract, 957 EXTRACT_get_vextract128_imm, [HasAVX512]>; 958defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 959 v4i32x_info, vextract128_extract, 960 EXTRACT_get_vextract128_imm, [HasAVX512]>; 961defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 962 v4i32x_info, vextract128_extract, 963 EXTRACT_get_vextract128_imm, [HasAVX512]>; 964defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 965 v2i64x_info, vextract128_extract, 966 EXTRACT_get_vextract128_imm, [HasDQI]>; 967defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 968 v2i64x_info, vextract128_extract, 969 EXTRACT_get_vextract128_imm, [HasDQI]>; 970defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 971 v2i64x_info, vextract128_extract, 972 EXTRACT_get_vextract128_imm, [HasDQI]>; 973 974defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 975 v8f32x_info, vextract256_extract, 976 EXTRACT_get_vextract256_imm, [HasDQI]>; 977defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 978 v4f64x_info, vextract256_extract, 979 EXTRACT_get_vextract256_imm, [HasAVX512]>; 980 981defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 982 v8i32x_info, vextract256_extract, 983 EXTRACT_get_vextract256_imm, [HasDQI]>; 984defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 985 v8i32x_info, vextract256_extract, 986 EXTRACT_get_vextract256_imm, [HasDQI]>; 987defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 988 v8i32x_info, vextract256_extract, 989 EXTRACT_get_vextract256_imm, [HasDQI]>; 990defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 991 v4i64x_info, vextract256_extract, 992 EXTRACT_get_vextract256_imm, [HasAVX512]>; 993defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 994 v4i64x_info, vextract256_extract, 995 EXTRACT_get_vextract256_imm, [HasAVX512]>; 996defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 997 v4i64x_info, vextract256_extract, 998 EXTRACT_get_vextract256_imm, [HasAVX512]>; 999 1000// vextractps - extract 32 bits from XMM 1001def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), 1002 (ins VR128X:$src1, u8imm:$src2), 1003 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1004 [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1005 EVEX, WIG, Sched<[WriteVecExtract]>; 1006 1007def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 1008 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1009 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1010 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1011 addr:$dst)]>, 1012 EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1013 1014//===---------------------------------------------------------------------===// 1015// AVX-512 BROADCAST 1016//--- 1017// broadcast with a scalar argument. 1018multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo, 1019 X86VectorVTInfo SrcInfo> { 1020 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1021 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr) 1022 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1023 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1024 (X86VBroadcast SrcInfo.FRC:$src), 1025 DestInfo.RC:$src0)), 1026 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk) 1027 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1028 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1029 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1030 (X86VBroadcast SrcInfo.FRC:$src), 1031 DestInfo.ImmAllZerosV)), 1032 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz) 1033 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1034} 1035 1036// Split version to allow mask and broadcast node to be different types. This 1037// helps support the 32x2 broadcasts. 1038multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1039 SchedWrite SchedRR, SchedWrite SchedRM, 1040 X86VectorVTInfo MaskInfo, 1041 X86VectorVTInfo DestInfo, 1042 X86VectorVTInfo SrcInfo, 1043 bit IsConvertibleToThreeAddress, 1044 SDPatternOperator UnmaskedOp = X86VBroadcast, 1045 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { 1046 let hasSideEffects = 0 in 1047 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), 1048 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1049 [(set MaskInfo.RC:$dst, 1050 (MaskInfo.VT 1051 (bitconvert 1052 (DestInfo.VT 1053 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], 1054 DestInfo.ExeDomain>, T8, PD, EVEX, Sched<[SchedRR]>; 1055 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1056 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), 1057 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1058 "${dst} {${mask}} {z}, $src}"), 1059 [(set MaskInfo.RC:$dst, 1060 (vselect_mask MaskInfo.KRCWM:$mask, 1061 (MaskInfo.VT 1062 (bitconvert 1063 (DestInfo.VT 1064 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1065 MaskInfo.ImmAllZerosV))], 1066 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; 1067 let Constraints = "$src0 = $dst" in 1068 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1069 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1070 SrcInfo.RC:$src), 1071 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1072 "${dst} {${mask}}, $src}"), 1073 [(set MaskInfo.RC:$dst, 1074 (vselect_mask MaskInfo.KRCWM:$mask, 1075 (MaskInfo.VT 1076 (bitconvert 1077 (DestInfo.VT 1078 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1079 MaskInfo.RC:$src0))], 1080 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, Sched<[SchedRR]>; 1081 1082 let hasSideEffects = 0, mayLoad = 1 in 1083 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1084 (ins SrcInfo.ScalarMemOp:$src), 1085 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1086 [(set MaskInfo.RC:$dst, 1087 (MaskInfo.VT 1088 (bitconvert 1089 (DestInfo.VT 1090 (UnmaskedBcastOp addr:$src)))))], 1091 DestInfo.ExeDomain>, T8, PD, EVEX, 1092 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1093 1094 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1095 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), 1096 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1097 "${dst} {${mask}} {z}, $src}"), 1098 [(set MaskInfo.RC:$dst, 1099 (vselect_mask MaskInfo.KRCWM:$mask, 1100 (MaskInfo.VT 1101 (bitconvert 1102 (DestInfo.VT 1103 (SrcInfo.BroadcastLdFrag addr:$src)))), 1104 MaskInfo.ImmAllZerosV))], 1105 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, 1106 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1107 1108 let Constraints = "$src0 = $dst", 1109 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 1110 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1111 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1112 SrcInfo.ScalarMemOp:$src), 1113 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1114 "${dst} {${mask}}, $src}"), 1115 [(set MaskInfo.RC:$dst, 1116 (vselect_mask MaskInfo.KRCWM:$mask, 1117 (MaskInfo.VT 1118 (bitconvert 1119 (DestInfo.VT 1120 (SrcInfo.BroadcastLdFrag addr:$src)))), 1121 MaskInfo.RC:$src0))], 1122 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, 1123 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1124} 1125 1126// Helper class to force mask and broadcast result to same type. 1127multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, 1128 SchedWrite SchedRR, SchedWrite SchedRM, 1129 X86VectorVTInfo DestInfo, 1130 X86VectorVTInfo SrcInfo, 1131 bit IsConvertibleToThreeAddress> : 1132 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM, 1133 DestInfo, DestInfo, SrcInfo, 1134 IsConvertibleToThreeAddress>; 1135 1136multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1137 AVX512VLVectorVTInfo _> { 1138 let Predicates = [HasAVX512] in { 1139 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1140 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1141 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1142 EVEX_V512; 1143 } 1144 1145 let Predicates = [HasVLX] in { 1146 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1147 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1148 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1149 EVEX_V256; 1150 } 1151} 1152 1153multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1154 AVX512VLVectorVTInfo _> { 1155 let Predicates = [HasAVX512] in { 1156 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1157 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1158 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1159 EVEX_V512; 1160 } 1161 1162 let Predicates = [HasVLX] in { 1163 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1164 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1165 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1166 EVEX_V256; 1167 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1168 WriteFShuffle256Ld, _.info128, _.info128, 1>, 1169 avx512_broadcast_scalar<NAME, _.info128, _.info128>, 1170 EVEX_V128; 1171 } 1172} 1173defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1174 avx512vl_f32_info>; 1175defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1176 avx512vl_f64_info>, REX_W; 1177 1178multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1179 X86VectorVTInfo _, SDPatternOperator OpNode, 1180 RegisterClass SrcRC> { 1181 // Fold with a mask even if it has multiple uses since it is cheap. 1182 let ExeDomain = _.ExeDomain in 1183 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1184 (ins SrcRC:$src), 1185 "vpbroadcast"#_.Suffix, "$src", "$src", 1186 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0, 1187 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>, 1188 T8, PD, EVEX, Sched<[SchedRR]>; 1189} 1190 1191multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1192 X86VectorVTInfo _, SDPatternOperator OpNode, 1193 RegisterClass SrcRC, SubRegIndex Subreg> { 1194 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1195 defm rr : AVX512_maskable_custom<opc, MRMSrcReg, 1196 (outs _.RC:$dst), (ins GR32:$src), 1197 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1198 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1199 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [], 1200 "$src0 = $dst">, T8, PD, EVEX, Sched<[SchedRR]>; 1201 1202 def : Pat <(_.VT (OpNode SrcRC:$src)), 1203 (!cast<Instruction>(Name#rr) 1204 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1205 1206 // Fold with a mask even if it has multiple uses since it is cheap. 1207 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1208 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask, 1209 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1210 1211 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1212 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask, 1213 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1214} 1215 1216multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1217 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1218 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1219 let Predicates = [prd] in 1220 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1221 OpNode, SrcRC, Subreg>, EVEX_V512; 1222 let Predicates = [prd, HasVLX] in { 1223 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1224 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1225 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1226 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1227 } 1228} 1229 1230multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1231 SDPatternOperator OpNode, 1232 RegisterClass SrcRC, Predicate prd> { 1233 let Predicates = [prd] in 1234 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1235 SrcRC>, EVEX_V512; 1236 let Predicates = [prd, HasVLX] in { 1237 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1238 SrcRC>, EVEX_V256; 1239 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1240 SrcRC>, EVEX_V128; 1241 } 1242} 1243 1244defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1245 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1246defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1247 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1248 HasBWI>; 1249defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1250 X86VBroadcast, GR32, HasAVX512>; 1251defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1252 X86VBroadcast, GR64, HasAVX512>, REX_W; 1253 1254multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1255 AVX512VLVectorVTInfo _, Predicate prd, 1256 bit IsConvertibleToThreeAddress> { 1257 let Predicates = [prd] in { 1258 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1259 WriteShuffle256Ld, _.info512, _.info128, 1260 IsConvertibleToThreeAddress>, 1261 EVEX_V512; 1262 } 1263 let Predicates = [prd, HasVLX] in { 1264 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1265 WriteShuffle256Ld, _.info256, _.info128, 1266 IsConvertibleToThreeAddress>, 1267 EVEX_V256; 1268 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle, 1269 WriteShuffleXLd, _.info128, _.info128, 1270 IsConvertibleToThreeAddress>, 1271 EVEX_V128; 1272 } 1273} 1274 1275defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1276 avx512vl_i8_info, HasBWI, 0>; 1277defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1278 avx512vl_i16_info, HasBWI, 0>; 1279defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1280 avx512vl_i32_info, HasAVX512, 1>; 1281defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1282 avx512vl_i64_info, HasAVX512, 1>, REX_W; 1283 1284multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1285 SDPatternOperator OpNode, 1286 X86VectorVTInfo _Dst, 1287 X86VectorVTInfo _Src> { 1288 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1289 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1290 (_Dst.VT (OpNode addr:$src))>, 1291 Sched<[SchedWriteShuffle.YMM.Folded]>, 1292 AVX5128IBase, EVEX; 1293} 1294 1295// This should be used for the AVX512DQ broadcast instructions. It disables 1296// the unmasked patterns so that we only use the DQ instructions when masking 1297// is requested. 1298multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1299 SDPatternOperator OpNode, 1300 X86VectorVTInfo _Dst, 1301 X86VectorVTInfo _Src> { 1302 let hasSideEffects = 0, mayLoad = 1 in 1303 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1304 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1305 (null_frag), 1306 (_Dst.VT (OpNode addr:$src))>, 1307 Sched<[SchedWriteShuffle.YMM.Folded]>, 1308 AVX5128IBase, EVEX; 1309} 1310let Predicates = [HasBWI] in { 1311 def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)), 1312 (VPBROADCASTWZrm addr:$src)>; 1313 1314 def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))), 1315 (VPBROADCASTWZrr VR128X:$src)>; 1316 def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))), 1317 (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1318} 1319let Predicates = [HasVLX, HasBWI] in { 1320 def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)), 1321 (VPBROADCASTWZ128rm addr:$src)>; 1322 def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)), 1323 (VPBROADCASTWZ256rm addr:$src)>; 1324 1325 def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))), 1326 (VPBROADCASTWZ128rr VR128X:$src)>; 1327 def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))), 1328 (VPBROADCASTWZ256rr VR128X:$src)>; 1329 1330 def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))), 1331 (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1332 def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))), 1333 (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1334} 1335 1336//===----------------------------------------------------------------------===// 1337// AVX-512 BROADCAST SUBVECTORS 1338// 1339 1340defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1341 X86SubVBroadcastld128, v16i32_info, v4i32x_info>, 1342 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1343defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1344 X86SubVBroadcastld128, v16f32_info, v4f32x_info>, 1345 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1346defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1347 X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W, 1348 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1349defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1350 X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W, 1351 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1352 1353let Predicates = [HasAVX512] in { 1354def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)), 1355 (VBROADCASTF64X4rm addr:$src)>; 1356def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)), 1357 (VBROADCASTF64X4rm addr:$src)>; 1358def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)), 1359 (VBROADCASTF64X4rm addr:$src)>; 1360def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)), 1361 (VBROADCASTI64X4rm addr:$src)>; 1362def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)), 1363 (VBROADCASTI64X4rm addr:$src)>; 1364def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)), 1365 (VBROADCASTI64X4rm addr:$src)>; 1366def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)), 1367 (VBROADCASTI64X4rm addr:$src)>; 1368 1369def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)), 1370 (VBROADCASTF32X4rm addr:$src)>; 1371def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)), 1372 (VBROADCASTF32X4rm addr:$src)>; 1373def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)), 1374 (VBROADCASTF32X4rm addr:$src)>; 1375def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)), 1376 (VBROADCASTI32X4rm addr:$src)>; 1377def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)), 1378 (VBROADCASTI32X4rm addr:$src)>; 1379def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)), 1380 (VBROADCASTI32X4rm addr:$src)>; 1381def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)), 1382 (VBROADCASTI32X4rm addr:$src)>; 1383 1384// Patterns for selects of bitcasted operations. 1385def : Pat<(vselect_mask VK16WM:$mask, 1386 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1387 (v16f32 immAllZerosV)), 1388 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1389def : Pat<(vselect_mask VK16WM:$mask, 1390 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1391 VR512:$src0), 1392 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1393def : Pat<(vselect_mask VK16WM:$mask, 1394 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1395 (v16i32 immAllZerosV)), 1396 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1397def : Pat<(vselect_mask VK16WM:$mask, 1398 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1399 VR512:$src0), 1400 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1401 1402def : Pat<(vselect_mask VK8WM:$mask, 1403 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1404 (v8f64 immAllZerosV)), 1405 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1406def : Pat<(vselect_mask VK8WM:$mask, 1407 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1408 VR512:$src0), 1409 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1410def : Pat<(vselect_mask VK8WM:$mask, 1411 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1412 (v8i64 immAllZerosV)), 1413 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1414def : Pat<(vselect_mask VK8WM:$mask, 1415 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1416 VR512:$src0), 1417 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1418} 1419 1420let Predicates = [HasVLX] in { 1421defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1422 X86SubVBroadcastld128, v8i32x_info, v4i32x_info>, 1423 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1424defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1425 X86SubVBroadcastld128, v8f32x_info, v4f32x_info>, 1426 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1427 1428def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), 1429 (VBROADCASTF32X4Z256rm addr:$src)>; 1430def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), 1431 (VBROADCASTF32X4Z256rm addr:$src)>; 1432def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)), 1433 (VBROADCASTF32X4Z256rm addr:$src)>; 1434def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), 1435 (VBROADCASTI32X4Z256rm addr:$src)>; 1436def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), 1437 (VBROADCASTI32X4Z256rm addr:$src)>; 1438def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), 1439 (VBROADCASTI32X4Z256rm addr:$src)>; 1440def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), 1441 (VBROADCASTI32X4Z256rm addr:$src)>; 1442 1443// Patterns for selects of bitcasted operations. 1444def : Pat<(vselect_mask VK8WM:$mask, 1445 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1446 (v8f32 immAllZerosV)), 1447 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1448def : Pat<(vselect_mask VK8WM:$mask, 1449 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1450 VR256X:$src0), 1451 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1452def : Pat<(vselect_mask VK8WM:$mask, 1453 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1454 (v8i32 immAllZerosV)), 1455 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1456def : Pat<(vselect_mask VK8WM:$mask, 1457 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1458 VR256X:$src0), 1459 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1460} 1461 1462let Predicates = [HasBF16] in { 1463 def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)), 1464 (VBROADCASTF64X4rm addr:$src)>; 1465 def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)), 1466 (VBROADCASTF32X4rm addr:$src)>; 1467} 1468 1469let Predicates = [HasBF16, HasVLX] in 1470 def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)), 1471 (VBROADCASTF32X4Z256rm addr:$src)>; 1472 1473let Predicates = [HasVLX, HasDQI] in { 1474defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1475 X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, 1476 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; 1477defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1478 X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, 1479 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; 1480 1481// Patterns for selects of bitcasted operations. 1482def : Pat<(vselect_mask VK4WM:$mask, 1483 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1484 (v4f64 immAllZerosV)), 1485 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1486def : Pat<(vselect_mask VK4WM:$mask, 1487 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1488 VR256X:$src0), 1489 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1490def : Pat<(vselect_mask VK4WM:$mask, 1491 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1492 (v4i64 immAllZerosV)), 1493 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1494def : Pat<(vselect_mask VK4WM:$mask, 1495 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1496 VR256X:$src0), 1497 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1498} 1499 1500let Predicates = [HasDQI] in { 1501defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1502 X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W, 1503 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1504defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1505 X86SubVBroadcastld256, v16i32_info, v8i32x_info>, 1506 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1507defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1508 X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W, 1509 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1510defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1511 X86SubVBroadcastld256, v16f32_info, v8f32x_info>, 1512 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1513 1514// Patterns for selects of bitcasted operations. 1515def : Pat<(vselect_mask VK16WM:$mask, 1516 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1517 (v16f32 immAllZerosV)), 1518 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1519def : Pat<(vselect_mask VK16WM:$mask, 1520 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1521 VR512:$src0), 1522 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1523def : Pat<(vselect_mask VK16WM:$mask, 1524 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1525 (v16i32 immAllZerosV)), 1526 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1527def : Pat<(vselect_mask VK16WM:$mask, 1528 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1529 VR512:$src0), 1530 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1531 1532def : Pat<(vselect_mask VK8WM:$mask, 1533 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1534 (v8f64 immAllZerosV)), 1535 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1536def : Pat<(vselect_mask VK8WM:$mask, 1537 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1538 VR512:$src0), 1539 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1540def : Pat<(vselect_mask VK8WM:$mask, 1541 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1542 (v8i64 immAllZerosV)), 1543 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1544def : Pat<(vselect_mask VK8WM:$mask, 1545 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1546 VR512:$src0), 1547 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1548} 1549 1550multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1551 AVX512VLVectorVTInfo _Dst, 1552 AVX512VLVectorVTInfo _Src> { 1553 let Predicates = [HasDQI] in 1554 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1555 WriteShuffle256Ld, _Dst.info512, 1556 _Src.info512, _Src.info128, 0, null_frag, null_frag>, 1557 EVEX_V512; 1558 let Predicates = [HasDQI, HasVLX] in 1559 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1560 WriteShuffle256Ld, _Dst.info256, 1561 _Src.info256, _Src.info128, 0, null_frag, null_frag>, 1562 EVEX_V256; 1563} 1564 1565multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1566 AVX512VLVectorVTInfo _Dst, 1567 AVX512VLVectorVTInfo _Src> : 1568 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1569 1570 let Predicates = [HasDQI, HasVLX] in 1571 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle, 1572 WriteShuffleXLd, _Dst.info128, 1573 _Src.info128, _Src.info128, 0, null_frag, null_frag>, 1574 EVEX_V128; 1575} 1576 1577defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1578 avx512vl_i32_info, avx512vl_i64_info>; 1579defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1580 avx512vl_f32_info, avx512vl_f64_info>; 1581 1582//===----------------------------------------------------------------------===// 1583// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1584//--- 1585multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1586 X86VectorVTInfo _, RegisterClass KRC> { 1587 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1588 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1589 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1590 EVEX, Sched<[WriteShuffle]>; 1591} 1592 1593multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1594 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1595 let Predicates = [HasCDI] in 1596 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1597 let Predicates = [HasCDI, HasVLX] in { 1598 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1599 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1600 } 1601} 1602 1603defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1604 avx512vl_i32_info, VK16>; 1605defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1606 avx512vl_i64_info, VK8>, REX_W; 1607 1608//===----------------------------------------------------------------------===// 1609// -- VPERMI2 - 3 source operands form -- 1610multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1611 X86FoldableSchedWrite sched, 1612 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1613let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1614 hasSideEffects = 0 in { 1615 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1616 (ins _.RC:$src2, _.RC:$src3), 1617 OpcodeStr, "$src3, $src2", "$src2, $src3", 1618 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1619 EVEX, VVVV, AVX5128IBase, Sched<[sched]>; 1620 1621 let mayLoad = 1 in 1622 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1623 (ins _.RC:$src2, _.MemOp:$src3), 1624 OpcodeStr, "$src3, $src2", "$src2, $src3", 1625 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1626 (_.VT (_.LdFrag addr:$src3)))), 1>, 1627 EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1628 } 1629} 1630 1631multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1632 X86FoldableSchedWrite sched, 1633 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1634 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1635 hasSideEffects = 0, mayLoad = 1 in 1636 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1637 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1638 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1639 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1640 (_.VT (X86VPermt2 _.RC:$src2, 1641 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1642 AVX5128IBase, EVEX, VVVV, EVEX_B, 1643 Sched<[sched.Folded, sched.ReadAfterFold]>; 1644} 1645 1646multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1647 X86FoldableSchedWrite sched, 1648 AVX512VLVectorVTInfo VTInfo, 1649 AVX512VLVectorVTInfo ShuffleMask> { 1650 defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1651 ShuffleMask.info512>, 1652 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1653 ShuffleMask.info512>, EVEX_V512; 1654 let Predicates = [HasVLX] in { 1655 defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1656 ShuffleMask.info128>, 1657 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1658 ShuffleMask.info128>, EVEX_V128; 1659 defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1660 ShuffleMask.info256>, 1661 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1662 ShuffleMask.info256>, EVEX_V256; 1663 } 1664} 1665 1666multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1667 X86FoldableSchedWrite sched, 1668 AVX512VLVectorVTInfo VTInfo, 1669 AVX512VLVectorVTInfo Idx, 1670 Predicate Prd> { 1671 let Predicates = [Prd] in 1672 defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1673 Idx.info512>, EVEX_V512; 1674 let Predicates = [Prd, HasVLX] in { 1675 defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1676 Idx.info128>, EVEX_V128; 1677 defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1678 Idx.info256>, EVEX_V256; 1679 } 1680} 1681 1682defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1683 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1684defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1685 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1686defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1687 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1688 REX_W, EVEX_CD8<16, CD8VF>; 1689defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1690 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1691 EVEX_CD8<8, CD8VF>; 1692defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1693 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1694defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1695 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1696 1697// Extra patterns to deal with extra bitcasts due to passthru and index being 1698// different types on the fp versions. 1699multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1700 X86VectorVTInfo IdxVT, 1701 X86VectorVTInfo CastVT> { 1702 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1703 (X86VPermt2 (_.VT _.RC:$src2), 1704 (IdxVT.VT (bitconvert 1705 (CastVT.VT _.RC:$src1))), 1706 _.RC:$src3), 1707 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1708 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1709 _.RC:$src2, _.RC:$src3)>; 1710 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1711 (X86VPermt2 _.RC:$src2, 1712 (IdxVT.VT (bitconvert 1713 (CastVT.VT _.RC:$src1))), 1714 (_.LdFrag addr:$src3)), 1715 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1716 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1717 _.RC:$src2, addr:$src3)>; 1718 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1719 (X86VPermt2 _.RC:$src2, 1720 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1721 (_.BroadcastLdFrag addr:$src3)), 1722 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1723 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1724 _.RC:$src2, addr:$src3)>; 1725} 1726 1727// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1728defm : avx512_perm_i_lowering<"VPERMI2PSZ", v16f32_info, v16i32_info, v8i64_info>; 1729defm : avx512_perm_i_lowering<"VPERMI2PSZ256", v8f32x_info, v8i32x_info, v4i64x_info>; 1730defm : avx512_perm_i_lowering<"VPERMI2PSZ128", v4f32x_info, v4i32x_info, v2i64x_info>; 1731 1732// VPERMT2 1733multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1734 X86FoldableSchedWrite sched, 1735 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1736let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1737 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1738 (ins IdxVT.RC:$src2, _.RC:$src3), 1739 OpcodeStr, "$src3, $src2", "$src2, $src3", 1740 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1741 EVEX, VVVV, AVX5128IBase, Sched<[sched]>; 1742 1743 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1744 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1745 OpcodeStr, "$src3, $src2", "$src2, $src3", 1746 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1747 (_.LdFrag addr:$src3))), 1>, 1748 EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1749 } 1750} 1751multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1752 X86FoldableSchedWrite sched, 1753 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1754 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1755 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1756 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1757 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1758 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1759 (_.VT (X86VPermt2 _.RC:$src1, 1760 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1761 AVX5128IBase, EVEX, VVVV, EVEX_B, 1762 Sched<[sched.Folded, sched.ReadAfterFold]>; 1763} 1764 1765multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1766 X86FoldableSchedWrite sched, 1767 AVX512VLVectorVTInfo VTInfo, 1768 AVX512VLVectorVTInfo ShuffleMask> { 1769 defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1770 ShuffleMask.info512>, 1771 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1772 ShuffleMask.info512>, EVEX_V512; 1773 let Predicates = [HasVLX] in { 1774 defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1775 ShuffleMask.info128>, 1776 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1777 ShuffleMask.info128>, EVEX_V128; 1778 defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1779 ShuffleMask.info256>, 1780 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1781 ShuffleMask.info256>, EVEX_V256; 1782 } 1783} 1784 1785multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1786 X86FoldableSchedWrite sched, 1787 AVX512VLVectorVTInfo VTInfo, 1788 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1789 let Predicates = [Prd] in 1790 defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1791 Idx.info512>, EVEX_V512; 1792 let Predicates = [Prd, HasVLX] in { 1793 defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1794 Idx.info128>, EVEX_V128; 1795 defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1796 Idx.info256>, EVEX_V256; 1797 } 1798} 1799 1800defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1801 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1802defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1803 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1804defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1805 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1806 REX_W, EVEX_CD8<16, CD8VF>; 1807defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1808 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1809 EVEX_CD8<8, CD8VF>; 1810defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1811 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1812defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1813 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1814 1815//===----------------------------------------------------------------------===// 1816// AVX-512 - BLEND using mask 1817// 1818 1819multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1820 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1821 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1822 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1823 (ins _.RC:$src1, _.RC:$src2), 1824 !strconcat(OpcodeStr, 1825 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1826 EVEX, VVVV, Sched<[sched]>; 1827 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1828 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1829 !strconcat(OpcodeStr, 1830 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1831 []>, EVEX, VVVV, EVEX_K, Sched<[sched]>; 1832 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1833 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1834 !strconcat(OpcodeStr, 1835 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1836 []>, EVEX, VVVV, EVEX_KZ, Sched<[sched]>; 1837 let mayLoad = 1 in { 1838 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1839 (ins _.RC:$src1, _.MemOp:$src2), 1840 !strconcat(OpcodeStr, 1841 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 1842 []>, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 1843 Sched<[sched.Folded, sched.ReadAfterFold]>; 1844 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1845 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1846 !strconcat(OpcodeStr, 1847 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1848 []>, EVEX, VVVV, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 1849 Sched<[sched.Folded, sched.ReadAfterFold]>; 1850 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1851 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1852 !strconcat(OpcodeStr, 1853 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1854 []>, EVEX, VVVV, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 1855 Sched<[sched.Folded, sched.ReadAfterFold]>; 1856 } 1857 } 1858} 1859multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 1860 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1861 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { 1862 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1863 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1864 !strconcat(OpcodeStr, 1865 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 1866 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1867 EVEX, VVVV, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1868 Sched<[sched.Folded, sched.ReadAfterFold]>; 1869 1870 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1871 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1872 !strconcat(OpcodeStr, 1873 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 1874 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1875 EVEX, VVVV, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1876 Sched<[sched.Folded, sched.ReadAfterFold]>; 1877 1878 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1879 (ins _.RC:$src1, _.ScalarMemOp:$src2), 1880 !strconcat(OpcodeStr, 1881 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 1882 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1883 EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1884 Sched<[sched.Folded, sched.ReadAfterFold]>; 1885 } 1886} 1887 1888multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 1889 AVX512VLVectorVTInfo VTInfo> { 1890 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1891 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1892 EVEX_V512; 1893 1894 let Predicates = [HasVLX] in { 1895 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1896 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1897 EVEX_V256; 1898 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1899 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1900 EVEX_V128; 1901 } 1902} 1903 1904multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 1905 AVX512VLVectorVTInfo VTInfo> { 1906 let Predicates = [HasBWI] in 1907 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1908 EVEX_V512; 1909 1910 let Predicates = [HasBWI, HasVLX] in { 1911 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1912 EVEX_V256; 1913 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1914 EVEX_V128; 1915 } 1916} 1917 1918defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 1919 avx512vl_f32_info>; 1920defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 1921 avx512vl_f64_info>, REX_W; 1922defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 1923 avx512vl_i32_info>; 1924defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 1925 avx512vl_i64_info>, REX_W; 1926defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 1927 avx512vl_i8_info>; 1928defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 1929 avx512vl_i16_info>, REX_W; 1930 1931//===----------------------------------------------------------------------===// 1932// Compare Instructions 1933//===----------------------------------------------------------------------===// 1934 1935// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 1936 1937multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, 1938 PatFrag OpNode_su, PatFrag OpNodeSAE_su, 1939 X86FoldableSchedWrite sched> { 1940 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 1941 (outs _.KRC:$dst), 1942 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 1943 "vcmp"#_.Suffix, 1944 "$cc, $src2, $src1", "$src1, $src2, $cc", 1945 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 1946 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 1947 timm:$cc)>, EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC; 1948 let mayLoad = 1 in 1949 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 1950 (outs _.KRC:$dst), 1951 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), 1952 "vcmp"#_.Suffix, 1953 "$cc, $src2, $src1", "$src1, $src2, $cc", 1954 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 1955 timm:$cc), 1956 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 1957 timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 1958 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1959 1960 let Uses = [MXCSR] in 1961 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 1962 (outs _.KRC:$dst), 1963 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 1964 "vcmp"#_.Suffix, 1965 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", 1966 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 1967 timm:$cc), 1968 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 1969 timm:$cc)>, 1970 EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>; 1971 1972 let isCodeGenOnly = 1 in { 1973 let isCommutable = 1 in 1974 def rr : AVX512Ii8<0xC2, MRMSrcReg, 1975 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), 1976 !strconcat("vcmp", _.Suffix, 1977 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 1978 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 1979 _.FRC:$src2, 1980 timm:$cc))]>, 1981 EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC; 1982 def rm : AVX512Ii8<0xC2, MRMSrcMem, 1983 (outs _.KRC:$dst), 1984 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 1985 !strconcat("vcmp", _.Suffix, 1986 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 1987 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 1988 (_.ScalarLdFrag addr:$src2), 1989 timm:$cc))]>, 1990 EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 1991 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1992 } 1993} 1994 1995let Predicates = [HasAVX512] in { 1996 let ExeDomain = SSEPackedSingle in 1997 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, 1998 X86cmpms_su, X86cmpmsSAE_su, 1999 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2000 let ExeDomain = SSEPackedDouble in 2001 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, 2002 X86cmpms_su, X86cmpmsSAE_su, 2003 SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W; 2004} 2005let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in 2006 defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE, 2007 X86cmpms_su, X86cmpmsSAE_su, 2008 SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA; 2009 2010multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, 2011 X86FoldableSchedWrite sched, 2012 X86VectorVTInfo _, bit IsCommutable> { 2013 let isCommutable = IsCommutable, hasSideEffects = 0 in 2014 def rr : AVX512BI<opc, MRMSrcReg, 2015 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2016 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2017 []>, EVEX, VVVV, Sched<[sched]>; 2018 let mayLoad = 1, hasSideEffects = 0 in 2019 def rm : AVX512BI<opc, MRMSrcMem, 2020 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2021 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2022 []>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 2023 let isCommutable = IsCommutable, hasSideEffects = 0 in 2024 def rrk : AVX512BI<opc, MRMSrcReg, 2025 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2026 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2027 "$dst {${mask}}, $src1, $src2}"), 2028 []>, EVEX, VVVV, EVEX_K, Sched<[sched]>; 2029 let mayLoad = 1, hasSideEffects = 0 in 2030 def rmk : AVX512BI<opc, MRMSrcMem, 2031 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2032 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2033 "$dst {${mask}}, $src1, $src2}"), 2034 []>, EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2035} 2036 2037multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, 2038 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2039 bit IsCommutable> : 2040 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { 2041 let mayLoad = 1, hasSideEffects = 0 in { 2042 def rmb : AVX512BI<opc, MRMSrcMem, 2043 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2044 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2045 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2046 []>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2047 def rmbk : AVX512BI<opc, MRMSrcMem, 2048 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2049 _.ScalarMemOp:$src2), 2050 !strconcat(OpcodeStr, 2051 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2052 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2053 []>, EVEX, VVVV, EVEX_K, EVEX_B, 2054 Sched<[sched.Folded, sched.ReadAfterFold]>; 2055 } 2056} 2057 2058multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, 2059 X86SchedWriteWidths sched, 2060 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2061 bit IsCommutable = 0> { 2062 let Predicates = [prd] in 2063 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, 2064 VTInfo.info512, IsCommutable>, EVEX_V512; 2065 2066 let Predicates = [prd, HasVLX] in { 2067 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, 2068 VTInfo.info256, IsCommutable>, EVEX_V256; 2069 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, 2070 VTInfo.info128, IsCommutable>, EVEX_V128; 2071 } 2072} 2073 2074multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2075 X86SchedWriteWidths sched, 2076 AVX512VLVectorVTInfo VTInfo, 2077 Predicate prd, bit IsCommutable = 0> { 2078 let Predicates = [prd] in 2079 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, 2080 VTInfo.info512, IsCommutable>, EVEX_V512; 2081 2082 let Predicates = [prd, HasVLX] in { 2083 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, 2084 VTInfo.info256, IsCommutable>, EVEX_V256; 2085 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, 2086 VTInfo.info128, IsCommutable>, EVEX_V128; 2087 } 2088} 2089 2090// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2091// increase the pattern complexity the way an immediate would. 2092let AddedComplexity = 2 in { 2093// FIXME: Is there a better scheduler class for VPCMP? 2094defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", 2095 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2096 EVEX_CD8<8, CD8VF>, WIG; 2097 2098defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", 2099 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2100 EVEX_CD8<16, CD8VF>, WIG; 2101 2102defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", 2103 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2104 EVEX_CD8<32, CD8VF>; 2105 2106defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", 2107 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2108 T8, REX_W, EVEX_CD8<64, CD8VF>; 2109 2110defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", 2111 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2112 EVEX_CD8<8, CD8VF>, WIG; 2113 2114defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", 2115 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2116 EVEX_CD8<16, CD8VF>, WIG; 2117 2118defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", 2119 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2120 EVEX_CD8<32, CD8VF>; 2121 2122defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", 2123 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2124 T8, REX_W, EVEX_CD8<64, CD8VF>; 2125} 2126 2127multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2128 PatFrag Frag_su, 2129 X86FoldableSchedWrite sched, 2130 X86VectorVTInfo _, string Name> { 2131 let isCommutable = 1 in 2132 def rri : AVX512AIi8<opc, MRMSrcReg, 2133 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2134 !strconcat("vpcmp", Suffix, 2135 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2136 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2137 (_.VT _.RC:$src2), 2138 cond)))]>, 2139 EVEX, VVVV, Sched<[sched]>; 2140 def rmi : AVX512AIi8<opc, MRMSrcMem, 2141 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2142 !strconcat("vpcmp", Suffix, 2143 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2144 [(set _.KRC:$dst, (_.KVT 2145 (Frag:$cc 2146 (_.VT _.RC:$src1), 2147 (_.VT (_.LdFrag addr:$src2)), 2148 cond)))]>, 2149 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 2150 let isCommutable = 1 in 2151 def rrik : AVX512AIi8<opc, MRMSrcReg, 2152 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2153 u8imm:$cc), 2154 !strconcat("vpcmp", Suffix, 2155 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2156 "$dst {${mask}}, $src1, $src2, $cc}"), 2157 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2158 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), 2159 (_.VT _.RC:$src2), 2160 cond))))]>, 2161 EVEX, VVVV, EVEX_K, Sched<[sched]>; 2162 def rmik : AVX512AIi8<opc, MRMSrcMem, 2163 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2164 u8imm:$cc), 2165 !strconcat("vpcmp", Suffix, 2166 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2167 "$dst {${mask}}, $src1, $src2, $cc}"), 2168 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2169 (_.KVT 2170 (Frag_su:$cc 2171 (_.VT _.RC:$src1), 2172 (_.VT (_.LdFrag addr:$src2)), 2173 cond))))]>, 2174 EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2175 2176 def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2), 2177 (_.VT _.RC:$src1), cond)), 2178 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2179 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2180 2181 def : Pat<(and _.KRCWM:$mask, 2182 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2), 2183 (_.VT _.RC:$src1), cond))), 2184 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2185 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2186 (X86pcmpm_imm_commute $cc))>; 2187} 2188 2189multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2190 PatFrag Frag_su, X86FoldableSchedWrite sched, 2191 X86VectorVTInfo _, string Name> : 2192 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> { 2193 def rmib : AVX512AIi8<opc, MRMSrcMem, 2194 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2195 u8imm:$cc), 2196 !strconcat("vpcmp", Suffix, 2197 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2198 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2199 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2200 (_.VT _.RC:$src1), 2201 (_.BroadcastLdFrag addr:$src2), 2202 cond)))]>, 2203 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2204 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2205 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2206 _.ScalarMemOp:$src2, u8imm:$cc), 2207 !strconcat("vpcmp", Suffix, 2208 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2209 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2210 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2211 (_.KVT (Frag_su:$cc 2212 (_.VT _.RC:$src1), 2213 (_.BroadcastLdFrag addr:$src2), 2214 cond))))]>, 2215 EVEX, VVVV, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2216 2217 def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2), 2218 (_.VT _.RC:$src1), cond)), 2219 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2220 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2221 2222 def : Pat<(and _.KRCWM:$mask, 2223 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2), 2224 (_.VT _.RC:$src1), cond))), 2225 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2226 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2227 (X86pcmpm_imm_commute $cc))>; 2228} 2229 2230multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2231 PatFrag Frag_su, X86SchedWriteWidths sched, 2232 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2233 let Predicates = [prd] in 2234 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2235 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2236 2237 let Predicates = [prd, HasVLX] in { 2238 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2239 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2240 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2241 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2242 } 2243} 2244 2245multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2246 PatFrag Frag_su, X86SchedWriteWidths sched, 2247 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2248 let Predicates = [prd] in 2249 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2250 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2251 2252 let Predicates = [prd, HasVLX] in { 2253 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2254 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2255 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2256 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2257 } 2258} 2259 2260// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2261defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su, 2262 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2263 EVEX_CD8<8, CD8VF>; 2264defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su, 2265 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2266 EVEX_CD8<8, CD8VF>; 2267 2268defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su, 2269 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2270 REX_W, EVEX_CD8<16, CD8VF>; 2271defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su, 2272 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2273 REX_W, EVEX_CD8<16, CD8VF>; 2274 2275defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su, 2276 SchedWriteVecALU, avx512vl_i32_info, 2277 HasAVX512>, EVEX_CD8<32, CD8VF>; 2278defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su, 2279 SchedWriteVecALU, avx512vl_i32_info, 2280 HasAVX512>, EVEX_CD8<32, CD8VF>; 2281 2282defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su, 2283 SchedWriteVecALU, avx512vl_i64_info, 2284 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>; 2285defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su, 2286 SchedWriteVecALU, avx512vl_i64_info, 2287 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>; 2288 2289multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2290 string Name> { 2291let Uses = [MXCSR], mayRaiseFPException = 1 in { 2292 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2293 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), 2294 "vcmp"#_.Suffix, 2295 "$cc, $src2, $src1", "$src1, $src2, $cc", 2296 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2297 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2298 1>, Sched<[sched]>; 2299 2300 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2301 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2302 "vcmp"#_.Suffix, 2303 "$cc, $src2, $src1", "$src1, $src2, $cc", 2304 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2305 timm:$cc), 2306 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2307 timm:$cc)>, 2308 Sched<[sched.Folded, sched.ReadAfterFold]>; 2309 2310 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2311 (outs _.KRC:$dst), 2312 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2313 "vcmp"#_.Suffix, 2314 "$cc, ${src2}"#_.BroadcastStr#", $src1", 2315 "$src1, ${src2}"#_.BroadcastStr#", $cc", 2316 (X86any_cmpm (_.VT _.RC:$src1), 2317 (_.VT (_.BroadcastLdFrag addr:$src2)), 2318 timm:$cc), 2319 (X86cmpm_su (_.VT _.RC:$src1), 2320 (_.VT (_.BroadcastLdFrag addr:$src2)), 2321 timm:$cc)>, 2322 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2323 } 2324 2325 // Patterns for selecting with loads in other operand. 2326 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2327 timm:$cc), 2328 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2329 (X86cmpm_imm_commute timm:$cc))>; 2330 2331 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), 2332 (_.VT _.RC:$src1), 2333 timm:$cc)), 2334 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2335 _.RC:$src1, addr:$src2, 2336 (X86cmpm_imm_commute timm:$cc))>; 2337 2338 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2), 2339 (_.VT _.RC:$src1), timm:$cc), 2340 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2341 (X86cmpm_imm_commute timm:$cc))>; 2342 2343 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), 2344 (_.VT _.RC:$src1), 2345 timm:$cc)), 2346 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2347 _.RC:$src1, addr:$src2, 2348 (X86cmpm_imm_commute timm:$cc))>; 2349 2350 // Patterns for mask intrinsics. 2351 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, 2352 (_.KVT immAllOnesV)), 2353 (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>; 2354 2355 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask), 2356 (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1, 2357 _.RC:$src2, timm:$cc)>; 2358 2359 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2360 (_.KVT immAllOnesV)), 2361 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>; 2362 2363 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2364 _.KRCWM:$mask), 2365 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, 2366 addr:$src2, timm:$cc)>; 2367 2368 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2369 (_.KVT immAllOnesV)), 2370 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>; 2371 2372 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2373 _.KRCWM:$mask), 2374 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, 2375 addr:$src2, timm:$cc)>; 2376 2377 // Patterns for mask intrinsics with loads in other operand. 2378 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2379 (_.KVT immAllOnesV)), 2380 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2381 (X86cmpm_imm_commute timm:$cc))>; 2382 2383 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2384 _.KRCWM:$mask), 2385 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2386 _.RC:$src1, addr:$src2, 2387 (X86cmpm_imm_commute timm:$cc))>; 2388 2389 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2390 (_.KVT immAllOnesV)), 2391 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2392 (X86cmpm_imm_commute timm:$cc))>; 2393 2394 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2395 _.KRCWM:$mask), 2396 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2397 _.RC:$src1, addr:$src2, 2398 (X86cmpm_imm_commute timm:$cc))>; 2399} 2400 2401multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2402 // comparison code form (VCMP[EQ/LT/LE/...] 2403 let Uses = [MXCSR] in 2404 defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst), 2405 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2406 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc), 2407 "vcmp"#_.Suffix, 2408 "$cc, {sae}, $src2, $src1", 2409 "$src1, $src2, {sae}, $cc", 2410 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2411 (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))], 2412 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2413 (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>, 2414 EVEX_B, Sched<[sched]>; 2415} 2416 2417multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 2418 Predicate Pred = HasAVX512> { 2419 let Predicates = [Pred] in { 2420 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2421 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2422 2423 } 2424 let Predicates = [Pred,HasVLX] in { 2425 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2426 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2427 } 2428} 2429 2430defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2431 AVX512PDIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 2432defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2433 AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 2434defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>, 2435 AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA; 2436 2437// Patterns to select fp compares with load as first operand. 2438let Predicates = [HasAVX512] in { 2439 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)), 2440 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2441 2442 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)), 2443 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2444} 2445 2446let Predicates = [HasFP16] in { 2447 def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)), 2448 (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2449} 2450 2451// ---------------------------------------------------------------- 2452// FPClass 2453 2454//handle fpclass instruction mask = op(reg_scalar,imm) 2455// op(mem_scalar,imm) 2456multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, 2457 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2458 Predicate prd> { 2459 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2460 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2461 (ins _.RC:$src1, i32u8imm:$src2), 2462 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2463 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), 2464 (i32 timm:$src2)))]>, 2465 Sched<[sched]>; 2466 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2467 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2468 OpcodeStr#_.Suffix# 2469 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2470 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2471 (X86Vfpclasss_su (_.VT _.RC:$src1), 2472 (i32 timm:$src2))))]>, 2473 EVEX_K, Sched<[sched]>; 2474 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2475 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2476 OpcodeStr#_.Suffix# 2477 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2478 [(set _.KRC:$dst, 2479 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1), 2480 (i32 timm:$src2)))]>, 2481 Sched<[sched.Folded, sched.ReadAfterFold]>; 2482 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2483 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2484 OpcodeStr#_.Suffix# 2485 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2486 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2487 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1), 2488 (i32 timm:$src2))))]>, 2489 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2490 } 2491} 2492 2493//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2494// fpclass(reg_vec, mem_vec, imm) 2495// fpclass(reg_vec, broadcast(eltVt), imm) 2496multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, 2497 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2498 string mem>{ 2499 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2500 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2501 (ins _.RC:$src1, i32u8imm:$src2), 2502 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2503 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), 2504 (i32 timm:$src2)))]>, 2505 Sched<[sched]>; 2506 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2507 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2508 OpcodeStr#_.Suffix# 2509 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2510 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2511 (X86Vfpclass_su (_.VT _.RC:$src1), 2512 (i32 timm:$src2))))]>, 2513 EVEX_K, Sched<[sched]>; 2514 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2515 (ins _.MemOp:$src1, i32u8imm:$src2), 2516 OpcodeStr#_.Suffix#"{"#mem#"}"# 2517 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2518 [(set _.KRC:$dst,(X86Vfpclass 2519 (_.VT (_.LdFrag addr:$src1)), 2520 (i32 timm:$src2)))]>, 2521 Sched<[sched.Folded, sched.ReadAfterFold]>; 2522 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2523 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2524 OpcodeStr#_.Suffix#"{"#mem#"}"# 2525 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2526 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su 2527 (_.VT (_.LdFrag addr:$src1)), 2528 (i32 timm:$src2))))]>, 2529 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2530 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2531 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2532 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2533 _.BroadcastStr#", $dst|$dst, ${src1}" 2534 #_.BroadcastStr#", $src2}", 2535 [(set _.KRC:$dst,(X86Vfpclass 2536 (_.VT (_.BroadcastLdFrag addr:$src1)), 2537 (i32 timm:$src2)))]>, 2538 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2539 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2540 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2541 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2542 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"# 2543 _.BroadcastStr#", $src2}", 2544 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su 2545 (_.VT (_.BroadcastLdFrag addr:$src1)), 2546 (i32 timm:$src2))))]>, 2547 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2548 } 2549 2550 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate 2551 // the memory form. 2552 def : InstAlias<OpcodeStr#_.Suffix#mem# 2553 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2554 (!cast<Instruction>(NAME#"rr") 2555 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2556 def : InstAlias<OpcodeStr#_.Suffix#mem# 2557 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2558 (!cast<Instruction>(NAME#"rrk") 2559 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2560 def : InstAlias<OpcodeStr#_.Suffix#mem# 2561 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"# 2562 _.BroadcastStr#", $src2}", 2563 (!cast<Instruction>(NAME#"rmb") 2564 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2565 def : InstAlias<OpcodeStr#_.Suffix#mem# 2566 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|" 2567 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}", 2568 (!cast<Instruction>(NAME#"rmbk") 2569 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2570} 2571 2572multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2573 bits<8> opc, X86SchedWriteWidths sched, 2574 Predicate prd>{ 2575 let Predicates = [prd] in { 2576 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM, 2577 _.info512, "z">, EVEX_V512; 2578 } 2579 let Predicates = [prd, HasVLX] in { 2580 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM, 2581 _.info128, "x">, EVEX_V128; 2582 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM, 2583 _.info256, "y">, EVEX_V256; 2584 } 2585} 2586 2587multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2588 bits<8> opcScalar, X86SchedWriteWidths sched> { 2589 defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec, 2590 sched, HasFP16>, 2591 EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA; 2592 defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2593 sched.Scl, f16x_info, HasFP16>, 2594 EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA; 2595 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2596 sched, HasDQI>, 2597 EVEX_CD8<32, CD8VF>, AVX512AIi8Base; 2598 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2599 sched, HasDQI>, 2600 EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W; 2601 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2602 sched.Scl, f32x_info, HasDQI>, VEX_LIG, 2603 EVEX_CD8<32, CD8VT1>, AVX512AIi8Base; 2604 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2605 sched.Scl, f64x_info, HasDQI>, VEX_LIG, 2606 EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W; 2607} 2608 2609defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX; 2610 2611//----------------------------------------------------------------- 2612// Mask register copy, including 2613// - copy between mask registers 2614// - load/store mask registers 2615// - copy from GPR to mask register and vice versa 2616// 2617multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2618 string OpcodeStr, RegisterClass KRC, ValueType vvt, 2619 X86MemOperand x86memop, string Suffix = ""> { 2620 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove], 2621 explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in 2622 def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2623 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2624 Sched<[WriteMove]>; 2625 def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2626 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2627 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2628 Sched<[WriteLoad]>; 2629 def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2630 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2631 [(store KRC:$src, addr:$dst)]>, 2632 Sched<[WriteStore]>; 2633} 2634 2635multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2636 string OpcodeStr, RegisterClass KRC, 2637 RegisterClass GRC, string Suffix = ""> { 2638 let hasSideEffects = 0 in { 2639 def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2640 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2641 Sched<[WriteMove]>; 2642 def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2643 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2644 Sched<[WriteMove]>; 2645 } 2646} 2647 2648let Predicates = [HasDQI, NoEGPR] in 2649 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2650 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2651 VEX, TB, PD; 2652let Predicates = [HasDQI, HasEGPR, In64BitMode] in 2653 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">, 2654 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">, 2655 EVEX, TB, PD; 2656 2657let Predicates = [HasAVX512, NoEGPR] in 2658 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2659 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2660 VEX, TB; 2661let Predicates = [HasAVX512, HasEGPR, In64BitMode] in 2662 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">, 2663 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">, 2664 EVEX, TB; 2665 2666let Predicates = [HasBWI, NoEGPR] in { 2667 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2668 VEX, TB, PD, REX_W; 2669 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2670 VEX, TB, XD; 2671 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2672 VEX, TB, REX_W; 2673 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2674 VEX, TB, XD, REX_W; 2675} 2676let Predicates = [HasBWI, HasEGPR, In64BitMode] in { 2677 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">, 2678 EVEX, TB, PD, REX_W; 2679 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">, 2680 EVEX, TB, XD; 2681 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">, 2682 EVEX, TB, REX_W; 2683 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">, 2684 EVEX, TB, XD, REX_W; 2685} 2686 2687// GR from/to mask register 2688def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2689 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2690def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2691 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2692def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))), 2693 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>; 2694 2695def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2696 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2697def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2698 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2699 2700def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2701 (KMOVWrk VK16:$src)>; 2702def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2703 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; 2704def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2705 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2706def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2707 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; 2708 2709def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2710 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2711def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2712 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; 2713def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2714 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2715def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2716 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; 2717 2718def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2719 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2720def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2721 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2722def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2723 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2724def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2725 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2726 2727// Load/store kreg 2728let Predicates = [HasDQI] in { 2729 def : Pat<(v1i1 (load addr:$src)), 2730 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2731 def : Pat<(v2i1 (load addr:$src)), 2732 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2733 def : Pat<(v4i1 (load addr:$src)), 2734 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2735} 2736 2737let Predicates = [HasAVX512] in { 2738 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2739 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2740 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), 2741 (KMOVWkm addr:$src)>; 2742} 2743 2744def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", 2745 SDTypeProfile<1, 2, [SDTCisVT<0, i8>, 2746 SDTCVecEltisVT<1, i1>, 2747 SDTCisPtrTy<2>]>>; 2748 2749let Predicates = [HasAVX512] in { 2750 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2751 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2752 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2753 2754 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2755 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2756 2757 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), 2758 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; 2759 2760 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), 2761 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; 2762 } 2763 2764 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2765 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2766 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2767 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2768 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2769 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2770 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2771 2772 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2773 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2774 (KMOVWkr (AND32ri 2775 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2776 (i32 1)))>; 2777} 2778 2779// Mask unary operation 2780// - KNOT 2781multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 2782 RegisterClass KRC, SDPatternOperator OpNode, 2783 X86FoldableSchedWrite sched, Predicate prd> { 2784 let Predicates = [prd] in 2785 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2786 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2787 [(set KRC:$dst, (OpNode KRC:$src))]>, 2788 Sched<[sched]>; 2789} 2790 2791multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 2792 SDPatternOperator OpNode, 2793 X86FoldableSchedWrite sched> { 2794 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2795 sched, HasDQI>, VEX, TB, PD; 2796 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2797 sched, HasAVX512>, VEX, TB; 2798 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2799 sched, HasBWI>, VEX, TB, PD, REX_W; 2800 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2801 sched, HasBWI>, VEX, TB, REX_W; 2802} 2803 2804// TODO - do we need a X86SchedWriteWidths::KMASK type? 2805defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 2806 2807// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 2808let Predicates = [HasAVX512, NoDQI] in 2809def : Pat<(vnot VK8:$src), 2810 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 2811 2812def : Pat<(vnot VK4:$src), 2813 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 2814def : Pat<(vnot VK2:$src), 2815 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 2816def : Pat<(vnot VK1:$src), 2817 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>; 2818 2819// Mask binary operation 2820// - KAND, KANDN, KOR, KXNOR, KXOR 2821multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 2822 RegisterClass KRC, SDPatternOperator OpNode, 2823 X86FoldableSchedWrite sched, Predicate prd, 2824 bit IsCommutable> { 2825 let Predicates = [prd], isCommutable = IsCommutable in 2826 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 2827 !strconcat(OpcodeStr, 2828 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2829 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 2830 Sched<[sched]>; 2831} 2832 2833multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 2834 SDPatternOperator OpNode, 2835 X86FoldableSchedWrite sched, bit IsCommutable, 2836 Predicate prdW = HasAVX512> { 2837 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2838 sched, HasDQI, IsCommutable>, VEX, VVVV, VEX_L, TB, PD; 2839 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2840 sched, prdW, IsCommutable>, VEX, VVVV, VEX_L, TB; 2841 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2842 sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB, PD; 2843 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2844 sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB; 2845} 2846 2847// TODO - do we need a X86SchedWriteWidths::KMASK type? 2848defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 2849defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 2850defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 2851defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 2852defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 2853defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 2854 2855multiclass avx512_binop_pat<SDPatternOperator VOpNode, 2856 Instruction Inst> { 2857 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 2858 // for the DQI set, this type is legal and KxxxB instruction is used 2859 let Predicates = [NoDQI] in 2860 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 2861 (COPY_TO_REGCLASS 2862 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 2863 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 2864 2865 // All types smaller than 8 bits require conversion anyway 2866 def : Pat<(VOpNode VK1:$src1, VK1:$src2), 2867 (COPY_TO_REGCLASS (Inst 2868 (COPY_TO_REGCLASS VK1:$src1, VK16), 2869 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 2870 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 2871 (COPY_TO_REGCLASS (Inst 2872 (COPY_TO_REGCLASS VK2:$src1, VK16), 2873 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; 2874 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 2875 (COPY_TO_REGCLASS (Inst 2876 (COPY_TO_REGCLASS VK4:$src1, VK16), 2877 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; 2878} 2879 2880defm : avx512_binop_pat<and, KANDWrr>; 2881defm : avx512_binop_pat<vandn, KANDNWrr>; 2882defm : avx512_binop_pat<or, KORWrr>; 2883defm : avx512_binop_pat<vxnor, KXNORWrr>; 2884defm : avx512_binop_pat<xor, KXORWrr>; 2885 2886// Mask unpacking 2887multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, 2888 X86KVectorVTInfo Src, X86FoldableSchedWrite sched, 2889 Predicate prd> { 2890 let Predicates = [prd] in { 2891 let hasSideEffects = 0 in 2892 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), 2893 (ins Src.KRC:$src1, Src.KRC:$src2), 2894 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 2895 VEX, VVVV, VEX_L, Sched<[sched]>; 2896 2897 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), 2898 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>; 2899 } 2900} 2901 2902defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, TB, PD; 2903defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, TB; 2904defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, TB, REX_W; 2905 2906// Mask bit testing 2907multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 2908 SDNode OpNode, X86FoldableSchedWrite sched, 2909 Predicate prd> { 2910 let Predicates = [prd], Defs = [EFLAGS] in 2911 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 2912 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2913 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 2914 Sched<[sched]>; 2915} 2916 2917multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 2918 X86FoldableSchedWrite sched, 2919 Predicate prdW = HasAVX512> { 2920 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 2921 VEX, TB, PD; 2922 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 2923 VEX, TB; 2924 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 2925 VEX, TB, REX_W; 2926 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 2927 VEX, TB, PD, REX_W; 2928} 2929 2930// TODO - do we need a X86SchedWriteWidths::KMASK type? 2931defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 2932defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 2933 2934// Mask shift 2935multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 2936 SDNode OpNode, X86FoldableSchedWrite sched> { 2937 let Predicates = [HasAVX512] in 2938 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 2939 !strconcat(OpcodeStr, 2940 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 2941 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, 2942 Sched<[sched]>; 2943} 2944 2945multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 2946 SDNode OpNode, X86FoldableSchedWrite sched> { 2947 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2948 sched>, VEX, TA, PD, REX_W; 2949 let Predicates = [HasDQI] in 2950 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2951 sched>, VEX, TA, PD; 2952 let Predicates = [HasBWI] in { 2953 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2954 sched>, VEX, TA, PD, REX_W; 2955 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2956 sched>, VEX, TA, PD; 2957 } 2958} 2959 2960defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 2961defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 2962 2963// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 2964multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 2965 string InstStr, 2966 X86VectorVTInfo Narrow, 2967 X86VectorVTInfo Wide> { 2968def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 2969 (Narrow.VT Narrow.RC:$src2), cond)), 2970 (COPY_TO_REGCLASS 2971 (!cast<Instruction>(InstStr#"Zrri") 2972 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 2973 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 2974 (X86pcmpm_imm $cc)), Narrow.KRC)>; 2975 2976def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 2977 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 2978 (Narrow.VT Narrow.RC:$src2), 2979 cond)))), 2980 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 2981 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 2982 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 2983 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 2984 (X86pcmpm_imm $cc)), Narrow.KRC)>; 2985} 2986 2987multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 2988 string InstStr, 2989 X86VectorVTInfo Narrow, 2990 X86VectorVTInfo Wide> { 2991// Broadcast load. 2992def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 2993 (Narrow.BroadcastLdFrag addr:$src2), cond)), 2994 (COPY_TO_REGCLASS 2995 (!cast<Instruction>(InstStr#"Zrmib") 2996 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 2997 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 2998 2999def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3000 (Narrow.KVT 3001 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3002 (Narrow.BroadcastLdFrag addr:$src2), 3003 cond)))), 3004 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3005 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3006 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3007 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3008 3009// Commuted with broadcast load. 3010def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2), 3011 (Narrow.VT Narrow.RC:$src1), 3012 cond)), 3013 (COPY_TO_REGCLASS 3014 (!cast<Instruction>(InstStr#"Zrmib") 3015 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3016 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3017 3018def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3019 (Narrow.KVT 3020 (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), 3021 (Narrow.VT Narrow.RC:$src1), 3022 cond)))), 3023 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3024 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3025 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3026 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3027} 3028 3029// Same as above, but for fp types which don't use PatFrags. 3030multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, 3031 X86VectorVTInfo Narrow, 3032 X86VectorVTInfo Wide> { 3033def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3034 (Narrow.VT Narrow.RC:$src2), timm:$cc)), 3035 (COPY_TO_REGCLASS 3036 (!cast<Instruction>(InstStr#"Zrri") 3037 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3038 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3039 timm:$cc), Narrow.KRC)>; 3040 3041def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3042 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3043 (Narrow.VT Narrow.RC:$src2), timm:$cc))), 3044 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3045 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3046 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3047 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3048 timm:$cc), Narrow.KRC)>; 3049 3050// Broadcast load. 3051def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3052 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), 3053 (COPY_TO_REGCLASS 3054 (!cast<Instruction>(InstStr#"Zrmbi") 3055 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3056 addr:$src2, timm:$cc), Narrow.KRC)>; 3057 3058def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3059 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3060 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), 3061 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3062 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3063 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3064 addr:$src2, timm:$cc), Narrow.KRC)>; 3065 3066// Commuted with broadcast load. 3067def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3068 (Narrow.VT Narrow.RC:$src1), timm:$cc)), 3069 (COPY_TO_REGCLASS 3070 (!cast<Instruction>(InstStr#"Zrmbi") 3071 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3072 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3073 3074def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3075 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3076 (Narrow.VT Narrow.RC:$src1), timm:$cc))), 3077 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3078 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3079 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3080 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3081} 3082 3083let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 3084 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3085 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3086 3087 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3088 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3089 3090 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3091 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3092 3093 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3094 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3095 3096 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3097 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3098 3099 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3100 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3101 3102 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3103 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3104 3105 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3106 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3107 3108 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; 3109 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; 3110 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; 3111 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; 3112} 3113 3114let Predicates = [HasBWI, NoVLX, HasEVEX512] in { 3115 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; 3116 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; 3117 3118 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>; 3119 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>; 3120 3121 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>; 3122 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>; 3123 3124 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>; 3125 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>; 3126} 3127 3128// Mask setting all 0s or 1s 3129multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> { 3130 let Predicates = [HasAVX512] in 3131 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3132 SchedRW = [WriteZero] in 3133 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3134 [(set KRC:$dst, (VT Val))]>; 3135} 3136 3137multiclass avx512_mask_setop_w<SDPatternOperator Val> { 3138 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3139 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3140 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3141} 3142 3143defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3144defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3145 3146// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3147let Predicates = [HasAVX512] in { 3148 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3149 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3150 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3151 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3152 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3153 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3154 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3155 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3156} 3157 3158// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3159multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3160 RegisterClass RC, ValueType VT> { 3161 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3162 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3163 3164 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3165 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3166} 3167defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3168defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3169defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3170defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3171defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3172defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3173 3174defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3175defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3176defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3177defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3178defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3179 3180defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3181defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3182defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3183defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3184 3185defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3186defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3187defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3188 3189defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3190defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3191 3192defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3193 3194//===----------------------------------------------------------------------===// 3195// AVX-512 - Aligned and unaligned load and store 3196// 3197 3198multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3199 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3200 X86SchedWriteMoveLS Sched, bit NoRMPattern = 0, 3201 SDPatternOperator SelectOprr = vselect> { 3202 let hasSideEffects = 0 in { 3203 let isMoveReg = 1 in 3204 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3205 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3206 _.ExeDomain>, EVEX, Sched<[Sched.RR]>; 3207 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3208 (ins _.KRCWM:$mask, _.RC:$src), 3209 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3210 "${dst} {${mask}} {z}, $src}"), 3211 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3212 (_.VT _.RC:$src), 3213 _.ImmAllZerosV)))], _.ExeDomain>, 3214 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3215 3216 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3217 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3218 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3219 !if(NoRMPattern, [], 3220 [(set _.RC:$dst, 3221 (_.VT (ld_frag addr:$src)))]), 3222 _.ExeDomain>, EVEX, Sched<[Sched.RM]>; 3223 3224 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3225 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3226 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3227 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3228 "${dst} {${mask}}, $src1}"), 3229 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3230 (_.VT _.RC:$src1), 3231 (_.VT _.RC:$src0))))], _.ExeDomain>, 3232 EVEX, EVEX_K, Sched<[Sched.RR]>; 3233 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3234 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3235 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3236 "${dst} {${mask}}, $src1}"), 3237 [(set _.RC:$dst, (_.VT 3238 (vselect_mask _.KRCWM:$mask, 3239 (_.VT (ld_frag addr:$src1)), 3240 (_.VT _.RC:$src0))))], _.ExeDomain>, 3241 EVEX, EVEX_K, Sched<[Sched.RM]>; 3242 } 3243 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3244 (ins _.KRCWM:$mask, _.MemOp:$src), 3245 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3246 "${dst} {${mask}} {z}, $src}", 3247 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask, 3248 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], 3249 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3250 } 3251 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3252 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3253 3254 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3255 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3256 3257 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3258 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0, 3259 _.KRCWM:$mask, addr:$ptr)>; 3260} 3261 3262multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3263 AVX512VLVectorVTInfo _, Predicate prd, 3264 X86SchedWriteMoveLSWidths Sched, 3265 bit NoRMPattern = 0> { 3266 let Predicates = [prd] in 3267 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3268 _.info512.AlignedLdFrag, masked_load_aligned, 3269 Sched.ZMM, NoRMPattern>, EVEX_V512; 3270 3271 let Predicates = [prd, HasVLX] in { 3272 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3273 _.info256.AlignedLdFrag, masked_load_aligned, 3274 Sched.YMM, NoRMPattern>, EVEX_V256; 3275 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3276 _.info128.AlignedLdFrag, masked_load_aligned, 3277 Sched.XMM, NoRMPattern>, EVEX_V128; 3278 } 3279} 3280 3281multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3282 AVX512VLVectorVTInfo _, Predicate prd, 3283 X86SchedWriteMoveLSWidths Sched, 3284 bit NoRMPattern = 0, 3285 SDPatternOperator SelectOprr = vselect> { 3286 let Predicates = [prd] in 3287 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3288 masked_load, Sched.ZMM, NoRMPattern, SelectOprr>, EVEX_V512; 3289 3290 let Predicates = [prd, HasVLX] in { 3291 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3292 masked_load, Sched.YMM, NoRMPattern, SelectOprr>, EVEX_V256; 3293 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3294 masked_load, Sched.XMM, NoRMPattern, SelectOprr>, EVEX_V128; 3295 } 3296} 3297 3298multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3299 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3300 X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> { 3301 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3302 let isMoveReg = 1 in 3303 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3304 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3305 [], _.ExeDomain>, EVEX, 3306 Sched<[Sched.RR]>; 3307 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3308 (ins _.KRCWM:$mask, _.RC:$src), 3309 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3310 "${dst} {${mask}}, $src}", 3311 [], _.ExeDomain>, EVEX, EVEX_K, 3312 Sched<[Sched.RR]>; 3313 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3314 (ins _.KRCWM:$mask, _.RC:$src), 3315 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3316 "${dst} {${mask}} {z}, $src}", 3317 [], _.ExeDomain>, EVEX, EVEX_KZ, 3318 Sched<[Sched.RR]>; 3319 } 3320 3321 let hasSideEffects = 0, mayStore = 1 in 3322 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3323 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3324 !if(NoMRPattern, [], 3325 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3326 _.ExeDomain>, EVEX, Sched<[Sched.MR]>; 3327 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3328 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3329 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3330 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>; 3331 3332 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask), 3333 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3334 _.KRCWM:$mask, _.RC:$src)>; 3335 3336 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3337 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3338 _.RC:$dst, _.RC:$src), 0>; 3339 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3340 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3341 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3342 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3343 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3344 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3345} 3346 3347multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3348 AVX512VLVectorVTInfo _, Predicate prd, 3349 X86SchedWriteMoveLSWidths Sched, 3350 bit NoMRPattern = 0> { 3351 let Predicates = [prd] in 3352 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3353 masked_store, Sched.ZMM, NoMRPattern>, EVEX_V512; 3354 let Predicates = [prd, HasVLX] in { 3355 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3356 masked_store, Sched.YMM, NoMRPattern>, EVEX_V256; 3357 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3358 masked_store, Sched.XMM, NoMRPattern>, EVEX_V128; 3359 } 3360} 3361 3362multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3363 AVX512VLVectorVTInfo _, Predicate prd, 3364 X86SchedWriteMoveLSWidths Sched, 3365 bit NoMRPattern = 0> { 3366 let Predicates = [prd] in 3367 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3368 masked_store_aligned, Sched.ZMM, NoMRPattern>, EVEX_V512; 3369 3370 let Predicates = [prd, HasVLX] in { 3371 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3372 masked_store_aligned, Sched.YMM, NoMRPattern>, EVEX_V256; 3373 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3374 masked_store_aligned, Sched.XMM, NoMRPattern>, EVEX_V128; 3375 } 3376} 3377 3378defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3379 HasAVX512, SchedWriteFMoveLS>, 3380 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3381 HasAVX512, SchedWriteFMoveLS>, 3382 TB, EVEX_CD8<32, CD8VF>; 3383 3384defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3385 HasAVX512, SchedWriteFMoveLS>, 3386 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3387 HasAVX512, SchedWriteFMoveLS>, 3388 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 3389 3390defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3391 SchedWriteFMoveLS, 0, null_frag>, 3392 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3393 SchedWriteFMoveLS>, 3394 TB, EVEX_CD8<32, CD8VF>; 3395 3396defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3397 SchedWriteFMoveLS, 0, null_frag>, 3398 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3399 SchedWriteFMoveLS>, 3400 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 3401 3402defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3403 HasAVX512, SchedWriteVecMoveLS, 1>, 3404 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3405 HasAVX512, SchedWriteVecMoveLS, 1>, 3406 TB, PD, EVEX_CD8<32, CD8VF>; 3407 3408defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3409 HasAVX512, SchedWriteVecMoveLS>, 3410 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3411 HasAVX512, SchedWriteVecMoveLS>, 3412 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 3413 3414defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3415 SchedWriteVecMoveLS, 1>, 3416 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3417 SchedWriteVecMoveLS, 1>, 3418 TB, XD, EVEX_CD8<8, CD8VF>; 3419 3420defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3421 SchedWriteVecMoveLS, 1>, 3422 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3423 SchedWriteVecMoveLS, 1>, 3424 TB, XD, REX_W, EVEX_CD8<16, CD8VF>; 3425 3426defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3427 SchedWriteVecMoveLS, 1, null_frag>, 3428 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3429 SchedWriteVecMoveLS, 1>, 3430 TB, XS, EVEX_CD8<32, CD8VF>; 3431 3432defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3433 SchedWriteVecMoveLS, 0, null_frag>, 3434 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3435 SchedWriteVecMoveLS>, 3436 TB, XS, REX_W, EVEX_CD8<64, CD8VF>; 3437 3438// Special instructions to help with spilling when we don't have VLX. We need 3439// to load or store from a ZMM register instead. These are converted in 3440// expandPostRAPseudos. 3441let isReMaterializable = 1, canFoldAsLoad = 1, 3442 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3443def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3444 "", []>, Sched<[WriteFLoadX]>; 3445def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3446 "", []>, Sched<[WriteFLoadY]>; 3447def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3448 "", []>, Sched<[WriteFLoadX]>; 3449def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3450 "", []>, Sched<[WriteFLoadY]>; 3451} 3452 3453let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3454def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3455 "", []>, Sched<[WriteFStoreX]>; 3456def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3457 "", []>, Sched<[WriteFStoreY]>; 3458def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3459 "", []>, Sched<[WriteFStoreX]>; 3460def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3461 "", []>, Sched<[WriteFStoreY]>; 3462} 3463 3464def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), 3465 (v8i64 VR512:$src))), 3466 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3467 VK8), VR512:$src)>; 3468 3469def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3470 (v16i32 VR512:$src))), 3471 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3472 3473// These patterns exist to prevent the above patterns from introducing a second 3474// mask inversion when one already exists. 3475def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)), 3476 (v8i64 immAllZerosV), 3477 (v8i64 VR512:$src))), 3478 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3479def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)), 3480 (v16i32 immAllZerosV), 3481 (v16i32 VR512:$src))), 3482 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3483 3484multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3485 X86VectorVTInfo Wide> { 3486 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3487 Narrow.RC:$src1, Narrow.RC:$src0)), 3488 (EXTRACT_SUBREG 3489 (Wide.VT 3490 (!cast<Instruction>(InstrStr#"rrk") 3491 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3492 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3493 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3494 Narrow.SubRegIdx)>; 3495 3496 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3497 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3498 (EXTRACT_SUBREG 3499 (Wide.VT 3500 (!cast<Instruction>(InstrStr#"rrkz") 3501 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3502 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3503 Narrow.SubRegIdx)>; 3504} 3505 3506// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3507// available. Use a 512-bit operation and extract. 3508let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 3509 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3510 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3511 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3512 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3513 3514 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3515 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3516 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3517 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3518} 3519 3520let Predicates = [HasBWI, NoVLX, HasEVEX512] in { 3521 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3522 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3523 3524 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3525 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3526 3527 defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>; 3528 defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>; 3529 3530 defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>; 3531 defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>; 3532} 3533 3534let Predicates = [HasAVX512] in { 3535 // 512-bit load. 3536 def : Pat<(alignedloadv16i32 addr:$src), 3537 (VMOVDQA64Zrm addr:$src)>; 3538 def : Pat<(alignedloadv32i16 addr:$src), 3539 (VMOVDQA64Zrm addr:$src)>; 3540 def : Pat<(alignedloadv32f16 addr:$src), 3541 (VMOVAPSZrm addr:$src)>; 3542 def : Pat<(alignedloadv32bf16 addr:$src), 3543 (VMOVAPSZrm addr:$src)>; 3544 def : Pat<(alignedloadv64i8 addr:$src), 3545 (VMOVDQA64Zrm addr:$src)>; 3546 def : Pat<(loadv16i32 addr:$src), 3547 (VMOVDQU64Zrm addr:$src)>; 3548 def : Pat<(loadv32i16 addr:$src), 3549 (VMOVDQU64Zrm addr:$src)>; 3550 def : Pat<(loadv32f16 addr:$src), 3551 (VMOVUPSZrm addr:$src)>; 3552 def : Pat<(loadv32bf16 addr:$src), 3553 (VMOVUPSZrm addr:$src)>; 3554 def : Pat<(loadv64i8 addr:$src), 3555 (VMOVDQU64Zrm addr:$src)>; 3556 3557 // 512-bit store. 3558 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3559 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3560 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3561 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3562 def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst), 3563 (VMOVAPSZmr addr:$dst, VR512:$src)>; 3564 def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst), 3565 (VMOVAPSZmr addr:$dst, VR512:$src)>; 3566 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3567 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3568 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3569 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3570 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3571 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3572 def : Pat<(store (v32f16 VR512:$src), addr:$dst), 3573 (VMOVUPSZmr addr:$dst, VR512:$src)>; 3574 def : Pat<(store (v32bf16 VR512:$src), addr:$dst), 3575 (VMOVUPSZmr addr:$dst, VR512:$src)>; 3576 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3577 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3578} 3579 3580let Predicates = [HasVLX] in { 3581 // 128-bit load. 3582 def : Pat<(alignedloadv4i32 addr:$src), 3583 (VMOVDQA64Z128rm addr:$src)>; 3584 def : Pat<(alignedloadv8i16 addr:$src), 3585 (VMOVDQA64Z128rm addr:$src)>; 3586 def : Pat<(alignedloadv8f16 addr:$src), 3587 (VMOVAPSZ128rm addr:$src)>; 3588 def : Pat<(alignedloadv8bf16 addr:$src), 3589 (VMOVAPSZ128rm addr:$src)>; 3590 def : Pat<(alignedloadv16i8 addr:$src), 3591 (VMOVDQA64Z128rm addr:$src)>; 3592 def : Pat<(loadv4i32 addr:$src), 3593 (VMOVDQU64Z128rm addr:$src)>; 3594 def : Pat<(loadv8i16 addr:$src), 3595 (VMOVDQU64Z128rm addr:$src)>; 3596 def : Pat<(loadv8f16 addr:$src), 3597 (VMOVUPSZ128rm addr:$src)>; 3598 def : Pat<(loadv8bf16 addr:$src), 3599 (VMOVUPSZ128rm addr:$src)>; 3600 def : Pat<(loadv16i8 addr:$src), 3601 (VMOVDQU64Z128rm addr:$src)>; 3602 3603 // 128-bit store. 3604 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3605 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3606 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3607 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3608 def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst), 3609 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 3610 def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst), 3611 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 3612 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3613 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3614 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3615 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3616 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3617 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3618 def : Pat<(store (v8f16 VR128X:$src), addr:$dst), 3619 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 3620 def : Pat<(store (v8bf16 VR128X:$src), addr:$dst), 3621 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 3622 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3623 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3624 3625 // 256-bit load. 3626 def : Pat<(alignedloadv8i32 addr:$src), 3627 (VMOVDQA64Z256rm addr:$src)>; 3628 def : Pat<(alignedloadv16i16 addr:$src), 3629 (VMOVDQA64Z256rm addr:$src)>; 3630 def : Pat<(alignedloadv16f16 addr:$src), 3631 (VMOVAPSZ256rm addr:$src)>; 3632 def : Pat<(alignedloadv16bf16 addr:$src), 3633 (VMOVAPSZ256rm addr:$src)>; 3634 def : Pat<(alignedloadv32i8 addr:$src), 3635 (VMOVDQA64Z256rm addr:$src)>; 3636 def : Pat<(loadv8i32 addr:$src), 3637 (VMOVDQU64Z256rm addr:$src)>; 3638 def : Pat<(loadv16i16 addr:$src), 3639 (VMOVDQU64Z256rm addr:$src)>; 3640 def : Pat<(loadv16f16 addr:$src), 3641 (VMOVUPSZ256rm addr:$src)>; 3642 def : Pat<(loadv16bf16 addr:$src), 3643 (VMOVUPSZ256rm addr:$src)>; 3644 def : Pat<(loadv32i8 addr:$src), 3645 (VMOVDQU64Z256rm addr:$src)>; 3646 3647 // 256-bit store. 3648 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3649 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3650 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3651 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3652 def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst), 3653 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; 3654 def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst), 3655 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; 3656 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3657 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3658 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3659 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3660 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3661 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3662 def : Pat<(store (v16f16 VR256X:$src), addr:$dst), 3663 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; 3664 def : Pat<(store (v16bf16 VR256X:$src), addr:$dst), 3665 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; 3666 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3667 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3668} 3669 3670multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> { 3671let Predicates = [HasBWI] in { 3672 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))), 3673 (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>; 3674 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)), 3675 (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>; 3676 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3677 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))), 3678 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3679 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3680 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)), 3681 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3682 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3683 (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))), 3684 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3685 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3686 (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)), 3687 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3688 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))), 3689 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3690 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)), 3691 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3692 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)), 3693 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3694 3695 def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask), 3696 (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>; 3697} 3698let Predicates = [HasBWI, HasVLX] in { 3699 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))), 3700 (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>; 3701 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)), 3702 (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>; 3703 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3704 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))), 3705 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3706 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3707 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)), 3708 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3709 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3710 (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))), 3711 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3712 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3713 (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)), 3714 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3715 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))), 3716 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3717 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)), 3718 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3719 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)), 3720 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3721 3722 def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask), 3723 (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>; 3724 3725 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))), 3726 (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>; 3727 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)), 3728 (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>; 3729 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3730 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))), 3731 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3732 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3733 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)), 3734 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3735 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3736 (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))), 3737 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3738 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3739 (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)), 3740 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3741 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))), 3742 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3743 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)), 3744 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3745 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)), 3746 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3747 3748 def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask), 3749 (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>; 3750} 3751} 3752 3753defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>; 3754defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>; 3755 3756// Move Int Doubleword to Packed Double Int 3757// 3758let ExeDomain = SSEPackedInt in { 3759def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 3760 "vmovd\t{$src, $dst|$dst, $src}", 3761 [(set VR128X:$dst, 3762 (v4i32 (scalar_to_vector GR32:$src)))]>, 3763 EVEX, Sched<[WriteVecMoveFromGpr]>; 3764def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 3765 "vmovd\t{$src, $dst|$dst, $src}", 3766 [(set VR128X:$dst, 3767 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3768 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3769def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 3770 "vmovq\t{$src, $dst|$dst, $src}", 3771 [(set VR128X:$dst, 3772 (v2i64 (scalar_to_vector GR64:$src)))]>, 3773 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 3774let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3775def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 3776 (ins i64mem:$src), 3777 "vmovq\t{$src, $dst|$dst, $src}", []>, 3778 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 3779let isCodeGenOnly = 1 in { 3780def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 3781 "vmovq\t{$src, $dst|$dst, $src}", 3782 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 3783 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 3784def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 3785 "vmovq\t{$src, $dst|$dst, $src}", 3786 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 3787 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 3788} 3789} // ExeDomain = SSEPackedInt 3790 3791// Move Int Doubleword to Single Scalar 3792// 3793let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3794def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 3795 "vmovd\t{$src, $dst|$dst, $src}", 3796 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 3797 EVEX, Sched<[WriteVecMoveFromGpr]>; 3798} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3799 3800// Move doubleword from xmm register to r/m32 3801// 3802let ExeDomain = SSEPackedInt in { 3803def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 3804 "vmovd\t{$src, $dst|$dst, $src}", 3805 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 3806 (iPTR 0)))]>, 3807 EVEX, Sched<[WriteVecMoveToGpr]>; 3808def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 3809 (ins i32mem:$dst, VR128X:$src), 3810 "vmovd\t{$src, $dst|$dst, $src}", 3811 [(store (i32 (extractelt (v4i32 VR128X:$src), 3812 (iPTR 0))), addr:$dst)]>, 3813 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 3814} // ExeDomain = SSEPackedInt 3815 3816// Move quadword from xmm1 register to r/m64 3817// 3818let ExeDomain = SSEPackedInt in { 3819def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 3820 "vmovq\t{$src, $dst|$dst, $src}", 3821 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 3822 (iPTR 0)))]>, 3823 TB, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>, 3824 Requires<[HasAVX512]>; 3825 3826let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 3827def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 3828 "vmovq\t{$src, $dst|$dst, $src}", []>, TB, PD, 3829 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>, 3830 Requires<[HasAVX512, In64BitMode]>; 3831 3832def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 3833 (ins i64mem:$dst, VR128X:$src), 3834 "vmovq\t{$src, $dst|$dst, $src}", 3835 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 3836 addr:$dst)]>, 3837 EVEX, TB, PD, REX_W, EVEX_CD8<64, CD8VT1>, 3838 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 3839 3840let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 3841def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 3842 (ins VR128X:$src), 3843 "vmovq\t{$src, $dst|$dst, $src}", []>, 3844 EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>; 3845} // ExeDomain = SSEPackedInt 3846 3847def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 3848 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 3849 3850let Predicates = [HasAVX512] in { 3851 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), 3852 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; 3853} 3854 3855// Move Scalar Single to Double Int 3856// 3857let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3858def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 3859 (ins FR32X:$src), 3860 "vmovd\t{$src, $dst|$dst, $src}", 3861 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 3862 EVEX, Sched<[WriteVecMoveToGpr]>; 3863} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3864 3865// Move Quadword Int to Packed Quadword Int 3866// 3867let ExeDomain = SSEPackedInt in { 3868def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 3869 (ins i64mem:$src), 3870 "vmovq\t{$src, $dst|$dst, $src}", 3871 [(set VR128X:$dst, 3872 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 3873 EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 3874} // ExeDomain = SSEPackedInt 3875 3876// Allow "vmovd" but print "vmovq". 3877def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3878 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 3879def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3880 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 3881 3882// Conversions between masks and scalar fp. 3883def : Pat<(v32i1 (bitconvert FR32X:$src)), 3884 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>; 3885def : Pat<(f32 (bitconvert VK32:$src)), 3886 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>; 3887 3888def : Pat<(v64i1 (bitconvert FR64X:$src)), 3889 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>; 3890def : Pat<(f64 (bitconvert VK64:$src)), 3891 (VMOV64toSDZrr (KMOVQrk VK64:$src))>; 3892 3893//===----------------------------------------------------------------------===// 3894// AVX-512 MOVSH, MOVSS, MOVSD 3895//===----------------------------------------------------------------------===// 3896 3897multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, 3898 X86VectorVTInfo _, Predicate prd = HasAVX512> { 3899 let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in 3900 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3901 (ins _.RC:$src1, _.RC:$src2), 3902 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3903 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 3904 _.ExeDomain>, EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>; 3905 let Predicates = [prd] in { 3906 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3907 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3908 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 3909 "$dst {${mask}} {z}, $src1, $src2}"), 3910 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3911 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3912 _.ImmAllZerosV)))], 3913 _.ExeDomain>, EVEX, VVVV, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 3914 let Constraints = "$src0 = $dst" in 3915 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3916 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3917 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 3918 "$dst {${mask}}, $src1, $src2}"), 3919 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3920 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3921 (_.VT _.RC:$src0))))], 3922 _.ExeDomain>, EVEX, VVVV, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 3923 let canFoldAsLoad = 1, isReMaterializable = 1 in { 3924 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), 3925 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3926 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))], 3927 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3928 // _alt version uses FR32/FR64 register class. 3929 let isCodeGenOnly = 1 in 3930 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 3931 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3932 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 3933 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3934 } 3935 let mayLoad = 1, hasSideEffects = 0 in { 3936 let Constraints = "$src0 = $dst" in 3937 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3938 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 3939 !strconcat(asm, "\t{$src, $dst {${mask}}|", 3940 "$dst {${mask}}, $src}"), 3941 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 3942 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3943 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 3944 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 3945 "$dst {${mask}} {z}, $src}"), 3946 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 3947 } 3948 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 3949 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3950 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 3951 EVEX, Sched<[WriteFStore]>; 3952 let mayStore = 1, hasSideEffects = 0 in 3953 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 3954 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src), 3955 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 3956 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>; 3957 } 3958} 3959 3960defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, 3961 VEX_LIG, TB, XS, EVEX_CD8<32, CD8VT1>; 3962 3963defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, 3964 VEX_LIG, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 3965 3966defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info, 3967 HasFP16>, 3968 VEX_LIG, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 3969 3970multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 3971 PatLeaf ZeroFP, X86VectorVTInfo _> { 3972 3973def : Pat<(_.VT (OpNode _.RC:$src0, 3974 (_.VT (scalar_to_vector 3975 (_.EltVT (X86selects VK1WM:$mask, 3976 (_.EltVT _.FRC:$src1), 3977 (_.EltVT _.FRC:$src2))))))), 3978 (!cast<Instruction>(InstrStr#rrk) 3979 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 3980 VK1WM:$mask, 3981 (_.VT _.RC:$src0), 3982 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 3983 3984def : Pat<(_.VT (OpNode _.RC:$src0, 3985 (_.VT (scalar_to_vector 3986 (_.EltVT (X86selects VK1WM:$mask, 3987 (_.EltVT _.FRC:$src1), 3988 (_.EltVT ZeroFP))))))), 3989 (!cast<Instruction>(InstrStr#rrkz) 3990 VK1WM:$mask, 3991 (_.VT _.RC:$src0), 3992 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 3993} 3994 3995multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 3996 dag Mask, RegisterClass MaskRC> { 3997 3998def : Pat<(masked_store 3999 (_.info512.VT (insert_subvector undef, 4000 (_.info128.VT _.info128.RC:$src), 4001 (iPTR 0))), addr:$dst, Mask), 4002 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4003 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4004 _.info128.RC:$src)>; 4005 4006} 4007 4008multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 4009 AVX512VLVectorVTInfo _, 4010 dag Mask, RegisterClass MaskRC, 4011 SubRegIndex subreg> { 4012 4013def : Pat<(masked_store 4014 (_.info512.VT (insert_subvector undef, 4015 (_.info128.VT _.info128.RC:$src), 4016 (iPTR 0))), addr:$dst, Mask), 4017 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4018 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4019 _.info128.RC:$src)>; 4020 4021} 4022 4023// This matches the more recent codegen from clang that avoids emitting a 512 4024// bit masked store directly. Codegen will widen 128-bit masked store to 512 4025// bits on AVX512F only targets. 4026multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4027 AVX512VLVectorVTInfo _, 4028 dag Mask512, dag Mask128, 4029 RegisterClass MaskRC, 4030 SubRegIndex subreg> { 4031 4032// AVX512F pattern. 4033def : Pat<(masked_store 4034 (_.info512.VT (insert_subvector undef, 4035 (_.info128.VT _.info128.RC:$src), 4036 (iPTR 0))), addr:$dst, Mask512), 4037 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4038 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4039 _.info128.RC:$src)>; 4040 4041// AVX512VL pattern. 4042def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128), 4043 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4044 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4045 _.info128.RC:$src)>; 4046} 4047 4048multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4049 dag Mask, RegisterClass MaskRC> { 4050 4051def : Pat<(_.info128.VT (extract_subvector 4052 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4053 _.info512.ImmAllZerosV)), 4054 (iPTR 0))), 4055 (!cast<Instruction>(InstrStr#rmkz) 4056 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4057 addr:$srcAddr)>; 4058 4059def : Pat<(_.info128.VT (extract_subvector 4060 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4061 (_.info512.VT (insert_subvector undef, 4062 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4063 (iPTR 0))))), 4064 (iPTR 0))), 4065 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4066 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4067 addr:$srcAddr)>; 4068 4069} 4070 4071multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4072 AVX512VLVectorVTInfo _, 4073 dag Mask, RegisterClass MaskRC, 4074 SubRegIndex subreg> { 4075 4076def : Pat<(_.info128.VT (extract_subvector 4077 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4078 _.info512.ImmAllZerosV)), 4079 (iPTR 0))), 4080 (!cast<Instruction>(InstrStr#rmkz) 4081 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4082 addr:$srcAddr)>; 4083 4084def : Pat<(_.info128.VT (extract_subvector 4085 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4086 (_.info512.VT (insert_subvector undef, 4087 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4088 (iPTR 0))))), 4089 (iPTR 0))), 4090 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4091 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4092 addr:$srcAddr)>; 4093 4094} 4095 4096// This matches the more recent codegen from clang that avoids emitting a 512 4097// bit masked load directly. Codegen will widen 128-bit masked load to 512 4098// bits on AVX512F only targets. 4099multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4100 AVX512VLVectorVTInfo _, 4101 dag Mask512, dag Mask128, 4102 RegisterClass MaskRC, 4103 SubRegIndex subreg> { 4104// AVX512F patterns. 4105def : Pat<(_.info128.VT (extract_subvector 4106 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4107 _.info512.ImmAllZerosV)), 4108 (iPTR 0))), 4109 (!cast<Instruction>(InstrStr#rmkz) 4110 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4111 addr:$srcAddr)>; 4112 4113def : Pat<(_.info128.VT (extract_subvector 4114 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4115 (_.info512.VT (insert_subvector undef, 4116 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4117 (iPTR 0))))), 4118 (iPTR 0))), 4119 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4120 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4121 addr:$srcAddr)>; 4122 4123// AVX512Vl patterns. 4124def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4125 _.info128.ImmAllZerosV)), 4126 (!cast<Instruction>(InstrStr#rmkz) 4127 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4128 addr:$srcAddr)>; 4129 4130def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4131 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4132 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4133 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4134 addr:$srcAddr)>; 4135} 4136 4137defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4138defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4139 4140defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4141 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4142defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4143 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4144defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4145 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4146 4147let Predicates = [HasFP16] in { 4148defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>; 4149defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4150 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4151defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4152 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4153defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4154 (v32i1 (insert_subvector 4155 (v32i1 immAllZerosV), 4156 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4157 (iPTR 0))), 4158 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4159 GR8, sub_8bit>; 4160 4161defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4162 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4163defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4164 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4165defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4166 (v32i1 (insert_subvector 4167 (v32i1 immAllZerosV), 4168 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4169 (iPTR 0))), 4170 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4171 GR8, sub_8bit>; 4172 4173def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))), 4174 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk 4175 (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)), 4176 VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4177 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4178 4179def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)), 4180 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4181 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4182} 4183 4184defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4185 (v16i1 (insert_subvector 4186 (v16i1 immAllZerosV), 4187 (v4i1 (extract_subvector 4188 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4189 (iPTR 0))), 4190 (iPTR 0))), 4191 (v4i1 (extract_subvector 4192 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4193 (iPTR 0))), GR8, sub_8bit>; 4194defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4195 (v8i1 4196 (extract_subvector 4197 (v16i1 4198 (insert_subvector 4199 (v16i1 immAllZerosV), 4200 (v2i1 (extract_subvector 4201 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4202 (iPTR 0))), 4203 (iPTR 0))), 4204 (iPTR 0))), 4205 (v2i1 (extract_subvector 4206 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4207 (iPTR 0))), GR8, sub_8bit>; 4208 4209defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4210 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4211defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4212 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4213defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4214 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4215 4216defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4217 (v16i1 (insert_subvector 4218 (v16i1 immAllZerosV), 4219 (v4i1 (extract_subvector 4220 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4221 (iPTR 0))), 4222 (iPTR 0))), 4223 (v4i1 (extract_subvector 4224 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4225 (iPTR 0))), GR8, sub_8bit>; 4226defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4227 (v8i1 4228 (extract_subvector 4229 (v16i1 4230 (insert_subvector 4231 (v16i1 immAllZerosV), 4232 (v2i1 (extract_subvector 4233 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4234 (iPTR 0))), 4235 (iPTR 0))), 4236 (iPTR 0))), 4237 (v2i1 (extract_subvector 4238 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4239 (iPTR 0))), GR8, sub_8bit>; 4240 4241def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4242 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4243 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4244 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4245 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4246 4247def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4248 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4249 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4250 4251def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), 4252 (COPY_TO_REGCLASS 4253 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), 4254 VK1WM:$mask, addr:$src)), 4255 FR32X)>; 4256def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), 4257 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; 4258 4259def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4260 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4261 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4262 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4263 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4264 4265def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), 4266 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4267 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4268 4269def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), 4270 (COPY_TO_REGCLASS 4271 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), 4272 VK1WM:$mask, addr:$src)), 4273 FR64X)>; 4274def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), 4275 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; 4276 4277 4278def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))), 4279 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4280def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))), 4281 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4282 4283def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))), 4284 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4285def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))), 4286 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4287 4288let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4289 let Predicates = [HasFP16] in { 4290 def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4291 (ins VR128X:$src1, VR128X:$src2), 4292 "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4293 []>, T_MAP5, XS, EVEX, VVVV, VEX_LIG, 4294 Sched<[SchedWriteFShuffle.XMM]>; 4295 4296 let Constraints = "$src0 = $dst" in 4297 def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4298 (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask, 4299 VR128X:$src1, VR128X:$src2), 4300 "vmovsh\t{$src2, $src1, $dst {${mask}}|"# 4301 "$dst {${mask}}, $src1, $src2}", 4302 []>, T_MAP5, XS, EVEX_K, EVEX, VVVV, VEX_LIG, 4303 Sched<[SchedWriteFShuffle.XMM]>; 4304 4305 def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4306 (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4307 "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"# 4308 "$dst {${mask}} {z}, $src1, $src2}", 4309 []>, EVEX_KZ, T_MAP5, XS, EVEX, VVVV, VEX_LIG, 4310 Sched<[SchedWriteFShuffle.XMM]>; 4311 } 4312 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4313 (ins VR128X:$src1, VR128X:$src2), 4314 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4315 []>, TB, XS, EVEX, VVVV, VEX_LIG, 4316 Sched<[SchedWriteFShuffle.XMM]>; 4317 4318 let Constraints = "$src0 = $dst" in 4319 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4320 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4321 VR128X:$src1, VR128X:$src2), 4322 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4323 "$dst {${mask}}, $src1, $src2}", 4324 []>, EVEX_K, TB, XS, EVEX, VVVV, VEX_LIG, 4325 Sched<[SchedWriteFShuffle.XMM]>; 4326 4327 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4328 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4329 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4330 "$dst {${mask}} {z}, $src1, $src2}", 4331 []>, EVEX_KZ, TB, XS, EVEX, VVVV, VEX_LIG, 4332 Sched<[SchedWriteFShuffle.XMM]>; 4333 4334 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4335 (ins VR128X:$src1, VR128X:$src2), 4336 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4337 []>, TB, XD, EVEX, VVVV, VEX_LIG, REX_W, 4338 Sched<[SchedWriteFShuffle.XMM]>; 4339 4340 let Constraints = "$src0 = $dst" in 4341 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4342 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4343 VR128X:$src1, VR128X:$src2), 4344 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4345 "$dst {${mask}}, $src1, $src2}", 4346 []>, EVEX_K, TB, XD, EVEX, VVVV, VEX_LIG, 4347 REX_W, Sched<[SchedWriteFShuffle.XMM]>; 4348 4349 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4350 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4351 VR128X:$src2), 4352 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4353 "$dst {${mask}} {z}, $src1, $src2}", 4354 []>, EVEX_KZ, TB, XD, EVEX, VVVV, VEX_LIG, 4355 REX_W, Sched<[SchedWriteFShuffle.XMM]>; 4356} 4357 4358def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4359 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4360def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"# 4361 "$dst {${mask}}, $src1, $src2}", 4362 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask, 4363 VR128X:$src1, VR128X:$src2), 0>; 4364def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4365 "$dst {${mask}} {z}, $src1, $src2}", 4366 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask, 4367 VR128X:$src1, VR128X:$src2), 0>; 4368def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4369 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4370def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4371 "$dst {${mask}}, $src1, $src2}", 4372 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4373 VR128X:$src1, VR128X:$src2), 0>; 4374def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4375 "$dst {${mask}} {z}, $src1, $src2}", 4376 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4377 VR128X:$src1, VR128X:$src2), 0>; 4378def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4379 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4380def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4381 "$dst {${mask}}, $src1, $src2}", 4382 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4383 VR128X:$src1, VR128X:$src2), 0>; 4384def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4385 "$dst {${mask}} {z}, $src1, $src2}", 4386 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4387 VR128X:$src1, VR128X:$src2), 0>; 4388 4389let Predicates = [HasAVX512, OptForSize] in { 4390 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4391 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4392 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4393 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4394 4395 // Move low f32 and clear high bits. 4396 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4397 (SUBREG_TO_REG (i32 0), 4398 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4399 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4400 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4401 (SUBREG_TO_REG (i32 0), 4402 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4403 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4404 4405 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4406 (SUBREG_TO_REG (i32 0), 4407 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4408 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4409 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4410 (SUBREG_TO_REG (i32 0), 4411 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4412 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4413} 4414 4415// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4416// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4417let Predicates = [HasAVX512, OptForSpeed] in { 4418 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4419 (SUBREG_TO_REG (i32 0), 4420 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4421 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4422 (i8 1))), sub_xmm)>; 4423 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4424 (SUBREG_TO_REG (i32 0), 4425 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4426 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4427 (i8 3))), sub_xmm)>; 4428} 4429 4430let Predicates = [HasAVX512] in { 4431 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 4432 (VMOVSSZrm addr:$src)>; 4433 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 4434 (VMOVSDZrm addr:$src)>; 4435 4436 // Represent the same patterns above but in the form they appear for 4437 // 256-bit types 4438 def : Pat<(v8f32 (X86vzload32 addr:$src)), 4439 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4440 def : Pat<(v4f64 (X86vzload64 addr:$src)), 4441 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4442 4443 // Represent the same patterns above but in the form they appear for 4444 // 512-bit types 4445 def : Pat<(v16f32 (X86vzload32 addr:$src)), 4446 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4447 def : Pat<(v8f64 (X86vzload64 addr:$src)), 4448 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4449} 4450let Predicates = [HasFP16] in { 4451 def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))), 4452 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>; 4453 def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))), 4454 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>; 4455 4456 // FIXME we need better canonicalization in dag combine 4457 def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))), 4458 (SUBREG_TO_REG (i32 0), 4459 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4460 (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>; 4461 def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))), 4462 (SUBREG_TO_REG (i32 0), 4463 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), 4464 (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>; 4465 4466 // FIXME we need better canonicalization in dag combine 4467 def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))), 4468 (SUBREG_TO_REG (i32 0), 4469 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4470 (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>; 4471 def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))), 4472 (SUBREG_TO_REG (i32 0), 4473 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), 4474 (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>; 4475 4476 def : Pat<(v8f16 (X86vzload16 addr:$src)), 4477 (VMOVSHZrm addr:$src)>; 4478 4479 def : Pat<(v16f16 (X86vzload16 addr:$src)), 4480 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4481 4482 def : Pat<(v32f16 (X86vzload16 addr:$src)), 4483 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4484} 4485 4486let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4487def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4488 (ins VR128X:$src), 4489 "vmovq\t{$src, $dst|$dst, $src}", 4490 [(set VR128X:$dst, (v2i64 (X86vzmovl 4491 (v2i64 VR128X:$src))))]>, 4492 EVEX, REX_W; 4493} 4494 4495let Predicates = [HasAVX512] in { 4496 def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))), 4497 (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 4498 GR8:$src, sub_8bit)))>; 4499 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4500 (VMOVDI2PDIZrr GR32:$src)>; 4501 4502 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4503 (VMOV64toPQIZrr GR64:$src)>; 4504 4505 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4506 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4507 (VMOVDI2PDIZrm addr:$src)>; 4508 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4509 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4510 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4511 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4512 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4513 (VMOVQI2PQIZrm addr:$src)>; 4514 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4515 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4516 4517 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4518 def : Pat<(v16i32 (X86vzload32 addr:$src)), 4519 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4520 def : Pat<(v8i64 (X86vzload64 addr:$src)), 4521 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4522 4523 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4524 (SUBREG_TO_REG (i32 0), 4525 (v2f64 (VMOVZPQILo2PQIZrr 4526 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), 4527 sub_xmm)>; 4528 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4529 (SUBREG_TO_REG (i32 0), 4530 (v2i64 (VMOVZPQILo2PQIZrr 4531 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), 4532 sub_xmm)>; 4533 4534 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4535 (SUBREG_TO_REG (i32 0), 4536 (v2f64 (VMOVZPQILo2PQIZrr 4537 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), 4538 sub_xmm)>; 4539 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4540 (SUBREG_TO_REG (i32 0), 4541 (v2i64 (VMOVZPQILo2PQIZrr 4542 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), 4543 sub_xmm)>; 4544} 4545 4546//===----------------------------------------------------------------------===// 4547// AVX-512 - Non-temporals 4548//===----------------------------------------------------------------------===// 4549 4550def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4551 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4552 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4553 EVEX, T8, PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4554 4555let Predicates = [HasVLX] in { 4556 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4557 (ins i256mem:$src), 4558 "vmovntdqa\t{$src, $dst|$dst, $src}", 4559 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4560 EVEX, T8, PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4561 4562 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4563 (ins i128mem:$src), 4564 "vmovntdqa\t{$src, $dst|$dst, $src}", 4565 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4566 EVEX, T8, PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4567} 4568 4569multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4570 X86SchedWriteMoveLS Sched, 4571 PatFrag st_frag = alignednontemporalstore> { 4572 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4573 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4574 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4575 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4576 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4577} 4578 4579multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4580 AVX512VLVectorVTInfo VTInfo, 4581 X86SchedWriteMoveLSWidths Sched> { 4582 let Predicates = [HasAVX512] in 4583 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4584 4585 let Predicates = [HasAVX512, HasVLX] in { 4586 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4587 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4588 } 4589} 4590 4591defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4592 SchedWriteVecMoveLSNT>, TB, PD; 4593defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4594 SchedWriteFMoveLSNT>, TB, PD, REX_W; 4595defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4596 SchedWriteFMoveLSNT>, TB; 4597 4598let Predicates = [HasAVX512], AddedComplexity = 400 in { 4599 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4600 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4601 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4602 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4603 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4604 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4605 4606 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4607 (VMOVNTDQAZrm addr:$src)>; 4608 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4609 (VMOVNTDQAZrm addr:$src)>; 4610 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4611 (VMOVNTDQAZrm addr:$src)>; 4612 def : Pat<(v16i32 (alignednontemporalload addr:$src)), 4613 (VMOVNTDQAZrm addr:$src)>; 4614 def : Pat<(v32i16 (alignednontemporalload addr:$src)), 4615 (VMOVNTDQAZrm addr:$src)>; 4616 def : Pat<(v64i8 (alignednontemporalload addr:$src)), 4617 (VMOVNTDQAZrm addr:$src)>; 4618} 4619 4620let Predicates = [HasVLX], AddedComplexity = 400 in { 4621 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4622 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4623 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4624 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4625 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4626 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4627 4628 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4629 (VMOVNTDQAZ256rm addr:$src)>; 4630 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4631 (VMOVNTDQAZ256rm addr:$src)>; 4632 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4633 (VMOVNTDQAZ256rm addr:$src)>; 4634 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 4635 (VMOVNTDQAZ256rm addr:$src)>; 4636 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 4637 (VMOVNTDQAZ256rm addr:$src)>; 4638 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 4639 (VMOVNTDQAZ256rm addr:$src)>; 4640 4641 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4642 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4643 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4644 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4645 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4646 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4647 4648 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4649 (VMOVNTDQAZ128rm addr:$src)>; 4650 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4651 (VMOVNTDQAZ128rm addr:$src)>; 4652 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4653 (VMOVNTDQAZ128rm addr:$src)>; 4654 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 4655 (VMOVNTDQAZ128rm addr:$src)>; 4656 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 4657 (VMOVNTDQAZ128rm addr:$src)>; 4658 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 4659 (VMOVNTDQAZ128rm addr:$src)>; 4660} 4661 4662//===----------------------------------------------------------------------===// 4663// AVX-512 - Integer arithmetic 4664// 4665multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4666 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4667 bit IsCommutable = 0> { 4668 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4669 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4670 "$src2, $src1", "$src1, $src2", 4671 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4672 IsCommutable, IsCommutable>, AVX512BIBase, EVEX, VVVV, 4673 Sched<[sched]>; 4674 4675 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4676 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4677 "$src2, $src1", "$src1, $src2", 4678 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, 4679 AVX512BIBase, EVEX, VVVV, 4680 Sched<[sched.Folded, sched.ReadAfterFold]>; 4681} 4682 4683multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4684 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4685 bit IsCommutable = 0> : 4686 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4687 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4688 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4689 "${src2}"#_.BroadcastStr#", $src1", 4690 "$src1, ${src2}"#_.BroadcastStr, 4691 (_.VT (OpNode _.RC:$src1, 4692 (_.BroadcastLdFrag addr:$src2)))>, 4693 AVX512BIBase, EVEX, VVVV, EVEX_B, 4694 Sched<[sched.Folded, sched.ReadAfterFold]>; 4695} 4696 4697multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4698 AVX512VLVectorVTInfo VTInfo, 4699 X86SchedWriteWidths sched, Predicate prd, 4700 bit IsCommutable = 0> { 4701 let Predicates = [prd] in 4702 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4703 IsCommutable>, EVEX_V512; 4704 4705 let Predicates = [prd, HasVLX] in { 4706 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4707 sched.YMM, IsCommutable>, EVEX_V256; 4708 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4709 sched.XMM, IsCommutable>, EVEX_V128; 4710 } 4711} 4712 4713multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4714 AVX512VLVectorVTInfo VTInfo, 4715 X86SchedWriteWidths sched, Predicate prd, 4716 bit IsCommutable = 0> { 4717 let Predicates = [prd] in 4718 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4719 IsCommutable>, EVEX_V512; 4720 4721 let Predicates = [prd, HasVLX] in { 4722 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4723 sched.YMM, IsCommutable>, EVEX_V256; 4724 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4725 sched.XMM, IsCommutable>, EVEX_V128; 4726 } 4727} 4728 4729multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4730 X86SchedWriteWidths sched, Predicate prd, 4731 bit IsCommutable = 0> { 4732 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4733 sched, prd, IsCommutable>, 4734 REX_W, EVEX_CD8<64, CD8VF>; 4735} 4736 4737multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4738 X86SchedWriteWidths sched, Predicate prd, 4739 bit IsCommutable = 0> { 4740 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4741 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4742} 4743 4744multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4745 X86SchedWriteWidths sched, Predicate prd, 4746 bit IsCommutable = 0> { 4747 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4748 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4749 WIG; 4750} 4751 4752multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4753 X86SchedWriteWidths sched, Predicate prd, 4754 bit IsCommutable = 0> { 4755 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4756 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4757 WIG; 4758} 4759 4760multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 4761 SDNode OpNode, X86SchedWriteWidths sched, 4762 Predicate prd, bit IsCommutable = 0> { 4763 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 4764 IsCommutable>; 4765 4766 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 4767 IsCommutable>; 4768} 4769 4770multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 4771 SDNode OpNode, X86SchedWriteWidths sched, 4772 Predicate prd, bit IsCommutable = 0> { 4773 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 4774 IsCommutable>; 4775 4776 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 4777 IsCommutable>; 4778} 4779 4780multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 4781 bits<8> opc_d, bits<8> opc_q, 4782 string OpcodeStr, SDNode OpNode, 4783 X86SchedWriteWidths sched, 4784 bit IsCommutable = 0> { 4785 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 4786 sched, HasAVX512, IsCommutable>, 4787 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 4788 sched, HasBWI, IsCommutable>; 4789} 4790 4791multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 4792 X86FoldableSchedWrite sched, 4793 SDNode OpNode,X86VectorVTInfo _Src, 4794 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 4795 bit IsCommutable = 0> { 4796 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4797 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4798 "$src2, $src1","$src1, $src2", 4799 (_Dst.VT (OpNode 4800 (_Src.VT _Src.RC:$src1), 4801 (_Src.VT _Src.RC:$src2))), 4802 IsCommutable>, 4803 AVX512BIBase, EVEX, VVVV, Sched<[sched]>; 4804 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4805 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4806 "$src2, $src1", "$src1, $src2", 4807 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4808 (_Src.LdFrag addr:$src2)))>, 4809 AVX512BIBase, EVEX, VVVV, 4810 Sched<[sched.Folded, sched.ReadAfterFold]>; 4811 4812 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4813 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 4814 OpcodeStr, 4815 "${src2}"#_Brdct.BroadcastStr#", $src1", 4816 "$src1, ${src2}"#_Brdct.BroadcastStr, 4817 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4818 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, 4819 AVX512BIBase, EVEX, VVVV, EVEX_B, 4820 Sched<[sched.Folded, sched.ReadAfterFold]>; 4821} 4822 4823defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 4824 SchedWriteVecALU, 1>; 4825defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 4826 SchedWriteVecALU, 0>; 4827defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, 4828 SchedWriteVecALU, HasBWI, 1>; 4829defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, 4830 SchedWriteVecALU, HasBWI, 0>; 4831defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, 4832 SchedWriteVecALU, HasBWI, 1>; 4833defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, 4834 SchedWriteVecALU, HasBWI, 0>; 4835defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 4836 SchedWritePMULLD, HasAVX512, 1>, T8; 4837defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 4838 SchedWriteVecIMul, HasBWI, 1>; 4839defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 4840 SchedWriteVecIMul, HasDQI, 1>, T8; 4841defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 4842 HasBWI, 1>; 4843defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 4844 HasBWI, 1>; 4845defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 4846 SchedWriteVecIMul, HasBWI, 1>, T8; 4847defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu, 4848 SchedWriteVecALU, HasBWI, 1>; 4849defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 4850 SchedWriteVecIMul, HasAVX512, 1>, T8; 4851defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 4852 SchedWriteVecIMul, HasAVX512, 1>; 4853 4854multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 4855 X86SchedWriteWidths sched, 4856 AVX512VLVectorVTInfo _SrcVTInfo, 4857 AVX512VLVectorVTInfo _DstVTInfo, 4858 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 4859 let Predicates = [prd] in 4860 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 4861 _SrcVTInfo.info512, _DstVTInfo.info512, 4862 v8i64_info, IsCommutable>, 4863 EVEX_V512, EVEX_CD8<64, CD8VF>, REX_W; 4864 let Predicates = [HasVLX, prd] in { 4865 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 4866 _SrcVTInfo.info256, _DstVTInfo.info256, 4867 v4i64x_info, IsCommutable>, 4868 EVEX_V256, EVEX_CD8<64, CD8VF>, REX_W; 4869 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 4870 _SrcVTInfo.info128, _DstVTInfo.info128, 4871 v2i64x_info, IsCommutable>, 4872 EVEX_V128, EVEX_CD8<64, CD8VF>, REX_W; 4873 } 4874} 4875 4876defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 4877 avx512vl_i8_info, avx512vl_i8_info, 4878 X86multishift, HasVBMI, 0>, T8; 4879 4880multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4881 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 4882 X86FoldableSchedWrite sched> { 4883 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4884 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 4885 OpcodeStr, 4886 "${src2}"#_Src.BroadcastStr#", $src1", 4887 "$src1, ${src2}"#_Src.BroadcastStr, 4888 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4889 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, 4890 EVEX, VVVV, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 4891 Sched<[sched.Folded, sched.ReadAfterFold]>; 4892} 4893 4894multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 4895 SDNode OpNode,X86VectorVTInfo _Src, 4896 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 4897 bit IsCommutable = 0> { 4898 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4899 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4900 "$src2, $src1","$src1, $src2", 4901 (_Dst.VT (OpNode 4902 (_Src.VT _Src.RC:$src1), 4903 (_Src.VT _Src.RC:$src2))), 4904 IsCommutable, IsCommutable>, 4905 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX, VVVV, Sched<[sched]>; 4906 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4907 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4908 "$src2, $src1", "$src1, $src2", 4909 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4910 (_Src.LdFrag addr:$src2)))>, 4911 EVEX, VVVV, EVEX_CD8<_Src.EltSize, CD8VF>, 4912 Sched<[sched.Folded, sched.ReadAfterFold]>; 4913} 4914 4915multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 4916 SDNode OpNode> { 4917 let Predicates = [HasBWI] in 4918 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 4919 v32i16_info, SchedWriteShuffle.ZMM>, 4920 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 4921 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 4922 let Predicates = [HasBWI, HasVLX] in { 4923 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 4924 v16i16x_info, SchedWriteShuffle.YMM>, 4925 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 4926 v16i16x_info, SchedWriteShuffle.YMM>, 4927 EVEX_V256; 4928 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 4929 v8i16x_info, SchedWriteShuffle.XMM>, 4930 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 4931 v8i16x_info, SchedWriteShuffle.XMM>, 4932 EVEX_V128; 4933 } 4934} 4935multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 4936 SDNode OpNode> { 4937 let Predicates = [HasBWI] in 4938 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 4939 SchedWriteShuffle.ZMM>, EVEX_V512, WIG; 4940 let Predicates = [HasBWI, HasVLX] in { 4941 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 4942 v32i8x_info, SchedWriteShuffle.YMM>, 4943 EVEX_V256, WIG; 4944 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 4945 v16i8x_info, SchedWriteShuffle.XMM>, 4946 EVEX_V128, WIG; 4947 } 4948} 4949 4950multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 4951 SDNode OpNode, AVX512VLVectorVTInfo _Src, 4952 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 4953 let Predicates = [HasBWI] in 4954 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 4955 _Dst.info512, SchedWriteVecIMul.ZMM, 4956 IsCommutable>, EVEX_V512; 4957 let Predicates = [HasBWI, HasVLX] in { 4958 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 4959 _Dst.info256, SchedWriteVecIMul.YMM, 4960 IsCommutable>, EVEX_V256; 4961 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 4962 _Dst.info128, SchedWriteVecIMul.XMM, 4963 IsCommutable>, EVEX_V128; 4964 } 4965} 4966 4967defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 4968defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 4969defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 4970defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 4971 4972defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 4973 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8, WIG; 4974defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 4975 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG; 4976 4977defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 4978 SchedWriteVecALU, HasBWI, 1>, T8; 4979defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 4980 SchedWriteVecALU, HasBWI, 1>; 4981defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 4982 SchedWriteVecALU, HasAVX512, 1>, T8; 4983defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 4984 SchedWriteVecALU, HasAVX512, 1>, T8; 4985 4986defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 4987 SchedWriteVecALU, HasBWI, 1>; 4988defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 4989 SchedWriteVecALU, HasBWI, 1>, T8; 4990defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 4991 SchedWriteVecALU, HasAVX512, 1>, T8; 4992defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 4993 SchedWriteVecALU, HasAVX512, 1>, T8; 4994 4995defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 4996 SchedWriteVecALU, HasBWI, 1>, T8; 4997defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 4998 SchedWriteVecALU, HasBWI, 1>; 4999defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 5000 SchedWriteVecALU, HasAVX512, 1>, T8; 5001defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 5002 SchedWriteVecALU, HasAVX512, 1>, T8; 5003 5004defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 5005 SchedWriteVecALU, HasBWI, 1>; 5006defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 5007 SchedWriteVecALU, HasBWI, 1>, T8; 5008defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 5009 SchedWriteVecALU, HasAVX512, 1>, T8; 5010defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 5011 SchedWriteVecALU, HasAVX512, 1>, T8; 5012 5013// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX, HasEVEX512. 5014let Predicates = [HasDQI, NoVLX, HasEVEX512] in { 5015 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 5016 (EXTRACT_SUBREG 5017 (VPMULLQZrr 5018 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5019 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5020 sub_ymm)>; 5021 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5022 (EXTRACT_SUBREG 5023 (VPMULLQZrmb 5024 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5025 addr:$src2), 5026 sub_ymm)>; 5027 5028 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5029 (EXTRACT_SUBREG 5030 (VPMULLQZrr 5031 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5032 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5033 sub_xmm)>; 5034 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5035 (EXTRACT_SUBREG 5036 (VPMULLQZrmb 5037 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5038 addr:$src2), 5039 sub_xmm)>; 5040} 5041 5042multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { 5043 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 5044 (EXTRACT_SUBREG 5045 (!cast<Instruction>(Instr#"rr") 5046 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5047 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5048 sub_ymm)>; 5049 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5050 (EXTRACT_SUBREG 5051 (!cast<Instruction>(Instr#"rmb") 5052 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5053 addr:$src2), 5054 sub_ymm)>; 5055 5056 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 5057 (EXTRACT_SUBREG 5058 (!cast<Instruction>(Instr#"rr") 5059 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5060 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5061 sub_xmm)>; 5062 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5063 (EXTRACT_SUBREG 5064 (!cast<Instruction>(Instr#"rmb") 5065 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5066 addr:$src2), 5067 sub_xmm)>; 5068} 5069 5070let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 5071 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; 5072 defm : avx512_min_max_lowering<"VPMINUQZ", umin>; 5073 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; 5074 defm : avx512_min_max_lowering<"VPMINSQZ", smin>; 5075} 5076 5077//===----------------------------------------------------------------------===// 5078// AVX-512 Logical Instructions 5079//===----------------------------------------------------------------------===// 5080 5081defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, 5082 SchedWriteVecLogic, HasAVX512, 1>; 5083defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, 5084 SchedWriteVecLogic, HasAVX512, 1>; 5085defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 5086 SchedWriteVecLogic, HasAVX512, 1>; 5087defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5088 SchedWriteVecLogic, HasAVX512>; 5089 5090let Predicates = [HasVLX] in { 5091 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)), 5092 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5093 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)), 5094 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5095 5096 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)), 5097 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5098 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)), 5099 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5100 5101 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)), 5102 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5103 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)), 5104 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5105 5106 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)), 5107 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5108 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)), 5109 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5110 5111 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)), 5112 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5113 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)), 5114 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5115 5116 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)), 5117 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5118 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)), 5119 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5120 5121 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)), 5122 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5123 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)), 5124 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5125 5126 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)), 5127 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5128 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), 5129 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5130 5131 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), 5132 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5133 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), 5134 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5135 5136 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)), 5137 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5138 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)), 5139 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5140 5141 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)), 5142 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5143 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)), 5144 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5145 5146 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)), 5147 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5148 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)), 5149 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5150 5151 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)), 5152 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5153 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)), 5154 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5155 5156 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)), 5157 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5158 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)), 5159 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5160 5161 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)), 5162 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5163 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)), 5164 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5165 5166 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)), 5167 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5168 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), 5169 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5170} 5171 5172let Predicates = [HasAVX512] in { 5173 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)), 5174 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5175 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)), 5176 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5177 5178 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)), 5179 (VPORQZrr VR512:$src1, VR512:$src2)>; 5180 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)), 5181 (VPORQZrr VR512:$src1, VR512:$src2)>; 5182 5183 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)), 5184 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5185 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)), 5186 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5187 5188 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)), 5189 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5190 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)), 5191 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5192 5193 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)), 5194 (VPANDQZrm VR512:$src1, addr:$src2)>; 5195 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)), 5196 (VPANDQZrm VR512:$src1, addr:$src2)>; 5197 5198 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)), 5199 (VPORQZrm VR512:$src1, addr:$src2)>; 5200 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)), 5201 (VPORQZrm VR512:$src1, addr:$src2)>; 5202 5203 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)), 5204 (VPXORQZrm VR512:$src1, addr:$src2)>; 5205 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)), 5206 (VPXORQZrm VR512:$src1, addr:$src2)>; 5207 5208 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)), 5209 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5210 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), 5211 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5212} 5213 5214// Patterns to catch vselect with different type than logic op. 5215multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, 5216 X86VectorVTInfo _, 5217 X86VectorVTInfo IntInfo> { 5218 // Masked register-register logical operations. 5219 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5220 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5221 _.RC:$src0)), 5222 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5223 _.RC:$src1, _.RC:$src2)>; 5224 5225 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5226 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5227 _.ImmAllZerosV)), 5228 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5229 _.RC:$src2)>; 5230 5231 // Masked register-memory logical operations. 5232 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5233 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5234 (load addr:$src2)))), 5235 _.RC:$src0)), 5236 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5237 _.RC:$src1, addr:$src2)>; 5238 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5239 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5240 (load addr:$src2)))), 5241 _.ImmAllZerosV)), 5242 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5243 addr:$src2)>; 5244} 5245 5246multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, 5247 X86VectorVTInfo _, 5248 X86VectorVTInfo IntInfo> { 5249 // Register-broadcast logical operations. 5250 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5251 (bitconvert 5252 (IntInfo.VT (OpNode _.RC:$src1, 5253 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5254 _.RC:$src0)), 5255 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5256 _.RC:$src1, addr:$src2)>; 5257 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5258 (bitconvert 5259 (IntInfo.VT (OpNode _.RC:$src1, 5260 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5261 _.ImmAllZerosV)), 5262 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5263 _.RC:$src1, addr:$src2)>; 5264} 5265 5266multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode, 5267 AVX512VLVectorVTInfo SelectInfo, 5268 AVX512VLVectorVTInfo IntInfo> { 5269let Predicates = [HasVLX] in { 5270 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128, 5271 IntInfo.info128>; 5272 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256, 5273 IntInfo.info256>; 5274} 5275let Predicates = [HasAVX512] in { 5276 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512, 5277 IntInfo.info512>; 5278} 5279} 5280 5281multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode, 5282 AVX512VLVectorVTInfo SelectInfo, 5283 AVX512VLVectorVTInfo IntInfo> { 5284let Predicates = [HasVLX] in { 5285 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode, 5286 SelectInfo.info128, IntInfo.info128>; 5287 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode, 5288 SelectInfo.info256, IntInfo.info256>; 5289} 5290let Predicates = [HasAVX512] in { 5291 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode, 5292 SelectInfo.info512, IntInfo.info512>; 5293} 5294} 5295 5296multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> { 5297 // i64 vselect with i32/i16/i8 logic op 5298 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5299 avx512vl_i32_info>; 5300 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5301 avx512vl_i16_info>; 5302 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5303 avx512vl_i8_info>; 5304 5305 // i32 vselect with i64/i16/i8 logic op 5306 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5307 avx512vl_i64_info>; 5308 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5309 avx512vl_i16_info>; 5310 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5311 avx512vl_i8_info>; 5312 5313 // f32 vselect with i64/i32/i16/i8 logic op 5314 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5315 avx512vl_i64_info>; 5316 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5317 avx512vl_i32_info>; 5318 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5319 avx512vl_i16_info>; 5320 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5321 avx512vl_i8_info>; 5322 5323 // f64 vselect with i64/i32/i16/i8 logic op 5324 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5325 avx512vl_i64_info>; 5326 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5327 avx512vl_i32_info>; 5328 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5329 avx512vl_i16_info>; 5330 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5331 avx512vl_i8_info>; 5332 5333 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode, 5334 avx512vl_f32_info, 5335 avx512vl_i32_info>; 5336 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode, 5337 avx512vl_f64_info, 5338 avx512vl_i64_info>; 5339} 5340 5341defm : avx512_logical_lowering_types<"VPAND", and>; 5342defm : avx512_logical_lowering_types<"VPOR", or>; 5343defm : avx512_logical_lowering_types<"VPXOR", xor>; 5344defm : avx512_logical_lowering_types<"VPANDN", X86andnp>; 5345 5346//===----------------------------------------------------------------------===// 5347// AVX-512 FP arithmetic 5348//===----------------------------------------------------------------------===// 5349 5350multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5351 SDPatternOperator OpNode, SDNode VecNode, 5352 X86FoldableSchedWrite sched, bit IsCommutable> { 5353 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5354 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5355 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5356 "$src2, $src1", "$src1, $src2", 5357 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5358 Sched<[sched]>; 5359 5360 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5361 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5362 "$src2, $src1", "$src1, $src2", 5363 (_.VT (VecNode _.RC:$src1, 5364 (_.ScalarIntMemFrags addr:$src2)))>, 5365 Sched<[sched.Folded, sched.ReadAfterFold]>; 5366 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5367 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5368 (ins _.FRC:$src1, _.FRC:$src2), 5369 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5370 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5371 Sched<[sched]> { 5372 let isCommutable = IsCommutable; 5373 } 5374 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5375 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5376 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5377 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5378 (_.ScalarLdFrag addr:$src2)))]>, 5379 Sched<[sched.Folded, sched.ReadAfterFold]>; 5380 } 5381 } 5382} 5383 5384multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5385 SDNode VecNode, X86FoldableSchedWrite sched> { 5386 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5387 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5388 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5389 "$rc, $src2, $src1", "$src1, $src2, $rc", 5390 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5391 (i32 timm:$rc))>, 5392 EVEX_B, EVEX_RC, Sched<[sched]>; 5393} 5394multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5395 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5396 X86FoldableSchedWrite sched, bit IsCommutable> { 5397 let ExeDomain = _.ExeDomain in { 5398 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5399 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5400 "$src2, $src1", "$src1, $src2", 5401 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5402 Sched<[sched]>, SIMD_EXC; 5403 5404 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5405 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5406 "$src2, $src1", "$src1, $src2", 5407 (_.VT (VecNode _.RC:$src1, 5408 (_.ScalarIntMemFrags addr:$src2)))>, 5409 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 5410 5411 let isCodeGenOnly = 1, Predicates = [HasAVX512], 5412 Uses = [MXCSR], mayRaiseFPException = 1 in { 5413 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5414 (ins _.FRC:$src1, _.FRC:$src2), 5415 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5416 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5417 Sched<[sched]> { 5418 let isCommutable = IsCommutable; 5419 } 5420 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5421 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5422 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5423 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5424 (_.ScalarLdFrag addr:$src2)))]>, 5425 Sched<[sched.Folded, sched.ReadAfterFold]>; 5426 } 5427 5428 let Uses = [MXCSR] in 5429 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5430 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5431 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5432 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 5433 EVEX_B, Sched<[sched]>; 5434 } 5435} 5436 5437multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5438 SDNode VecNode, SDNode RndNode, 5439 X86SchedWriteSizes sched, bit IsCommutable> { 5440 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5441 sched.PS.Scl, IsCommutable>, 5442 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, 5443 sched.PS.Scl>, 5444 TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5445 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5446 sched.PD.Scl, IsCommutable>, 5447 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, 5448 sched.PD.Scl>, 5449 TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5450 let Predicates = [HasFP16] in 5451 defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode, 5452 VecNode, sched.PH.Scl, IsCommutable>, 5453 avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode, 5454 sched.PH.Scl>, 5455 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>; 5456} 5457 5458multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5459 SDNode VecNode, SDNode SaeNode, 5460 X86SchedWriteSizes sched, bit IsCommutable> { 5461 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5462 VecNode, SaeNode, sched.PS.Scl, IsCommutable>, 5463 TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5464 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5465 VecNode, SaeNode, sched.PD.Scl, IsCommutable>, 5466 TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5467 let Predicates = [HasFP16] in { 5468 defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode, 5469 VecNode, SaeNode, sched.PH.Scl, IsCommutable>, 5470 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>; 5471 } 5472} 5473defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, 5474 SchedWriteFAddSizes, 1>; 5475defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, 5476 SchedWriteFMulSizes, 1>; 5477defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, 5478 SchedWriteFAddSizes, 0>; 5479defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, 5480 SchedWriteFDivSizes, 0>; 5481defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, 5482 SchedWriteFCmpSizes, 0>; 5483defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, 5484 SchedWriteFCmpSizes, 0>; 5485 5486// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5487// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5488multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5489 X86VectorVTInfo _, SDNode OpNode, 5490 X86FoldableSchedWrite sched> { 5491 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5492 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5493 (ins _.FRC:$src1, _.FRC:$src2), 5494 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5495 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5496 Sched<[sched]> { 5497 let isCommutable = 1; 5498 } 5499 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5500 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5501 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5502 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5503 (_.ScalarLdFrag addr:$src2)))]>, 5504 Sched<[sched.Folded, sched.ReadAfterFold]>; 5505 } 5506} 5507defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5508 SchedWriteFCmp.Scl>, TB, XS, 5509 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5510 5511defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5512 SchedWriteFCmp.Scl>, TB, XD, 5513 REX_W, EVEX, VVVV, VEX_LIG, 5514 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5515 5516defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5517 SchedWriteFCmp.Scl>, TB, XS, 5518 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5519 5520defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5521 SchedWriteFCmp.Scl>, TB, XD, 5522 REX_W, EVEX, VVVV, VEX_LIG, 5523 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5524 5525defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc, 5526 SchedWriteFCmp.Scl>, T_MAP5, XS, 5527 EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC; 5528 5529defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc, 5530 SchedWriteFCmp.Scl>, T_MAP5, XS, 5531 EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC; 5532 5533multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5534 SDPatternOperator MaskOpNode, 5535 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5536 bit IsCommutable, 5537 bit IsKCommutable = IsCommutable, 5538 string suffix = _.Suffix, 5539 string ClobberConstraint = "", 5540 bit MayRaiseFPException = 1> { 5541 let ExeDomain = _.ExeDomain, hasSideEffects = 0, 5542 Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in { 5543 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 5544 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix, 5545 "$src2, $src1", "$src1, $src2", 5546 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 5547 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint, 5548 IsCommutable, IsKCommutable, IsKCommutable>, EVEX, VVVV, Sched<[sched]>; 5549 let mayLoad = 1 in { 5550 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5551 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix, 5552 "$src2, $src1", "$src1, $src2", 5553 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5554 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5555 ClobberConstraint>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 5556 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5557 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix, 5558 "${src2}"#_.BroadcastStr#", $src1", 5559 "$src1, ${src2}"#_.BroadcastStr, 5560 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5561 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5562 ClobberConstraint>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5563 } 5564 } 5565} 5566 5567multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5568 SDPatternOperator OpNodeRnd, 5569 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5570 string suffix = _.Suffix, 5571 string ClobberConstraint = ""> { 5572 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5573 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5574 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix, 5575 "$rc, $src2, $src1", "$src1, $src2, $rc", 5576 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))), 5577 0, 0, 0, vselect_mask, ClobberConstraint>, 5578 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 5579} 5580 5581multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5582 SDPatternOperator OpNodeSAE, 5583 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5584 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5585 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5586 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5587 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5588 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, 5589 EVEX, VVVV, EVEX_B, Sched<[sched]>; 5590} 5591 5592multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5593 SDPatternOperator MaskOpNode, 5594 Predicate prd, X86SchedWriteSizes sched, 5595 bit IsCommutable = 0, 5596 bit IsPD128Commutable = IsCommutable> { 5597 let Predicates = [prd] in { 5598 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 5599 sched.PS.ZMM, IsCommutable>, EVEX_V512, TB, 5600 EVEX_CD8<32, CD8VF>; 5601 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info, 5602 sched.PD.ZMM, IsCommutable>, EVEX_V512, TB, PD, REX_W, 5603 EVEX_CD8<64, CD8VF>; 5604 } 5605 5606 // Define only if AVX512VL feature is present. 5607 let Predicates = [prd, HasVLX] in { 5608 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 5609 sched.PS.XMM, IsCommutable>, EVEX_V128, TB, 5610 EVEX_CD8<32, CD8VF>; 5611 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 5612 sched.PS.YMM, IsCommutable>, EVEX_V256, TB, 5613 EVEX_CD8<32, CD8VF>; 5614 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info, 5615 sched.PD.XMM, IsPD128Commutable, 5616 IsCommutable>, EVEX_V128, TB, PD, REX_W, 5617 EVEX_CD8<64, CD8VF>; 5618 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info, 5619 sched.PD.YMM, IsCommutable>, EVEX_V256, TB, PD, REX_W, 5620 EVEX_CD8<64, CD8VF>; 5621 } 5622} 5623 5624multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5625 SDPatternOperator MaskOpNode, 5626 X86SchedWriteSizes sched, bit IsCommutable = 0> { 5627 let Predicates = [HasFP16] in { 5628 defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info, 5629 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5, 5630 EVEX_CD8<16, CD8VF>; 5631 } 5632 let Predicates = [HasVLX, HasFP16] in { 5633 defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info, 5634 sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5, 5635 EVEX_CD8<16, CD8VF>; 5636 defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info, 5637 sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5, 5638 EVEX_CD8<16, CD8VF>; 5639 } 5640} 5641 5642let Uses = [MXCSR] in 5643multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5644 X86SchedWriteSizes sched> { 5645 let Predicates = [HasFP16] in { 5646 defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5647 v32f16_info>, 5648 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 5649 } 5650 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5651 v16f32_info>, 5652 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 5653 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5654 v8f64_info>, 5655 EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>; 5656} 5657 5658let Uses = [MXCSR] in 5659multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5660 X86SchedWriteSizes sched> { 5661 let Predicates = [HasFP16] in { 5662 defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5663 v32f16_info>, 5664 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 5665 } 5666 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5667 v16f32_info>, 5668 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 5669 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5670 v8f64_info>, 5671 EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>; 5672} 5673 5674defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512, 5675 SchedWriteFAddSizes, 1>, 5676 avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>, 5677 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5678defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512, 5679 SchedWriteFMulSizes, 1>, 5680 avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>, 5681 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5682defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512, 5683 SchedWriteFAddSizes>, 5684 avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>, 5685 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5686defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512, 5687 SchedWriteFDivSizes>, 5688 avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>, 5689 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5690defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512, 5691 SchedWriteFCmpSizes, 0>, 5692 avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>, 5693 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 5694defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512, 5695 SchedWriteFCmpSizes, 0>, 5696 avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>, 5697 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 5698let isCodeGenOnly = 1 in { 5699 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512, 5700 SchedWriteFCmpSizes, 1>, 5701 avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc, 5702 SchedWriteFCmpSizes, 1>; 5703 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512, 5704 SchedWriteFCmpSizes, 1>, 5705 avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc, 5706 SchedWriteFCmpSizes, 1>; 5707} 5708let Uses = []<Register>, mayRaiseFPException = 0 in { 5709defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI, 5710 SchedWriteFLogicSizes, 1>; 5711defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI, 5712 SchedWriteFLogicSizes, 0>; 5713defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI, 5714 SchedWriteFLogicSizes, 1>; 5715defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI, 5716 SchedWriteFLogicSizes, 1>; 5717} 5718 5719multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5720 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5721 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5722 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5723 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5724 "$src2, $src1", "$src1, $src2", 5725 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5726 EVEX, VVVV, Sched<[sched]>; 5727 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5728 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, 5729 "$src2, $src1", "$src1, $src2", 5730 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5731 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 5732 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5733 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, 5734 "${src2}"#_.BroadcastStr#", $src1", 5735 "$src1, ${src2}"#_.BroadcastStr, 5736 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5737 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5738 } 5739} 5740 5741multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5742 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5743 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5744 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5745 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5746 "$src2, $src1", "$src1, $src2", 5747 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5748 Sched<[sched]>; 5749 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5750 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix, 5751 "$src2, $src1", "$src1, $src2", 5752 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>, 5753 Sched<[sched.Folded, sched.ReadAfterFold]>; 5754 } 5755} 5756 5757multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 5758 X86SchedWriteWidths sched> { 5759 let Predicates = [HasFP16] in { 5760 defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>, 5761 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>, 5762 EVEX_V512, T_MAP6, PD, EVEX_CD8<16, CD8VF>; 5763 defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>, 5764 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>, 5765 EVEX, VVVV, T_MAP6, PD, EVEX_CD8<16, CD8VT1>; 5766 } 5767 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, 5768 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, 5769 EVEX_V512, EVEX_CD8<32, CD8VF>, T8, PD; 5770 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, 5771 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, 5772 EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8, PD; 5773 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, 5774 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info, 5775 X86scalefsRnd, sched.Scl>, 5776 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8, PD; 5777 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, 5778 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info, 5779 X86scalefsRnd, sched.Scl>, 5780 EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8, PD; 5781 5782 // Define only if AVX512VL feature is present. 5783 let Predicates = [HasVLX] in { 5784 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, 5785 EVEX_V128, EVEX_CD8<32, CD8VF>, T8, PD; 5786 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, 5787 EVEX_V256, EVEX_CD8<32, CD8VF>, T8, PD; 5788 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, 5789 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8, PD; 5790 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, 5791 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8, PD; 5792 } 5793 5794 let Predicates = [HasFP16, HasVLX] in { 5795 defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>, 5796 EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PD; 5797 defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>, 5798 EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD; 5799 } 5800} 5801defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", SchedWriteFAdd>; 5802 5803//===----------------------------------------------------------------------===// 5804// AVX-512 VPTESTM instructions 5805//===----------------------------------------------------------------------===// 5806 5807multiclass avx512_vptest<bits<8> opc, string OpcodeStr, 5808 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5809 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. 5810 // There are just too many permutations due to commutability and bitcasts. 5811 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 5812 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 5813 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5814 "$src2, $src1", "$src1, $src2", 5815 (null_frag), (null_frag), 1>, 5816 EVEX, VVVV, Sched<[sched]>; 5817 let mayLoad = 1 in 5818 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5819 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5820 "$src2, $src1", "$src1, $src2", 5821 (null_frag), (null_frag)>, 5822 EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 5823 Sched<[sched.Folded, sched.ReadAfterFold]>; 5824 } 5825} 5826 5827multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, 5828 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5829 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 5830 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5831 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5832 "${src2}"#_.BroadcastStr#", $src1", 5833 "$src1, ${src2}"#_.BroadcastStr, 5834 (null_frag), (null_frag)>, 5835 EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 5836 Sched<[sched.Folded, sched.ReadAfterFold]>; 5837} 5838 5839multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, 5840 X86SchedWriteWidths sched, 5841 AVX512VLVectorVTInfo _> { 5842 let Predicates = [HasAVX512] in 5843 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>, 5844 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512; 5845 5846 let Predicates = [HasAVX512, HasVLX] in { 5847 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>, 5848 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256; 5849 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>, 5850 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128; 5851 } 5852} 5853 5854multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, 5855 X86SchedWriteWidths sched> { 5856 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched, 5857 avx512vl_i32_info>; 5858 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched, 5859 avx512vl_i64_info>, REX_W; 5860} 5861 5862multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 5863 X86SchedWriteWidths sched> { 5864 let Predicates = [HasBWI] in { 5865 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM, 5866 v32i16_info>, EVEX_V512, REX_W; 5867 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM, 5868 v64i8_info>, EVEX_V512; 5869 } 5870 5871 let Predicates = [HasVLX, HasBWI] in { 5872 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM, 5873 v16i16x_info>, EVEX_V256, REX_W; 5874 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM, 5875 v8i16x_info>, EVEX_V128, REX_W; 5876 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM, 5877 v32i8x_info>, EVEX_V256; 5878 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM, 5879 v16i8x_info>, EVEX_V128; 5880 } 5881} 5882 5883multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 5884 X86SchedWriteWidths sched> : 5885 avx512_vptest_wb<opc_wb, OpcodeStr, sched>, 5886 avx512_vptest_dq<opc_dq, OpcodeStr, sched>; 5887 5888defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", 5889 SchedWriteVecLogic>, T8, PD; 5890defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", 5891 SchedWriteVecLogic>, T8, XS; 5892 5893//===----------------------------------------------------------------------===// 5894// AVX-512 Shift instructions 5895//===----------------------------------------------------------------------===// 5896 5897multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 5898 string OpcodeStr, SDNode OpNode, 5899 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5900 let ExeDomain = _.ExeDomain in { 5901 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 5902 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 5903 "$src2, $src1", "$src1, $src2", 5904 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, 5905 Sched<[sched]>; 5906 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5907 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 5908 "$src2, $src1", "$src1, $src2", 5909 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), 5910 (i8 timm:$src2)))>, 5911 Sched<[sched.Folded]>; 5912 } 5913} 5914 5915multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 5916 string OpcodeStr, SDNode OpNode, 5917 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5918 let ExeDomain = _.ExeDomain in 5919 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5920 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 5921 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2", 5922 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, 5923 EVEX_B, Sched<[sched.Folded]>; 5924} 5925 5926multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5927 X86FoldableSchedWrite sched, ValueType SrcVT, 5928 X86VectorVTInfo _> { 5929 // src2 is always 128-bit 5930 let ExeDomain = _.ExeDomain in { 5931 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5932 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 5933 "$src2, $src1", "$src1, $src2", 5934 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 5935 AVX512BIBase, EVEX, VVVV, Sched<[sched]>; 5936 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5937 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 5938 "$src2, $src1", "$src1, $src2", 5939 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, 5940 AVX512BIBase, 5941 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 5942 } 5943} 5944 5945multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5946 X86SchedWriteWidths sched, ValueType SrcVT, 5947 AVX512VLVectorVTInfo VTInfo, 5948 Predicate prd> { 5949 let Predicates = [prd] in 5950 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 5951 VTInfo.info512>, EVEX_V512, 5952 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 5953 let Predicates = [prd, HasVLX] in { 5954 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 5955 VTInfo.info256>, EVEX_V256, 5956 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 5957 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 5958 VTInfo.info128>, EVEX_V128, 5959 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 5960 } 5961} 5962 5963multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 5964 string OpcodeStr, SDNode OpNode, 5965 X86SchedWriteWidths sched> { 5966 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 5967 avx512vl_i32_info, HasAVX512>; 5968 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 5969 avx512vl_i64_info, HasAVX512>, REX_W; 5970 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 5971 avx512vl_i16_info, HasBWI>; 5972} 5973 5974multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 5975 string OpcodeStr, SDNode OpNode, 5976 X86SchedWriteWidths sched, 5977 AVX512VLVectorVTInfo VTInfo> { 5978 let Predicates = [HasAVX512] in 5979 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5980 sched.ZMM, VTInfo.info512>, 5981 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 5982 VTInfo.info512>, EVEX_V512; 5983 let Predicates = [HasAVX512, HasVLX] in { 5984 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5985 sched.YMM, VTInfo.info256>, 5986 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 5987 VTInfo.info256>, EVEX_V256; 5988 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5989 sched.XMM, VTInfo.info128>, 5990 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 5991 VTInfo.info128>, EVEX_V128; 5992 } 5993} 5994 5995multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 5996 string OpcodeStr, SDNode OpNode, 5997 X86SchedWriteWidths sched> { 5998 let Predicates = [HasBWI] in 5999 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6000 sched.ZMM, v32i16_info>, EVEX_V512, WIG; 6001 let Predicates = [HasVLX, HasBWI] in { 6002 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6003 sched.YMM, v16i16x_info>, EVEX_V256, WIG; 6004 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6005 sched.XMM, v8i16x_info>, EVEX_V128, WIG; 6006 } 6007} 6008 6009multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 6010 Format ImmFormR, Format ImmFormM, 6011 string OpcodeStr, SDNode OpNode, 6012 X86SchedWriteWidths sched> { 6013 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 6014 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 6015 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 6016 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W; 6017} 6018 6019defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 6020 SchedWriteVecShiftImm>, 6021 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 6022 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6023 6024defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 6025 SchedWriteVecShiftImm>, 6026 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 6027 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6028 6029defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 6030 SchedWriteVecShiftImm>, 6031 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 6032 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6033 6034defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 6035 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6036defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 6037 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6038 6039defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 6040 SchedWriteVecShift>; 6041defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 6042 SchedWriteVecShift>; 6043defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 6044 SchedWriteVecShift>; 6045 6046// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 6047let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 6048 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 6049 (EXTRACT_SUBREG (v8i64 6050 (VPSRAQZrr 6051 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6052 VR128X:$src2)), sub_ymm)>; 6053 6054 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6055 (EXTRACT_SUBREG (v8i64 6056 (VPSRAQZrr 6057 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6058 VR128X:$src2)), sub_xmm)>; 6059 6060 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), 6061 (EXTRACT_SUBREG (v8i64 6062 (VPSRAQZri 6063 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6064 timm:$src2)), sub_ymm)>; 6065 6066 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), 6067 (EXTRACT_SUBREG (v8i64 6068 (VPSRAQZri 6069 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6070 timm:$src2)), sub_xmm)>; 6071} 6072 6073//===-------------------------------------------------------------------===// 6074// Variable Bit Shifts 6075//===-------------------------------------------------------------------===// 6076 6077multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 6078 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6079 let ExeDomain = _.ExeDomain in { 6080 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 6081 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 6082 "$src2, $src1", "$src1, $src2", 6083 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 6084 AVX5128IBase, EVEX, VVVV, Sched<[sched]>; 6085 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6086 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 6087 "$src2, $src1", "$src1, $src2", 6088 (_.VT (OpNode _.RC:$src1, 6089 (_.VT (_.LdFrag addr:$src2))))>, 6090 AVX5128IBase, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 6091 Sched<[sched.Folded, sched.ReadAfterFold]>; 6092 } 6093} 6094 6095multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 6096 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6097 let ExeDomain = _.ExeDomain in 6098 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6099 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6100 "${src2}"#_.BroadcastStr#", $src1", 6101 "$src1, ${src2}"#_.BroadcastStr, 6102 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, 6103 AVX5128IBase, EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 6104 Sched<[sched.Folded, sched.ReadAfterFold]>; 6105} 6106 6107multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6108 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 6109 let Predicates = [HasAVX512] in 6110 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 6111 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 6112 6113 let Predicates = [HasAVX512, HasVLX] in { 6114 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 6115 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 6116 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 6117 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 6118 } 6119} 6120 6121multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 6122 SDNode OpNode, X86SchedWriteWidths sched> { 6123 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 6124 avx512vl_i32_info>; 6125 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 6126 avx512vl_i64_info>, REX_W; 6127} 6128 6129// Use 512bit version to implement 128/256 bit in case NoVLX. 6130multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 6131 SDNode OpNode, list<Predicate> p> { 6132 let Predicates = p in { 6133 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 6134 (_.info256.VT _.info256.RC:$src2))), 6135 (EXTRACT_SUBREG 6136 (!cast<Instruction>(OpcodeStr#"Zrr") 6137 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 6138 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 6139 sub_ymm)>; 6140 6141 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 6142 (_.info128.VT _.info128.RC:$src2))), 6143 (EXTRACT_SUBREG 6144 (!cast<Instruction>(OpcodeStr#"Zrr") 6145 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6146 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6147 sub_xmm)>; 6148 } 6149} 6150multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6151 SDNode OpNode, X86SchedWriteWidths sched> { 6152 let Predicates = [HasBWI] in 6153 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6154 EVEX_V512, REX_W; 6155 let Predicates = [HasVLX, HasBWI] in { 6156 6157 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6158 EVEX_V256, REX_W; 6159 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6160 EVEX_V128, REX_W; 6161 } 6162} 6163 6164defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, 6165 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; 6166 6167defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, 6168 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; 6169 6170defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, 6171 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; 6172 6173defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6174defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6175 6176defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX, HasEVEX512]>; 6177defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX, HasEVEX512]>; 6178defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX, HasEVEX512]>; 6179defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX, HasEVEX512]>; 6180 6181 6182// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6183let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 6184 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6185 (EXTRACT_SUBREG (v8i64 6186 (VPROLVQZrr 6187 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6188 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6189 sub_xmm)>; 6190 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6191 (EXTRACT_SUBREG (v8i64 6192 (VPROLVQZrr 6193 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6194 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6195 sub_ymm)>; 6196 6197 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6198 (EXTRACT_SUBREG (v16i32 6199 (VPROLVDZrr 6200 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6201 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6202 sub_xmm)>; 6203 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6204 (EXTRACT_SUBREG (v16i32 6205 (VPROLVDZrr 6206 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6207 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6208 sub_ymm)>; 6209 6210 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), 6211 (EXTRACT_SUBREG (v8i64 6212 (VPROLQZri 6213 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6214 timm:$src2)), sub_xmm)>; 6215 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), 6216 (EXTRACT_SUBREG (v8i64 6217 (VPROLQZri 6218 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6219 timm:$src2)), sub_ymm)>; 6220 6221 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), 6222 (EXTRACT_SUBREG (v16i32 6223 (VPROLDZri 6224 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6225 timm:$src2)), sub_xmm)>; 6226 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), 6227 (EXTRACT_SUBREG (v16i32 6228 (VPROLDZri 6229 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6230 timm:$src2)), sub_ymm)>; 6231} 6232 6233// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6234let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 6235 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6236 (EXTRACT_SUBREG (v8i64 6237 (VPRORVQZrr 6238 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6239 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6240 sub_xmm)>; 6241 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6242 (EXTRACT_SUBREG (v8i64 6243 (VPRORVQZrr 6244 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6245 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6246 sub_ymm)>; 6247 6248 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6249 (EXTRACT_SUBREG (v16i32 6250 (VPRORVDZrr 6251 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6252 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6253 sub_xmm)>; 6254 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6255 (EXTRACT_SUBREG (v16i32 6256 (VPRORVDZrr 6257 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6258 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6259 sub_ymm)>; 6260 6261 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), 6262 (EXTRACT_SUBREG (v8i64 6263 (VPRORQZri 6264 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6265 timm:$src2)), sub_xmm)>; 6266 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), 6267 (EXTRACT_SUBREG (v8i64 6268 (VPRORQZri 6269 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6270 timm:$src2)), sub_ymm)>; 6271 6272 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), 6273 (EXTRACT_SUBREG (v16i32 6274 (VPRORDZri 6275 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6276 timm:$src2)), sub_xmm)>; 6277 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), 6278 (EXTRACT_SUBREG (v16i32 6279 (VPRORDZri 6280 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6281 timm:$src2)), sub_ymm)>; 6282} 6283 6284//===-------------------------------------------------------------------===// 6285// 1-src variable permutation VPERMW/D/Q 6286//===-------------------------------------------------------------------===// 6287 6288multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6289 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6290 let Predicates = [HasAVX512] in 6291 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6292 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6293 6294 let Predicates = [HasAVX512, HasVLX] in 6295 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6296 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6297} 6298 6299multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6300 string OpcodeStr, SDNode OpNode, 6301 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6302 let Predicates = [HasAVX512] in 6303 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6304 sched, VTInfo.info512>, 6305 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6306 sched, VTInfo.info512>, EVEX_V512; 6307 let Predicates = [HasAVX512, HasVLX] in 6308 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6309 sched, VTInfo.info256>, 6310 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6311 sched, VTInfo.info256>, EVEX_V256; 6312} 6313 6314multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6315 Predicate prd, SDNode OpNode, 6316 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6317 let Predicates = [prd] in 6318 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6319 EVEX_V512 ; 6320 let Predicates = [HasVLX, prd] in { 6321 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6322 EVEX_V256 ; 6323 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6324 EVEX_V128 ; 6325 } 6326} 6327 6328defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6329 WriteVarShuffle256, avx512vl_i16_info>, REX_W; 6330defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6331 WriteVarShuffle256, avx512vl_i8_info>; 6332 6333defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6334 WriteVarShuffle256, avx512vl_i32_info>; 6335defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6336 WriteVarShuffle256, avx512vl_i64_info>, REX_W; 6337defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6338 WriteFVarShuffle256, avx512vl_f32_info>; 6339defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6340 WriteFVarShuffle256, avx512vl_f64_info>, REX_W; 6341 6342defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6343 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6344 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W; 6345defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6346 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6347 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W; 6348 6349//===----------------------------------------------------------------------===// 6350// AVX-512 - VPERMIL 6351//===----------------------------------------------------------------------===// 6352 6353multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6354 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6355 X86VectorVTInfo Ctrl> { 6356 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6357 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6358 "$src2, $src1", "$src1, $src2", 6359 (_.VT (OpNode _.RC:$src1, 6360 (Ctrl.VT Ctrl.RC:$src2)))>, 6361 T8, PD, EVEX, VVVV, Sched<[sched]>; 6362 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6363 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6364 "$src2, $src1", "$src1, $src2", 6365 (_.VT (OpNode 6366 _.RC:$src1, 6367 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, 6368 T8, PD, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 6369 Sched<[sched.Folded, sched.ReadAfterFold]>; 6370 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6371 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6372 "${src2}"#_.BroadcastStr#", $src1", 6373 "$src1, ${src2}"#_.BroadcastStr, 6374 (_.VT (OpNode 6375 _.RC:$src1, 6376 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, 6377 T8, PD, EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6378 Sched<[sched.Folded, sched.ReadAfterFold]>; 6379} 6380 6381multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6382 X86SchedWriteWidths sched, 6383 AVX512VLVectorVTInfo _, 6384 AVX512VLVectorVTInfo Ctrl> { 6385 let Predicates = [HasAVX512] in { 6386 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6387 _.info512, Ctrl.info512>, EVEX_V512; 6388 } 6389 let Predicates = [HasAVX512, HasVLX] in { 6390 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6391 _.info128, Ctrl.info128>, EVEX_V128; 6392 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6393 _.info256, Ctrl.info256>, EVEX_V256; 6394 } 6395} 6396 6397multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6398 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6399 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6400 _, Ctrl>; 6401 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6402 X86VPermilpi, SchedWriteFShuffle, _>, 6403 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6404} 6405 6406let ExeDomain = SSEPackedSingle in 6407defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6408 avx512vl_i32_info>; 6409let ExeDomain = SSEPackedDouble in 6410defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6411 avx512vl_i64_info>, REX_W; 6412 6413//===----------------------------------------------------------------------===// 6414// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6415//===----------------------------------------------------------------------===// 6416 6417defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6418 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6419 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6420defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6421 X86PShufhw, SchedWriteShuffle>, 6422 EVEX, AVX512XSIi8Base; 6423defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6424 X86PShuflw, SchedWriteShuffle>, 6425 EVEX, AVX512XDIi8Base; 6426 6427//===----------------------------------------------------------------------===// 6428// AVX-512 - VPSHUFB 6429//===----------------------------------------------------------------------===// 6430 6431multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6432 X86SchedWriteWidths sched> { 6433 let Predicates = [HasBWI] in 6434 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6435 EVEX_V512; 6436 6437 let Predicates = [HasVLX, HasBWI] in { 6438 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6439 EVEX_V256; 6440 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6441 EVEX_V128; 6442 } 6443} 6444 6445defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6446 SchedWriteVarShuffle>, WIG; 6447 6448//===----------------------------------------------------------------------===// 6449// Move Low to High and High to Low packed FP Instructions 6450//===----------------------------------------------------------------------===// 6451 6452def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6453 (ins VR128X:$src1, VR128X:$src2), 6454 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6455 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6456 Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV; 6457let isCommutable = 1 in 6458def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6459 (ins VR128X:$src1, VR128X:$src2), 6460 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6461 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6462 Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV; 6463 6464//===----------------------------------------------------------------------===// 6465// VMOVHPS/PD VMOVLPS Instructions 6466// All patterns was taken from SSS implementation. 6467//===----------------------------------------------------------------------===// 6468 6469multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6470 SDPatternOperator OpNode, 6471 X86VectorVTInfo _> { 6472 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6473 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6474 (ins _.RC:$src1, f64mem:$src2), 6475 !strconcat(OpcodeStr, 6476 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6477 [(set _.RC:$dst, 6478 (OpNode _.RC:$src1, 6479 (_.VT (bitconvert 6480 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6481 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX, VVVV; 6482} 6483 6484// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6485// SSE1. And MOVLPS pattern is even more complex. 6486defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6487 v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB; 6488defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6489 v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W; 6490defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6491 v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB; 6492defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6493 v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W; 6494 6495let Predicates = [HasAVX512] in { 6496 // VMOVHPD patterns 6497 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), 6498 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6499 6500 // VMOVLPD patterns 6501 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), 6502 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; 6503} 6504 6505let SchedRW = [WriteFStore] in { 6506let mayStore = 1, hasSideEffects = 0 in 6507def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6508 (ins f64mem:$dst, VR128X:$src), 6509 "vmovhps\t{$src, $dst|$dst, $src}", 6510 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6511def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6512 (ins f64mem:$dst, VR128X:$src), 6513 "vmovhpd\t{$src, $dst|$dst, $src}", 6514 [(store (f64 (extractelt 6515 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6516 (iPTR 0))), addr:$dst)]>, 6517 EVEX, EVEX_CD8<64, CD8VT1>, REX_W; 6518let mayStore = 1, hasSideEffects = 0 in 6519def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6520 (ins f64mem:$dst, VR128X:$src), 6521 "vmovlps\t{$src, $dst|$dst, $src}", 6522 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6523def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6524 (ins f64mem:$dst, VR128X:$src), 6525 "vmovlpd\t{$src, $dst|$dst, $src}", 6526 [(store (f64 (extractelt (v2f64 VR128X:$src), 6527 (iPTR 0))), addr:$dst)]>, 6528 EVEX, EVEX_CD8<64, CD8VT1>, REX_W; 6529} // SchedRW 6530 6531let Predicates = [HasAVX512] in { 6532 // VMOVHPD patterns 6533 def : Pat<(store (f64 (extractelt 6534 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6535 (iPTR 0))), addr:$dst), 6536 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6537} 6538//===----------------------------------------------------------------------===// 6539// FMA - Fused Multiply Operations 6540// 6541 6542multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6543 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6544 X86VectorVTInfo _> { 6545 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6546 Uses = [MXCSR], mayRaiseFPException = 1 in { 6547 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6548 (ins _.RC:$src2, _.RC:$src3), 6549 OpcodeStr, "$src3, $src2", "$src2, $src3", 6550 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 6551 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6552 EVEX, VVVV, Sched<[sched]>; 6553 6554 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6555 (ins _.RC:$src2, _.MemOp:$src3), 6556 OpcodeStr, "$src3, $src2", "$src2, $src3", 6557 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 6558 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6559 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 6560 sched.ReadAfterFold]>; 6561 6562 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6563 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6564 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6565 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6566 (OpNode _.RC:$src2, 6567 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 6568 (MaskOpNode _.RC:$src2, 6569 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, 6570 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, 6571 sched.ReadAfterFold]>; 6572 } 6573} 6574 6575multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6576 X86FoldableSchedWrite sched, 6577 X86VectorVTInfo _> { 6578 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6579 Uses = [MXCSR] in 6580 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6581 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6582 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6583 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 6584 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, 6585 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 6586} 6587 6588multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6589 SDNode MaskOpNode, SDNode OpNodeRnd, 6590 X86SchedWriteWidths sched, 6591 AVX512VLVectorVTInfo _, 6592 Predicate prd = HasAVX512> { 6593 let Predicates = [prd] in { 6594 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6595 sched.ZMM, _.info512>, 6596 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6597 _.info512>, 6598 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6599 } 6600 let Predicates = [HasVLX, prd] in { 6601 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6602 sched.YMM, _.info256>, 6603 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6604 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6605 sched.XMM, _.info128>, 6606 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6607 } 6608} 6609 6610multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6611 SDNode MaskOpNode, SDNode OpNodeRnd> { 6612 defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6613 OpNodeRnd, SchedWriteFMA, 6614 avx512vl_f16_info, HasFP16>, T_MAP6, PD; 6615 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6616 OpNodeRnd, SchedWriteFMA, 6617 avx512vl_f32_info>, T8, PD; 6618 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6619 OpNodeRnd, SchedWriteFMA, 6620 avx512vl_f64_info>, T8, PD, REX_W; 6621} 6622 6623defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma, 6624 fma, X86FmaddRnd>; 6625defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub, 6626 X86Fmsub, X86FmsubRnd>; 6627defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, 6628 X86Fmaddsub, X86FmaddsubRnd>; 6629defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, 6630 X86Fmsubadd, X86FmsubaddRnd>; 6631defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd, 6632 X86Fnmadd, X86FnmaddRnd>; 6633defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub, 6634 X86Fnmsub, X86FnmsubRnd>; 6635 6636 6637multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6638 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6639 X86VectorVTInfo _> { 6640 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6641 Uses = [MXCSR], mayRaiseFPException = 1 in { 6642 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6643 (ins _.RC:$src2, _.RC:$src3), 6644 OpcodeStr, "$src3, $src2", "$src2, $src3", 6645 (null_frag), 6646 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6647 EVEX, VVVV, Sched<[sched]>; 6648 6649 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6650 (ins _.RC:$src2, _.MemOp:$src3), 6651 OpcodeStr, "$src3, $src2", "$src2, $src3", 6652 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 6653 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6654 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 6655 sched.ReadAfterFold]>; 6656 6657 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6658 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6659 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6660 "$src2, ${src3}"#_.BroadcastStr, 6661 (_.VT (OpNode _.RC:$src2, 6662 (_.VT (_.BroadcastLdFrag addr:$src3)), 6663 _.RC:$src1)), 6664 (_.VT (MaskOpNode _.RC:$src2, 6665 (_.VT (_.BroadcastLdFrag addr:$src3)), 6666 _.RC:$src1)), 1, 0>, EVEX, VVVV, EVEX_B, 6667 Sched<[sched.Folded, sched.ReadAfterFold, 6668 sched.ReadAfterFold]>; 6669 } 6670} 6671 6672multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6673 X86FoldableSchedWrite sched, 6674 X86VectorVTInfo _> { 6675 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6676 Uses = [MXCSR] in 6677 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6678 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6679 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6680 (null_frag), 6681 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), 6682 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 6683} 6684 6685multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6686 SDNode MaskOpNode, SDNode OpNodeRnd, 6687 X86SchedWriteWidths sched, 6688 AVX512VLVectorVTInfo _, 6689 Predicate prd = HasAVX512> { 6690 let Predicates = [prd] in { 6691 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6692 sched.ZMM, _.info512>, 6693 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6694 _.info512>, 6695 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6696 } 6697 let Predicates = [HasVLX, prd] in { 6698 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6699 sched.YMM, _.info256>, 6700 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6701 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6702 sched.XMM, _.info128>, 6703 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6704 } 6705} 6706 6707multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6708 SDNode MaskOpNode, SDNode OpNodeRnd > { 6709 defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6710 OpNodeRnd, SchedWriteFMA, 6711 avx512vl_f16_info, HasFP16>, T_MAP6, PD; 6712 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6713 OpNodeRnd, SchedWriteFMA, 6714 avx512vl_f32_info>, T8, PD; 6715 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6716 OpNodeRnd, SchedWriteFMA, 6717 avx512vl_f64_info>, T8, PD, REX_W; 6718} 6719 6720defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma, 6721 fma, X86FmaddRnd>; 6722defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub, 6723 X86Fmsub, X86FmsubRnd>; 6724defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, 6725 X86Fmaddsub, X86FmaddsubRnd>; 6726defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, 6727 X86Fmsubadd, X86FmsubaddRnd>; 6728defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd, 6729 X86Fnmadd, X86FnmaddRnd>; 6730defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub, 6731 X86Fnmsub, X86FnmsubRnd>; 6732 6733multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6734 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6735 X86VectorVTInfo _> { 6736 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6737 Uses = [MXCSR], mayRaiseFPException = 1 in { 6738 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6739 (ins _.RC:$src2, _.RC:$src3), 6740 OpcodeStr, "$src3, $src2", "$src2, $src3", 6741 (null_frag), 6742 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>, 6743 EVEX, VVVV, Sched<[sched]>; 6744 6745 // Pattern is 312 order so that the load is in a different place from the 6746 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6747 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6748 (ins _.RC:$src2, _.MemOp:$src3), 6749 OpcodeStr, "$src3, $src2", "$src2, $src3", 6750 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 6751 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 6752 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 6753 sched.ReadAfterFold]>; 6754 6755 // Pattern is 312 order so that the load is in a different place from the 6756 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6757 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6758 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6759 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6760 "$src2, ${src3}"#_.BroadcastStr, 6761 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6762 _.RC:$src1, _.RC:$src2)), 6763 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6764 _.RC:$src1, _.RC:$src2)), 1, 0>, 6765 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, 6766 sched.ReadAfterFold]>; 6767 } 6768} 6769 6770multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6771 X86FoldableSchedWrite sched, 6772 X86VectorVTInfo _> { 6773 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6774 Uses = [MXCSR] in 6775 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6776 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6777 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6778 (null_frag), 6779 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), 6780 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 6781} 6782 6783multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6784 SDNode MaskOpNode, SDNode OpNodeRnd, 6785 X86SchedWriteWidths sched, 6786 AVX512VLVectorVTInfo _, 6787 Predicate prd = HasAVX512> { 6788 let Predicates = [prd] in { 6789 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6790 sched.ZMM, _.info512>, 6791 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6792 _.info512>, 6793 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6794 } 6795 let Predicates = [HasVLX, prd] in { 6796 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6797 sched.YMM, _.info256>, 6798 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6799 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6800 sched.XMM, _.info128>, 6801 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6802 } 6803} 6804 6805multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6806 SDNode MaskOpNode, SDNode OpNodeRnd > { 6807 defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6808 OpNodeRnd, SchedWriteFMA, 6809 avx512vl_f16_info, HasFP16>, T_MAP6, PD; 6810 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6811 OpNodeRnd, SchedWriteFMA, 6812 avx512vl_f32_info>, T8, PD; 6813 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6814 OpNodeRnd, SchedWriteFMA, 6815 avx512vl_f64_info>, T8, PD, REX_W; 6816} 6817 6818defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma, 6819 fma, X86FmaddRnd>; 6820defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub, 6821 X86Fmsub, X86FmsubRnd>; 6822defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, 6823 X86Fmaddsub, X86FmaddsubRnd>; 6824defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, 6825 X86Fmsubadd, X86FmsubaddRnd>; 6826defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd, 6827 X86Fnmadd, X86FnmaddRnd>; 6828defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub, 6829 X86Fnmsub, X86FnmsubRnd>; 6830 6831// Scalar FMA 6832multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 6833 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 6834let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 6835 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6836 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 6837 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6838 EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 6839 6840 let mayLoad = 1 in 6841 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 6842 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 6843 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6844 EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold, 6845 SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 6846 6847 let Uses = [MXCSR] in 6848 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6849 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6850 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 6851 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 6852 6853 let isCodeGenOnly = 1, isCommutable = 1 in { 6854 def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 6855 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 6856 !strconcat(OpcodeStr, 6857 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6858 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV, SIMD_EXC; 6859 def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst), 6860 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 6861 !strconcat(OpcodeStr, 6862 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6863 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold, 6864 SchedWriteFMA.Scl.ReadAfterFold]>, EVEX, VVVV, SIMD_EXC; 6865 6866 let Uses = [MXCSR] in 6867 def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 6868 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 6869 !strconcat(OpcodeStr, 6870 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), 6871 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 6872 Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV; 6873 }// isCodeGenOnly = 1 6874}// Constraints = "$src1 = $dst" 6875} 6876 6877multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6878 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd, 6879 X86VectorVTInfo _, string SUFF> { 6880 let ExeDomain = _.ExeDomain in { 6881 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 6882 // Operands for intrinsic are in 123 order to preserve passthu 6883 // semantics. 6884 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6885 _.FRC:$src3))), 6886 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6887 (_.ScalarLdFrag addr:$src3)))), 6888 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 6889 _.FRC:$src3, (i32 timm:$rc)))), 0>; 6890 6891 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 6892 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 6893 _.FRC:$src1))), 6894 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 6895 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 6896 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 6897 _.FRC:$src1, (i32 timm:$rc)))), 1>; 6898 6899 // One pattern is 312 order so that the load is in a different place from the 6900 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6901 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 6902 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 6903 _.FRC:$src2))), 6904 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 6905 _.FRC:$src1, _.FRC:$src2))), 6906 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 6907 _.FRC:$src2, (i32 timm:$rc)))), 1>; 6908 } 6909} 6910 6911multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6912 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> { 6913 let Predicates = [HasAVX512] in { 6914 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6915 OpNodeRnd, f32x_info, "SS">, 6916 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD; 6917 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6918 OpNodeRnd, f64x_info, "SD">, 6919 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD; 6920 } 6921 let Predicates = [HasFP16] in { 6922 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6923 OpNodeRnd, f16x_info, "SH">, 6924 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6, PD; 6925 } 6926} 6927 6928defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>; 6929defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>; 6930defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>; 6931defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>; 6932 6933multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp, 6934 SDNode RndOp, string Prefix, 6935 string Suffix, SDNode Move, 6936 X86VectorVTInfo _, PatLeaf ZeroFP, 6937 Predicate prd = HasAVX512> { 6938 let Predicates = [prd] in { 6939 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6940 (Op _.FRC:$src2, 6941 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6942 _.FRC:$src3))))), 6943 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 6944 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6945 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6946 6947 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6948 (Op _.FRC:$src2, _.FRC:$src3, 6949 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6950 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 6951 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6952 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6953 6954 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6955 (Op _.FRC:$src2, 6956 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6957 (_.ScalarLdFrag addr:$src3)))))), 6958 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 6959 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6960 addr:$src3)>; 6961 6962 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6963 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6964 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 6965 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 6966 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6967 addr:$src3)>; 6968 6969 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6970 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6971 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6972 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 6973 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6974 addr:$src3)>; 6975 6976 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6977 (X86selects_mask VK1WM:$mask, 6978 (MaskedOp _.FRC:$src2, 6979 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6980 _.FRC:$src3), 6981 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6982 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 6983 VR128X:$src1, VK1WM:$mask, 6984 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6985 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6986 6987 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6988 (X86selects_mask VK1WM:$mask, 6989 (MaskedOp _.FRC:$src2, 6990 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6991 (_.ScalarLdFrag addr:$src3)), 6992 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6993 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 6994 VR128X:$src1, VK1WM:$mask, 6995 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6996 6997 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6998 (X86selects_mask VK1WM:$mask, 6999 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7000 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 7001 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7002 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 7003 VR128X:$src1, VK1WM:$mask, 7004 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7005 7006 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7007 (X86selects_mask VK1WM:$mask, 7008 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7009 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7010 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7011 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 7012 VR128X:$src1, VK1WM:$mask, 7013 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7014 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7015 7016 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7017 (X86selects_mask VK1WM:$mask, 7018 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7019 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7020 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7021 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 7022 VR128X:$src1, VK1WM:$mask, 7023 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7024 7025 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7026 (X86selects_mask VK1WM:$mask, 7027 (MaskedOp _.FRC:$src2, 7028 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7029 _.FRC:$src3), 7030 (_.EltVT ZeroFP)))))), 7031 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 7032 VR128X:$src1, VK1WM:$mask, 7033 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7034 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7035 7036 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7037 (X86selects_mask VK1WM:$mask, 7038 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7039 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7040 (_.EltVT ZeroFP)))))), 7041 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 7042 VR128X:$src1, VK1WM:$mask, 7043 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7044 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7045 7046 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7047 (X86selects_mask VK1WM:$mask, 7048 (MaskedOp _.FRC:$src2, 7049 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7050 (_.ScalarLdFrag addr:$src3)), 7051 (_.EltVT ZeroFP)))))), 7052 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 7053 VR128X:$src1, VK1WM:$mask, 7054 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7055 7056 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7057 (X86selects_mask VK1WM:$mask, 7058 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7059 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 7060 (_.EltVT ZeroFP)))))), 7061 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 7062 VR128X:$src1, VK1WM:$mask, 7063 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7064 7065 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7066 (X86selects_mask VK1WM:$mask, 7067 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7068 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7069 (_.EltVT ZeroFP)))))), 7070 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 7071 VR128X:$src1, VK1WM:$mask, 7072 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7073 7074 // Patterns with rounding mode. 7075 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7076 (RndOp _.FRC:$src2, 7077 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7078 _.FRC:$src3, (i32 timm:$rc)))))), 7079 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 7080 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7081 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7082 7083 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7084 (RndOp _.FRC:$src2, _.FRC:$src3, 7085 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7086 (i32 timm:$rc)))))), 7087 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 7088 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7089 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7090 7091 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7092 (X86selects_mask VK1WM:$mask, 7093 (RndOp _.FRC:$src2, 7094 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7095 _.FRC:$src3, (i32 timm:$rc)), 7096 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7097 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 7098 VR128X:$src1, VK1WM:$mask, 7099 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7100 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7101 7102 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7103 (X86selects_mask VK1WM:$mask, 7104 (RndOp _.FRC:$src2, _.FRC:$src3, 7105 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7106 (i32 timm:$rc)), 7107 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7108 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 7109 VR128X:$src1, VK1WM:$mask, 7110 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7111 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7112 7113 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7114 (X86selects_mask VK1WM:$mask, 7115 (RndOp _.FRC:$src2, 7116 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7117 _.FRC:$src3, (i32 timm:$rc)), 7118 (_.EltVT ZeroFP)))))), 7119 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 7120 VR128X:$src1, VK1WM:$mask, 7121 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7122 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7123 7124 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7125 (X86selects_mask VK1WM:$mask, 7126 (RndOp _.FRC:$src2, _.FRC:$src3, 7127 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7128 (i32 timm:$rc)), 7129 (_.EltVT ZeroFP)))))), 7130 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 7131 VR128X:$src1, VK1WM:$mask, 7132 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7133 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7134 } 7135} 7136defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH", 7137 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7138defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH", 7139 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7140defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH", 7141 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7142defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH", 7143 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7144 7145defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7146 "SS", X86Movss, v4f32x_info, fp32imm0>; 7147defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7148 "SS", X86Movss, v4f32x_info, fp32imm0>; 7149defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7150 "SS", X86Movss, v4f32x_info, fp32imm0>; 7151defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7152 "SS", X86Movss, v4f32x_info, fp32imm0>; 7153 7154defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7155 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7156defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7157 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7158defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7159 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7160defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7161 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7162 7163//===----------------------------------------------------------------------===// 7164// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 7165//===----------------------------------------------------------------------===// 7166let Constraints = "$src1 = $dst" in { 7167multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7168 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 7169 // NOTE: The SDNode have the multiply operands first with the add last. 7170 // This enables commuted load patterns to be autogenerated by tablegen. 7171 let ExeDomain = _.ExeDomain in { 7172 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 7173 (ins _.RC:$src2, _.RC:$src3), 7174 OpcodeStr, "$src3, $src2", "$src2, $src3", 7175 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 7176 T8, PD, EVEX, VVVV, Sched<[sched]>; 7177 7178 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7179 (ins _.RC:$src2, _.MemOp:$src3), 7180 OpcodeStr, "$src3, $src2", "$src2, $src3", 7181 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 7182 T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 7183 sched.ReadAfterFold]>; 7184 7185 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7186 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7187 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 7188 !strconcat("$src2, ${src3}", _.BroadcastStr ), 7189 (OpNode _.RC:$src2, 7190 (_.VT (_.BroadcastLdFrag addr:$src3)), 7191 _.RC:$src1)>, 7192 T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, 7193 sched.ReadAfterFold]>; 7194 } 7195} 7196} // Constraints = "$src1 = $dst" 7197 7198multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 7199 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 7200 let Predicates = [HasIFMA] in { 7201 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 7202 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7203 } 7204 let Predicates = [HasVLX, HasIFMA] in { 7205 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 7206 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7207 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 7208 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7209 } 7210} 7211 7212defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 7213 SchedWriteVecIMul, avx512vl_i64_info>, 7214 REX_W; 7215defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 7216 SchedWriteVecIMul, avx512vl_i64_info>, 7217 REX_W; 7218 7219//===----------------------------------------------------------------------===// 7220// AVX-512 Scalar convert from sign integer to float/double 7221//===----------------------------------------------------------------------===// 7222 7223multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, 7224 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7225 X86MemOperand x86memop, PatFrag ld_frag, string asm, 7226 string mem, list<Register> _Uses = [MXCSR], 7227 bit _mayRaiseFPException = 1> { 7228let ExeDomain = DstVT.ExeDomain, Uses = _Uses, 7229 mayRaiseFPException = _mayRaiseFPException in { 7230 let hasSideEffects = 0, isCodeGenOnly = 1 in { 7231 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7232 (ins DstVT.FRC:$src1, SrcRC:$src), 7233 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7234 EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7235 let mayLoad = 1 in 7236 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7237 (ins DstVT.FRC:$src1, x86memop:$src), 7238 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 7239 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 7240 } // hasSideEffects = 0 7241 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7242 (ins DstVT.RC:$src1, SrcRC:$src2), 7243 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7244 [(set DstVT.RC:$dst, 7245 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, 7246 EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7247 7248 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7249 (ins DstVT.RC:$src1, x86memop:$src2), 7250 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7251 [(set DstVT.RC:$dst, 7252 (OpNode (DstVT.VT DstVT.RC:$src1), 7253 (ld_frag addr:$src2)))]>, 7254 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 7255} 7256 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7257 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, 7258 DstVT.RC:$src1, SrcRC:$src2), 0, "att">; 7259} 7260 7261multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7262 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7263 X86VectorVTInfo DstVT, string asm, 7264 string mem> { 7265 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in 7266 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7267 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7268 !strconcat(asm, 7269 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7270 [(set DstVT.RC:$dst, 7271 (OpNode (DstVT.VT DstVT.RC:$src1), 7272 SrcRC:$src2, 7273 (i32 timm:$rc)))]>, 7274 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7275 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", 7276 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, 7277 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; 7278} 7279 7280multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd, 7281 X86FoldableSchedWrite sched, 7282 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7283 X86MemOperand x86memop, PatFrag ld_frag, 7284 string asm, string mem> { 7285 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>, 7286 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7287 ld_frag, asm, mem>, VEX_LIG; 7288} 7289 7290let Predicates = [HasAVX512] in { 7291defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7292 WriteCvtI2SS, GR32, 7293 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, 7294 TB, XS, EVEX_CD8<32, CD8VT1>; 7295defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7296 WriteCvtI2SS, GR64, 7297 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, 7298 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>; 7299defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, 7300 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>, 7301 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7302defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7303 WriteCvtI2SD, GR64, 7304 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, 7305 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7306 7307def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7308 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7309def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7310 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7311 7312def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 7313 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7314def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 7315 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7316def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 7317 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7318def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 7319 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7320 7321def : Pat<(f32 (any_sint_to_fp GR32:$src)), 7322 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7323def : Pat<(f32 (any_sint_to_fp GR64:$src)), 7324 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7325def : Pat<(f64 (any_sint_to_fp GR32:$src)), 7326 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7327def : Pat<(f64 (any_sint_to_fp GR64:$src)), 7328 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7329 7330defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7331 WriteCvtI2SS, GR32, 7332 v4f32x_info, i32mem, loadi32, 7333 "cvtusi2ss", "l">, TB, XS, EVEX_CD8<32, CD8VT1>; 7334defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7335 WriteCvtI2SS, GR64, 7336 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, 7337 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>; 7338defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, 7339 i32mem, loadi32, "cvtusi2sd", "l", [], 0>, 7340 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7341defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7342 WriteCvtI2SD, GR64, 7343 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, 7344 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7345 7346def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7347 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7348def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7349 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7350 7351def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))), 7352 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7353def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))), 7354 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7355def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))), 7356 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7357def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))), 7358 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7359 7360def : Pat<(f32 (any_uint_to_fp GR32:$src)), 7361 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7362def : Pat<(f32 (any_uint_to_fp GR64:$src)), 7363 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7364def : Pat<(f64 (any_uint_to_fp GR32:$src)), 7365 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7366def : Pat<(f64 (any_uint_to_fp GR64:$src)), 7367 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7368} 7369 7370//===----------------------------------------------------------------------===// 7371// AVX-512 Scalar convert from float/double to integer 7372//===----------------------------------------------------------------------===// 7373 7374multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7375 X86VectorVTInfo DstVT, SDNode OpNode, 7376 SDNode OpNodeRnd, 7377 X86FoldableSchedWrite sched, string asm, 7378 string aliasStr, Predicate prd = HasAVX512> { 7379 let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in { 7380 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7381 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7382 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, 7383 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7384 let Uses = [MXCSR] in 7385 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7386 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7387 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, 7388 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7389 Sched<[sched]>; 7390 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7391 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7392 [(set DstVT.RC:$dst, (OpNode 7393 (SrcVT.ScalarIntMemFrags addr:$src)))]>, 7394 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7395 } // Predicates = [prd] 7396 7397 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7398 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7399 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7400 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7401 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7402 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7403 SrcVT.IntScalarMemOp:$src), 0, "att">; 7404} 7405 7406// Convert float/double to signed/unsigned int 32/64 7407defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, 7408 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, 7409 TB, XS, EVEX_CD8<32, CD8VT1>; 7410defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, 7411 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, 7412 TB, XS, REX_W, EVEX_CD8<32, CD8VT1>; 7413defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, 7414 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, 7415 TB, XS, EVEX_CD8<32, CD8VT1>; 7416defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, 7417 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, 7418 TB, XS, REX_W, EVEX_CD8<32, CD8VT1>; 7419defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, 7420 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, 7421 TB, XD, EVEX_CD8<64, CD8VT1>; 7422defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, 7423 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, 7424 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7425defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, 7426 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7427 TB, XD, EVEX_CD8<64, CD8VT1>; 7428defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, 7429 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7430 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7431 7432multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT, 7433 X86VectorVTInfo DstVT, SDNode OpNode, 7434 X86FoldableSchedWrite sched> { 7435 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7436 let isCodeGenOnly = 1 in { 7437 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src), 7438 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7439 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>, 7440 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7441 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src), 7442 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7443 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>, 7444 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7445 } 7446 } // Predicates = [HasAVX512] 7447} 7448 7449defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info, 7450 lrint, WriteCvtSS2I>, TB, XS, EVEX_CD8<32, CD8VT1>; 7451defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info, 7452 llrint, WriteCvtSS2I>, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>; 7453defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info, 7454 lrint, WriteCvtSD2I>, TB, XD, EVEX_CD8<64, CD8VT1>; 7455defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info, 7456 llrint, WriteCvtSD2I>, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>; 7457 7458let Predicates = [HasAVX512] in { 7459 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>; 7460 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>; 7461 7462 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>; 7463 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>; 7464} 7465 7466// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7467// which produce unnecessary vmovs{s,d} instructions 7468let Predicates = [HasAVX512] in { 7469def : Pat<(v4f32 (X86Movss 7470 (v4f32 VR128X:$dst), 7471 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 7472 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7473 7474def : Pat<(v4f32 (X86Movss 7475 (v4f32 VR128X:$dst), 7476 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 7477 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7478 7479def : Pat<(v4f32 (X86Movss 7480 (v4f32 VR128X:$dst), 7481 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 7482 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7483 7484def : Pat<(v4f32 (X86Movss 7485 (v4f32 VR128X:$dst), 7486 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 7487 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7488 7489def : Pat<(v2f64 (X86Movsd 7490 (v2f64 VR128X:$dst), 7491 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 7492 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7493 7494def : Pat<(v2f64 (X86Movsd 7495 (v2f64 VR128X:$dst), 7496 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 7497 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7498 7499def : Pat<(v2f64 (X86Movsd 7500 (v2f64 VR128X:$dst), 7501 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 7502 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7503 7504def : Pat<(v2f64 (X86Movsd 7505 (v2f64 VR128X:$dst), 7506 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 7507 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7508 7509def : Pat<(v4f32 (X86Movss 7510 (v4f32 VR128X:$dst), 7511 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))), 7512 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7513 7514def : Pat<(v4f32 (X86Movss 7515 (v4f32 VR128X:$dst), 7516 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))), 7517 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7518 7519def : Pat<(v4f32 (X86Movss 7520 (v4f32 VR128X:$dst), 7521 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))), 7522 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7523 7524def : Pat<(v4f32 (X86Movss 7525 (v4f32 VR128X:$dst), 7526 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))), 7527 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7528 7529def : Pat<(v2f64 (X86Movsd 7530 (v2f64 VR128X:$dst), 7531 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))), 7532 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7533 7534def : Pat<(v2f64 (X86Movsd 7535 (v2f64 VR128X:$dst), 7536 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))), 7537 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7538 7539def : Pat<(v2f64 (X86Movsd 7540 (v2f64 VR128X:$dst), 7541 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))), 7542 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7543 7544def : Pat<(v2f64 (X86Movsd 7545 (v2f64 VR128X:$dst), 7546 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))), 7547 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7548} // Predicates = [HasAVX512] 7549 7550// Convert float/double to signed/unsigned int 32/64 with truncation 7551multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7552 X86VectorVTInfo _DstRC, SDPatternOperator OpNode, 7553 SDNode OpNodeInt, SDNode OpNodeSAE, 7554 X86FoldableSchedWrite sched, string aliasStr, 7555 Predicate prd = HasAVX512> { 7556let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in { 7557 let isCodeGenOnly = 1 in { 7558 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7559 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7560 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7561 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7562 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7563 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7564 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7565 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7566 } 7567 7568 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7569 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7570 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 7571 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7572 let Uses = [MXCSR] in 7573 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7574 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7575 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 7576 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 7577 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7578 (ins _SrcRC.IntScalarMemOp:$src), 7579 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7580 [(set _DstRC.RC:$dst, 7581 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, 7582 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7583} // Predicates = [prd] 7584 7585 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7586 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7587 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7588 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7589 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7590 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7591 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7592} 7593 7594defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7595 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7596 "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>; 7597defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7598 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7599 "{q}">, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>; 7600defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7601 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7602 "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>; 7603defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7604 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7605 "{q}">, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>; 7606 7607defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, 7608 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7609 "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>; 7610defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, 7611 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7612 "{q}">, TB, XS,REX_W, EVEX_CD8<32, CD8VT1>; 7613defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7614 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7615 "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>; 7616defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7617 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7618 "{q}">, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7619 7620//===----------------------------------------------------------------------===// 7621// AVX-512 Convert form float to double and back 7622//===----------------------------------------------------------------------===// 7623 7624let Uses = [MXCSR], mayRaiseFPException = 1 in 7625multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7626 X86VectorVTInfo _Src, SDNode OpNode, 7627 X86FoldableSchedWrite sched> { 7628 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7629 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7630 "$src2, $src1", "$src1, $src2", 7631 (_.VT (OpNode (_.VT _.RC:$src1), 7632 (_Src.VT _Src.RC:$src2)))>, 7633 EVEX, VVVV, VEX_LIG, Sched<[sched]>; 7634 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7635 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7636 "$src2, $src1", "$src1, $src2", 7637 (_.VT (OpNode (_.VT _.RC:$src1), 7638 (_Src.ScalarIntMemFrags addr:$src2)))>, 7639 EVEX, VVVV, VEX_LIG, 7640 Sched<[sched.Folded, sched.ReadAfterFold]>; 7641 7642 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7643 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7644 (ins _.FRC:$src1, _Src.FRC:$src2), 7645 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7646 EVEX, VVVV, VEX_LIG, Sched<[sched]>; 7647 let mayLoad = 1 in 7648 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7649 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7650 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7651 EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7652 } 7653} 7654 7655// Scalar Conversion with SAE - suppress all exceptions 7656multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7657 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7658 X86FoldableSchedWrite sched> { 7659 let Uses = [MXCSR] in 7660 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7661 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7662 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7663 (_.VT (OpNodeSAE (_.VT _.RC:$src1), 7664 (_Src.VT _Src.RC:$src2)))>, 7665 EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>; 7666} 7667 7668// Scalar Conversion with rounding control (RC) 7669multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7670 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7671 X86FoldableSchedWrite sched> { 7672 let Uses = [MXCSR] in 7673 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7674 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7675 "$rc, $src2, $src1", "$src1, $src2, $rc", 7676 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7677 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 7678 EVEX, VVVV, VEX_LIG, Sched<[sched]>, 7679 EVEX_B, EVEX_RC; 7680} 7681multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr, 7682 SDNode OpNode, SDNode OpNodeRnd, 7683 X86FoldableSchedWrite sched, 7684 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7685 Predicate prd = HasAVX512> { 7686 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7687 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7688 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7689 OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>; 7690 } 7691} 7692 7693multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr, 7694 SDNode OpNode, SDNode OpNodeSAE, 7695 X86FoldableSchedWrite sched, 7696 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7697 Predicate prd = HasAVX512> { 7698 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7699 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7700 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, 7701 EVEX_CD8<_src.EltSize, CD8VT1>; 7702 } 7703} 7704defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds, 7705 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7706 f32x_info>, TB, XD, REX_W; 7707defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts, 7708 X86fpextsSAE, WriteCvtSS2SD, f32x_info, 7709 f64x_info>, TB, XS; 7710defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds, 7711 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7712 f16x_info, HasFP16>, T_MAP5, XD, REX_W; 7713defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts, 7714 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7715 f64x_info, HasFP16>, T_MAP5, XS; 7716defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds, 7717 X86froundsRnd, WriteCvtSD2SS, f32x_info, 7718 f16x_info, HasFP16>, T_MAP5; 7719defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts, 7720 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7721 f32x_info, HasFP16>, T_MAP6; 7722 7723def : Pat<(f64 (any_fpextend FR32X:$src)), 7724 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7725 Requires<[HasAVX512]>; 7726def : Pat<(f64 (any_fpextend (loadf32 addr:$src))), 7727 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7728 Requires<[HasAVX512, OptForSize]>; 7729 7730def : Pat<(f32 (any_fpround FR64X:$src)), 7731 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7732 Requires<[HasAVX512]>; 7733 7734def : Pat<(f32 (any_fpextend FR16X:$src)), 7735 (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>, 7736 Requires<[HasFP16]>; 7737def : Pat<(f32 (any_fpextend (loadf16 addr:$src))), 7738 (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>, 7739 Requires<[HasFP16, OptForSize]>; 7740 7741def : Pat<(f64 (any_fpextend FR16X:$src)), 7742 (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>, 7743 Requires<[HasFP16]>; 7744def : Pat<(f64 (any_fpextend (loadf16 addr:$src))), 7745 (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7746 Requires<[HasFP16, OptForSize]>; 7747 7748def : Pat<(f16 (any_fpround FR32X:$src)), 7749 (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>, 7750 Requires<[HasFP16]>; 7751def : Pat<(f16 (any_fpround FR64X:$src)), 7752 (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>, 7753 Requires<[HasFP16]>; 7754 7755def : Pat<(v4f32 (X86Movss 7756 (v4f32 VR128X:$dst), 7757 (v4f32 (scalar_to_vector 7758 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 7759 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 7760 Requires<[HasAVX512]>; 7761 7762def : Pat<(v2f64 (X86Movsd 7763 (v2f64 VR128X:$dst), 7764 (v2f64 (scalar_to_vector 7765 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 7766 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 7767 Requires<[HasAVX512]>; 7768 7769//===----------------------------------------------------------------------===// 7770// AVX-512 Vector convert from signed/unsigned integer to float/double 7771// and from float/double to signed/unsigned integer 7772//===----------------------------------------------------------------------===// 7773 7774multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7775 X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode, 7776 X86FoldableSchedWrite sched, 7777 string Broadcast = _.BroadcastStr, 7778 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7779 RegisterClass MaskRC = _.KRCWM, 7780 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))), 7781 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> { 7782let Uses = [MXCSR], mayRaiseFPException = 1 in { 7783 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst), 7784 (ins _Src.RC:$src), 7785 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), 7786 (ins MaskRC:$mask, _Src.RC:$src), 7787 OpcodeStr, "$src", "$src", 7788 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7789 (vselect_mask MaskRC:$mask, 7790 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7791 _.RC:$src0), 7792 (vselect_mask MaskRC:$mask, 7793 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7794 _.ImmAllZerosV)>, 7795 EVEX, Sched<[sched]>; 7796 7797 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 7798 (ins MemOp:$src), 7799 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), 7800 (ins MaskRC:$mask, MemOp:$src), 7801 OpcodeStr#Alias, "$src", "$src", 7802 LdDAG, 7803 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0), 7804 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>, 7805 EVEX, Sched<[sched.Folded]>; 7806 7807 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 7808 (ins _Src.ScalarMemOp:$src), 7809 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), 7810 (ins MaskRC:$mask, _Src.ScalarMemOp:$src), 7811 OpcodeStr, 7812 "${src}"#Broadcast, "${src}"#Broadcast, 7813 (_.VT (OpNode (_Src.VT 7814 (_Src.BroadcastLdFrag addr:$src)) 7815 )), 7816 (vselect_mask MaskRC:$mask, 7817 (_.VT 7818 (MaskOpNode 7819 (_Src.VT 7820 (_Src.BroadcastLdFrag addr:$src)))), 7821 _.RC:$src0), 7822 (vselect_mask MaskRC:$mask, 7823 (_.VT 7824 (MaskOpNode 7825 (_Src.VT 7826 (_Src.BroadcastLdFrag addr:$src)))), 7827 _.ImmAllZerosV)>, 7828 EVEX, EVEX_B, Sched<[sched.Folded]>; 7829 } 7830} 7831// Conversion with SAE - suppress all exceptions 7832multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7833 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7834 X86FoldableSchedWrite sched> { 7835 let Uses = [MXCSR] in 7836 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7837 (ins _Src.RC:$src), OpcodeStr, 7838 "{sae}, $src", "$src, {sae}", 7839 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, 7840 EVEX, EVEX_B, Sched<[sched]>; 7841} 7842 7843// Conversion with rounding control (RC) 7844multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7845 X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd, 7846 X86FoldableSchedWrite sched> { 7847 let Uses = [MXCSR] in 7848 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7849 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 7850 "$rc, $src", "$src, $rc", 7851 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, 7852 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 7853} 7854 7855// Similar to avx512_vcvt_fp, but uses an extload for the memory form. 7856multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7857 X86VectorVTInfo _Src, SDPatternOperator OpNode, 7858 SDNode MaskOpNode, 7859 X86FoldableSchedWrite sched, 7860 string Broadcast = _.BroadcastStr, 7861 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7862 RegisterClass MaskRC = _.KRCWM> 7863 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast, 7864 Alias, MemOp, MaskRC, 7865 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)), 7866 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; 7867 7868// Extend [Float to Double, Half to Float] 7869multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr, 7870 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 7871 X86SchedWriteWidths sched, Predicate prd = HasAVX512> { 7872 let Predicates = [prd] in { 7873 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256, 7874 any_fpextend, fpextend, sched.ZMM>, 7875 avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256, 7876 X86vfpextSAE, sched.ZMM>, EVEX_V512; 7877 } 7878 let Predicates = [prd, HasVLX] in { 7879 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128, 7880 X86any_vfpext, X86vfpext, sched.XMM, 7881 _dst.info128.BroadcastStr, 7882 "", f64mem>, EVEX_V128; 7883 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128, 7884 any_fpextend, fpextend, sched.YMM>, EVEX_V256; 7885 } 7886} 7887 7888// Truncate [Double to Float, Float to Half] 7889multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr, 7890 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 7891 X86SchedWriteWidths sched, Predicate prd = HasAVX512, 7892 PatFrag bcast128 = _src.info128.BroadcastLdFrag, 7893 PatFrag loadVT128 = _src.info128.LdFrag, 7894 RegisterClass maskRC128 = _src.info128.KRCWM> { 7895 let Predicates = [prd] in { 7896 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, 7897 X86any_vfpround, X86vfpround, sched.ZMM>, 7898 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 7899 X86vfproundRnd, sched.ZMM>, EVEX_V512; 7900 } 7901 let Predicates = [prd, HasVLX] in { 7902 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, 7903 null_frag, null_frag, sched.XMM, 7904 _src.info128.BroadcastStr, "{x}", 7905 f128mem, maskRC128>, EVEX_V128; 7906 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, 7907 X86any_vfpround, X86vfpround, 7908 sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256; 7909 7910 // Special patterns to allow use of X86vmfpround for masking. Instruction 7911 // patterns have been disabled with null_frag. 7912 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))), 7913 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 7914 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 7915 maskRC128:$mask), 7916 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>; 7917 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 7918 maskRC128:$mask), 7919 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>; 7920 7921 def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))), 7922 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 7923 def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0), 7924 maskRC128:$mask), 7925 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 7926 def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV, 7927 maskRC128:$mask), 7928 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>; 7929 7930 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))), 7931 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 7932 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 7933 (_dst.info128.VT VR128X:$src0), maskRC128:$mask), 7934 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 7935 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 7936 _dst.info128.ImmAllZerosV, maskRC128:$mask), 7937 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>; 7938 } 7939 7940 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 7941 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7942 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7943 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7944 VK2WM:$mask, VR128X:$src), 0, "att">; 7945 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|" 7946 "$dst {${mask}} {z}, $src}", 7947 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7948 VK2WM:$mask, VR128X:$src), 0, "att">; 7949 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7950 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7951 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 7952 "$dst {${mask}}, ${src}{1to2}}", 7953 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7954 VK2WM:$mask, f64mem:$src), 0, "att">; 7955 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7956 "$dst {${mask}} {z}, ${src}{1to2}}", 7957 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7958 VK2WM:$mask, f64mem:$src), 0, "att">; 7959 7960 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 7961 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7962 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7963 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7964 VK4WM:$mask, VR256X:$src), 0, "att">; 7965 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 7966 "$dst {${mask}} {z}, $src}", 7967 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7968 VK4WM:$mask, VR256X:$src), 0, "att">; 7969 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7970 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7971 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 7972 "$dst {${mask}}, ${src}{1to4}}", 7973 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7974 VK4WM:$mask, f64mem:$src), 0, "att">; 7975 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7976 "$dst {${mask}} {z}, ${src}{1to4}}", 7977 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7978 VK4WM:$mask, f64mem:$src), 0, "att">; 7979} 7980 7981defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps", 7982 avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>, 7983 REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 7984defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd", 7985 avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>, 7986 TB, EVEX_CD8<32, CD8VH>; 7987 7988// Extend Half to Double 7989multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr, 7990 X86SchedWriteWidths sched> { 7991 let Predicates = [HasFP16] in { 7992 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info, 7993 any_fpextend, fpextend, sched.ZMM>, 7994 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info, 7995 X86vfpextSAE, sched.ZMM>, EVEX_V512; 7996 def : Pat<(v8f64 (extloadv8f16 addr:$src)), 7997 (!cast<Instruction>(NAME # "Zrm") addr:$src)>; 7998 } 7999 let Predicates = [HasFP16, HasVLX] in { 8000 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info, 8001 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "", 8002 f32mem>, EVEX_V128; 8003 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info, 8004 X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "", 8005 f64mem>, EVEX_V256; 8006 } 8007} 8008 8009// Truncate Double to Half 8010multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 8011 let Predicates = [HasFP16] in { 8012 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info, 8013 X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">, 8014 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info, 8015 X86vfproundRnd, sched.ZMM>, EVEX_V512; 8016 } 8017 let Predicates = [HasFP16, HasVLX] in { 8018 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag, 8019 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8020 VK2WM>, EVEX_V128; 8021 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag, 8022 null_frag, sched.YMM, "{1to4}", "{y}", f256mem, 8023 VK4WM>, EVEX_V256; 8024 } 8025 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8026 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8027 VR128X:$src), 0, "att">; 8028 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8029 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8030 VK2WM:$mask, VR128X:$src), 0, "att">; 8031 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8032 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8033 VK2WM:$mask, VR128X:$src), 0, "att">; 8034 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8035 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8036 i64mem:$src), 0, "att">; 8037 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8038 "$dst {${mask}}, ${src}{1to2}}", 8039 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8040 VK2WM:$mask, i64mem:$src), 0, "att">; 8041 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8042 "$dst {${mask}} {z}, ${src}{1to2}}", 8043 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8044 VK2WM:$mask, i64mem:$src), 0, "att">; 8045 8046 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8047 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8048 VR256X:$src), 0, "att">; 8049 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8050 "$dst {${mask}}, $src}", 8051 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8052 VK4WM:$mask, VR256X:$src), 0, "att">; 8053 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8054 "$dst {${mask}} {z}, $src}", 8055 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8056 VK4WM:$mask, VR256X:$src), 0, "att">; 8057 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8058 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8059 i64mem:$src), 0, "att">; 8060 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8061 "$dst {${mask}}, ${src}{1to4}}", 8062 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8063 VK4WM:$mask, i64mem:$src), 0, "att">; 8064 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8065 "$dst {${mask}} {z}, ${src}{1to4}}", 8066 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8067 VK4WM:$mask, i64mem:$src), 0, "att">; 8068 8069 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 8070 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 8071 VR512:$src), 0, "att">; 8072 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 8073 "$dst {${mask}}, $src}", 8074 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 8075 VK8WM:$mask, VR512:$src), 0, "att">; 8076 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 8077 "$dst {${mask}} {z}, $src}", 8078 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 8079 VK8WM:$mask, VR512:$src), 0, "att">; 8080 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 8081 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 8082 i64mem:$src), 0, "att">; 8083 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 8084 "$dst {${mask}}, ${src}{1to8}}", 8085 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 8086 VK8WM:$mask, i64mem:$src), 0, "att">; 8087 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 8088 "$dst {${mask}} {z}, ${src}{1to8}}", 8089 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 8090 VK8WM:$mask, i64mem:$src), 0, "att">; 8091} 8092 8093defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info, 8094 avx512vl_f32_info, SchedWriteCvtPD2PS, 8095 HasFP16>, T_MAP5, PD, EVEX_CD8<32, CD8VF>; 8096defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info, 8097 avx512vl_f16_info, SchedWriteCvtPS2PD, 8098 HasFP16>, T_MAP6, PD, EVEX_CD8<16, CD8VH>; 8099defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>, 8100 REX_W, T_MAP5, PD, EVEX_CD8<64, CD8VF>; 8101defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>, 8102 T_MAP5, EVEX_CD8<16, CD8VQ>; 8103 8104let Predicates = [HasFP16, HasVLX] in { 8105 // Special patterns to allow use of X86vmfpround for masking. Instruction 8106 // patterns have been disabled with null_frag. 8107 def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))), 8108 (VCVTPD2PHZ256rr VR256X:$src)>; 8109 def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0), 8110 VK4WM:$mask)), 8111 (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 8112 def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV, 8113 VK4WM:$mask), 8114 (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 8115 8116 def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))), 8117 (VCVTPD2PHZ256rm addr:$src)>; 8118 def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0), 8119 VK4WM:$mask), 8120 (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8121 def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV, 8122 VK4WM:$mask), 8123 (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>; 8124 8125 def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))), 8126 (VCVTPD2PHZ256rmb addr:$src)>; 8127 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8128 (v8f16 VR128X:$src0), VK4WM:$mask), 8129 (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8130 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8131 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 8132 (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 8133 8134 def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))), 8135 (VCVTPD2PHZ128rr VR128X:$src)>; 8136 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0), 8137 VK2WM:$mask), 8138 (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8139 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV, 8140 VK2WM:$mask), 8141 (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 8142 8143 def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))), 8144 (VCVTPD2PHZ128rm addr:$src)>; 8145 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0), 8146 VK2WM:$mask), 8147 (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8148 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV, 8149 VK2WM:$mask), 8150 (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>; 8151 8152 def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))), 8153 (VCVTPD2PHZ128rmb addr:$src)>; 8154 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8155 (v8f16 VR128X:$src0), VK2WM:$mask), 8156 (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8157 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8158 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 8159 (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 8160} 8161 8162// Convert Signed/Unsigned Doubleword to Double 8163let Uses = []<Register>, mayRaiseFPException = 0 in 8164multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8165 SDNode MaskOpNode, SDPatternOperator OpNode128, 8166 SDNode MaskOpNode128, 8167 X86SchedWriteWidths sched> { 8168 // No rounding in this op 8169 let Predicates = [HasAVX512] in 8170 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 8171 MaskOpNode, sched.ZMM>, EVEX_V512; 8172 8173 let Predicates = [HasVLX] in { 8174 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 8175 OpNode128, MaskOpNode128, sched.XMM, "{1to2}", 8176 "", i64mem, VK2WM, 8177 (v2f64 (OpNode128 (bc_v4i32 8178 (v2i64 8179 (scalar_to_vector (loadi64 addr:$src)))))), 8180 (v2f64 (MaskOpNode128 (bc_v4i32 8181 (v2i64 8182 (scalar_to_vector (loadi64 addr:$src))))))>, 8183 EVEX_V128; 8184 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 8185 MaskOpNode, sched.YMM>, EVEX_V256; 8186 } 8187} 8188 8189// Convert Signed/Unsigned Doubleword to Float 8190multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8191 SDNode MaskOpNode, SDNode OpNodeRnd, 8192 X86SchedWriteWidths sched> { 8193 let Predicates = [HasAVX512] in 8194 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 8195 MaskOpNode, sched.ZMM>, 8196 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 8197 OpNodeRnd, sched.ZMM>, EVEX_V512; 8198 8199 let Predicates = [HasVLX] in { 8200 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 8201 MaskOpNode, sched.XMM>, EVEX_V128; 8202 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 8203 MaskOpNode, sched.YMM>, EVEX_V256; 8204 } 8205} 8206 8207// Convert Float to Signed/Unsigned Doubleword with truncation 8208multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8209 SDNode MaskOpNode, 8210 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 8211 let Predicates = [HasAVX512] in { 8212 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8213 MaskOpNode, sched.ZMM>, 8214 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 8215 OpNodeSAE, sched.ZMM>, EVEX_V512; 8216 } 8217 let Predicates = [HasVLX] in { 8218 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8219 MaskOpNode, sched.XMM>, EVEX_V128; 8220 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8221 MaskOpNode, sched.YMM>, EVEX_V256; 8222 } 8223} 8224 8225// Convert Float to Signed/Unsigned Doubleword 8226multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8227 SDNode MaskOpNode, SDNode OpNodeRnd, 8228 X86SchedWriteWidths sched> { 8229 let Predicates = [HasAVX512] in { 8230 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8231 MaskOpNode, sched.ZMM>, 8232 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 8233 OpNodeRnd, sched.ZMM>, EVEX_V512; 8234 } 8235 let Predicates = [HasVLX] in { 8236 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8237 MaskOpNode, sched.XMM>, EVEX_V128; 8238 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8239 MaskOpNode, sched.YMM>, EVEX_V256; 8240 } 8241} 8242 8243// Convert Double to Signed/Unsigned Doubleword with truncation 8244multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8245 SDNode MaskOpNode, SDNode OpNodeSAE, 8246 X86SchedWriteWidths sched> { 8247 let Predicates = [HasAVX512] in { 8248 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8249 MaskOpNode, sched.ZMM>, 8250 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 8251 OpNodeSAE, sched.ZMM>, EVEX_V512; 8252 } 8253 let Predicates = [HasVLX] in { 8254 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8255 // memory forms of these instructions in Asm Parser. They have the same 8256 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8257 // due to the same reason. 8258 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8259 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8260 VK2WM>, EVEX_V128; 8261 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8262 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8263 } 8264 8265 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8266 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8267 VR128X:$src), 0, "att">; 8268 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8269 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8270 VK2WM:$mask, VR128X:$src), 0, "att">; 8271 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8272 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8273 VK2WM:$mask, VR128X:$src), 0, "att">; 8274 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8275 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8276 f64mem:$src), 0, "att">; 8277 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8278 "$dst {${mask}}, ${src}{1to2}}", 8279 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8280 VK2WM:$mask, f64mem:$src), 0, "att">; 8281 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8282 "$dst {${mask}} {z}, ${src}{1to2}}", 8283 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8284 VK2WM:$mask, f64mem:$src), 0, "att">; 8285 8286 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8287 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8288 VR256X:$src), 0, "att">; 8289 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8290 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8291 VK4WM:$mask, VR256X:$src), 0, "att">; 8292 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8293 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8294 VK4WM:$mask, VR256X:$src), 0, "att">; 8295 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8296 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8297 f64mem:$src), 0, "att">; 8298 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8299 "$dst {${mask}}, ${src}{1to4}}", 8300 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8301 VK4WM:$mask, f64mem:$src), 0, "att">; 8302 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8303 "$dst {${mask}} {z}, ${src}{1to4}}", 8304 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8305 VK4WM:$mask, f64mem:$src), 0, "att">; 8306} 8307 8308// Convert Double to Signed/Unsigned Doubleword 8309multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8310 SDNode MaskOpNode, SDNode OpNodeRnd, 8311 X86SchedWriteWidths sched> { 8312 let Predicates = [HasAVX512] in { 8313 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8314 MaskOpNode, sched.ZMM>, 8315 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 8316 OpNodeRnd, sched.ZMM>, EVEX_V512; 8317 } 8318 let Predicates = [HasVLX] in { 8319 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8320 // memory forms of these instructions in Asm Parcer. They have the same 8321 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8322 // due to the same reason. 8323 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8324 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8325 VK2WM>, EVEX_V128; 8326 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8327 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8328 } 8329 8330 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8331 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 8332 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8333 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8334 VK2WM:$mask, VR128X:$src), 0, "att">; 8335 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8336 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8337 VK2WM:$mask, VR128X:$src), 0, "att">; 8338 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8339 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8340 f64mem:$src), 0, "att">; 8341 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8342 "$dst {${mask}}, ${src}{1to2}}", 8343 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8344 VK2WM:$mask, f64mem:$src), 0, "att">; 8345 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8346 "$dst {${mask}} {z}, ${src}{1to2}}", 8347 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8348 VK2WM:$mask, f64mem:$src), 0, "att">; 8349 8350 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8351 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 8352 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8353 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8354 VK4WM:$mask, VR256X:$src), 0, "att">; 8355 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8356 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8357 VK4WM:$mask, VR256X:$src), 0, "att">; 8358 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8359 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8360 f64mem:$src), 0, "att">; 8361 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8362 "$dst {${mask}}, ${src}{1to4}}", 8363 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8364 VK4WM:$mask, f64mem:$src), 0, "att">; 8365 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8366 "$dst {${mask}} {z}, ${src}{1to4}}", 8367 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8368 VK4WM:$mask, f64mem:$src), 0, "att">; 8369} 8370 8371// Convert Double to Signed/Unsigned Quardword 8372multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8373 SDNode MaskOpNode, SDNode OpNodeRnd, 8374 X86SchedWriteWidths sched> { 8375 let Predicates = [HasDQI] in { 8376 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8377 MaskOpNode, sched.ZMM>, 8378 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 8379 OpNodeRnd, sched.ZMM>, EVEX_V512; 8380 } 8381 let Predicates = [HasDQI, HasVLX] in { 8382 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8383 MaskOpNode, sched.XMM>, EVEX_V128; 8384 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8385 MaskOpNode, sched.YMM>, EVEX_V256; 8386 } 8387} 8388 8389// Convert Double to Signed/Unsigned Quardword with truncation 8390multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8391 SDNode MaskOpNode, SDNode OpNodeRnd, 8392 X86SchedWriteWidths sched> { 8393 let Predicates = [HasDQI] in { 8394 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8395 MaskOpNode, sched.ZMM>, 8396 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 8397 OpNodeRnd, sched.ZMM>, EVEX_V512; 8398 } 8399 let Predicates = [HasDQI, HasVLX] in { 8400 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8401 MaskOpNode, sched.XMM>, EVEX_V128; 8402 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8403 MaskOpNode, sched.YMM>, EVEX_V256; 8404 } 8405} 8406 8407// Convert Signed/Unsigned Quardword to Double 8408multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8409 SDNode MaskOpNode, SDNode OpNodeRnd, 8410 X86SchedWriteWidths sched> { 8411 let Predicates = [HasDQI] in { 8412 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 8413 MaskOpNode, sched.ZMM>, 8414 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 8415 OpNodeRnd, sched.ZMM>, EVEX_V512; 8416 } 8417 let Predicates = [HasDQI, HasVLX] in { 8418 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 8419 MaskOpNode, sched.XMM>, EVEX_V128; 8420 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 8421 MaskOpNode, sched.YMM>, EVEX_V256; 8422 } 8423} 8424 8425// Convert Float to Signed/Unsigned Quardword 8426multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8427 SDNode MaskOpNode, SDNode OpNodeRnd, 8428 X86SchedWriteWidths sched> { 8429 let Predicates = [HasDQI] in { 8430 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8431 MaskOpNode, sched.ZMM>, 8432 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 8433 OpNodeRnd, sched.ZMM>, EVEX_V512; 8434 } 8435 let Predicates = [HasDQI, HasVLX] in { 8436 // Explicitly specified broadcast string, since we take only 2 elements 8437 // from v4f32x_info source 8438 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8439 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8440 (v2i64 (OpNode (bc_v4f32 8441 (v2f64 8442 (scalar_to_vector (loadf64 addr:$src)))))), 8443 (v2i64 (MaskOpNode (bc_v4f32 8444 (v2f64 8445 (scalar_to_vector (loadf64 addr:$src))))))>, 8446 EVEX_V128; 8447 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8448 MaskOpNode, sched.YMM>, EVEX_V256; 8449 } 8450} 8451 8452// Convert Float to Signed/Unsigned Quardword with truncation 8453multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8454 SDNode MaskOpNode, SDNode OpNodeRnd, 8455 X86SchedWriteWidths sched> { 8456 let Predicates = [HasDQI] in { 8457 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8458 MaskOpNode, sched.ZMM>, 8459 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 8460 OpNodeRnd, sched.ZMM>, EVEX_V512; 8461 } 8462 let Predicates = [HasDQI, HasVLX] in { 8463 // Explicitly specified broadcast string, since we take only 2 elements 8464 // from v4f32x_info source 8465 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8466 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8467 (v2i64 (OpNode (bc_v4f32 8468 (v2f64 8469 (scalar_to_vector (loadf64 addr:$src)))))), 8470 (v2i64 (MaskOpNode (bc_v4f32 8471 (v2f64 8472 (scalar_to_vector (loadf64 addr:$src))))))>, 8473 EVEX_V128; 8474 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8475 MaskOpNode, sched.YMM>, EVEX_V256; 8476 } 8477} 8478 8479// Convert Signed/Unsigned Quardword to Float 8480// Also Convert Signed/Unsigned Doubleword to Half 8481multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8482 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128, 8483 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd, 8484 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8485 X86SchedWriteWidths sched, Predicate prd = HasDQI> { 8486 let Predicates = [prd] in { 8487 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode, 8488 MaskOpNode, sched.ZMM>, 8489 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 8490 OpNodeRnd, sched.ZMM>, EVEX_V512; 8491 } 8492 let Predicates = [prd, HasVLX] in { 8493 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8494 // memory forms of these instructions in Asm Parcer. They have the same 8495 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8496 // due to the same reason. 8497 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag, 8498 null_frag, sched.XMM, _src.info128.BroadcastStr, 8499 "{x}", i128mem, _src.info128.KRCWM>, 8500 EVEX_V128; 8501 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode, 8502 MaskOpNode, sched.YMM, _src.info256.BroadcastStr, 8503 "{y}">, EVEX_V256; 8504 8505 // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction 8506 // patterns have been disabled with null_frag. 8507 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))), 8508 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 8509 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 8510 _src.info128.KRCWM:$mask), 8511 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>; 8512 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 8513 _src.info128.KRCWM:$mask), 8514 (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>; 8515 8516 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))), 8517 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 8518 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0), 8519 _src.info128.KRCWM:$mask), 8520 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8521 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV, 8522 _src.info128.KRCWM:$mask), 8523 (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>; 8524 8525 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))), 8526 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 8527 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8528 (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask), 8529 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8530 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8531 _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask), 8532 (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>; 8533 } 8534 8535 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8536 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8537 VR128X:$src), 0, "att">; 8538 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8539 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8540 VK2WM:$mask, VR128X:$src), 0, "att">; 8541 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8542 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8543 VK2WM:$mask, VR128X:$src), 0, "att">; 8544 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8545 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8546 i64mem:$src), 0, "att">; 8547 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8548 "$dst {${mask}}, ${src}{1to2}}", 8549 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8550 VK2WM:$mask, i64mem:$src), 0, "att">; 8551 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8552 "$dst {${mask}} {z}, ${src}{1to2}}", 8553 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8554 VK2WM:$mask, i64mem:$src), 0, "att">; 8555 8556 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8557 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8558 VR256X:$src), 0, "att">; 8559 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8560 "$dst {${mask}}, $src}", 8561 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8562 VK4WM:$mask, VR256X:$src), 0, "att">; 8563 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8564 "$dst {${mask}} {z}, $src}", 8565 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8566 VK4WM:$mask, VR256X:$src), 0, "att">; 8567 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8568 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8569 i64mem:$src), 0, "att">; 8570 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8571 "$dst {${mask}}, ${src}{1to4}}", 8572 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8573 VK4WM:$mask, i64mem:$src), 0, "att">; 8574 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8575 "$dst {${mask}} {z}, ${src}{1to4}}", 8576 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8577 VK4WM:$mask, i64mem:$src), 0, "att">; 8578} 8579 8580defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp, 8581 X86any_VSintToFP, X86VSintToFP, 8582 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>; 8583 8584defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp, 8585 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8586 TB, EVEX_CD8<32, CD8VF>; 8587 8588defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, 8589 X86cvttp2si, X86cvttp2siSAE, 8590 SchedWriteCvtPS2DQ>, TB, XS, EVEX_CD8<32, CD8VF>; 8591 8592defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, 8593 X86cvttp2si, X86cvttp2siSAE, 8594 SchedWriteCvtPD2DQ>, 8595 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 8596 8597defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, 8598 X86cvttp2ui, X86cvttp2uiSAE, 8599 SchedWriteCvtPS2DQ>, TB, EVEX_CD8<32, CD8VF>; 8600 8601defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, 8602 X86cvttp2ui, X86cvttp2uiSAE, 8603 SchedWriteCvtPD2DQ>, 8604 TB, REX_W, EVEX_CD8<64, CD8VF>; 8605 8606defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, 8607 uint_to_fp, X86any_VUintToFP, X86VUintToFP, 8608 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>; 8609 8610defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, 8611 uint_to_fp, X86VUintToFpRnd, 8612 SchedWriteCvtDQ2PS>, TB, XD, EVEX_CD8<32, CD8VF>; 8613 8614defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int, 8615 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD, 8616 EVEX_CD8<32, CD8VF>; 8617 8618defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int, 8619 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, TB, XD, 8620 REX_W, EVEX_CD8<64, CD8VF>; 8621 8622defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt, 8623 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8624 TB, EVEX_CD8<32, CD8VF>; 8625 8626defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt, 8627 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W, 8628 TB, EVEX_CD8<64, CD8VF>; 8629 8630defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int, 8631 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W, 8632 TB, PD, EVEX_CD8<64, CD8VF>; 8633 8634defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int, 8635 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD, 8636 EVEX_CD8<32, CD8VH>; 8637 8638defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt, 8639 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W, 8640 TB, PD, EVEX_CD8<64, CD8VF>; 8641 8642defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt, 8643 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, TB, PD, 8644 EVEX_CD8<32, CD8VH>; 8645 8646defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, 8647 X86cvttp2si, X86cvttp2siSAE, 8648 SchedWriteCvtPD2DQ>, REX_W, 8649 TB, PD, EVEX_CD8<64, CD8VF>; 8650 8651defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, 8652 X86cvttp2si, X86cvttp2siSAE, 8653 SchedWriteCvtPS2DQ>, TB, PD, 8654 EVEX_CD8<32, CD8VH>; 8655 8656defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, 8657 X86cvttp2ui, X86cvttp2uiSAE, 8658 SchedWriteCvtPD2DQ>, REX_W, 8659 TB, PD, EVEX_CD8<64, CD8VF>; 8660 8661defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, 8662 X86cvttp2ui, X86cvttp2uiSAE, 8663 SchedWriteCvtPS2DQ>, TB, PD, 8664 EVEX_CD8<32, CD8VH>; 8665 8666defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp, 8667 sint_to_fp, X86VSintToFpRnd, 8668 SchedWriteCvtDQ2PD>, REX_W, TB, XS, EVEX_CD8<64, CD8VF>; 8669 8670defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp, 8671 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, 8672 REX_W, TB, XS, EVEX_CD8<64, CD8VF>; 8673 8674defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp, 8675 X86any_VSintToFP, X86VMSintToFP, 8676 X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8677 SchedWriteCvtDQ2PS, HasFP16>, 8678 T_MAP5, EVEX_CD8<32, CD8VF>; 8679 8680defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp, 8681 X86any_VUintToFP, X86VMUintToFP, 8682 X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8683 SchedWriteCvtDQ2PS, HasFP16>, T_MAP5, XD, 8684 EVEX_CD8<32, CD8VF>; 8685 8686defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp, 8687 X86any_VSintToFP, X86VMSintToFP, 8688 X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8689 SchedWriteCvtDQ2PS>, REX_W, TB, 8690 EVEX_CD8<64, CD8VF>; 8691 8692defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp, 8693 X86any_VUintToFP, X86VMUintToFP, 8694 X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8695 SchedWriteCvtDQ2PS>, REX_W, TB, XD, 8696 EVEX_CD8<64, CD8VF>; 8697 8698let Predicates = [HasVLX] in { 8699 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction 8700 // patterns have been disabled with null_frag. 8701 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), 8702 (VCVTPD2DQZ128rr VR128X:$src)>; 8703 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8704 VK2WM:$mask), 8705 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8706 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8707 VK2WM:$mask), 8708 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8709 8710 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), 8711 (VCVTPD2DQZ128rm addr:$src)>; 8712 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8713 VK2WM:$mask), 8714 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8715 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8716 VK2WM:$mask), 8717 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8718 8719 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), 8720 (VCVTPD2DQZ128rmb addr:$src)>; 8721 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8722 (v4i32 VR128X:$src0), VK2WM:$mask), 8723 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8724 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8725 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8726 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8727 8728 // Special patterns to allow use of X86mcvttp2si for masking. Instruction 8729 // patterns have been disabled with null_frag. 8730 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))), 8731 (VCVTTPD2DQZ128rr VR128X:$src)>; 8732 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8733 VK2WM:$mask), 8734 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8735 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8736 VK2WM:$mask), 8737 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8738 8739 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))), 8740 (VCVTTPD2DQZ128rm addr:$src)>; 8741 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8742 VK2WM:$mask), 8743 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8744 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8745 VK2WM:$mask), 8746 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8747 8748 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), 8749 (VCVTTPD2DQZ128rmb addr:$src)>; 8750 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8751 (v4i32 VR128X:$src0), VK2WM:$mask), 8752 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8753 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8754 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8755 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8756 8757 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8758 // patterns have been disabled with null_frag. 8759 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), 8760 (VCVTPD2UDQZ128rr VR128X:$src)>; 8761 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8762 VK2WM:$mask), 8763 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8764 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8765 VK2WM:$mask), 8766 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8767 8768 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), 8769 (VCVTPD2UDQZ128rm addr:$src)>; 8770 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8771 VK2WM:$mask), 8772 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8773 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8774 VK2WM:$mask), 8775 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8776 8777 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), 8778 (VCVTPD2UDQZ128rmb addr:$src)>; 8779 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8780 (v4i32 VR128X:$src0), VK2WM:$mask), 8781 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8782 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8783 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8784 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8785 8786 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8787 // patterns have been disabled with null_frag. 8788 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))), 8789 (VCVTTPD2UDQZ128rr VR128X:$src)>; 8790 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8791 VK2WM:$mask), 8792 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8793 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8794 VK2WM:$mask), 8795 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8796 8797 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))), 8798 (VCVTTPD2UDQZ128rm addr:$src)>; 8799 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8800 VK2WM:$mask), 8801 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8802 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8803 VK2WM:$mask), 8804 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8805 8806 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), 8807 (VCVTTPD2UDQZ128rmb addr:$src)>; 8808 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8809 (v4i32 VR128X:$src0), VK2WM:$mask), 8810 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8811 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8812 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8813 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8814} 8815 8816let Predicates = [HasDQI, HasVLX] in { 8817 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8818 (VCVTPS2QQZ128rm addr:$src)>; 8819 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8820 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8821 VR128X:$src0)), 8822 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8823 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8824 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8825 v2i64x_info.ImmAllZerosV)), 8826 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8827 8828 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8829 (VCVTPS2UQQZ128rm addr:$src)>; 8830 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8831 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8832 VR128X:$src0)), 8833 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8834 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8835 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8836 v2i64x_info.ImmAllZerosV)), 8837 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8838 8839 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8840 (VCVTTPS2QQZ128rm addr:$src)>; 8841 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8842 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8843 VR128X:$src0)), 8844 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8845 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8846 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8847 v2i64x_info.ImmAllZerosV)), 8848 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8849 8850 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8851 (VCVTTPS2UQQZ128rm addr:$src)>; 8852 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8853 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8854 VR128X:$src0)), 8855 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8856 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8857 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8858 v2i64x_info.ImmAllZerosV)), 8859 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8860} 8861 8862let Predicates = [HasVLX] in { 8863 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8864 (VCVTDQ2PDZ128rm addr:$src)>; 8865 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8866 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8867 VR128X:$src0)), 8868 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8869 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8870 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8871 v2f64x_info.ImmAllZerosV)), 8872 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8873 8874 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8875 (VCVTUDQ2PDZ128rm addr:$src)>; 8876 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8877 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8878 VR128X:$src0)), 8879 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8880 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8881 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8882 v2f64x_info.ImmAllZerosV)), 8883 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8884} 8885 8886//===----------------------------------------------------------------------===// 8887// Half precision conversion instructions 8888//===----------------------------------------------------------------------===// 8889 8890let Uses = [MXCSR], mayRaiseFPException = 1 in 8891multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8892 X86MemOperand x86memop, dag ld_dag, 8893 X86FoldableSchedWrite sched> { 8894 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 8895 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 8896 (X86any_cvtph2ps (_src.VT _src.RC:$src)), 8897 (X86cvtph2ps (_src.VT _src.RC:$src))>, 8898 T8, PD, Sched<[sched]>; 8899 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 8900 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 8901 (X86any_cvtph2ps (_src.VT ld_dag)), 8902 (X86cvtph2ps (_src.VT ld_dag))>, 8903 T8, PD, Sched<[sched.Folded]>; 8904} 8905 8906multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8907 X86FoldableSchedWrite sched> { 8908 let Uses = [MXCSR] in 8909 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 8910 (ins _src.RC:$src), "vcvtph2ps", 8911 "{sae}, $src", "$src, {sae}", 8912 (X86cvtph2psSAE (_src.VT _src.RC:$src))>, 8913 T8, PD, EVEX_B, Sched<[sched]>; 8914} 8915 8916let Predicates = [HasAVX512] in 8917 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, 8918 (load addr:$src), WriteCvtPH2PSZ>, 8919 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 8920 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8921 8922let Predicates = [HasVLX] in { 8923 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 8924 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256, 8925 EVEX_CD8<32, CD8VH>; 8926 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 8927 (bitconvert (v2i64 (X86vzload64 addr:$src))), 8928 WriteCvtPH2PS>, EVEX, EVEX_V128, 8929 EVEX_CD8<32, CD8VH>; 8930 8931 // Pattern match vcvtph2ps of a scalar i64 load. 8932 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert 8933 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 8934 (VCVTPH2PSZ128rm addr:$src)>; 8935} 8936 8937multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8938 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 8939let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8940 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8941 (ins _src.RC:$src1, i32u8imm:$src2), 8942 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8943 [(set _dest.RC:$dst, 8944 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 8945 Sched<[RR]>; 8946 let Constraints = "$src0 = $dst" in 8947 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8948 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8949 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 8950 [(set _dest.RC:$dst, 8951 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8952 _dest.RC:$src0, _src.KRCWM:$mask))]>, 8953 Sched<[RR]>, EVEX_K; 8954 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8955 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8956 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", 8957 [(set _dest.RC:$dst, 8958 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8959 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 8960 Sched<[RR]>, EVEX_KZ; 8961 let hasSideEffects = 0, mayStore = 1 in { 8962 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 8963 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 8964 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8965 Sched<[MR]>; 8966 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 8967 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8968 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 8969 EVEX_K, Sched<[MR]>; 8970 } 8971} 8972} 8973 8974multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8975 SchedWrite Sched> { 8976 let hasSideEffects = 0, Uses = [MXCSR] in { 8977 def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8978 (ins _src.RC:$src1, i32u8imm:$src2), 8979 "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}", 8980 [(set _dest.RC:$dst, 8981 (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 8982 EVEX_B, Sched<[Sched]>; 8983 let Constraints = "$src0 = $dst" in 8984 def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8985 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8986 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}", 8987 [(set _dest.RC:$dst, 8988 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2), 8989 _dest.RC:$src0, _src.KRCWM:$mask))]>, 8990 EVEX_B, Sched<[Sched]>, EVEX_K; 8991 def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8992 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8993 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}", 8994 [(set _dest.RC:$dst, 8995 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2), 8996 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 8997 EVEX_B, Sched<[Sched]>, EVEX_KZ; 8998} 8999} 9000 9001let Predicates = [HasAVX512] in { 9002 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 9003 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 9004 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 9005 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 9006 9007 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), 9008 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; 9009} 9010 9011let Predicates = [HasVLX] in { 9012 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 9013 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 9014 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 9015 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 9016 WriteCvtPS2PH, WriteCvtPS2PHSt>, 9017 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 9018 9019 def : Pat<(store (f64 (extractelt 9020 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9021 (iPTR 0))), addr:$dst), 9022 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9023 def : Pat<(store (i64 (extractelt 9024 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9025 (iPTR 0))), addr:$dst), 9026 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9027 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), 9028 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; 9029} 9030 9031// Unordered/Ordered scalar fp compare with Sae and set EFLAGS 9032multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 9033 string OpcodeStr, Domain d, 9034 X86FoldableSchedWrite sched = WriteFComX> { 9035 let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in 9036 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 9037 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 9038 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 9039} 9040 9041let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9042 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>, 9043 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9044 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>, 9045 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>; 9046 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>, 9047 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9048 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>, 9049 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>; 9050} 9051 9052let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9053 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32, 9054 "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9055 EVEX_CD8<32, CD8VT1>; 9056 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64, 9057 "ucomisd", SSEPackedDouble>, TB, PD, EVEX, 9058 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9059 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32, 9060 "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9061 EVEX_CD8<32, CD8VT1>; 9062 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64, 9063 "comisd", SSEPackedDouble>, TB, PD, EVEX, 9064 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9065 let isCodeGenOnly = 1 in { 9066 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 9067 sse_load_f32, "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9068 EVEX_CD8<32, CD8VT1>; 9069 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 9070 sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, EVEX, 9071 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9072 9073 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 9074 sse_load_f32, "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9075 EVEX_CD8<32, CD8VT1>; 9076 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 9077 sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, EVEX, 9078 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9079 } 9080} 9081 9082let Defs = [EFLAGS], Predicates = [HasFP16] in { 9083 defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish", 9084 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5, 9085 EVEX_CD8<16, CD8VT1>; 9086 defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish", 9087 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5, 9088 EVEX_CD8<16, CD8VT1>; 9089 defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16, 9090 "ucomish", SSEPackedSingle>, T_MAP5, EVEX, 9091 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9092 defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16, 9093 "comish", SSEPackedSingle>, T_MAP5, EVEX, 9094 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9095 let isCodeGenOnly = 1 in { 9096 defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem, 9097 sse_load_f16, "ucomish", SSEPackedSingle>, 9098 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9099 9100 defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem, 9101 sse_load_f16, "comish", SSEPackedSingle>, 9102 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9103 } 9104} 9105 9106/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh 9107multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9108 X86FoldableSchedWrite sched, X86VectorVTInfo _, 9109 Predicate prd = HasAVX512> { 9110 let Predicates = [prd], ExeDomain = _.ExeDomain in { 9111 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9112 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9113 "$src2, $src1", "$src1, $src2", 9114 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9115 EVEX, VVVV, VEX_LIG, Sched<[sched]>; 9116 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9117 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9118 "$src2, $src1", "$src1, $src2", 9119 (OpNode (_.VT _.RC:$src1), 9120 (_.ScalarIntMemFrags addr:$src2))>, EVEX, VVVV, VEX_LIG, 9121 Sched<[sched.Folded, sched.ReadAfterFold]>; 9122} 9123} 9124 9125defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl, 9126 f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>, 9127 T_MAP6, PD; 9128defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s, 9129 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>, 9130 EVEX_CD8<16, CD8VT1>, T_MAP6, PD; 9131let Uses = [MXCSR] in { 9132defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 9133 f32x_info>, EVEX_CD8<32, CD8VT1>, 9134 T8, PD; 9135defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 9136 f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>, 9137 T8, PD; 9138defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 9139 SchedWriteFRsqrt.Scl, f32x_info>, 9140 EVEX_CD8<32, CD8VT1>, T8, PD; 9141defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 9142 SchedWriteFRsqrt.Scl, f64x_info>, REX_W, 9143 EVEX_CD8<64, CD8VT1>, T8, PD; 9144} 9145 9146/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 9147multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 9148 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9149 let ExeDomain = _.ExeDomain in { 9150 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9151 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9152 (_.VT (OpNode _.RC:$src))>, EVEX, T8, PD, 9153 Sched<[sched]>; 9154 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9155 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9156 (OpNode (_.VT 9157 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8, PD, 9158 Sched<[sched.Folded, sched.ReadAfterFold]>; 9159 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9160 (ins _.ScalarMemOp:$src), OpcodeStr, 9161 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9162 (OpNode (_.VT 9163 (_.BroadcastLdFrag addr:$src)))>, 9164 EVEX, T8, PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9165 } 9166} 9167 9168multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 9169 X86SchedWriteWidths sched> { 9170 let Uses = [MXCSR] in { 9171 defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM, 9172 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 9173 defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM, 9174 v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>; 9175 } 9176 let Predicates = [HasFP16] in 9177 defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM, 9178 v32f16_info>, EVEX_V512, T_MAP6, EVEX_CD8<16, CD8VF>; 9179 9180 // Define only if AVX512VL feature is present. 9181 let Predicates = [HasVLX], Uses = [MXCSR] in { 9182 defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9183 OpNode, sched.XMM, v4f32x_info>, 9184 EVEX_V128, EVEX_CD8<32, CD8VF>; 9185 defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9186 OpNode, sched.YMM, v8f32x_info>, 9187 EVEX_V256, EVEX_CD8<32, CD8VF>; 9188 defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9189 OpNode, sched.XMM, v2f64x_info>, 9190 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>; 9191 defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9192 OpNode, sched.YMM, v4f64x_info>, 9193 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>; 9194 } 9195 let Predicates = [HasFP16, HasVLX] in { 9196 defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9197 OpNode, sched.XMM, v8f16x_info>, 9198 EVEX_V128, T_MAP6, EVEX_CD8<16, CD8VF>; 9199 defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9200 OpNode, sched.YMM, v16f16x_info>, 9201 EVEX_V256, T_MAP6, EVEX_CD8<16, CD8VF>; 9202 } 9203} 9204 9205defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>; 9206defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>; 9207 9208/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 9209multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 9210 SDNode OpNode, SDNode OpNodeSAE, 9211 X86FoldableSchedWrite sched> { 9212 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 9213 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9214 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9215 "$src2, $src1", "$src1, $src2", 9216 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9217 Sched<[sched]>, SIMD_EXC; 9218 9219 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9220 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9221 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 9222 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9223 EVEX_B, Sched<[sched]>; 9224 9225 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9226 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9227 "$src2, $src1", "$src1, $src2", 9228 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>, 9229 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9230 } 9231} 9232 9233multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9234 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9235 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, 9236 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV; 9237 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, 9238 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV; 9239} 9240 9241multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode, 9242 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9243 let Predicates = [HasFP16] in 9244 defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>, 9245 EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV; 9246} 9247 9248let Predicates = [HasERI] in { 9249 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs, 9250 SchedWriteFRcp.Scl>; 9251 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs, 9252 SchedWriteFRsqrt.Scl>; 9253} 9254 9255defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9256 SchedWriteFRnd.Scl>, 9257 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9258 SchedWriteFRnd.Scl>; 9259/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 9260 9261multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9262 SDNode OpNode, X86FoldableSchedWrite sched> { 9263 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9264 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9265 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9266 (OpNode (_.VT _.RC:$src))>, 9267 Sched<[sched]>; 9268 9269 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9270 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9271 (OpNode (_.VT 9272 (bitconvert (_.LdFrag addr:$src))))>, 9273 Sched<[sched.Folded, sched.ReadAfterFold]>; 9274 9275 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9276 (ins _.ScalarMemOp:$src), OpcodeStr, 9277 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9278 (OpNode (_.VT 9279 (_.BroadcastLdFrag addr:$src)))>, 9280 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9281 } 9282} 9283multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9284 SDNode OpNode, X86FoldableSchedWrite sched> { 9285 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 9286 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9287 (ins _.RC:$src), OpcodeStr, 9288 "{sae}, $src", "$src, {sae}", 9289 (OpNode (_.VT _.RC:$src))>, 9290 EVEX_B, Sched<[sched]>; 9291} 9292 9293multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 9294 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9295 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 9296 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, 9297 T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 9298 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 9299 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, 9300 T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>; 9301} 9302 9303multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 9304 SDNode OpNode, X86SchedWriteWidths sched> { 9305 // Define only if AVX512VL feature is present. 9306 let Predicates = [HasVLX] in { 9307 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, 9308 sched.XMM>, 9309 EVEX_V128, T8, PD, EVEX_CD8<32, CD8VF>; 9310 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, 9311 sched.YMM>, 9312 EVEX_V256, T8, PD, EVEX_CD8<32, CD8VF>; 9313 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, 9314 sched.XMM>, 9315 EVEX_V128, REX_W, T8, PD, EVEX_CD8<64, CD8VF>; 9316 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, 9317 sched.YMM>, 9318 EVEX_V256, REX_W, T8, PD, EVEX_CD8<64, CD8VF>; 9319 } 9320} 9321 9322multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode, 9323 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9324 let Predicates = [HasFP16] in 9325 defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>, 9326 avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>, 9327 T_MAP6, PD, EVEX_V512, EVEX_CD8<16, CD8VF>; 9328 let Predicates = [HasFP16, HasVLX] in { 9329 defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>, 9330 EVEX_V128, T_MAP6, PD, EVEX_CD8<16, CD8VF>; 9331 defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>, 9332 EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>; 9333 } 9334} 9335let Predicates = [HasERI] in { 9336 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE, 9337 SchedWriteFRsqrt>, EVEX; 9338 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE, 9339 SchedWriteFRcp>, EVEX; 9340 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE, 9341 SchedWriteFAdd>, EVEX; 9342} 9343defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9344 SchedWriteFRnd>, 9345 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9346 SchedWriteFRnd>, 9347 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, 9348 SchedWriteFRnd>, EVEX; 9349 9350multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 9351 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9352 let ExeDomain = _.ExeDomain in 9353 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9354 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 9355 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, 9356 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 9357} 9358 9359multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 9360 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9361 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9362 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 9363 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9364 (_.VT (any_fsqrt _.RC:$src)), 9365 (_.VT (fsqrt _.RC:$src))>, EVEX, 9366 Sched<[sched]>; 9367 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9368 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9369 (any_fsqrt (_.VT (_.LdFrag addr:$src))), 9370 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX, 9371 Sched<[sched.Folded, sched.ReadAfterFold]>; 9372 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9373 (ins _.ScalarMemOp:$src), OpcodeStr, 9374 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9375 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))), 9376 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>, 9377 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9378 } 9379} 9380 9381let Uses = [MXCSR], mayRaiseFPException = 1 in 9382multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 9383 X86SchedWriteSizes sched> { 9384 let Predicates = [HasFP16] in 9385 defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9386 sched.PH.ZMM, v32f16_info>, 9387 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 9388 let Predicates = [HasFP16, HasVLX] in { 9389 defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9390 sched.PH.XMM, v8f16x_info>, 9391 EVEX_V128, T_MAP5, EVEX_CD8<16, CD8VF>; 9392 defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9393 sched.PH.YMM, v16f16x_info>, 9394 EVEX_V256, T_MAP5, EVEX_CD8<16, CD8VF>; 9395 } 9396 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9397 sched.PS.ZMM, v16f32_info>, 9398 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 9399 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9400 sched.PD.ZMM, v8f64_info>, 9401 EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9402 // Define only if AVX512VL feature is present. 9403 let Predicates = [HasVLX] in { 9404 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9405 sched.PS.XMM, v4f32x_info>, 9406 EVEX_V128, TB, EVEX_CD8<32, CD8VF>; 9407 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9408 sched.PS.YMM, v8f32x_info>, 9409 EVEX_V256, TB, EVEX_CD8<32, CD8VF>; 9410 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9411 sched.PD.XMM, v2f64x_info>, 9412 EVEX_V128, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9413 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9414 sched.PD.YMM, v4f64x_info>, 9415 EVEX_V256, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9416 } 9417} 9418 9419let Uses = [MXCSR] in 9420multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 9421 X86SchedWriteSizes sched> { 9422 let Predicates = [HasFP16] in 9423 defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"), 9424 sched.PH.ZMM, v32f16_info>, 9425 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 9426 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 9427 sched.PS.ZMM, v16f32_info>, 9428 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 9429 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 9430 sched.PD.ZMM, v8f64_info>, 9431 EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9432} 9433 9434multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9435 X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> { 9436 let ExeDomain = _.ExeDomain, Predicates = [prd] in { 9437 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9438 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9439 "$src2, $src1", "$src1, $src2", 9440 (X86fsqrts (_.VT _.RC:$src1), 9441 (_.VT _.RC:$src2))>, 9442 Sched<[sched]>, SIMD_EXC; 9443 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9444 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9445 "$src2, $src1", "$src1, $src2", 9446 (X86fsqrts (_.VT _.RC:$src1), 9447 (_.ScalarIntMemFrags addr:$src2))>, 9448 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9449 let Uses = [MXCSR] in 9450 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9451 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 9452 "$rc, $src2, $src1", "$src1, $src2, $rc", 9453 (X86fsqrtRnds (_.VT _.RC:$src1), 9454 (_.VT _.RC:$src2), 9455 (i32 timm:$rc))>, 9456 EVEX_B, EVEX_RC, Sched<[sched]>; 9457 9458 let isCodeGenOnly = 1, hasSideEffects = 0 in { 9459 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9460 (ins _.FRC:$src1, _.FRC:$src2), 9461 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9462 Sched<[sched]>, SIMD_EXC; 9463 let mayLoad = 1 in 9464 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9465 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 9466 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9467 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9468 } 9469 } 9470 9471 let Predicates = [prd] in { 9472 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)), 9473 (!cast<Instruction>(Name#Zr) 9474 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 9475 } 9476 9477 let Predicates = [prd, OptForSize] in { 9478 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))), 9479 (!cast<Instruction>(Name#Zm) 9480 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 9481 } 9482} 9483 9484multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 9485 X86SchedWriteSizes sched> { 9486 defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>, 9487 EVEX_CD8<16, CD8VT1>, EVEX, VVVV, T_MAP5, XS; 9488 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 9489 EVEX_CD8<32, CD8VT1>, EVEX, VVVV, TB, XS; 9490 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 9491 EVEX_CD8<64, CD8VT1>, EVEX, VVVV, TB, XD, REX_W; 9492} 9493 9494defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 9495 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 9496 9497defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 9498 9499multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 9500 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9501 let ExeDomain = _.ExeDomain in { 9502 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9503 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9504 "$src3, $src2, $src1", "$src1, $src2, $src3", 9505 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9506 (i32 timm:$src3)))>, 9507 Sched<[sched]>, SIMD_EXC; 9508 9509 let Uses = [MXCSR] in 9510 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9511 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9512 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 9513 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9514 (i32 timm:$src3)))>, EVEX_B, 9515 Sched<[sched]>; 9516 9517 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9518 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 9519 OpcodeStr, 9520 "$src3, $src2, $src1", "$src1, $src2, $src3", 9521 (_.VT (X86RndScales _.RC:$src1, 9522 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>, 9523 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9524 9525 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 9526 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9527 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 9528 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9529 []>, Sched<[sched]>, SIMD_EXC; 9530 9531 let mayLoad = 1 in 9532 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9533 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9534 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9535 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9536 } 9537 } 9538 9539 let Predicates = [HasAVX512] in { 9540 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2), 9541 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)), 9542 _.FRC:$src1, timm:$src2))>; 9543 } 9544 9545 let Predicates = [HasAVX512, OptForSize] in { 9546 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), 9547 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)), 9548 addr:$src1, timm:$src2))>; 9549 } 9550} 9551 9552let Predicates = [HasFP16] in 9553defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh", 9554 SchedWriteFRnd.Scl, f16x_info>, 9555 AVX512PSIi8Base, TA, EVEX, VVVV, 9556 EVEX_CD8<16, CD8VT1>; 9557 9558defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 9559 SchedWriteFRnd.Scl, f32x_info>, 9560 AVX512AIi8Base, EVEX, VVVV, VEX_LIG, 9561 EVEX_CD8<32, CD8VT1>; 9562 9563defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 9564 SchedWriteFRnd.Scl, f64x_info>, 9565 REX_W, AVX512AIi8Base, EVEX, VVVV, VEX_LIG, 9566 EVEX_CD8<64, CD8VT1>; 9567 9568multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 9569 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 9570 dag OutMask, Predicate BasePredicate> { 9571 let Predicates = [BasePredicate] in { 9572 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9573 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9574 (extractelt _.VT:$dst, (iPTR 0))))), 9575 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 9576 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 9577 9578 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9579 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9580 ZeroFP))), 9581 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 9582 OutMask, _.VT:$src2, _.VT:$src1)>; 9583 } 9584} 9585 9586defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh, 9587 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info, 9588 fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>; 9589defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 9590 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 9591 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9592defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 9593 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 9594 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9595 9596 9597//------------------------------------------------- 9598// Integer truncate and extend operations 9599//------------------------------------------------- 9600 9601multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9602 SDPatternOperator MaskNode, 9603 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9604 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9605 let ExeDomain = DestInfo.ExeDomain in { 9606 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9607 (ins SrcInfo.RC:$src), 9608 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9609 [(set DestInfo.RC:$dst, 9610 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>, 9611 EVEX, Sched<[sched]>; 9612 let Constraints = "$src0 = $dst" in 9613 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9614 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9615 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9616 [(set DestInfo.RC:$dst, 9617 (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9618 (DestInfo.VT DestInfo.RC:$src0), 9619 SrcInfo.KRCWM:$mask))]>, 9620 EVEX, EVEX_K, Sched<[sched]>; 9621 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9622 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9623 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 9624 [(set DestInfo.RC:$dst, 9625 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9626 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>, 9627 EVEX, EVEX_KZ, Sched<[sched]>; 9628 } 9629 9630 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9631 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9632 (ins x86memop:$dst, SrcInfo.RC:$src), 9633 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9634 EVEX, Sched<[sched.Folded]>; 9635 9636 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9637 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9638 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9639 EVEX, EVEX_K, Sched<[sched.Folded]>; 9640 }//mayStore = 1, hasSideEffects = 0 9641} 9642 9643multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9644 PatFrag truncFrag, PatFrag mtruncFrag, 9645 string Name> { 9646 9647 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9648 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr) 9649 addr:$dst, SrcInfo.RC:$src)>; 9650 9651 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, 9652 SrcInfo.KRCWM:$mask), 9653 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk) 9654 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9655} 9656 9657multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9658 SDNode OpNode256, SDNode OpNode512, 9659 SDPatternOperator MaskNode128, 9660 SDPatternOperator MaskNode256, 9661 SDPatternOperator MaskNode512, 9662 X86SchedWriteWidths sched, 9663 AVX512VLVectorVTInfo VTSrcInfo, 9664 X86VectorVTInfo DestInfoZ128, 9665 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9666 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9667 X86MemOperand x86memopZ, PatFrag truncFrag, 9668 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9669 9670 let Predicates = [HasVLX, prd] in { 9671 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM, 9672 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9673 avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag, 9674 mtruncFrag, NAME>, EVEX_V128; 9675 9676 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM, 9677 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9678 avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag, 9679 mtruncFrag, NAME>, EVEX_V256; 9680 } 9681 let Predicates = [prd] in 9682 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM, 9683 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9684 avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag, 9685 mtruncFrag, NAME>, EVEX_V512; 9686} 9687 9688multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, 9689 X86SchedWriteWidths sched, PatFrag StoreNode, 9690 PatFrag MaskedStoreNode, SDNode InVecNode, 9691 SDPatternOperator InVecMaskNode> { 9692 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, 9693 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched, 9694 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9695 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9696 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9697} 9698 9699multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9700 SDPatternOperator MaskNode, 9701 X86SchedWriteWidths sched, PatFrag StoreNode, 9702 PatFrag MaskedStoreNode, SDNode InVecNode, 9703 SDPatternOperator InVecMaskNode> { 9704 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9705 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9706 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9707 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9708 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9709} 9710 9711multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9712 SDPatternOperator MaskNode, 9713 X86SchedWriteWidths sched, PatFrag StoreNode, 9714 PatFrag MaskedStoreNode, SDNode InVecNode, 9715 SDPatternOperator InVecMaskNode> { 9716 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9717 InVecMaskNode, MaskNode, MaskNode, sched, 9718 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9719 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9720 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9721} 9722 9723multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 9724 SDPatternOperator MaskNode, 9725 X86SchedWriteWidths sched, PatFrag StoreNode, 9726 PatFrag MaskedStoreNode, SDNode InVecNode, 9727 SDPatternOperator InVecMaskNode> { 9728 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9729 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9730 avx512vl_i32_info, v16i8x_info, v16i8x_info, 9731 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 9732 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 9733} 9734 9735multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9736 SDPatternOperator MaskNode, 9737 X86SchedWriteWidths sched, PatFrag StoreNode, 9738 PatFrag MaskedStoreNode, SDNode InVecNode, 9739 SDPatternOperator InVecMaskNode> { 9740 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9741 InVecMaskNode, MaskNode, MaskNode, sched, 9742 avx512vl_i32_info, v8i16x_info, v8i16x_info, 9743 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 9744 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 9745} 9746 9747multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9748 SDPatternOperator MaskNode, 9749 X86SchedWriteWidths sched, PatFrag StoreNode, 9750 PatFrag MaskedStoreNode, SDNode InVecNode, 9751 SDPatternOperator InVecMaskNode> { 9752 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9753 InVecMaskNode, MaskNode, MaskNode, sched, 9754 avx512vl_i16_info, v16i8x_info, v16i8x_info, 9755 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 9756 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 9757} 9758 9759defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", 9760 SchedWriteVecTruncate, truncstorevi8, 9761 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9762defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", 9763 SchedWriteVecTruncate, truncstore_s_vi8, 9764 masked_truncstore_s_vi8, X86vtruncs, 9765 X86vmtruncs>; 9766defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", 9767 SchedWriteVecTruncate, truncstore_us_vi8, 9768 masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; 9769 9770defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, 9771 SchedWriteVecTruncate, truncstorevi16, 9772 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9773defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, 9774 SchedWriteVecTruncate, truncstore_s_vi16, 9775 masked_truncstore_s_vi16, X86vtruncs, 9776 X86vmtruncs>; 9777defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, 9778 select_truncus, SchedWriteVecTruncate, 9779 truncstore_us_vi16, masked_truncstore_us_vi16, 9780 X86vtruncus, X86vmtruncus>; 9781 9782defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, 9783 SchedWriteVecTruncate, truncstorevi32, 9784 masked_truncstorevi32, X86vtrunc, X86vmtrunc>; 9785defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, 9786 SchedWriteVecTruncate, truncstore_s_vi32, 9787 masked_truncstore_s_vi32, X86vtruncs, 9788 X86vmtruncs>; 9789defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, 9790 select_truncus, SchedWriteVecTruncate, 9791 truncstore_us_vi32, masked_truncstore_us_vi32, 9792 X86vtruncus, X86vmtruncus>; 9793 9794defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, 9795 SchedWriteVecTruncate, truncstorevi8, 9796 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9797defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, 9798 SchedWriteVecTruncate, truncstore_s_vi8, 9799 masked_truncstore_s_vi8, X86vtruncs, 9800 X86vmtruncs>; 9801defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, 9802 select_truncus, SchedWriteVecTruncate, 9803 truncstore_us_vi8, masked_truncstore_us_vi8, 9804 X86vtruncus, X86vmtruncus>; 9805 9806defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, 9807 SchedWriteVecTruncate, truncstorevi16, 9808 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9809defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, 9810 SchedWriteVecTruncate, truncstore_s_vi16, 9811 masked_truncstore_s_vi16, X86vtruncs, 9812 X86vmtruncs>; 9813defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, 9814 select_truncus, SchedWriteVecTruncate, 9815 truncstore_us_vi16, masked_truncstore_us_vi16, 9816 X86vtruncus, X86vmtruncus>; 9817 9818defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, 9819 SchedWriteVecTruncate, truncstorevi8, 9820 masked_truncstorevi8, X86vtrunc, 9821 X86vmtrunc>; 9822defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, 9823 SchedWriteVecTruncate, truncstore_s_vi8, 9824 masked_truncstore_s_vi8, X86vtruncs, 9825 X86vmtruncs>; 9826defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, 9827 select_truncus, SchedWriteVecTruncate, 9828 truncstore_us_vi8, masked_truncstore_us_vi8, 9829 X86vtruncus, X86vmtruncus>; 9830 9831let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 9832def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 9833 (v8i16 (EXTRACT_SUBREG 9834 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 9835 VR256X:$src, sub_ymm)))), sub_xmm))>; 9836def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 9837 (v4i32 (EXTRACT_SUBREG 9838 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 9839 VR256X:$src, sub_ymm)))), sub_xmm))>; 9840} 9841 9842let Predicates = [HasBWI, NoVLX, HasEVEX512] in { 9843def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9844 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 9845 VR256X:$src, sub_ymm))), sub_xmm))>; 9846} 9847 9848// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. 9849multiclass mtrunc_lowering<string InstrName, SDNode OpNode, 9850 X86VectorVTInfo DestInfo, 9851 X86VectorVTInfo SrcInfo> { 9852 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9853 DestInfo.RC:$src0, 9854 SrcInfo.KRCWM:$mask)), 9855 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0, 9856 SrcInfo.KRCWM:$mask, 9857 SrcInfo.RC:$src)>; 9858 9859 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9860 DestInfo.ImmAllZerosV, 9861 SrcInfo.KRCWM:$mask)), 9862 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask, 9863 SrcInfo.RC:$src)>; 9864} 9865 9866let Predicates = [HasVLX] in { 9867defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; 9868defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; 9869defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; 9870} 9871 9872let Predicates = [HasAVX512] in { 9873defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; 9874defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; 9875defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; 9876 9877defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; 9878defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; 9879defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; 9880 9881defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; 9882defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; 9883defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; 9884} 9885 9886multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9887 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 9888 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 9889 let ExeDomain = DestInfo.ExeDomain in { 9890 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 9891 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 9892 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 9893 EVEX, Sched<[sched]>; 9894 9895 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 9896 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 9897 (DestInfo.VT (LdFrag addr:$src))>, 9898 EVEX, Sched<[sched.Folded]>; 9899 } 9900} 9901 9902multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr, 9903 SDNode OpNode, SDNode InVecNode, string ExtTy, 9904 X86SchedWriteWidths sched, 9905 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9906 let Predicates = [HasVLX, HasBWI] in { 9907 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info, 9908 v16i8x_info, i64mem, LdFrag, InVecNode>, 9909 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V128, WIG; 9910 9911 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info, 9912 v16i8x_info, i128mem, LdFrag, OpNode>, 9913 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V256, WIG; 9914 } 9915 let Predicates = [HasBWI] in { 9916 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info, 9917 v32i8x_info, i256mem, LdFrag, OpNode>, 9918 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V512, WIG; 9919 } 9920} 9921 9922multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr, 9923 SDNode OpNode, SDNode InVecNode, string ExtTy, 9924 X86SchedWriteWidths sched, 9925 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9926 let Predicates = [HasVLX, HasAVX512] in { 9927 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info, 9928 v16i8x_info, i32mem, LdFrag, InVecNode>, 9929 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V128, WIG; 9930 9931 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info, 9932 v16i8x_info, i64mem, LdFrag, InVecNode>, 9933 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V256, WIG; 9934 } 9935 let Predicates = [HasAVX512] in { 9936 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info, 9937 v16i8x_info, i128mem, LdFrag, OpNode>, 9938 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V512, WIG; 9939 } 9940} 9941 9942multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr, 9943 SDNode InVecNode, string ExtTy, 9944 X86SchedWriteWidths sched, 9945 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9946 let Predicates = [HasVLX, HasAVX512] in { 9947 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 9948 v16i8x_info, i16mem, LdFrag, InVecNode>, 9949 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V128, WIG; 9950 9951 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 9952 v16i8x_info, i32mem, LdFrag, InVecNode>, 9953 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V256, WIG; 9954 } 9955 let Predicates = [HasAVX512] in { 9956 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 9957 v16i8x_info, i64mem, LdFrag, InVecNode>, 9958 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V512, WIG; 9959 } 9960} 9961 9962multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr, 9963 SDNode OpNode, SDNode InVecNode, string ExtTy, 9964 X86SchedWriteWidths sched, 9965 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9966 let Predicates = [HasVLX, HasAVX512] in { 9967 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info, 9968 v8i16x_info, i64mem, LdFrag, InVecNode>, 9969 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V128, WIG; 9970 9971 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info, 9972 v8i16x_info, i128mem, LdFrag, OpNode>, 9973 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V256, WIG; 9974 } 9975 let Predicates = [HasAVX512] in { 9976 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info, 9977 v16i16x_info, i256mem, LdFrag, OpNode>, 9978 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V512, WIG; 9979 } 9980} 9981 9982multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr, 9983 SDNode OpNode, SDNode InVecNode, string ExtTy, 9984 X86SchedWriteWidths sched, 9985 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9986 let Predicates = [HasVLX, HasAVX512] in { 9987 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 9988 v8i16x_info, i32mem, LdFrag, InVecNode>, 9989 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V128, WIG; 9990 9991 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 9992 v8i16x_info, i64mem, LdFrag, InVecNode>, 9993 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V256, WIG; 9994 } 9995 let Predicates = [HasAVX512] in { 9996 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 9997 v8i16x_info, i128mem, LdFrag, OpNode>, 9998 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V512, WIG; 9999 } 10000} 10001 10002multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr, 10003 SDNode OpNode, SDNode InVecNode, string ExtTy, 10004 X86SchedWriteWidths sched, 10005 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 10006 10007 let Predicates = [HasVLX, HasAVX512] in { 10008 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 10009 v4i32x_info, i64mem, LdFrag, InVecNode>, 10010 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V128; 10011 10012 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 10013 v4i32x_info, i128mem, LdFrag, OpNode>, 10014 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V256; 10015 } 10016 let Predicates = [HasAVX512] in { 10017 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 10018 v8i32x_info, i256mem, LdFrag, OpNode>, 10019 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V512; 10020 } 10021} 10022 10023defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>; 10024defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>; 10025defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteVecExtend>; 10026defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>; 10027defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>; 10028defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>; 10029 10030defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>; 10031defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>; 10032defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteVecExtend>; 10033defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>; 10034defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>; 10035defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>; 10036 10037 10038// Patterns that we also need any extend versions of. aext_vector_inreg 10039// is currently legalized to zext_vector_inreg. 10040multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { 10041 // 256-bit patterns 10042 let Predicates = [HasVLX, HasBWI] in { 10043 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 10044 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 10045 } 10046 10047 let Predicates = [HasVLX] in { 10048 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 10049 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 10050 10051 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 10052 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 10053 } 10054 10055 // 512-bit patterns 10056 let Predicates = [HasBWI] in { 10057 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))), 10058 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 10059 } 10060 let Predicates = [HasAVX512] in { 10061 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))), 10062 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 10063 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))), 10064 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 10065 10066 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))), 10067 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 10068 10069 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))), 10070 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 10071 } 10072} 10073 10074multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 10075 SDNode InVecOp> : 10076 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { 10077 // 128-bit patterns 10078 let Predicates = [HasVLX, HasBWI] in { 10079 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10080 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10081 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10082 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10083 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10084 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10085 } 10086 let Predicates = [HasVLX] in { 10087 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10088 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10089 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10090 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10091 10092 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 10093 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 10094 10095 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10096 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10097 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10098 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10099 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10100 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10101 10102 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10103 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10104 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 10105 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10106 10107 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10108 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10109 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10110 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10111 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 10112 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10113 } 10114 let Predicates = [HasVLX] in { 10115 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10116 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10117 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10118 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10119 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10120 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10121 10122 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10123 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10124 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10125 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10126 10127 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10128 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10129 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10130 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10131 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10132 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10133 } 10134 // 512-bit patterns 10135 let Predicates = [HasAVX512] in { 10136 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10137 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10138 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10139 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10140 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10141 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10142 } 10143} 10144 10145defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; 10146defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; 10147 10148// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge 10149// ext+trunc aggressively making it impossible to legalize the DAG to this 10150// pattern directly. 10151let Predicates = [HasAVX512, NoBWI] in { 10152def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 10153 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; 10154def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), 10155 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; 10156} 10157 10158//===----------------------------------------------------------------------===// 10159// GATHER - SCATTER Operations 10160 10161// FIXME: Improve scheduling of gather/scatter instructions. 10162multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10163 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10164 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 10165 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 10166 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 10167 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 10168 !strconcat(OpcodeStr#_.Suffix, 10169 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 10170 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10171 Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 10172} 10173 10174multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 10175 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10176 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, 10177 vy512xmem>, EVEX_V512, REX_W; 10178 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512, 10179 vz512mem>, EVEX_V512, REX_W; 10180let Predicates = [HasVLX] in { 10181 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10182 vx256xmem>, EVEX_V256, REX_W; 10183 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256, 10184 vy256xmem>, EVEX_V256, REX_W; 10185 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10186 vx128xmem>, EVEX_V128, REX_W; 10187 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10188 vx128xmem>, EVEX_V128, REX_W; 10189} 10190} 10191 10192multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 10193 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10194 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10195 EVEX_V512; 10196 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10197 EVEX_V512; 10198let Predicates = [HasVLX] in { 10199 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10200 vy256xmem>, EVEX_V256; 10201 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10202 vy128xmem>, EVEX_V256; 10203 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10204 vx128xmem>, EVEX_V128; 10205 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10206 vx64xmem, VK2WM>, EVEX_V128; 10207} 10208} 10209 10210 10211defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 10212 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 10213 10214defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 10215 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 10216 10217multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10218 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10219 10220let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain, 10221 hasSideEffects = 0 in 10222 10223 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 10224 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 10225 !strconcat(OpcodeStr#_.Suffix, 10226 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 10227 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10228 Sched<[WriteStore]>; 10229} 10230 10231multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 10232 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10233 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, 10234 vy512xmem>, EVEX_V512, REX_W; 10235 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512, 10236 vz512mem>, EVEX_V512, REX_W; 10237let Predicates = [HasVLX] in { 10238 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10239 vx256xmem>, EVEX_V256, REX_W; 10240 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256, 10241 vy256xmem>, EVEX_V256, REX_W; 10242 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10243 vx128xmem>, EVEX_V128, REX_W; 10244 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10245 vx128xmem>, EVEX_V128, REX_W; 10246} 10247} 10248 10249multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 10250 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10251 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10252 EVEX_V512; 10253 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10254 EVEX_V512; 10255let Predicates = [HasVLX] in { 10256 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10257 vy256xmem>, EVEX_V256; 10258 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10259 vy128xmem>, EVEX_V256; 10260 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10261 vx128xmem>, EVEX_V128; 10262 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10263 vx64xmem, VK2WM>, EVEX_V128; 10264} 10265} 10266 10267defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 10268 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 10269 10270defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 10271 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 10272 10273// prefetch 10274multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 10275 RegisterClass KRC, X86MemOperand memop> { 10276 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in 10277 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 10278 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 10279 EVEX, EVEX_K, Sched<[WriteLoad]>; 10280} 10281 10282defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 10283 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10284 10285defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 10286 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10287 10288defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 10289 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10290 10291defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 10292 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10293 10294defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 10295 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10296 10297defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 10298 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10299 10300defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 10301 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10302 10303defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 10304 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10305 10306defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 10307 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10308 10309defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 10310 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10311 10312defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 10313 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10314 10315defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 10316 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10317 10318defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 10319 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10320 10321defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 10322 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10323 10324defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 10325 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10326 10327defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 10328 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10329 10330multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> { 10331def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 10332 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 10333 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 10334 EVEX, Sched<[Sched]>; 10335} 10336 10337multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 10338 string OpcodeStr, Predicate prd> { 10339let Predicates = [prd] in 10340 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512; 10341 10342 let Predicates = [prd, HasVLX] in { 10343 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256; 10344 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128; 10345 } 10346} 10347 10348defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 10349defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W; 10350defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 10351defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W; 10352 10353multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 10354 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 10355 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 10356 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 10357 EVEX, Sched<[WriteMove]>; 10358} 10359 10360// Use 512bit version to implement 128/256 bit in case NoVLX. 10361multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 10362 X86VectorVTInfo _, 10363 string Name> { 10364 10365 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 10366 (_.KVT (COPY_TO_REGCLASS 10367 (!cast<Instruction>(Name#"Zrr") 10368 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 10369 _.RC:$src, _.SubRegIdx)), 10370 _.KRC))>; 10371} 10372 10373multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 10374 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10375 let Predicates = [prd] in 10376 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 10377 EVEX_V512; 10378 10379 let Predicates = [prd, HasVLX] in { 10380 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 10381 EVEX_V256; 10382 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 10383 EVEX_V128; 10384 } 10385 let Predicates = [prd, NoVLX, HasEVEX512] in { 10386 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 10387 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 10388 } 10389} 10390 10391defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 10392 avx512vl_i8_info, HasBWI>; 10393defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 10394 avx512vl_i16_info, HasBWI>, REX_W; 10395defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 10396 avx512vl_i32_info, HasDQI>; 10397defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 10398 avx512vl_i64_info, HasDQI>, REX_W; 10399 10400// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 10401// is available, but BWI is not. We can't handle this in lowering because 10402// a target independent DAG combine likes to combine sext and trunc. 10403let Predicates = [HasDQI, NoBWI] in { 10404 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 10405 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10406 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 10407 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10408} 10409 10410let Predicates = [HasDQI, NoBWI, HasVLX] in { 10411 def : Pat<(v8i16 (sext (v8i1 VK8:$src))), 10412 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 10413} 10414 10415//===----------------------------------------------------------------------===// 10416// AVX-512 - COMPRESS and EXPAND 10417// 10418 10419multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 10420 string OpcodeStr, X86FoldableSchedWrite sched> { 10421 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 10422 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10423 (null_frag)>, AVX5128IBase, 10424 Sched<[sched]>; 10425 10426 let mayStore = 1, hasSideEffects = 0 in 10427 def mr : AVX5128I<opc, MRMDestMem, (outs), 10428 (ins _.MemOp:$dst, _.RC:$src), 10429 OpcodeStr # "\t{$src, $dst|$dst, $src}", 10430 []>, EVEX_CD8<_.EltSize, CD8VT1>, 10431 Sched<[sched.Folded]>; 10432 10433 def mrk : AVX5128I<opc, MRMDestMem, (outs), 10434 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 10435 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 10436 []>, 10437 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10438 Sched<[sched.Folded]>; 10439} 10440 10441multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10442 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), 10443 (!cast<Instruction>(Name#_.ZSuffix#mrk) 10444 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 10445 10446 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10447 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10448 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10449 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10450 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10451 _.KRCWM:$mask, _.RC:$src)>; 10452} 10453 10454multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 10455 X86FoldableSchedWrite sched, 10456 AVX512VLVectorVTInfo VTInfo, 10457 Predicate Pred = HasAVX512> { 10458 let Predicates = [Pred] in 10459 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 10460 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10461 10462 let Predicates = [Pred, HasVLX] in { 10463 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 10464 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10465 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 10466 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10467 } 10468} 10469 10470// FIXME: Is there a better scheduler class for VPCOMPRESS? 10471defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 10472 avx512vl_i32_info>, EVEX; 10473defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 10474 avx512vl_i64_info>, EVEX, REX_W; 10475defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 10476 avx512vl_f32_info>, EVEX; 10477defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 10478 avx512vl_f64_info>, EVEX, REX_W; 10479 10480// expand 10481multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 10482 string OpcodeStr, X86FoldableSchedWrite sched> { 10483 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10484 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10485 (null_frag)>, AVX5128IBase, 10486 Sched<[sched]>; 10487 10488 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10489 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 10490 (null_frag)>, 10491 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 10492 Sched<[sched.Folded, sched.ReadAfterFold]>; 10493} 10494 10495multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10496 10497 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 10498 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10499 _.KRCWM:$mask, addr:$src)>; 10500 10501 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 10502 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10503 _.KRCWM:$mask, addr:$src)>; 10504 10505 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 10506 (_.VT _.RC:$src0))), 10507 (!cast<Instruction>(Name#_.ZSuffix#rmk) 10508 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 10509 10510 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10511 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10512 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10513 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10514 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10515 _.KRCWM:$mask, _.RC:$src)>; 10516} 10517 10518multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 10519 X86FoldableSchedWrite sched, 10520 AVX512VLVectorVTInfo VTInfo, 10521 Predicate Pred = HasAVX512> { 10522 let Predicates = [Pred] in 10523 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 10524 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10525 10526 let Predicates = [Pred, HasVLX] in { 10527 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 10528 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10529 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 10530 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10531 } 10532} 10533 10534// FIXME: Is there a better scheduler class for VPEXPAND? 10535defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 10536 avx512vl_i32_info>, EVEX; 10537defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 10538 avx512vl_i64_info>, EVEX, REX_W; 10539defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 10540 avx512vl_f32_info>, EVEX; 10541defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 10542 avx512vl_f64_info>, EVEX, REX_W; 10543 10544//handle instruction reg_vec1 = op(reg_vec,imm) 10545// op(mem_vec,imm) 10546// op(broadcast(eltVt),imm) 10547//all instruction created with FROUND_CURRENT 10548multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, 10549 SDPatternOperator OpNode, 10550 SDPatternOperator MaskOpNode, 10551 X86FoldableSchedWrite sched, 10552 X86VectorVTInfo _> { 10553 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10554 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 10555 (ins _.RC:$src1, i32u8imm:$src2), 10556 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10557 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)), 10558 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>, 10559 Sched<[sched]>; 10560 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10561 (ins _.MemOp:$src1, i32u8imm:$src2), 10562 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10563 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10564 (i32 timm:$src2)), 10565 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10566 (i32 timm:$src2))>, 10567 Sched<[sched.Folded, sched.ReadAfterFold]>; 10568 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10569 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 10570 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr, 10571 "${src1}"#_.BroadcastStr#", $src2", 10572 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10573 (i32 timm:$src2)), 10574 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10575 (i32 timm:$src2))>, EVEX_B, 10576 Sched<[sched.Folded, sched.ReadAfterFold]>; 10577 } 10578} 10579 10580//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10581multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10582 SDNode OpNode, X86FoldableSchedWrite sched, 10583 X86VectorVTInfo _> { 10584 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10585 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10586 (ins _.RC:$src1, i32u8imm:$src2), 10587 OpcodeStr#_.Suffix, "$src2, {sae}, $src1", 10588 "$src1, {sae}, $src2", 10589 (OpNode (_.VT _.RC:$src1), 10590 (i32 timm:$src2))>, 10591 EVEX_B, Sched<[sched]>; 10592} 10593 10594multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 10595 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode, 10596 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched, 10597 Predicate prd>{ 10598 let Predicates = [prd] in { 10599 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10600 sched.ZMM, _.info512>, 10601 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, 10602 sched.ZMM, _.info512>, EVEX_V512; 10603 } 10604 let Predicates = [prd, HasVLX] in { 10605 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10606 sched.XMM, _.info128>, EVEX_V128; 10607 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10608 sched.YMM, _.info256>, EVEX_V256; 10609 } 10610} 10611 10612//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10613// op(reg_vec2,mem_vec,imm) 10614// op(reg_vec2,broadcast(eltVt),imm) 10615//all instruction created with FROUND_CURRENT 10616multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10617 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10618 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10619 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10620 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10621 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10622 (OpNode (_.VT _.RC:$src1), 10623 (_.VT _.RC:$src2), 10624 (i32 timm:$src3))>, 10625 Sched<[sched]>; 10626 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10627 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 10628 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10629 (OpNode (_.VT _.RC:$src1), 10630 (_.VT (bitconvert (_.LdFrag addr:$src2))), 10631 (i32 timm:$src3))>, 10632 Sched<[sched.Folded, sched.ReadAfterFold]>; 10633 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10634 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10635 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10636 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10637 (OpNode (_.VT _.RC:$src1), 10638 (_.VT (_.BroadcastLdFrag addr:$src2)), 10639 (i32 timm:$src3))>, EVEX_B, 10640 Sched<[sched.Folded, sched.ReadAfterFold]>; 10641 } 10642} 10643 10644//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10645// op(reg_vec2,mem_vec,imm) 10646multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10647 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 10648 X86VectorVTInfo SrcInfo>{ 10649 let ExeDomain = DestInfo.ExeDomain, ImmT = Imm8 in { 10650 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10651 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 10652 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10653 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10654 (SrcInfo.VT SrcInfo.RC:$src2), 10655 (i8 timm:$src3)))>, 10656 Sched<[sched]>; 10657 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10658 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 10659 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10660 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10661 (SrcInfo.VT (bitconvert 10662 (SrcInfo.LdFrag addr:$src2))), 10663 (i8 timm:$src3)))>, 10664 Sched<[sched.Folded, sched.ReadAfterFold]>; 10665 } 10666} 10667 10668//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10669// op(reg_vec2,mem_vec,imm) 10670// op(reg_vec2,broadcast(eltVt),imm) 10671multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10672 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 10673 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 10674 10675 let ExeDomain = _.ExeDomain, ImmT = Imm8 in 10676 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10677 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10678 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10679 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10680 (OpNode (_.VT _.RC:$src1), 10681 (_.VT (_.BroadcastLdFrag addr:$src2)), 10682 (i8 timm:$src3))>, EVEX_B, 10683 Sched<[sched.Folded, sched.ReadAfterFold]>; 10684} 10685 10686//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10687// op(reg_vec2,mem_scalar,imm) 10688multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10689 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10690 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10691 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10692 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10693 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10694 (OpNode (_.VT _.RC:$src1), 10695 (_.VT _.RC:$src2), 10696 (i32 timm:$src3))>, 10697 Sched<[sched]>; 10698 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 10699 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 10700 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10701 (OpNode (_.VT _.RC:$src1), 10702 (_.ScalarIntMemFrags addr:$src2), 10703 (i32 timm:$src3))>, 10704 Sched<[sched.Folded, sched.ReadAfterFold]>; 10705 } 10706} 10707 10708//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10709multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10710 SDNode OpNode, X86FoldableSchedWrite sched, 10711 X86VectorVTInfo _> { 10712 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10713 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10714 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10715 OpcodeStr, "$src3, {sae}, $src2, $src1", 10716 "$src1, $src2, {sae}, $src3", 10717 (OpNode (_.VT _.RC:$src1), 10718 (_.VT _.RC:$src2), 10719 (i32 timm:$src3))>, 10720 EVEX_B, Sched<[sched]>; 10721} 10722 10723//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10724multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10725 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10726 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10727 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10728 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10729 OpcodeStr, "$src3, {sae}, $src2, $src1", 10730 "$src1, $src2, {sae}, $src3", 10731 (OpNode (_.VT _.RC:$src1), 10732 (_.VT _.RC:$src2), 10733 (i32 timm:$src3))>, 10734 EVEX_B, Sched<[sched]>; 10735} 10736 10737multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 10738 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10739 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10740 let Predicates = [prd] in { 10741 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10742 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, 10743 EVEX_V512; 10744 10745 } 10746 let Predicates = [prd, HasVLX] in { 10747 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10748 EVEX_V128; 10749 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10750 EVEX_V256; 10751 } 10752} 10753 10754multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 10755 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 10756 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 10757 let Predicates = [Pred] in { 10758 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 10759 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX, VVVV; 10760 } 10761 let Predicates = [Pred, HasVLX] in { 10762 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 10763 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX, VVVV; 10764 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 10765 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX, VVVV; 10766 } 10767} 10768 10769multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 10770 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 10771 Predicate Pred = HasAVX512> { 10772 let Predicates = [Pred] in { 10773 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10774 EVEX_V512; 10775 } 10776 let Predicates = [Pred, HasVLX] in { 10777 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10778 EVEX_V128; 10779 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10780 EVEX_V256; 10781 } 10782} 10783 10784multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 10785 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 10786 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> { 10787 let Predicates = [prd] in { 10788 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 10789 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>; 10790 } 10791} 10792 10793multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 10794 bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode, 10795 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, 10796 X86SchedWriteWidths sched, Predicate prd>{ 10797 defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info, 10798 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>, 10799 AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 10800 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 10801 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 10802 AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>; 10803 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 10804 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 10805 AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W; 10806} 10807 10808defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 10809 X86VReduce, X86VReduce, X86VReduceSAE, 10810 SchedWriteFRnd, HasDQI>; 10811defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 10812 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE, 10813 SchedWriteFRnd, HasAVX512>; 10814defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 10815 X86VGetMant, X86VGetMant, X86VGetMantSAE, 10816 SchedWriteFRnd, HasAVX512>; 10817 10818defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 10819 0x50, X86VRange, X86VRangeSAE, 10820 SchedWriteFAdd, HasDQI>, 10821 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 10822defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 10823 0x50, X86VRange, X86VRangeSAE, 10824 SchedWriteFAdd, HasDQI>, 10825 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 10826 10827defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 10828 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10829 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 10830defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 10831 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10832 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 10833 10834defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 10835 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10836 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 10837defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 10838 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10839 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 10840defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info, 10841 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>, 10842 AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>; 10843 10844defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 10845 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10846 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 10847defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 10848 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10849 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 10850defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info, 10851 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>, 10852 AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>; 10853 10854multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 10855 X86FoldableSchedWrite sched, 10856 X86VectorVTInfo _, 10857 X86VectorVTInfo CastInfo> { 10858 let ExeDomain = _.ExeDomain in { 10859 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10860 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10861 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10862 (_.VT (bitconvert 10863 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 10864 (i8 timm:$src3)))))>, 10865 Sched<[sched]>; 10866 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10867 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10868 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10869 (_.VT 10870 (bitconvert 10871 (CastInfo.VT (X86Shuf128 _.RC:$src1, 10872 (CastInfo.LdFrag addr:$src2), 10873 (i8 timm:$src3)))))>, 10874 Sched<[sched.Folded, sched.ReadAfterFold]>; 10875 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10876 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10877 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10878 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10879 (_.VT 10880 (bitconvert 10881 (CastInfo.VT 10882 (X86Shuf128 _.RC:$src1, 10883 (_.BroadcastLdFrag addr:$src2), 10884 (i8 timm:$src3)))))>, EVEX_B, 10885 Sched<[sched.Folded, sched.ReadAfterFold]>; 10886 } 10887} 10888 10889multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 10890 AVX512VLVectorVTInfo _, 10891 AVX512VLVectorVTInfo CastInfo, bits<8> opc>{ 10892 let Predicates = [HasAVX512] in 10893 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10894 _.info512, CastInfo.info512>, EVEX_V512; 10895 10896 let Predicates = [HasAVX512, HasVLX] in 10897 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10898 _.info256, CastInfo.info256>, EVEX_V256; 10899} 10900 10901defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 10902 avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 10903defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 10904 avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 10905defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 10906 avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 10907defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 10908 avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 10909 10910multiclass avx512_valign<bits<8> opc, string OpcodeStr, 10911 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10912 let ExeDomain = _.ExeDomain in { 10913 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10914 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10915 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10916 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, 10917 Sched<[sched]>; 10918 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10919 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10920 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10921 (_.VT (X86VAlign _.RC:$src1, 10922 (bitconvert (_.LdFrag addr:$src2)), 10923 (i8 timm:$src3)))>, 10924 Sched<[sched.Folded, sched.ReadAfterFold]>; 10925 10926 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10927 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10928 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10929 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10930 (X86VAlign _.RC:$src1, 10931 (_.VT (_.BroadcastLdFrag addr:$src2)), 10932 (i8 timm:$src3))>, EVEX_B, 10933 Sched<[sched.Folded, sched.ReadAfterFold]>; 10934 } 10935} 10936 10937multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 10938 AVX512VLVectorVTInfo _> { 10939 let Predicates = [HasAVX512] in { 10940 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 10941 AVX512AIi8Base, EVEX, VVVV, EVEX_V512; 10942 } 10943 let Predicates = [HasAVX512, HasVLX] in { 10944 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 10945 AVX512AIi8Base, EVEX, VVVV, EVEX_V128; 10946 // We can't really override the 256-bit version so change it back to unset. 10947 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 10948 AVX512AIi8Base, EVEX, VVVV, EVEX_V256; 10949 } 10950} 10951 10952defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 10953 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 10954defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 10955 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 10956 REX_W; 10957 10958defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 10959 SchedWriteShuffle, avx512vl_i8_info, 10960 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 10961 10962// Fragments to help convert valignq into masked valignd. Or valignq/valignd 10963// into vpalignr. 10964def ValignqImm32XForm : SDNodeXForm<timm, [{ 10965 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 10966}]>; 10967def ValignqImm8XForm : SDNodeXForm<timm, [{ 10968 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 10969}]>; 10970def ValigndImm8XForm : SDNodeXForm<timm, [{ 10971 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 10972}]>; 10973 10974multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 10975 X86VectorVTInfo From, X86VectorVTInfo To, 10976 SDNodeXForm ImmXForm> { 10977 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10978 (bitconvert 10979 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10980 timm:$src3))), 10981 To.RC:$src0)), 10982 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 10983 To.RC:$src1, To.RC:$src2, 10984 (ImmXForm timm:$src3))>; 10985 10986 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10987 (bitconvert 10988 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10989 timm:$src3))), 10990 To.ImmAllZerosV)), 10991 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 10992 To.RC:$src1, To.RC:$src2, 10993 (ImmXForm timm:$src3))>; 10994 10995 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10996 (bitconvert 10997 (From.VT (OpNode From.RC:$src1, 10998 (From.LdFrag addr:$src2), 10999 timm:$src3))), 11000 To.RC:$src0)), 11001 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 11002 To.RC:$src1, addr:$src2, 11003 (ImmXForm timm:$src3))>; 11004 11005 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11006 (bitconvert 11007 (From.VT (OpNode From.RC:$src1, 11008 (From.LdFrag addr:$src2), 11009 timm:$src3))), 11010 To.ImmAllZerosV)), 11011 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 11012 To.RC:$src1, addr:$src2, 11013 (ImmXForm timm:$src3))>; 11014} 11015 11016multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 11017 X86VectorVTInfo From, 11018 X86VectorVTInfo To, 11019 SDNodeXForm ImmXForm> : 11020 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 11021 def : Pat<(From.VT (OpNode From.RC:$src1, 11022 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), 11023 timm:$src3)), 11024 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 11025 (ImmXForm timm:$src3))>; 11026 11027 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11028 (bitconvert 11029 (From.VT (OpNode From.RC:$src1, 11030 (bitconvert 11031 (To.VT (To.BroadcastLdFrag addr:$src2))), 11032 timm:$src3))), 11033 To.RC:$src0)), 11034 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 11035 To.RC:$src1, addr:$src2, 11036 (ImmXForm timm:$src3))>; 11037 11038 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11039 (bitconvert 11040 (From.VT (OpNode From.RC:$src1, 11041 (bitconvert 11042 (To.VT (To.BroadcastLdFrag addr:$src2))), 11043 timm:$src3))), 11044 To.ImmAllZerosV)), 11045 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 11046 To.RC:$src1, addr:$src2, 11047 (ImmXForm timm:$src3))>; 11048} 11049 11050let Predicates = [HasAVX512] in { 11051 // For 512-bit we lower to the widest element type we can. So we only need 11052 // to handle converting valignq to valignd. 11053 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 11054 v16i32_info, ValignqImm32XForm>; 11055} 11056 11057let Predicates = [HasVLX] in { 11058 // For 128-bit we lower to the widest element type we can. So we only need 11059 // to handle converting valignq to valignd. 11060 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 11061 v4i32x_info, ValignqImm32XForm>; 11062 // For 256-bit we lower to the widest element type we can. So we only need 11063 // to handle converting valignq to valignd. 11064 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 11065 v8i32x_info, ValignqImm32XForm>; 11066} 11067 11068let Predicates = [HasVLX, HasBWI] in { 11069 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 11070 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 11071 v16i8x_info, ValignqImm8XForm>; 11072 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 11073 v16i8x_info, ValigndImm8XForm>; 11074} 11075 11076defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 11077 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 11078 EVEX_CD8<8, CD8VF>; 11079 11080multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 11081 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11082 let ExeDomain = _.ExeDomain in { 11083 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11084 (ins _.RC:$src1), OpcodeStr, 11085 "$src1", "$src1", 11086 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, 11087 Sched<[sched]>; 11088 11089 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11090 (ins _.MemOp:$src1), OpcodeStr, 11091 "$src1", "$src1", 11092 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, 11093 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 11094 Sched<[sched.Folded]>; 11095 } 11096} 11097 11098multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 11099 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 11100 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 11101 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11102 (ins _.ScalarMemOp:$src1), OpcodeStr, 11103 "${src1}"#_.BroadcastStr, 11104 "${src1}"#_.BroadcastStr, 11105 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, 11106 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 11107 Sched<[sched.Folded]>; 11108} 11109 11110multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11111 X86SchedWriteWidths sched, 11112 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 11113 let Predicates = [prd] in 11114 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11115 EVEX_V512; 11116 11117 let Predicates = [prd, HasVLX] in { 11118 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11119 EVEX_V256; 11120 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11121 EVEX_V128; 11122 } 11123} 11124 11125multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11126 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 11127 Predicate prd> { 11128 let Predicates = [prd] in 11129 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11130 EVEX_V512; 11131 11132 let Predicates = [prd, HasVLX] in { 11133 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11134 EVEX_V256; 11135 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11136 EVEX_V128; 11137 } 11138} 11139 11140multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 11141 SDNode OpNode, X86SchedWriteWidths sched, 11142 Predicate prd> { 11143 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 11144 avx512vl_i64_info, prd>, REX_W; 11145 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 11146 avx512vl_i32_info, prd>; 11147} 11148 11149multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 11150 SDNode OpNode, X86SchedWriteWidths sched, 11151 Predicate prd> { 11152 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 11153 avx512vl_i16_info, prd>, WIG; 11154 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 11155 avx512vl_i8_info, prd>, WIG; 11156} 11157 11158multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 11159 bits<8> opc_d, bits<8> opc_q, 11160 string OpcodeStr, SDNode OpNode, 11161 X86SchedWriteWidths sched> { 11162 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 11163 HasAVX512>, 11164 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 11165 HasBWI>; 11166} 11167 11168defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 11169 SchedWriteVecALU>; 11170 11171// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 11172let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 11173 def : Pat<(v4i64 (abs VR256X:$src)), 11174 (EXTRACT_SUBREG 11175 (VPABSQZrr 11176 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 11177 sub_ymm)>; 11178 def : Pat<(v2i64 (abs VR128X:$src)), 11179 (EXTRACT_SUBREG 11180 (VPABSQZrr 11181 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 11182 sub_xmm)>; 11183} 11184 11185// Use 512bit version to implement 128/256 bit. 11186multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 11187 AVX512VLVectorVTInfo _, Predicate prd> { 11188 let Predicates = [prd, NoVLX, HasEVEX512] in { 11189 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), 11190 (EXTRACT_SUBREG 11191 (!cast<Instruction>(InstrStr # "Zrr") 11192 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11193 _.info256.RC:$src1, 11194 _.info256.SubRegIdx)), 11195 _.info256.SubRegIdx)>; 11196 11197 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), 11198 (EXTRACT_SUBREG 11199 (!cast<Instruction>(InstrStr # "Zrr") 11200 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11201 _.info128.RC:$src1, 11202 _.info128.SubRegIdx)), 11203 _.info128.SubRegIdx)>; 11204 } 11205} 11206 11207defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 11208 SchedWriteVecIMul, HasCDI>; 11209 11210// FIXME: Is there a better scheduler class for VPCONFLICT? 11211defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 11212 SchedWriteVecALU, HasCDI>; 11213 11214// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 11215defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 11216defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 11217 11218//===---------------------------------------------------------------------===// 11219// Counts number of ones - VPOPCNTD and VPOPCNTQ 11220//===---------------------------------------------------------------------===// 11221 11222// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 11223defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 11224 SchedWriteVecALU, HasVPOPCNTDQ>; 11225 11226defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 11227defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 11228 11229//===---------------------------------------------------------------------===// 11230// Replicate Single FP - MOVSHDUP and MOVSLDUP 11231//===---------------------------------------------------------------------===// 11232 11233multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 11234 X86SchedWriteWidths sched> { 11235 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 11236 avx512vl_f32_info, HasAVX512>, TB, XS; 11237} 11238 11239defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 11240 SchedWriteFShuffle>; 11241defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 11242 SchedWriteFShuffle>; 11243 11244//===----------------------------------------------------------------------===// 11245// AVX-512 - MOVDDUP 11246//===----------------------------------------------------------------------===// 11247 11248multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, 11249 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11250 let ExeDomain = _.ExeDomain in { 11251 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11252 (ins _.RC:$src), OpcodeStr, "$src", "$src", 11253 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, 11254 Sched<[sched]>; 11255 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11256 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 11257 (_.VT (_.BroadcastLdFrag addr:$src))>, 11258 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 11259 Sched<[sched.Folded]>; 11260 } 11261} 11262 11263multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, 11264 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 11265 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 11266 VTInfo.info512>, EVEX_V512; 11267 11268 let Predicates = [HasAVX512, HasVLX] in { 11269 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 11270 VTInfo.info256>, EVEX_V256; 11271 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM, 11272 VTInfo.info128>, EVEX_V128; 11273 } 11274} 11275 11276multiclass avx512_movddup<bits<8> opc, string OpcodeStr, 11277 X86SchedWriteWidths sched> { 11278 defm NAME: avx512_movddup_common<opc, OpcodeStr, sched, 11279 avx512vl_f64_info>, TB, XD, REX_W; 11280} 11281 11282defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>; 11283 11284let Predicates = [HasVLX] in { 11285def : Pat<(v2f64 (X86VBroadcast f64:$src)), 11286 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11287 11288def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11289 (v2f64 VR128X:$src0)), 11290 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 11291 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11292def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11293 immAllZerosV), 11294 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11295} 11296 11297//===----------------------------------------------------------------------===// 11298// AVX-512 - Unpack Instructions 11299//===----------------------------------------------------------------------===// 11300 11301let Uses = []<Register>, mayRaiseFPException = 0 in { 11302defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512, 11303 SchedWriteFShuffleSizes, 0, 1>; 11304defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512, 11305 SchedWriteFShuffleSizes>; 11306} 11307 11308defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 11309 SchedWriteShuffle, HasBWI>; 11310defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 11311 SchedWriteShuffle, HasBWI>; 11312defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 11313 SchedWriteShuffle, HasBWI>; 11314defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 11315 SchedWriteShuffle, HasBWI>; 11316 11317defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 11318 SchedWriteShuffle, HasAVX512>; 11319defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 11320 SchedWriteShuffle, HasAVX512>; 11321defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 11322 SchedWriteShuffle, HasAVX512>; 11323defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 11324 SchedWriteShuffle, HasAVX512>; 11325 11326//===----------------------------------------------------------------------===// 11327// AVX-512 - Extract & Insert Integer Instructions 11328//===----------------------------------------------------------------------===// 11329 11330multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11331 X86VectorVTInfo _> { 11332 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 11333 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11334 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11335 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))), 11336 addr:$dst)]>, 11337 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 11338} 11339 11340multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 11341 let Predicates = [HasBWI] in { 11342 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 11343 (ins _.RC:$src1, u8imm:$src2), 11344 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11345 [(set GR32orGR64:$dst, 11346 (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>, 11347 EVEX, TA, PD, Sched<[WriteVecExtract]>; 11348 11349 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TA, PD; 11350 } 11351} 11352 11353multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 11354 let Predicates = [HasBWI] in { 11355 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 11356 (ins _.RC:$src1, u8imm:$src2), 11357 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11358 [(set GR32orGR64:$dst, 11359 (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>, 11360 EVEX, TB, PD, Sched<[WriteVecExtract]>; 11361 11362 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 11363 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 11364 (ins _.RC:$src1, u8imm:$src2), 11365 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 11366 EVEX, TA, PD, Sched<[WriteVecExtract]>; 11367 11368 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TA, PD; 11369 } 11370} 11371 11372multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 11373 RegisterClass GRC> { 11374 let Predicates = [HasDQI] in { 11375 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 11376 (ins _.RC:$src1, u8imm:$src2), 11377 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11378 [(set GRC:$dst, 11379 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 11380 EVEX, TA, PD, Sched<[WriteVecExtract]>; 11381 11382 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 11383 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11384 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11385 [(store (extractelt (_.VT _.RC:$src1), 11386 imm:$src2),addr:$dst)]>, 11387 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TA, PD, 11388 Sched<[WriteVecExtractSt]>; 11389 } 11390} 11391 11392defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG; 11393defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG; 11394defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 11395defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W; 11396 11397multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11398 X86VectorVTInfo _, PatFrag LdFrag, 11399 SDPatternOperator immoperator> { 11400 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 11401 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11402 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11403 [(set _.RC:$dst, 11404 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>, 11405 EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 11406} 11407 11408multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 11409 X86VectorVTInfo _, PatFrag LdFrag> { 11410 let Predicates = [HasBWI] in { 11411 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11412 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 11413 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11414 [(set _.RC:$dst, 11415 (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX, VVVV, 11416 Sched<[WriteVecInsert]>; 11417 11418 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>; 11419 } 11420} 11421 11422multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 11423 X86VectorVTInfo _, RegisterClass GRC> { 11424 let Predicates = [HasDQI] in { 11425 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11426 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 11427 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11428 [(set _.RC:$dst, 11429 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 11430 EVEX, VVVV, TA, PD, Sched<[WriteVecInsert]>; 11431 11432 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 11433 _.ScalarLdFrag, imm>, TA, PD; 11434 } 11435} 11436 11437defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 11438 extloadi8>, TA, PD, WIG; 11439defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 11440 extloadi16>, TB, PD, WIG; 11441defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 11442defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W; 11443 11444let Predicates = [HasAVX512, NoBWI] in { 11445 def : Pat<(X86pinsrb VR128:$src1, 11446 (i32 (anyext (i8 (bitconvert v8i1:$src2)))), 11447 timm:$src3), 11448 (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)), 11449 timm:$src3)>; 11450} 11451 11452let Predicates = [HasBWI] in { 11453 def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3), 11454 (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 11455 GR8:$src2, sub_8bit), timm:$src3)>; 11456 def : Pat<(X86pinsrb VR128:$src1, 11457 (i32 (anyext (i8 (bitconvert v8i1:$src2)))), 11458 timm:$src3), 11459 (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)), 11460 timm:$src3)>; 11461} 11462 11463// Always select FP16 instructions if available. 11464let Predicates = [HasBWI], AddedComplexity = -10 in { 11465 def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>; 11466 def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>; 11467 def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>; 11468 def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>; 11469} 11470 11471//===----------------------------------------------------------------------===// 11472// VSHUFPS - VSHUFPD Operations 11473//===----------------------------------------------------------------------===// 11474 11475multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{ 11476 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 11477 SchedWriteFShuffle>, 11478 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 11479 TA, EVEX, VVVV; 11480} 11481 11482defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, TB; 11483defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, TB, PD, REX_W; 11484 11485//===----------------------------------------------------------------------===// 11486// AVX-512 - Byte shift Left/Right 11487//===----------------------------------------------------------------------===// 11488 11489multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 11490 Format MRMm, string OpcodeStr, 11491 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11492 def ri : AVX512<opc, MRMr, 11493 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 11494 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11495 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, 11496 Sched<[sched]>; 11497 def mi : AVX512<opc, MRMm, 11498 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 11499 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11500 [(set _.RC:$dst,(_.VT (OpNode 11501 (_.VT (bitconvert (_.LdFrag addr:$src1))), 11502 (i8 timm:$src2))))]>, 11503 Sched<[sched.Folded, sched.ReadAfterFold]>; 11504} 11505 11506multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 11507 Format MRMm, string OpcodeStr, 11508 X86SchedWriteWidths sched, Predicate prd>{ 11509 let Predicates = [prd] in 11510 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11511 sched.ZMM, v64i8_info>, EVEX_V512; 11512 let Predicates = [prd, HasVLX] in { 11513 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11514 sched.YMM, v32i8x_info>, EVEX_V256; 11515 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11516 sched.XMM, v16i8x_info>, EVEX_V128; 11517 } 11518} 11519defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11520 SchedWriteShuffle, HasBWI>, 11521 AVX512PDIi8Base, EVEX, VVVV, WIG; 11522defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11523 SchedWriteShuffle, HasBWI>, 11524 AVX512PDIi8Base, EVEX, VVVV, WIG; 11525 11526multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11527 string OpcodeStr, X86FoldableSchedWrite sched, 11528 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11529 let isCommutable = 1 in 11530 def rr : AVX512BI<opc, MRMSrcReg, 11531 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11532 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11533 [(set _dst.RC:$dst,(_dst.VT 11534 (OpNode (_src.VT _src.RC:$src1), 11535 (_src.VT _src.RC:$src2))))]>, 11536 Sched<[sched]>; 11537 def rm : AVX512BI<opc, MRMSrcMem, 11538 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11539 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11540 [(set _dst.RC:$dst,(_dst.VT 11541 (OpNode (_src.VT _src.RC:$src1), 11542 (_src.VT (bitconvert 11543 (_src.LdFrag addr:$src2))))))]>, 11544 Sched<[sched.Folded, sched.ReadAfterFold]>; 11545} 11546 11547multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11548 string OpcodeStr, X86SchedWriteWidths sched, 11549 Predicate prd> { 11550 let Predicates = [prd] in 11551 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11552 v8i64_info, v64i8_info>, EVEX_V512; 11553 let Predicates = [prd, HasVLX] in { 11554 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11555 v4i64x_info, v32i8x_info>, EVEX_V256; 11556 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11557 v2i64x_info, v16i8x_info>, EVEX_V128; 11558 } 11559} 11560 11561defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11562 SchedWritePSADBW, HasBWI>, EVEX, VVVV, WIG; 11563 11564// Transforms to swizzle an immediate to enable better matching when 11565// memory operand isn't in the right place. 11566def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{ 11567 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11568 uint8_t Imm = N->getZExtValue(); 11569 // Swap bits 1/4 and 3/6. 11570 uint8_t NewImm = Imm & 0xa5; 11571 if (Imm & 0x02) NewImm |= 0x10; 11572 if (Imm & 0x10) NewImm |= 0x02; 11573 if (Imm & 0x08) NewImm |= 0x40; 11574 if (Imm & 0x40) NewImm |= 0x08; 11575 return getI8Imm(NewImm, SDLoc(N)); 11576}]>; 11577def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{ 11578 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11579 uint8_t Imm = N->getZExtValue(); 11580 // Swap bits 2/4 and 3/5. 11581 uint8_t NewImm = Imm & 0xc3; 11582 if (Imm & 0x04) NewImm |= 0x10; 11583 if (Imm & 0x10) NewImm |= 0x04; 11584 if (Imm & 0x08) NewImm |= 0x20; 11585 if (Imm & 0x20) NewImm |= 0x08; 11586 return getI8Imm(NewImm, SDLoc(N)); 11587}]>; 11588def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{ 11589 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11590 uint8_t Imm = N->getZExtValue(); 11591 // Swap bits 1/2 and 5/6. 11592 uint8_t NewImm = Imm & 0x99; 11593 if (Imm & 0x02) NewImm |= 0x04; 11594 if (Imm & 0x04) NewImm |= 0x02; 11595 if (Imm & 0x20) NewImm |= 0x40; 11596 if (Imm & 0x40) NewImm |= 0x20; 11597 return getI8Imm(NewImm, SDLoc(N)); 11598}]>; 11599def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{ 11600 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11601 uint8_t Imm = N->getZExtValue(); 11602 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11603 uint8_t NewImm = Imm & 0x81; 11604 if (Imm & 0x02) NewImm |= 0x04; 11605 if (Imm & 0x04) NewImm |= 0x10; 11606 if (Imm & 0x08) NewImm |= 0x40; 11607 if (Imm & 0x10) NewImm |= 0x02; 11608 if (Imm & 0x20) NewImm |= 0x08; 11609 if (Imm & 0x40) NewImm |= 0x20; 11610 return getI8Imm(NewImm, SDLoc(N)); 11611}]>; 11612def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{ 11613 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11614 uint8_t Imm = N->getZExtValue(); 11615 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11616 uint8_t NewImm = Imm & 0x81; 11617 if (Imm & 0x02) NewImm |= 0x10; 11618 if (Imm & 0x04) NewImm |= 0x02; 11619 if (Imm & 0x08) NewImm |= 0x20; 11620 if (Imm & 0x10) NewImm |= 0x04; 11621 if (Imm & 0x20) NewImm |= 0x40; 11622 if (Imm & 0x40) NewImm |= 0x08; 11623 return getI8Imm(NewImm, SDLoc(N)); 11624}]>; 11625 11626multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11627 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11628 string Name>{ 11629 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11630 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11631 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11632 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11633 (OpNode (_.VT _.RC:$src1), 11634 (_.VT _.RC:$src2), 11635 (_.VT _.RC:$src3), 11636 (i8 timm:$src4)), 1, 1>, 11637 AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>; 11638 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11639 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11640 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11641 (OpNode (_.VT _.RC:$src1), 11642 (_.VT _.RC:$src2), 11643 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11644 (i8 timm:$src4)), 1, 0>, 11645 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 11646 Sched<[sched.Folded, sched.ReadAfterFold]>; 11647 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11648 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11649 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11650 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11651 (OpNode (_.VT _.RC:$src1), 11652 (_.VT _.RC:$src2), 11653 (_.VT (_.BroadcastLdFrag addr:$src3)), 11654 (i8 timm:$src4)), 1, 0>, EVEX_B, 11655 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 11656 Sched<[sched.Folded, sched.ReadAfterFold]>; 11657 }// Constraints = "$src1 = $dst" 11658 11659 // Additional patterns for matching passthru operand in other positions. 11660 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11661 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11662 _.RC:$src1)), 11663 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11664 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11665 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11666 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), 11667 _.RC:$src1)), 11668 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11669 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11670 11671 // Additional patterns for matching zero masking with loads in other 11672 // positions. 11673 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11674 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11675 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11676 _.ImmAllZerosV)), 11677 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11678 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11679 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11680 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11681 _.RC:$src2, (i8 timm:$src4)), 11682 _.ImmAllZerosV)), 11683 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11684 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11685 11686 // Additional patterns for matching masked loads with different 11687 // operand orders. 11688 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11689 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11690 _.RC:$src2, (i8 timm:$src4)), 11691 _.RC:$src1)), 11692 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11693 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11694 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11695 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11696 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11697 _.RC:$src1)), 11698 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11699 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11700 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11701 (OpNode _.RC:$src2, _.RC:$src1, 11702 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), 11703 _.RC:$src1)), 11704 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11705 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11706 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11707 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11708 _.RC:$src1, (i8 timm:$src4)), 11709 _.RC:$src1)), 11710 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11711 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11712 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11713 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11714 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11715 _.RC:$src1)), 11716 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11717 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11718 11719 // Additional patterns for matching zero masking with broadcasts in other 11720 // positions. 11721 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11722 (OpNode (_.BroadcastLdFrag addr:$src3), 11723 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11724 _.ImmAllZerosV)), 11725 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11726 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11727 (VPTERNLOG321_imm8 timm:$src4))>; 11728 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11729 (OpNode _.RC:$src1, 11730 (_.BroadcastLdFrag addr:$src3), 11731 _.RC:$src2, (i8 timm:$src4)), 11732 _.ImmAllZerosV)), 11733 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11734 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11735 (VPTERNLOG132_imm8 timm:$src4))>; 11736 11737 // Additional patterns for matching masked broadcasts with different 11738 // operand orders. 11739 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11740 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), 11741 _.RC:$src2, (i8 timm:$src4)), 11742 _.RC:$src1)), 11743 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11744 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11745 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11746 (OpNode (_.BroadcastLdFrag addr:$src3), 11747 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11748 _.RC:$src1)), 11749 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11750 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11751 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11752 (OpNode _.RC:$src2, _.RC:$src1, 11753 (_.BroadcastLdFrag addr:$src3), 11754 (i8 timm:$src4)), _.RC:$src1)), 11755 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11756 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11757 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11758 (OpNode _.RC:$src2, 11759 (_.BroadcastLdFrag addr:$src3), 11760 _.RC:$src1, (i8 timm:$src4)), 11761 _.RC:$src1)), 11762 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11763 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11764 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11765 (OpNode (_.BroadcastLdFrag addr:$src3), 11766 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11767 _.RC:$src1)), 11768 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11769 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11770} 11771 11772multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 11773 AVX512VLVectorVTInfo _> { 11774 let Predicates = [HasAVX512] in 11775 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 11776 _.info512, NAME>, EVEX_V512; 11777 let Predicates = [HasAVX512, HasVLX] in { 11778 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 11779 _.info128, NAME>, EVEX_V128; 11780 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 11781 _.info256, NAME>, EVEX_V256; 11782 } 11783} 11784 11785defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 11786 avx512vl_i32_info>; 11787defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 11788 avx512vl_i64_info>, REX_W; 11789 11790// Patterns to implement vnot using vpternlog instead of creating all ones 11791// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 11792// so that the result is only dependent on src0. But we use the same source 11793// for all operands to prevent a false dependency. 11794// TODO: We should maybe have a more generalized algorithm for folding to 11795// vpternlog. 11796let Predicates = [HasAVX512] in { 11797 def : Pat<(v64i8 (vnot VR512:$src)), 11798 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11799 def : Pat<(v32i16 (vnot VR512:$src)), 11800 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11801 def : Pat<(v16i32 (vnot VR512:$src)), 11802 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11803 def : Pat<(v8i64 (vnot VR512:$src)), 11804 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11805} 11806 11807let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 11808 def : Pat<(v16i8 (vnot VR128X:$src)), 11809 (EXTRACT_SUBREG 11810 (VPTERNLOGQZrri 11811 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11812 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11813 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11814 (i8 15)), sub_xmm)>; 11815 def : Pat<(v8i16 (vnot VR128X:$src)), 11816 (EXTRACT_SUBREG 11817 (VPTERNLOGQZrri 11818 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11819 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11820 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11821 (i8 15)), sub_xmm)>; 11822 def : Pat<(v4i32 (vnot VR128X:$src)), 11823 (EXTRACT_SUBREG 11824 (VPTERNLOGQZrri 11825 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11826 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11827 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11828 (i8 15)), sub_xmm)>; 11829 def : Pat<(v2i64 (vnot VR128X:$src)), 11830 (EXTRACT_SUBREG 11831 (VPTERNLOGQZrri 11832 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11833 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11834 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11835 (i8 15)), sub_xmm)>; 11836 11837 def : Pat<(v32i8 (vnot VR256X:$src)), 11838 (EXTRACT_SUBREG 11839 (VPTERNLOGQZrri 11840 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11841 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11842 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11843 (i8 15)), sub_ymm)>; 11844 def : Pat<(v16i16 (vnot VR256X:$src)), 11845 (EXTRACT_SUBREG 11846 (VPTERNLOGQZrri 11847 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11848 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11849 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11850 (i8 15)), sub_ymm)>; 11851 def : Pat<(v8i32 (vnot VR256X:$src)), 11852 (EXTRACT_SUBREG 11853 (VPTERNLOGQZrri 11854 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11855 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11856 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11857 (i8 15)), sub_ymm)>; 11858 def : Pat<(v4i64 (vnot VR256X:$src)), 11859 (EXTRACT_SUBREG 11860 (VPTERNLOGQZrri 11861 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11862 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11863 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11864 (i8 15)), sub_ymm)>; 11865} 11866 11867let Predicates = [HasVLX] in { 11868 def : Pat<(v16i8 (vnot VR128X:$src)), 11869 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11870 def : Pat<(v8i16 (vnot VR128X:$src)), 11871 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11872 def : Pat<(v4i32 (vnot VR128X:$src)), 11873 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11874 def : Pat<(v2i64 (vnot VR128X:$src)), 11875 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11876 11877 def : Pat<(v32i8 (vnot VR256X:$src)), 11878 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11879 def : Pat<(v16i16 (vnot VR256X:$src)), 11880 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11881 def : Pat<(v8i32 (vnot VR256X:$src)), 11882 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11883 def : Pat<(v4i64 (vnot VR256X:$src)), 11884 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11885} 11886 11887//===----------------------------------------------------------------------===// 11888// AVX-512 - FixupImm 11889//===----------------------------------------------------------------------===// 11890 11891multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, 11892 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11893 X86VectorVTInfo TblVT>{ 11894 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 11895 Uses = [MXCSR], mayRaiseFPException = 1 in { 11896 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11897 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11898 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11899 (X86VFixupimm (_.VT _.RC:$src1), 11900 (_.VT _.RC:$src2), 11901 (TblVT.VT _.RC:$src3), 11902 (i32 timm:$src4))>, Sched<[sched]>; 11903 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11904 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 11905 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11906 (X86VFixupimm (_.VT _.RC:$src1), 11907 (_.VT _.RC:$src2), 11908 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 11909 (i32 timm:$src4))>, 11910 Sched<[sched.Folded, sched.ReadAfterFold]>; 11911 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11912 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11913 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11914 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11915 (X86VFixupimm (_.VT _.RC:$src1), 11916 (_.VT _.RC:$src2), 11917 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), 11918 (i32 timm:$src4))>, 11919 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11920 } // Constraints = "$src1 = $dst" 11921} 11922 11923multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 11924 X86FoldableSchedWrite sched, 11925 X86VectorVTInfo _, X86VectorVTInfo TblVT> 11926 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> { 11927let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 11928 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11929 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11930 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 11931 "$src2, $src3, {sae}, $src4", 11932 (X86VFixupimmSAE (_.VT _.RC:$src1), 11933 (_.VT _.RC:$src2), 11934 (TblVT.VT _.RC:$src3), 11935 (i32 timm:$src4))>, 11936 EVEX_B, Sched<[sched]>; 11937 } 11938} 11939 11940multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, 11941 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11942 X86VectorVTInfo _src3VT> { 11943 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 11944 ExeDomain = _.ExeDomain in { 11945 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11946 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11947 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11948 (X86VFixupimms (_.VT _.RC:$src1), 11949 (_.VT _.RC:$src2), 11950 (_src3VT.VT _src3VT.RC:$src3), 11951 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC; 11952 let Uses = [MXCSR] in 11953 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11954 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11955 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 11956 "$src2, $src3, {sae}, $src4", 11957 (X86VFixupimmSAEs (_.VT _.RC:$src1), 11958 (_.VT _.RC:$src2), 11959 (_src3VT.VT _src3VT.RC:$src3), 11960 (i32 timm:$src4))>, 11961 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11962 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 11963 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11964 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11965 (X86VFixupimms (_.VT _.RC:$src1), 11966 (_.VT _.RC:$src2), 11967 (_src3VT.VT (scalar_to_vector 11968 (_src3VT.ScalarLdFrag addr:$src3))), 11969 (i32 timm:$src4))>, 11970 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 11971 } 11972} 11973 11974multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 11975 AVX512VLVectorVTInfo _Vec, 11976 AVX512VLVectorVTInfo _Tbl> { 11977 let Predicates = [HasAVX512] in 11978 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, 11979 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 11980 EVEX, VVVV, EVEX_V512; 11981 let Predicates = [HasAVX512, HasVLX] in { 11982 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, 11983 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 11984 EVEX, VVVV, EVEX_V128; 11985 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, 11986 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 11987 EVEX, VVVV, EVEX_V256; 11988 } 11989} 11990 11991defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11992 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 11993 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 11994defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11995 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 11996 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 11997defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 11998 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11999defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 12000 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W; 12001 12002// Patterns used to select SSE scalar fp arithmetic instructions from 12003// either: 12004// 12005// (1) a scalar fp operation followed by a blend 12006// 12007// The effect is that the backend no longer emits unnecessary vector 12008// insert instructions immediately after SSE scalar fp instructions 12009// like addss or mulss. 12010// 12011// For example, given the following code: 12012// __m128 foo(__m128 A, __m128 B) { 12013// A[0] += B[0]; 12014// return A; 12015// } 12016// 12017// Previously we generated: 12018// addss %xmm0, %xmm1 12019// movss %xmm1, %xmm0 12020// 12021// We now generate: 12022// addss %xmm1, %xmm0 12023// 12024// (2) a vector packed single/double fp operation followed by a vector insert 12025// 12026// The effect is that the backend converts the packed fp instruction 12027// followed by a vector insert into a single SSE scalar fp instruction. 12028// 12029// For example, given the following code: 12030// __m128 foo(__m128 A, __m128 B) { 12031// __m128 C = A + B; 12032// return (__m128) {c[0], a[1], a[2], a[3]}; 12033// } 12034// 12035// Previously we generated: 12036// addps %xmm0, %xmm1 12037// movss %xmm1, %xmm0 12038// 12039// We now generate: 12040// addss %xmm1, %xmm0 12041 12042// TODO: Some canonicalization in lowering would simplify the number of 12043// patterns we have to try to match. 12044multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp, 12045 string OpcPrefix, SDNode MoveNode, 12046 X86VectorVTInfo _, PatLeaf ZeroFP> { 12047 let Predicates = [HasAVX512] in { 12048 // extracted scalar math op with insert via movss 12049 def : Pat<(MoveNode 12050 (_.VT VR128X:$dst), 12051 (_.VT (scalar_to_vector 12052 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12053 _.FRC:$src)))), 12054 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst, 12055 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 12056 def : Pat<(MoveNode 12057 (_.VT VR128X:$dst), 12058 (_.VT (scalar_to_vector 12059 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12060 (_.ScalarLdFrag addr:$src))))), 12061 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>; 12062 12063 // extracted masked scalar math op with insert via movss 12064 def : Pat<(MoveNode (_.VT VR128X:$src1), 12065 (scalar_to_vector 12066 (X86selects_mask VK1WM:$mask, 12067 (MaskedOp (_.EltVT 12068 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12069 _.FRC:$src2), 12070 _.FRC:$src0))), 12071 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk") 12072 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12073 VK1WM:$mask, _.VT:$src1, 12074 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12075 def : Pat<(MoveNode (_.VT VR128X:$src1), 12076 (scalar_to_vector 12077 (X86selects_mask VK1WM:$mask, 12078 (MaskedOp (_.EltVT 12079 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12080 (_.ScalarLdFrag addr:$src2)), 12081 _.FRC:$src0))), 12082 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk") 12083 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12084 VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12085 12086 // extracted masked scalar math op with insert via movss 12087 def : Pat<(MoveNode (_.VT VR128X:$src1), 12088 (scalar_to_vector 12089 (X86selects_mask VK1WM:$mask, 12090 (MaskedOp (_.EltVT 12091 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12092 _.FRC:$src2), (_.EltVT ZeroFP)))), 12093 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz") 12094 VK1WM:$mask, _.VT:$src1, 12095 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12096 def : Pat<(MoveNode (_.VT VR128X:$src1), 12097 (scalar_to_vector 12098 (X86selects_mask VK1WM:$mask, 12099 (MaskedOp (_.EltVT 12100 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12101 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), 12102 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12103 } 12104} 12105 12106defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 12107defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 12108defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 12109defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 12110 12111defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 12112defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 12113defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 12114defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 12115 12116defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>; 12117defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>; 12118defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>; 12119defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>; 12120 12121multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, 12122 SDNode Move, X86VectorVTInfo _> { 12123 let Predicates = [HasAVX512] in { 12124 def : Pat<(_.VT (Move _.VT:$dst, 12125 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 12126 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>; 12127 } 12128} 12129 12130defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 12131defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 12132defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>; 12133 12134//===----------------------------------------------------------------------===// 12135// AES instructions 12136//===----------------------------------------------------------------------===// 12137 12138multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 12139 let Predicates = [HasVLX, HasVAES] in { 12140 defm Z128 : AESI_binop_rm_int<Op, OpStr, 12141 !cast<Intrinsic>(IntPrefix), 12142 loadv2i64, 0, VR128X, i128mem>, 12143 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG; 12144 defm Z256 : AESI_binop_rm_int<Op, OpStr, 12145 !cast<Intrinsic>(IntPrefix#"_256"), 12146 loadv4i64, 0, VR256X, i256mem>, 12147 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG; 12148 } 12149 let Predicates = [HasAVX512, HasVAES] in 12150 defm Z : AESI_binop_rm_int<Op, OpStr, 12151 !cast<Intrinsic>(IntPrefix#"_512"), 12152 loadv8i64, 0, VR512, i512mem>, 12153 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG; 12154} 12155 12156defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 12157defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 12158defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 12159defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 12160 12161//===----------------------------------------------------------------------===// 12162// PCLMUL instructions - Carry less multiplication 12163//===----------------------------------------------------------------------===// 12164 12165let Predicates = [HasAVX512, HasVPCLMULQDQ] in 12166defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 12167 EVEX, VVVV, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG; 12168 12169let Predicates = [HasVLX, HasVPCLMULQDQ] in { 12170defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 12171 EVEX, VVVV, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG; 12172 12173defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 12174 int_x86_pclmulqdq_256>, EVEX, VVVV, EVEX_V256, 12175 EVEX_CD8<64, CD8VF>, WIG; 12176} 12177 12178// Aliases 12179defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 12180defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 12181defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 12182 12183//===----------------------------------------------------------------------===// 12184// VBMI2 12185//===----------------------------------------------------------------------===// 12186 12187multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 12188 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12189 let Constraints = "$src1 = $dst", 12190 ExeDomain = VTI.ExeDomain in { 12191 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12192 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12193 "$src3, $src2", "$src2, $src3", 12194 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 12195 T8, PD, EVEX, VVVV, Sched<[sched]>; 12196 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12197 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12198 "$src3, $src2", "$src2, $src3", 12199 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12200 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12201 T8, PD, EVEX, VVVV, 12202 Sched<[sched.Folded, sched.ReadAfterFold]>; 12203 } 12204} 12205 12206multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12207 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 12208 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 12209 let Constraints = "$src1 = $dst", 12210 ExeDomain = VTI.ExeDomain in 12211 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12212 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 12213 "${src3}"#VTI.BroadcastStr#", $src2", 12214 "$src2, ${src3}"#VTI.BroadcastStr, 12215 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12216 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12217 T8, PD, EVEX, VVVV, EVEX_B, 12218 Sched<[sched.Folded, sched.ReadAfterFold]>; 12219} 12220 12221multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 12222 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12223 let Predicates = [HasVBMI2] in 12224 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12225 EVEX_V512; 12226 let Predicates = [HasVBMI2, HasVLX] in { 12227 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12228 EVEX_V256; 12229 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12230 EVEX_V128; 12231 } 12232} 12233 12234multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 12235 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12236 let Predicates = [HasVBMI2] in 12237 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12238 EVEX_V512; 12239 let Predicates = [HasVBMI2, HasVLX] in { 12240 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12241 EVEX_V256; 12242 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12243 EVEX_V128; 12244 } 12245} 12246multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 12247 SDNode OpNode, X86SchedWriteWidths sched> { 12248 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched, 12249 avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>; 12250 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched, 12251 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12252 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched, 12253 avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 12254} 12255 12256multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 12257 SDNode OpNode, X86SchedWriteWidths sched> { 12258 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched, 12259 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 12260 REX_W, EVEX_CD8<16, CD8VF>; 12261 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp, 12262 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 12263 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode, 12264 sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 12265} 12266 12267// Concat & Shift 12268defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 12269defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 12270defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 12271defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 12272 12273// Compress 12274defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 12275 avx512vl_i8_info, HasVBMI2>, EVEX; 12276defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 12277 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W; 12278// Expand 12279defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 12280 avx512vl_i8_info, HasVBMI2>, EVEX; 12281defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 12282 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W; 12283 12284//===----------------------------------------------------------------------===// 12285// VNNI 12286//===----------------------------------------------------------------------===// 12287 12288let Constraints = "$src1 = $dst" in 12289multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12290 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12291 bit IsCommutable> { 12292 let ExeDomain = VTI.ExeDomain in { 12293 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12294 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12295 "$src3, $src2", "$src2, $src3", 12296 (VTI.VT (OpNode VTI.RC:$src1, 12297 VTI.RC:$src2, VTI.RC:$src3)), 12298 IsCommutable, IsCommutable>, 12299 EVEX, VVVV, T8, PD, Sched<[sched]>; 12300 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12301 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12302 "$src3, $src2", "$src2, $src3", 12303 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12304 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12305 EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8, PD, 12306 Sched<[sched.Folded, sched.ReadAfterFold, 12307 sched.ReadAfterFold]>; 12308 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12309 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 12310 OpStr, "${src3}"#VTI.BroadcastStr#", $src2", 12311 "$src2, ${src3}"#VTI.BroadcastStr, 12312 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12313 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12314 EVEX, VVVV, EVEX_CD8<32, CD8VF>, EVEX_B, 12315 T8, PD, Sched<[sched.Folded, sched.ReadAfterFold, 12316 sched.ReadAfterFold]>; 12317 } 12318} 12319 12320multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 12321 X86SchedWriteWidths sched, bit IsCommutable> { 12322 let Predicates = [HasVNNI] in 12323 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info, 12324 IsCommutable>, EVEX_V512; 12325 let Predicates = [HasVNNI, HasVLX] in { 12326 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info, 12327 IsCommutable>, EVEX_V256; 12328 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info, 12329 IsCommutable>, EVEX_V128; 12330 } 12331} 12332 12333// FIXME: Is there a better scheduler class for VPDP? 12334defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; 12335defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; 12336defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; 12337defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; 12338 12339// Patterns to match VPDPWSSD from existing instructions/intrinsics. 12340let Predicates = [HasVNNI] in { 12341 def : Pat<(v16i32 (add VR512:$src1, 12342 (X86vpmaddwd_su VR512:$src2, VR512:$src3))), 12343 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; 12344 def : Pat<(v16i32 (add VR512:$src1, 12345 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), 12346 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; 12347} 12348let Predicates = [HasVNNI,HasVLX] in { 12349 def : Pat<(v8i32 (add VR256X:$src1, 12350 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), 12351 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; 12352 def : Pat<(v8i32 (add VR256X:$src1, 12353 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), 12354 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; 12355 def : Pat<(v4i32 (add VR128X:$src1, 12356 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), 12357 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; 12358 def : Pat<(v4i32 (add VR128X:$src1, 12359 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), 12360 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; 12361} 12362 12363//===----------------------------------------------------------------------===// 12364// Bit Algorithms 12365//===----------------------------------------------------------------------===// 12366 12367// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 12368defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 12369 avx512vl_i8_info, HasBITALG>; 12370defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 12371 avx512vl_i16_info, HasBITALG>, REX_W; 12372 12373defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 12374defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 12375 12376multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12377 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 12378 (ins VTI.RC:$src1, VTI.RC:$src2), 12379 "vpshufbitqmb", 12380 "$src2, $src1", "$src1, $src2", 12381 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12382 (VTI.VT VTI.RC:$src2)), 12383 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12384 (VTI.VT VTI.RC:$src2))>, EVEX, VVVV, T8, PD, 12385 Sched<[sched]>; 12386 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 12387 (ins VTI.RC:$src1, VTI.MemOp:$src2), 12388 "vpshufbitqmb", 12389 "$src2, $src1", "$src1, $src2", 12390 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12391 (VTI.VT (VTI.LdFrag addr:$src2))), 12392 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12393 (VTI.VT (VTI.LdFrag addr:$src2)))>, 12394 EVEX, VVVV, EVEX_CD8<8, CD8VF>, T8, PD, 12395 Sched<[sched.Folded, sched.ReadAfterFold]>; 12396} 12397 12398multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12399 let Predicates = [HasBITALG] in 12400 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 12401 let Predicates = [HasBITALG, HasVLX] in { 12402 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 12403 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 12404 } 12405} 12406 12407// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 12408defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 12409 12410//===----------------------------------------------------------------------===// 12411// GFNI 12412//===----------------------------------------------------------------------===// 12413 12414multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12415 X86SchedWriteWidths sched> { 12416 let Predicates = [HasGFNI, HasAVX512] in 12417 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 12418 EVEX_V512; 12419 let Predicates = [HasGFNI, HasVLX] in { 12420 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 12421 EVEX_V256; 12422 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 12423 EVEX_V128; 12424 } 12425} 12426 12427defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 12428 SchedWriteVecALU>, 12429 EVEX_CD8<8, CD8VF>, T8; 12430 12431multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 12432 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12433 X86VectorVTInfo BcstVTI> 12434 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 12435 let ExeDomain = VTI.ExeDomain in 12436 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12437 (ins VTI.RC:$src1, BcstVTI.ScalarMemOp:$src2, u8imm:$src3), 12438 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1", 12439 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3", 12440 (OpNode (VTI.VT VTI.RC:$src1), 12441 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), 12442 (i8 timm:$src3))>, EVEX_B, 12443 Sched<[sched.Folded, sched.ReadAfterFold]>; 12444} 12445 12446multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12447 X86SchedWriteWidths sched> { 12448 let Predicates = [HasGFNI, HasAVX512] in 12449 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 12450 v64i8_info, v8i64_info>, EVEX_V512; 12451 let Predicates = [HasGFNI, HasVLX] in { 12452 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 12453 v32i8x_info, v4i64x_info>, EVEX_V256; 12454 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 12455 v16i8x_info, v2i64x_info>, EVEX_V128; 12456 } 12457} 12458 12459defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 12460 X86GF2P8affineinvqb, SchedWriteVecIMul>, 12461 EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base; 12462defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 12463 X86GF2P8affineqb, SchedWriteVecIMul>, 12464 EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base; 12465 12466 12467//===----------------------------------------------------------------------===// 12468// AVX5124FMAPS 12469//===----------------------------------------------------------------------===// 12470 12471let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 12472 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in { 12473defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 12474 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12475 "v4fmaddps", "$src3, $src2", "$src2, $src3", 12476 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12477 Sched<[SchedWriteFMA.ZMM.Folded]>; 12478 12479defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 12480 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12481 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 12482 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12483 Sched<[SchedWriteFMA.ZMM.Folded]>; 12484 12485defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 12486 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12487 "v4fmaddss", "$src3, $src2", "$src2, $src3", 12488 []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>, 12489 Sched<[SchedWriteFMA.Scl.Folded]>; 12490 12491defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 12492 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12493 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 12494 []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>, 12495 Sched<[SchedWriteFMA.Scl.Folded]>; 12496} 12497 12498//===----------------------------------------------------------------------===// 12499// AVX5124VNNIW 12500//===----------------------------------------------------------------------===// 12501 12502let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 12503 Constraints = "$src1 = $dst" in { 12504defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 12505 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12506 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 12507 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12508 Sched<[SchedWriteFMA.ZMM.Folded]>; 12509 12510defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 12511 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12512 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 12513 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12514 Sched<[SchedWriteFMA.ZMM.Folded]>; 12515} 12516 12517let hasSideEffects = 0 in { 12518 let mayStore = 1, SchedRW = [WriteFStoreX] in 12519 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>; 12520 let mayLoad = 1, SchedRW = [WriteFLoadX] in 12521 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>; 12522} 12523 12524//===----------------------------------------------------------------------===// 12525// VP2INTERSECT 12526//===----------------------------------------------------------------------===// 12527 12528multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 12529 def rr : I<0x68, MRMSrcReg, 12530 (outs _.KRPC:$dst), 12531 (ins _.RC:$src1, _.RC:$src2), 12532 !strconcat("vp2intersect", _.Suffix, 12533 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12534 [(set _.KRPC:$dst, (X86vp2intersect 12535 _.RC:$src1, (_.VT _.RC:$src2)))]>, 12536 EVEX, VVVV, T8, XD, Sched<[sched]>; 12537 12538 def rm : I<0x68, MRMSrcMem, 12539 (outs _.KRPC:$dst), 12540 (ins _.RC:$src1, _.MemOp:$src2), 12541 !strconcat("vp2intersect", _.Suffix, 12542 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12543 [(set _.KRPC:$dst, (X86vp2intersect 12544 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 12545 EVEX, VVVV, T8, XD, EVEX_CD8<_.EltSize, CD8VF>, 12546 Sched<[sched.Folded, sched.ReadAfterFold]>; 12547 12548 def rmb : I<0x68, MRMSrcMem, 12549 (outs _.KRPC:$dst), 12550 (ins _.RC:$src1, _.ScalarMemOp:$src2), 12551 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, 12552 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), 12553 [(set _.KRPC:$dst, (X86vp2intersect 12554 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, 12555 EVEX, VVVV, T8, XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 12556 Sched<[sched.Folded, sched.ReadAfterFold]>; 12557} 12558 12559multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 12560 let Predicates = [HasAVX512, HasVP2INTERSECT] in 12561 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512; 12562 12563 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { 12564 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256; 12565 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128; 12566 } 12567} 12568 12569let ExeDomain = SSEPackedInt in { 12570defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>; 12571defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W; 12572} 12573 12574multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, 12575 X86SchedWriteWidths sched, 12576 AVX512VLVectorVTInfo _SrcVTInfo, 12577 AVX512VLVectorVTInfo _DstVTInfo, 12578 SDNode OpNode, Predicate prd, 12579 bit IsCommutable = 0> { 12580 let Predicates = [prd] in 12581 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 12582 _SrcVTInfo.info512, _DstVTInfo.info512, 12583 _SrcVTInfo.info512, IsCommutable>, 12584 EVEX_V512, EVEX_CD8<32, CD8VF>; 12585 let Predicates = [HasVLX, prd] in { 12586 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 12587 _SrcVTInfo.info256, _DstVTInfo.info256, 12588 _SrcVTInfo.info256, IsCommutable>, 12589 EVEX_V256, EVEX_CD8<32, CD8VF>; 12590 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 12591 _SrcVTInfo.info128, _DstVTInfo.info128, 12592 _SrcVTInfo.info128, IsCommutable>, 12593 EVEX_V128, EVEX_CD8<32, CD8VF>; 12594 } 12595} 12596 12597let ExeDomain = SSEPackedSingle in 12598defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", 12599 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF 12600 avx512vl_f32_info, avx512vl_bf16_info, 12601 X86cvtne2ps2bf16, HasBF16, 0>, T8, XD; 12602 12603// Truncate Float to BFloat16 12604multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, 12605 X86SchedWriteWidths sched> { 12606 let ExeDomain = SSEPackedSingle in { 12607 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in { 12608 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info, 12609 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512; 12610 } 12611 let Predicates = [HasBF16, HasVLX] in { 12612 let Uses = []<Register>, mayRaiseFPException = 0 in { 12613 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info, 12614 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem, 12615 VK4WM>, EVEX_V128; 12616 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info, 12617 X86cvtneps2bf16, X86cvtneps2bf16, 12618 sched.YMM, "{1to8}", "{y}">, EVEX_V256; 12619 } 12620 } // Predicates = [HasBF16, HasVLX] 12621 } // ExeDomain = SSEPackedSingle 12622 12623 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12624 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 12625 VR128X:$src), 0>; 12626 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12627 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, 12628 f128mem:$src), 0, "intel">; 12629 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12630 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 12631 VR256X:$src), 0>; 12632 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12633 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, 12634 f256mem:$src), 0, "intel">; 12635} 12636 12637defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", 12638 SchedWriteCvtPD2PS>, T8, XS, 12639 EVEX_CD8<32, CD8VF>; 12640 12641let Predicates = [HasBF16, HasVLX] in { 12642 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction 12643 // patterns have been disabled with null_frag. 12644 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))), 12645 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12646 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0), 12647 VK4WM:$mask), 12648 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>; 12649 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV, 12650 VK4WM:$mask), 12651 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>; 12652 12653 def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), 12654 (VCVTNEPS2BF16Z128rm addr:$src)>; 12655 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0), 12656 VK4WM:$mask), 12657 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12658 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV, 12659 VK4WM:$mask), 12660 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; 12661 12662 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 12663 (X86VBroadcastld32 addr:$src)))), 12664 (VCVTNEPS2BF16Z128rmb addr:$src)>; 12665 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12666 (v8bf16 VR128X:$src0), VK4WM:$mask), 12667 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12668 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12669 v8bf16x_info.ImmAllZerosV, VK4WM:$mask), 12670 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; 12671 12672 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))), 12673 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12674 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))), 12675 (VCVTNEPS2BF16Z128rm addr:$src)>; 12676 12677 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))), 12678 (VCVTNEPS2BF16Z256rr VR256X:$src)>; 12679 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))), 12680 (VCVTNEPS2BF16Z256rm addr:$src)>; 12681 12682 def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)), 12683 (VPBROADCASTWZ128rm addr:$src)>; 12684 def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)), 12685 (VPBROADCASTWZ256rm addr:$src)>; 12686 12687 def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12688 (VPBROADCASTWZ128rr VR128X:$src)>; 12689 def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12690 (VPBROADCASTWZ256rr VR128X:$src)>; 12691 12692 def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))), 12693 (VCVTNEPS2BF16Z256rr VR256X:$src)>; 12694 def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))), 12695 (VCVTNEPS2BF16Z256rm addr:$src)>; 12696 12697 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. 12698} 12699 12700let Predicates = [HasBF16] in { 12701 def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)), 12702 (VPBROADCASTWZrm addr:$src)>; 12703 12704 def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12705 (VPBROADCASTWZrr VR128X:$src)>; 12706 12707 def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))), 12708 (VCVTNEPS2BF16Zrr VR512:$src)>; 12709 def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))), 12710 (VCVTNEPS2BF16Zrm addr:$src)>; 12711 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. 12712} 12713 12714let Constraints = "$src1 = $dst" in { 12715multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 12716 X86FoldableSchedWrite sched, 12717 X86VectorVTInfo _, X86VectorVTInfo src_v> { 12718 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12719 (ins src_v.RC:$src2, src_v.RC:$src3), 12720 OpcodeStr, "$src3, $src2", "$src2, $src3", 12721 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>, 12722 EVEX, VVVV, Sched<[sched]>; 12723 12724 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12725 (ins src_v.RC:$src2, src_v.MemOp:$src3), 12726 OpcodeStr, "$src3, $src2", "$src2, $src3", 12727 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12728 (src_v.LdFrag addr:$src3)))>, EVEX, VVVV, 12729 Sched<[sched.Folded, sched.ReadAfterFold]>; 12730 12731 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12732 (ins src_v.RC:$src2, f32mem:$src3), 12733 OpcodeStr, 12734 !strconcat("${src3}", _.BroadcastStr,", $src2"), 12735 !strconcat("$src2, ${src3}", _.BroadcastStr), 12736 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12737 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, 12738 EVEX_B, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 12739 12740} 12741} // Constraints = "$src1 = $dst" 12742 12743multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 12744 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 12745 AVX512VLVectorVTInfo src_v, Predicate prd> { 12746 let Predicates = [prd] in { 12747 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, 12748 src_v.info512>, EVEX_V512; 12749 } 12750 let Predicates = [HasVLX, prd] in { 12751 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256, 12752 src_v.info256>, EVEX_V256; 12753 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128, 12754 src_v.info128>, EVEX_V128; 12755 } 12756} 12757 12758let ExeDomain = SSEPackedSingle in 12759defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, 12760 avx512vl_f32_info, avx512vl_bf16_info, 12761 HasBF16>, T8, XS, EVEX_CD8<32, CD8VF>; 12762 12763//===----------------------------------------------------------------------===// 12764// AVX512FP16 12765//===----------------------------------------------------------------------===// 12766 12767let Predicates = [HasFP16] in { 12768// Move word ( r/m16) to Packed word 12769def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 12770 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveFromGpr]>; 12771def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src), 12772 "vmovw\t{$src, $dst|$dst, $src}", 12773 [(set VR128X:$dst, 12774 (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>, 12775 T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>; 12776 12777def : Pat<(f16 (bitconvert GR16:$src)), 12778 (f16 (COPY_TO_REGCLASS 12779 (VMOVW2SHrr 12780 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), 12781 FR16X))>; 12782def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))), 12783 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 12784def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))), 12785 (VMOVW2SHrr GR32:$src)>; 12786// FIXME: We should really find a way to improve these patterns. 12787def : Pat<(v8i32 (X86vzmovl 12788 (insert_subvector undef, 12789 (v4i32 (scalar_to_vector 12790 (and GR32:$src, 0xffff))), 12791 (iPTR 0)))), 12792 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 12793def : Pat<(v16i32 (X86vzmovl 12794 (insert_subvector undef, 12795 (v4i32 (scalar_to_vector 12796 (and GR32:$src, 0xffff))), 12797 (iPTR 0)))), 12798 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 12799 12800def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))), 12801 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 12802 12803// AVX 128-bit movw instruction write zeros in the high 128-bit part. 12804def : Pat<(v8i16 (X86vzload16 addr:$src)), 12805 (VMOVWrm addr:$src)>; 12806def : Pat<(v16i16 (X86vzload16 addr:$src)), 12807 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 12808 12809// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 12810def : Pat<(v32i16 (X86vzload16 addr:$src)), 12811 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 12812 12813def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))), 12814 (VMOVWrm addr:$src)>; 12815def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))), 12816 (VMOVWrm addr:$src)>; 12817def : Pat<(v8i32 (X86vzmovl 12818 (insert_subvector undef, 12819 (v4i32 (scalar_to_vector 12820 (i32 (zextloadi16 addr:$src)))), 12821 (iPTR 0)))), 12822 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 12823def : Pat<(v16i32 (X86vzmovl 12824 (insert_subvector undef, 12825 (v4i32 (scalar_to_vector 12826 (i32 (zextloadi16 addr:$src)))), 12827 (iPTR 0)))), 12828 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 12829 12830// Move word from xmm register to r/m16 12831def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 12832 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveToGpr]>; 12833def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs), 12834 (ins i16mem:$dst, VR128X:$src), 12835 "vmovw\t{$src, $dst|$dst, $src}", 12836 [(store (i16 (extractelt (v8i16 VR128X:$src), 12837 (iPTR 0))), addr:$dst)]>, 12838 T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>; 12839 12840def : Pat<(i16 (bitconvert FR16X:$src)), 12841 (i16 (EXTRACT_SUBREG 12842 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)), 12843 sub_16bit))>; 12844def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))), 12845 (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>; 12846 12847// Allow "vmovw" to use GR64 12848let hasSideEffects = 0 in { 12849 def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 12850 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 12851 def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 12852 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>; 12853} 12854} 12855 12856// Convert 16-bit float to i16/u16 12857multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12858 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12859 AVX512VLVectorVTInfo _Dst, 12860 AVX512VLVectorVTInfo _Src, 12861 X86SchedWriteWidths sched> { 12862 let Predicates = [HasFP16] in { 12863 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 12864 OpNode, MaskOpNode, sched.ZMM>, 12865 avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512, 12866 OpNodeRnd, sched.ZMM>, EVEX_V512; 12867 } 12868 let Predicates = [HasFP16, HasVLX] in { 12869 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 12870 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 12871 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 12872 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 12873 } 12874} 12875 12876// Convert 16-bit float to i16/u16 truncate 12877multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12878 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12879 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src, 12880 X86SchedWriteWidths sched> { 12881 let Predicates = [HasFP16] in { 12882 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 12883 OpNode, MaskOpNode, sched.ZMM>, 12884 avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512, 12885 OpNodeRnd, sched.ZMM>, EVEX_V512; 12886 } 12887 let Predicates = [HasFP16, HasVLX] in { 12888 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 12889 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 12890 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 12891 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 12892 } 12893} 12894 12895defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt, 12896 X86cvtp2UIntRnd, avx512vl_i16_info, 12897 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 12898 T_MAP5, EVEX_CD8<16, CD8VF>; 12899defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp, 12900 X86VUintToFpRnd, avx512vl_f16_info, 12901 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 12902 T_MAP5, XD, EVEX_CD8<16, CD8VF>; 12903defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si, 12904 X86cvttp2si, X86cvttp2siSAE, 12905 avx512vl_i16_info, avx512vl_f16_info, 12906 SchedWriteCvtPD2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VF>; 12907defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui, 12908 X86cvttp2ui, X86cvttp2uiSAE, 12909 avx512vl_i16_info, avx512vl_f16_info, 12910 SchedWriteCvtPD2DQ>, T_MAP5, EVEX_CD8<16, CD8VF>; 12911defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int, 12912 X86cvtp2IntRnd, avx512vl_i16_info, 12913 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 12914 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 12915defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp, 12916 X86VSintToFpRnd, avx512vl_f16_info, 12917 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 12918 T_MAP5, XS, EVEX_CD8<16, CD8VF>; 12919 12920// Convert Half to Signed/Unsigned Doubleword 12921multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12922 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12923 X86SchedWriteWidths sched> { 12924 let Predicates = [HasFP16] in { 12925 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 12926 MaskOpNode, sched.ZMM>, 12927 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info, 12928 OpNodeRnd, sched.ZMM>, EVEX_V512; 12929 } 12930 let Predicates = [HasFP16, HasVLX] in { 12931 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 12932 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 12933 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 12934 MaskOpNode, sched.YMM>, EVEX_V256; 12935 } 12936} 12937 12938// Convert Half to Signed/Unsigned Doubleword with truncation 12939multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12940 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12941 X86SchedWriteWidths sched> { 12942 let Predicates = [HasFP16] in { 12943 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 12944 MaskOpNode, sched.ZMM>, 12945 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info, 12946 OpNodeRnd, sched.ZMM>, EVEX_V512; 12947 } 12948 let Predicates = [HasFP16, HasVLX] in { 12949 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 12950 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 12951 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 12952 MaskOpNode, sched.YMM>, EVEX_V256; 12953 } 12954} 12955 12956 12957defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int, 12958 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD, 12959 EVEX_CD8<16, CD8VH>; 12960defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt, 12961 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, 12962 EVEX_CD8<16, CD8VH>; 12963 12964defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si, 12965 X86cvttp2si, X86cvttp2siSAE, 12966 SchedWriteCvtPS2DQ>, T_MAP5, XS, 12967 EVEX_CD8<16, CD8VH>; 12968 12969defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui, 12970 X86cvttp2ui, X86cvttp2uiSAE, 12971 SchedWriteCvtPS2DQ>, T_MAP5, 12972 EVEX_CD8<16, CD8VH>; 12973 12974// Convert Half to Signed/Unsigned Quardword 12975multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12976 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12977 X86SchedWriteWidths sched> { 12978 let Predicates = [HasFP16] in { 12979 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 12980 MaskOpNode, sched.ZMM>, 12981 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info, 12982 OpNodeRnd, sched.ZMM>, EVEX_V512; 12983 } 12984 let Predicates = [HasFP16, HasVLX] in { 12985 // Explicitly specified broadcast string, since we take only 2 elements 12986 // from v8f16x_info source 12987 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 12988 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, 12989 EVEX_V128; 12990 // Explicitly specified broadcast string, since we take only 4 elements 12991 // from v8f16x_info source 12992 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 12993 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, 12994 EVEX_V256; 12995 } 12996} 12997 12998// Convert Half to Signed/Unsigned Quardword with truncation 12999multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13000 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13001 X86SchedWriteWidths sched> { 13002 let Predicates = [HasFP16] in { 13003 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 13004 MaskOpNode, sched.ZMM>, 13005 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info, 13006 OpNodeRnd, sched.ZMM>, EVEX_V512; 13007 } 13008 let Predicates = [HasFP16, HasVLX] in { 13009 // Explicitly specified broadcast string, since we take only 2 elements 13010 // from v8f16x_info source 13011 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13012 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128; 13013 // Explicitly specified broadcast string, since we take only 4 elements 13014 // from v8f16x_info source 13015 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13016 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256; 13017 } 13018} 13019 13020defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int, 13021 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD, 13022 EVEX_CD8<16, CD8VQ>; 13023 13024defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt, 13025 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD, 13026 EVEX_CD8<16, CD8VQ>; 13027 13028defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si, 13029 X86cvttp2si, X86cvttp2siSAE, 13030 SchedWriteCvtPS2DQ>, T_MAP5, PD, 13031 EVEX_CD8<16, CD8VQ>; 13032 13033defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui, 13034 X86cvttp2ui, X86cvttp2uiSAE, 13035 SchedWriteCvtPS2DQ>, T_MAP5, PD, 13036 EVEX_CD8<16, CD8VQ>; 13037 13038// Convert Signed/Unsigned Quardword to Half 13039multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13040 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13041 X86SchedWriteWidths sched> { 13042 // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and 13043 // 512 memory forms of these instructions in Asm Parcer. They have the same 13044 // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly 13045 // due to the same reason. 13046 let Predicates = [HasFP16] in { 13047 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode, 13048 MaskOpNode, sched.ZMM, "{1to8}", "{z}">, 13049 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info, 13050 OpNodeRnd, sched.ZMM>, EVEX_V512; 13051 } 13052 let Predicates = [HasFP16, HasVLX] in { 13053 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info, 13054 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", 13055 i128mem, VK2WM>, EVEX_V128; 13056 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info, 13057 null_frag, null_frag, sched.YMM, "{1to4}", "{y}", 13058 i256mem, VK4WM>, EVEX_V256; 13059 } 13060 13061 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 13062 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 13063 VR128X:$src), 0, "att">; 13064 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 13065 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 13066 VK2WM:$mask, VR128X:$src), 0, "att">; 13067 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 13068 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 13069 VK2WM:$mask, VR128X:$src), 0, "att">; 13070 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 13071 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 13072 i64mem:$src), 0, "att">; 13073 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 13074 "$dst {${mask}}, ${src}{1to2}}", 13075 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 13076 VK2WM:$mask, i64mem:$src), 0, "att">; 13077 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 13078 "$dst {${mask}} {z}, ${src}{1to2}}", 13079 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 13080 VK2WM:$mask, i64mem:$src), 0, "att">; 13081 13082 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 13083 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 13084 VR256X:$src), 0, "att">; 13085 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 13086 "$dst {${mask}}, $src}", 13087 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 13088 VK4WM:$mask, VR256X:$src), 0, "att">; 13089 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 13090 "$dst {${mask}} {z}, $src}", 13091 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 13092 VK4WM:$mask, VR256X:$src), 0, "att">; 13093 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 13094 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 13095 i64mem:$src), 0, "att">; 13096 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 13097 "$dst {${mask}}, ${src}{1to4}}", 13098 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 13099 VK4WM:$mask, i64mem:$src), 0, "att">; 13100 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 13101 "$dst {${mask}} {z}, ${src}{1to4}}", 13102 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 13103 VK4WM:$mask, i64mem:$src), 0, "att">; 13104 13105 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 13106 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 13107 VR512:$src), 0, "att">; 13108 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 13109 "$dst {${mask}}, $src}", 13110 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 13111 VK8WM:$mask, VR512:$src), 0, "att">; 13112 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 13113 "$dst {${mask}} {z}, $src}", 13114 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 13115 VK8WM:$mask, VR512:$src), 0, "att">; 13116 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 13117 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 13118 i64mem:$src), 0, "att">; 13119 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 13120 "$dst {${mask}}, ${src}{1to8}}", 13121 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 13122 VK8WM:$mask, i64mem:$src), 0, "att">; 13123 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 13124 "$dst {${mask}} {z}, ${src}{1to8}}", 13125 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 13126 VK8WM:$mask, i64mem:$src), 0, "att">; 13127} 13128 13129defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp, 13130 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, 13131 EVEX_CD8<64, CD8VF>; 13132 13133defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp, 13134 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, XD, 13135 EVEX_CD8<64, CD8VF>; 13136 13137// Convert half to signed/unsigned int 32/64 13138defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si, 13139 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>, 13140 T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13141defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si, 13142 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>, 13143 T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>; 13144defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi, 13145 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>, 13146 T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13147defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi, 13148 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>, 13149 T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>; 13150 13151defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info, 13152 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13153 "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13154defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info, 13155 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13156 "{q}", HasFP16>, REX_W, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13157defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info, 13158 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13159 "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13160defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info, 13161 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13162 "{q}", HasFP16>, T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>; 13163 13164let Predicates = [HasFP16] in { 13165 defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32, 13166 v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">, 13167 T_MAP5, XS, EVEX_CD8<32, CD8VT1>; 13168 defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64, 13169 v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">, 13170 T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>; 13171 defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32, 13172 v8f16x_info, i32mem, loadi32, 13173 "cvtusi2sh","l">, T_MAP5, XS, EVEX_CD8<32, CD8VT1>; 13174 defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64, 13175 v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">, 13176 T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>; 13177 def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13178 (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13179 13180 def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13181 (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13182 13183 13184 def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))), 13185 (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13186 def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))), 13187 (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13188 13189 def : Pat<(f16 (any_sint_to_fp GR32:$src)), 13190 (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13191 def : Pat<(f16 (any_sint_to_fp GR64:$src)), 13192 (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13193 13194 def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))), 13195 (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13196 def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))), 13197 (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13198 13199 def : Pat<(f16 (any_uint_to_fp GR32:$src)), 13200 (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13201 def : Pat<(f16 (any_uint_to_fp GR64:$src)), 13202 (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13203 13204 // Patterns used for matching vcvtsi2sh intrinsic sequences from clang 13205 // which produce unnecessary vmovsh instructions 13206 def : Pat<(v8f16 (X86Movsh 13207 (v8f16 VR128X:$dst), 13208 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))), 13209 (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13210 13211 def : Pat<(v8f16 (X86Movsh 13212 (v8f16 VR128X:$dst), 13213 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))), 13214 (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13215 13216 def : Pat<(v8f16 (X86Movsh 13217 (v8f16 VR128X:$dst), 13218 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))), 13219 (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13220 13221 def : Pat<(v8f16 (X86Movsh 13222 (v8f16 VR128X:$dst), 13223 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))), 13224 (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13225 13226 def : Pat<(v8f16 (X86Movsh 13227 (v8f16 VR128X:$dst), 13228 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))), 13229 (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13230 13231 def : Pat<(v8f16 (X86Movsh 13232 (v8f16 VR128X:$dst), 13233 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))), 13234 (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13235 13236 def : Pat<(v8f16 (X86Movsh 13237 (v8f16 VR128X:$dst), 13238 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))), 13239 (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13240 13241 def : Pat<(v8f16 (X86Movsh 13242 (v8f16 VR128X:$dst), 13243 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))), 13244 (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13245} // Predicates = [HasFP16] 13246 13247let Predicates = [HasFP16, HasVLX] in { 13248 // Special patterns to allow use of X86VMSintToFP for masking. Instruction 13249 // patterns have been disabled with null_frag. 13250 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))), 13251 (VCVTQQ2PHZ256rr VR256X:$src)>; 13252 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13253 VK4WM:$mask), 13254 (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13255 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13256 VK4WM:$mask), 13257 (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13258 13259 def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))), 13260 (VCVTQQ2PHZ256rm addr:$src)>; 13261 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13262 VK4WM:$mask), 13263 (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13264 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13265 VK4WM:$mask), 13266 (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13267 13268 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13269 (VCVTQQ2PHZ256rmb addr:$src)>; 13270 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13271 (v8f16 VR128X:$src0), VK4WM:$mask), 13272 (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13273 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13274 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13275 (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13276 13277 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))), 13278 (VCVTQQ2PHZ128rr VR128X:$src)>; 13279 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13280 VK2WM:$mask), 13281 (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13282 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13283 VK2WM:$mask), 13284 (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13285 13286 def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))), 13287 (VCVTQQ2PHZ128rm addr:$src)>; 13288 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13289 VK2WM:$mask), 13290 (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13291 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13292 VK2WM:$mask), 13293 (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13294 13295 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13296 (VCVTQQ2PHZ128rmb addr:$src)>; 13297 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13298 (v8f16 VR128X:$src0), VK2WM:$mask), 13299 (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13300 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13301 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13302 (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13303 13304 // Special patterns to allow use of X86VMUintToFP for masking. Instruction 13305 // patterns have been disabled with null_frag. 13306 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))), 13307 (VCVTUQQ2PHZ256rr VR256X:$src)>; 13308 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13309 VK4WM:$mask), 13310 (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13311 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13312 VK4WM:$mask), 13313 (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13314 13315 def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))), 13316 (VCVTUQQ2PHZ256rm addr:$src)>; 13317 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13318 VK4WM:$mask), 13319 (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13320 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13321 VK4WM:$mask), 13322 (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13323 13324 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13325 (VCVTUQQ2PHZ256rmb addr:$src)>; 13326 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13327 (v8f16 VR128X:$src0), VK4WM:$mask), 13328 (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13329 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13330 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13331 (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13332 13333 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))), 13334 (VCVTUQQ2PHZ128rr VR128X:$src)>; 13335 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13336 VK2WM:$mask), 13337 (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13338 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13339 VK2WM:$mask), 13340 (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13341 13342 def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))), 13343 (VCVTUQQ2PHZ128rm addr:$src)>; 13344 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13345 VK2WM:$mask), 13346 (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13347 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13348 VK2WM:$mask), 13349 (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13350 13351 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13352 (VCVTUQQ2PHZ128rmb addr:$src)>; 13353 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13354 (v8f16 VR128X:$src0), VK2WM:$mask), 13355 (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13356 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13357 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13358 (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13359} 13360 13361let Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13362 multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> { 13363 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13364 (ins _.RC:$src2, _.RC:$src3), 13365 OpcodeStr, "$src3, $src2", "$src2, $src3", 13366 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX, VVVV; 13367 13368 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13369 (ins _.RC:$src2, _.MemOp:$src3), 13370 OpcodeStr, "$src3, $src2", "$src2, $src3", 13371 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX, VVVV; 13372 13373 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13374 (ins _.RC:$src2, _.ScalarMemOp:$src3), 13375 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr), 13376 (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX, VVVV; 13377 } 13378} // Constraints = "@earlyclobber $dst, $src1 = $dst" 13379 13380multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 13381 X86VectorVTInfo _> { 13382 let Constraints = "@earlyclobber $dst, $src1 = $dst" in 13383 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13384 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 13385 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 13386 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>, 13387 EVEX, VVVV, EVEX_B, EVEX_RC; 13388} 13389 13390 13391multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> { 13392 let Predicates = [HasFP16] in { 13393 defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>, 13394 avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>, 13395 EVEX_V512, Sched<[WriteFMAZ]>; 13396 } 13397 let Predicates = [HasVLX, HasFP16] in { 13398 defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>; 13399 defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>; 13400 } 13401} 13402 13403multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13404 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> { 13405 let Predicates = [HasFP16] in { 13406 defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 13407 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, 13408 avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info, 13409 "", "@earlyclobber $dst">, EVEX_V512; 13410 } 13411 let Predicates = [HasVLX, HasFP16] in { 13412 defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 13413 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256; 13414 defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 13415 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128; 13416 } 13417} 13418 13419 13420let Uses = [MXCSR] in { 13421 defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>, 13422 T_MAP6, XS, EVEX_CD8<32, CD8VF>; 13423 defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>, 13424 T_MAP6, XD, EVEX_CD8<32, CD8VF>; 13425 13426 defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc, 13427 x86vfmulcRnd, 1>, T_MAP6, XS, EVEX_CD8<32, CD8VF>; 13428 defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc, 13429 x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6, XD, EVEX_CD8<32, CD8VF>; 13430} 13431 13432 13433multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 13434 bit IsCommutable> { 13435 let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13436 defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13437 (ins VR128X:$src2, VR128X:$src3), OpcodeStr, 13438 "$src3, $src2", "$src2, $src3", 13439 (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>, 13440 Sched<[WriteFMAX]>; 13441 defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst), 13442 (ins VR128X:$src2, ssmem:$src3), OpcodeStr, 13443 "$src3, $src2", "$src2, $src3", 13444 (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>, 13445 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13446 defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13447 (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr, 13448 "$rc, $src3, $src2", "$src2, $src3, $rc", 13449 (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>, 13450 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13451 } 13452} 13453 13454multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13455 SDNode OpNodeRnd, bit IsCommutable> { 13456 let Predicates = [HasFP16] in { 13457 defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13458 (ins VR128X:$src1, VR128X:$src2), OpcodeStr, 13459 "$src2, $src1", "$src1, $src2", 13460 (v4f32 (OpNode VR128X:$src1, VR128X:$src2)), 13461 IsCommutable, IsCommutable, IsCommutable, 13462 X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>; 13463 defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst), 13464 (ins VR128X:$src1, ssmem:$src2), OpcodeStr, 13465 "$src2, $src1", "$src1, $src2", 13466 (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))), 13467 0, 0, 0, X86selects, "@earlyclobber $dst">, 13468 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13469 defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13470 (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr, 13471 "$rc, $src2, $src1", "$src1, $src2, $rc", 13472 (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)), 13473 0, 0, 0, X86selects, "@earlyclobber $dst">, 13474 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13475 } 13476} 13477 13478let Uses = [MXCSR] in { 13479 defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>, 13480 T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV; 13481 defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>, 13482 T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV; 13483 13484 defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>, 13485 T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV; 13486 defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>, 13487 T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV; 13488} 13489