1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX512 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// This multiclass generates the masking variants from the non-masking 16// variant. It only provides the assembly pieces for the masking variants. 17// It assumes custom ISel patterns for masking which can be provided as 18// template arguments. 19multiclass AVX512_maskable_custom<bits<8> O, Format F, 20 dag Outs, 21 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 22 string OpcodeStr, 23 string AttSrcAsm, string IntelSrcAsm, 24 list<dag> Pattern, 25 list<dag> MaskingPattern, 26 list<dag> ZeroMaskingPattern, 27 string MaskingConstraint = "", 28 bit IsCommutable = 0, 29 bit IsKCommutable = 0, 30 bit IsKZCommutable = IsCommutable, 31 string ClobberConstraint = ""> { 32 let isCommutable = IsCommutable, Constraints = ClobberConstraint in 33 def NAME: AVX512<O, F, Outs, Ins, 34 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 35 "$dst, "#IntelSrcAsm#"}", 36 Pattern>; 37 38 // Prefer over VMOV*rrk Pat<> 39 let isCommutable = IsKCommutable in 40 def NAME#k: AVX512<O, F, Outs, MaskingIns, 41 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 42 "$dst {${mask}}, "#IntelSrcAsm#"}", 43 MaskingPattern>, 44 EVEX_K { 45 // In case of the 3src subclass this is overridden with a let. 46 string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint, 47 !if(!eq(MaskingConstraint, ""), ClobberConstraint, 48 !strconcat(ClobberConstraint, ", ", MaskingConstraint))); 49 } 50 51 // Zero mask does not add any restrictions to commute operands transformation. 52 // So, it is Ok to use IsCommutable instead of IsKCommutable. 53 let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<> 54 Constraints = ClobberConstraint in 55 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 56 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 57 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 58 ZeroMaskingPattern>, 59 EVEX_KZ; 60} 61 62 63// Common base class of AVX512_maskable and AVX512_maskable_3src. 64multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 65 dag Outs, 66 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 67 string OpcodeStr, 68 string AttSrcAsm, string IntelSrcAsm, 69 dag RHS, dag MaskingRHS, 70 SDPatternOperator Select = vselect_mask, 71 string MaskingConstraint = "", 72 bit IsCommutable = 0, 73 bit IsKCommutable = 0, 74 bit IsKZCommutable = IsCommutable, 75 string ClobberConstraint = ""> : 76 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 77 AttSrcAsm, IntelSrcAsm, 78 [(set _.RC:$dst, RHS)], 79 [(set _.RC:$dst, MaskingRHS)], 80 [(set _.RC:$dst, 81 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 82 MaskingConstraint, IsCommutable, 83 IsKCommutable, IsKZCommutable, ClobberConstraint>; 84 85// This multiclass generates the unconditional/non-masking, the masking and 86// the zero-masking variant of the vector instruction. In the masking case, the 87// preserved vector elements come from a new dummy input operand tied to $dst. 88// This version uses a separate dag for non-masking and masking. 89multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 90 dag Outs, dag Ins, string OpcodeStr, 91 string AttSrcAsm, string IntelSrcAsm, 92 dag RHS, dag MaskRHS, 93 string ClobberConstraint = "", 94 bit IsCommutable = 0, bit IsKCommutable = 0, 95 bit IsKZCommutable = IsCommutable> : 96 AVX512_maskable_custom<O, F, Outs, Ins, 97 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 98 !con((ins _.KRCWM:$mask), Ins), 99 OpcodeStr, AttSrcAsm, IntelSrcAsm, 100 [(set _.RC:$dst, RHS)], 101 [(set _.RC:$dst, 102 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 103 [(set _.RC:$dst, 104 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 105 "$src0 = $dst", IsCommutable, IsKCommutable, 106 IsKZCommutable, ClobberConstraint>; 107 108// This multiclass generates the unconditional/non-masking, the masking and 109// the zero-masking variant of the vector instruction. In the masking case, the 110// preserved vector elements come from a new dummy input operand tied to $dst. 111multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 112 dag Outs, dag Ins, string OpcodeStr, 113 string AttSrcAsm, string IntelSrcAsm, 114 dag RHS, 115 bit IsCommutable = 0, bit IsKCommutable = 0, 116 bit IsKZCommutable = IsCommutable, 117 SDPatternOperator Select = vselect_mask, 118 string ClobberConstraint = ""> : 119 AVX512_maskable_common<O, F, _, Outs, Ins, 120 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 121 !con((ins _.KRCWM:$mask), Ins), 122 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 123 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 124 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 125 IsKZCommutable, ClobberConstraint>; 126 127// This multiclass generates the unconditional/non-masking, the masking and 128// the zero-masking variant of the scalar instruction. 129multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 130 dag Outs, dag Ins, string OpcodeStr, 131 string AttSrcAsm, string IntelSrcAsm, 132 dag RHS> : 133 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 134 RHS, 0, 0, 0, X86selects_mask>; 135 136// Similar to AVX512_maskable but in this case one of the source operands 137// ($src1) is already tied to $dst so we just use that for the preserved 138// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 139// $src1. 140multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 141 dag Outs, dag NonTiedIns, string OpcodeStr, 142 string AttSrcAsm, string IntelSrcAsm, 143 dag RHS, 144 bit IsCommutable = 0, 145 bit IsKCommutable = 0, 146 SDPatternOperator Select = vselect_mask, 147 bit MaskOnly = 0> : 148 AVX512_maskable_common<O, F, _, Outs, 149 !con((ins _.RC:$src1), NonTiedIns), 150 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 151 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 152 OpcodeStr, AttSrcAsm, IntelSrcAsm, 153 !if(MaskOnly, (null_frag), RHS), 154 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 155 Select, "", IsCommutable, IsKCommutable>; 156 157// Similar to AVX512_maskable_3src but in this case the input VT for the tied 158// operand differs from the output VT. This requires a bitconvert on 159// the preserved vector going into the vselect. 160// NOTE: The unmasked pattern is disabled. 161multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 162 X86VectorVTInfo InVT, 163 dag Outs, dag NonTiedIns, string OpcodeStr, 164 string AttSrcAsm, string IntelSrcAsm, 165 dag RHS, bit IsCommutable = 0> : 166 AVX512_maskable_common<O, F, OutVT, Outs, 167 !con((ins InVT.RC:$src1), NonTiedIns), 168 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 169 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 170 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 171 (vselect_mask InVT.KRCWM:$mask, RHS, 172 (bitconvert InVT.RC:$src1)), 173 vselect_mask, "", IsCommutable>; 174 175multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 176 dag Outs, dag NonTiedIns, string OpcodeStr, 177 string AttSrcAsm, string IntelSrcAsm, 178 dag RHS, 179 bit IsCommutable = 0, 180 bit IsKCommutable = 0, 181 bit MaskOnly = 0> : 182 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 183 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 184 X86selects_mask, MaskOnly>; 185 186multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 187 dag Outs, dag Ins, 188 string OpcodeStr, 189 string AttSrcAsm, string IntelSrcAsm, 190 list<dag> Pattern> : 191 AVX512_maskable_custom<O, F, Outs, Ins, 192 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 193 !con((ins _.KRCWM:$mask), Ins), 194 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 195 "$src0 = $dst">; 196 197multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 198 dag Outs, dag NonTiedIns, 199 string OpcodeStr, 200 string AttSrcAsm, string IntelSrcAsm, 201 list<dag> Pattern> : 202 AVX512_maskable_custom<O, F, Outs, 203 !con((ins _.RC:$src1), NonTiedIns), 204 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 205 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 206 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 207 "">; 208 209// Instruction with mask that puts result in mask register, 210// like "compare" and "vptest" 211multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 212 dag Outs, 213 dag Ins, dag MaskingIns, 214 string OpcodeStr, 215 string AttSrcAsm, string IntelSrcAsm, 216 list<dag> Pattern, 217 list<dag> MaskingPattern, 218 bit IsCommutable = 0> { 219 let isCommutable = IsCommutable in { 220 def NAME: AVX512<O, F, Outs, Ins, 221 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 222 "$dst, "#IntelSrcAsm#"}", 223 Pattern>; 224 225 def NAME#k: AVX512<O, F, Outs, MaskingIns, 226 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 227 "$dst {${mask}}, "#IntelSrcAsm#"}", 228 MaskingPattern>, EVEX_K; 229 } 230} 231 232multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 233 dag Outs, 234 dag Ins, dag MaskingIns, 235 string OpcodeStr, 236 string AttSrcAsm, string IntelSrcAsm, 237 dag RHS, dag MaskingRHS, 238 bit IsCommutable = 0> : 239 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 240 AttSrcAsm, IntelSrcAsm, 241 [(set _.KRC:$dst, RHS)], 242 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 243 244multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 245 dag Outs, dag Ins, string OpcodeStr, 246 string AttSrcAsm, string IntelSrcAsm, 247 dag RHS, dag RHS_su, bit IsCommutable = 0> : 248 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 249 !con((ins _.KRCWM:$mask), Ins), 250 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 251 (and _.KRCWM:$mask, RHS_su), IsCommutable>; 252 253// Used by conversion instructions. 254multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _, 255 dag Outs, 256 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 257 string OpcodeStr, 258 string AttSrcAsm, string IntelSrcAsm, 259 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> : 260 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 261 AttSrcAsm, IntelSrcAsm, 262 [(set _.RC:$dst, RHS)], 263 [(set _.RC:$dst, MaskingRHS)], 264 [(set _.RC:$dst, ZeroMaskingRHS)], 265 "$src0 = $dst">; 266 267multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _, 268 dag Outs, dag NonTiedIns, string OpcodeStr, 269 string AttSrcAsm, string IntelSrcAsm, 270 dag RHS, dag MaskingRHS, bit IsCommutable, 271 bit IsKCommutable> : 272 AVX512_maskable_custom<O, F, Outs, 273 !con((ins _.RC:$src1), NonTiedIns), 274 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 275 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 276 OpcodeStr, AttSrcAsm, IntelSrcAsm, 277 [(set _.RC:$dst, RHS)], 278 [(set _.RC:$dst, 279 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))], 280 [(set _.RC:$dst, 281 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))], 282 "", IsCommutable, IsKCommutable>; 283 284// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 285// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 286// swizzled by ExecutionDomainFix to pxor. 287// We set canFoldAsLoad because this can be converted to a constant-pool 288// load of an all-zeros value if folding it would be beneficial. 289let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 290 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 291def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 292 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 293def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 294 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 295} 296 297let Predicates = [HasAVX512] in { 298def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; 299def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; 300def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; 301def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>; 302def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; 303def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; 304} 305 306// Alias instructions that allow VPTERNLOG to be used with a mask to create 307// a mix of all ones and all zeros elements. This is done this way to force 308// the same register to be used as input for all three sources. 309let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 310def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 311 (ins VK16WM:$mask), "", 312 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 313 (v16i32 immAllOnesV), 314 (v16i32 immAllZerosV)))]>; 315def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 316 (ins VK8WM:$mask), "", 317 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 318 (v8i64 immAllOnesV), 319 (v8i64 immAllZerosV)))]>; 320} 321 322let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 323 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 324def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 325 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 326def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 327 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 328} 329 330let Predicates = [HasAVX512] in { 331def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; 332def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; 333def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; 334def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>; 335def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; 336def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; 337def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; 338def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; 339def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; 340def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>; 341def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; 342def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; 343} 344 345// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 346// This is expanded by ExpandPostRAPseudos. 347let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 348 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 349 def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "", 350 [(set FR16X:$dst, fp16imm0)]>; 351 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 352 [(set FR32X:$dst, fp32imm0)]>; 353 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 354 [(set FR64X:$dst, fp64imm0)]>; 355 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 356 [(set VR128X:$dst, fp128imm0)]>; 357} 358 359//===----------------------------------------------------------------------===// 360// AVX-512 - VECTOR INSERT 361// 362 363// Supports two different pattern operators for mask and unmasked ops. Allows 364// null_frag to be passed for one. 365multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 366 X86VectorVTInfo To, 367 SDPatternOperator vinsert_insert, 368 SDPatternOperator vinsert_for_mask, 369 X86FoldableSchedWrite sched> { 370 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 371 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 372 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 373 "vinsert" # From.EltTypeName # "x" # From.NumElts, 374 "$src3, $src2, $src1", "$src1, $src2, $src3", 375 (vinsert_insert:$src3 (To.VT To.RC:$src1), 376 (From.VT From.RC:$src2), 377 (iPTR imm)), 378 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 379 (From.VT From.RC:$src2), 380 (iPTR imm))>, 381 AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>; 382 let mayLoad = 1 in 383 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 384 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 385 "vinsert" # From.EltTypeName # "x" # From.NumElts, 386 "$src3, $src2, $src1", "$src1, $src2, $src3", 387 (vinsert_insert:$src3 (To.VT To.RC:$src1), 388 (From.VT (From.LdFrag addr:$src2)), 389 (iPTR imm)), 390 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 391 (From.VT (From.LdFrag addr:$src2)), 392 (iPTR imm))>, AVX512AIi8Base, EVEX, VVVV, 393 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 394 Sched<[sched.Folded, sched.ReadAfterFold]>; 395 } 396} 397 398// Passes the same pattern operator for masked and unmasked ops. 399multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 400 X86VectorVTInfo To, 401 SDPatternOperator vinsert_insert, 402 X86FoldableSchedWrite sched> : 403 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 404 405multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 406 X86VectorVTInfo To, PatFrag vinsert_insert, 407 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 408 let Predicates = p in { 409 def : Pat<(vinsert_insert:$ins 410 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 411 (To.VT (!cast<Instruction>(InstrStr#"rr") 412 To.RC:$src1, From.RC:$src2, 413 (INSERT_get_vinsert_imm To.RC:$ins)))>; 414 415 def : Pat<(vinsert_insert:$ins 416 (To.VT To.RC:$src1), 417 (From.VT (From.LdFrag addr:$src2)), 418 (iPTR imm)), 419 (To.VT (!cast<Instruction>(InstrStr#"rm") 420 To.RC:$src1, addr:$src2, 421 (INSERT_get_vinsert_imm To.RC:$ins)))>; 422 } 423} 424 425multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 426 ValueType EltVT64, int Opcode256, 427 X86FoldableSchedWrite sched> { 428 429 let Predicates = [HasVLX] in 430 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 431 X86VectorVTInfo< 4, EltVT32, VR128X>, 432 X86VectorVTInfo< 8, EltVT32, VR256X>, 433 vinsert128_insert, sched>, EVEX_V256; 434 435 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 436 X86VectorVTInfo< 4, EltVT32, VR128X>, 437 X86VectorVTInfo<16, EltVT32, VR512>, 438 vinsert128_insert, sched>, EVEX_V512; 439 440 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 441 X86VectorVTInfo< 4, EltVT64, VR256X>, 442 X86VectorVTInfo< 8, EltVT64, VR512>, 443 vinsert256_insert, sched>, REX_W, EVEX_V512; 444 445 // Even with DQI we'd like to only use these instructions for masking. 446 let Predicates = [HasVLX, HasDQI] in 447 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 448 X86VectorVTInfo< 2, EltVT64, VR128X>, 449 X86VectorVTInfo< 4, EltVT64, VR256X>, 450 null_frag, vinsert128_insert, sched>, 451 EVEX_V256, REX_W; 452 453 // Even with DQI we'd like to only use these instructions for masking. 454 let Predicates = [HasDQI] in { 455 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 456 X86VectorVTInfo< 2, EltVT64, VR128X>, 457 X86VectorVTInfo< 8, EltVT64, VR512>, 458 null_frag, vinsert128_insert, sched>, 459 REX_W, EVEX_V512; 460 461 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 462 X86VectorVTInfo< 8, EltVT32, VR256X>, 463 X86VectorVTInfo<16, EltVT32, VR512>, 464 null_frag, vinsert256_insert, sched>, 465 EVEX_V512; 466 } 467} 468 469// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 470defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 471defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 472 473// Codegen pattern with the alternative types, 474// Even with AVX512DQ we'll still use these for unmasked operations. 475defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 476 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 477defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 478 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 479 480defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 481 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 482defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 483 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 484 485defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 486 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 487defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 488 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 489 490// Codegen pattern with the alternative types insert VEC128 into VEC256 491defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 492 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 493defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 494 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 495defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info, 496 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 497defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8bf16x_info, v16bf16x_info, 498 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 499// Codegen pattern with the alternative types insert VEC128 into VEC512 500defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 501 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 502defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 503 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 504defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info, 505 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 506defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8bf16x_info, v32bf16_info, 507 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 508// Codegen pattern with the alternative types insert VEC256 into VEC512 509defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 510 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 511defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 512 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 513defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info, 514 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 515defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16bf16x_info, v32bf16_info, 516 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 517 518 519multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 520 X86VectorVTInfo To, X86VectorVTInfo Cast, 521 PatFrag vinsert_insert, 522 SDNodeXForm INSERT_get_vinsert_imm, 523 list<Predicate> p> { 524let Predicates = p in { 525 def : Pat<(Cast.VT 526 (vselect_mask Cast.KRCWM:$mask, 527 (bitconvert 528 (vinsert_insert:$ins (To.VT To.RC:$src1), 529 (From.VT From.RC:$src2), 530 (iPTR imm))), 531 Cast.RC:$src0)), 532 (!cast<Instruction>(InstrStr#"rrk") 533 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 534 (INSERT_get_vinsert_imm To.RC:$ins))>; 535 def : Pat<(Cast.VT 536 (vselect_mask Cast.KRCWM:$mask, 537 (bitconvert 538 (vinsert_insert:$ins (To.VT To.RC:$src1), 539 (From.VT 540 (bitconvert 541 (From.LdFrag addr:$src2))), 542 (iPTR imm))), 543 Cast.RC:$src0)), 544 (!cast<Instruction>(InstrStr#"rmk") 545 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 546 (INSERT_get_vinsert_imm To.RC:$ins))>; 547 548 def : Pat<(Cast.VT 549 (vselect_mask Cast.KRCWM:$mask, 550 (bitconvert 551 (vinsert_insert:$ins (To.VT To.RC:$src1), 552 (From.VT From.RC:$src2), 553 (iPTR imm))), 554 Cast.ImmAllZerosV)), 555 (!cast<Instruction>(InstrStr#"rrkz") 556 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 557 (INSERT_get_vinsert_imm To.RC:$ins))>; 558 def : Pat<(Cast.VT 559 (vselect_mask Cast.KRCWM:$mask, 560 (bitconvert 561 (vinsert_insert:$ins (To.VT To.RC:$src1), 562 (From.VT (From.LdFrag addr:$src2)), 563 (iPTR imm))), 564 Cast.ImmAllZerosV)), 565 (!cast<Instruction>(InstrStr#"rmkz") 566 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 567 (INSERT_get_vinsert_imm To.RC:$ins))>; 568} 569} 570 571defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 572 v8f32x_info, vinsert128_insert, 573 INSERT_get_vinsert128_imm, [HasVLX]>; 574defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 575 v4f64x_info, vinsert128_insert, 576 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 577 578defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 579 v8i32x_info, vinsert128_insert, 580 INSERT_get_vinsert128_imm, [HasVLX]>; 581defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 582 v8i32x_info, vinsert128_insert, 583 INSERT_get_vinsert128_imm, [HasVLX]>; 584defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 585 v8i32x_info, vinsert128_insert, 586 INSERT_get_vinsert128_imm, [HasVLX]>; 587defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 588 v4i64x_info, vinsert128_insert, 589 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 590defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 591 v4i64x_info, vinsert128_insert, 592 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 593defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 594 v4i64x_info, vinsert128_insert, 595 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 596 597defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 598 v16f32_info, vinsert128_insert, 599 INSERT_get_vinsert128_imm, [HasAVX512]>; 600defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 601 v8f64_info, vinsert128_insert, 602 INSERT_get_vinsert128_imm, [HasDQI]>; 603 604defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 605 v16i32_info, vinsert128_insert, 606 INSERT_get_vinsert128_imm, [HasAVX512]>; 607defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 608 v16i32_info, vinsert128_insert, 609 INSERT_get_vinsert128_imm, [HasAVX512]>; 610defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 611 v16i32_info, vinsert128_insert, 612 INSERT_get_vinsert128_imm, [HasAVX512]>; 613defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 614 v8i64_info, vinsert128_insert, 615 INSERT_get_vinsert128_imm, [HasDQI]>; 616defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 617 v8i64_info, vinsert128_insert, 618 INSERT_get_vinsert128_imm, [HasDQI]>; 619defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 620 v8i64_info, vinsert128_insert, 621 INSERT_get_vinsert128_imm, [HasDQI]>; 622 623defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 624 v16f32_info, vinsert256_insert, 625 INSERT_get_vinsert256_imm, [HasDQI]>; 626defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 627 v8f64_info, vinsert256_insert, 628 INSERT_get_vinsert256_imm, [HasAVX512]>; 629 630defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 631 v16i32_info, vinsert256_insert, 632 INSERT_get_vinsert256_imm, [HasDQI]>; 633defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 634 v16i32_info, vinsert256_insert, 635 INSERT_get_vinsert256_imm, [HasDQI]>; 636defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 637 v16i32_info, vinsert256_insert, 638 INSERT_get_vinsert256_imm, [HasDQI]>; 639defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 640 v8i64_info, vinsert256_insert, 641 INSERT_get_vinsert256_imm, [HasAVX512]>; 642defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 643 v8i64_info, vinsert256_insert, 644 INSERT_get_vinsert256_imm, [HasAVX512]>; 645defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 646 v8i64_info, vinsert256_insert, 647 INSERT_get_vinsert256_imm, [HasAVX512]>; 648 649// vinsertps - insert f32 to XMM 650let ExeDomain = SSEPackedSingle in { 651let isCommutable = 1 in 652def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 653 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 654 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 655 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, 656 EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>; 657def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 658 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 659 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 660 [(set VR128X:$dst, (X86insertps VR128X:$src1, 661 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 662 timm:$src3))]>, 663 EVEX, VVVV, EVEX_CD8<32, CD8VT1>, 664 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 665} 666 667//===----------------------------------------------------------------------===// 668// AVX-512 VECTOR EXTRACT 669//--- 670 671// Supports two different pattern operators for mask and unmasked ops. Allows 672// null_frag to be passed for one. 673multiclass vextract_for_size_split<int Opcode, 674 X86VectorVTInfo From, X86VectorVTInfo To, 675 SDPatternOperator vextract_extract, 676 SDPatternOperator vextract_for_mask, 677 SchedWrite SchedRR, SchedWrite SchedMR> { 678 679 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 680 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 681 (ins From.RC:$src1, u8imm:$idx), 682 "vextract" # To.EltTypeName # "x" # To.NumElts, 683 "$idx, $src1", "$src1, $idx", 684 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 685 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 686 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 687 688 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 689 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 690 "vextract" # To.EltTypeName # "x" # To.NumElts # 691 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 692 [(store (To.VT (vextract_extract:$idx 693 (From.VT From.RC:$src1), (iPTR imm))), 694 addr:$dst)]>, EVEX, 695 Sched<[SchedMR]>; 696 697 let mayStore = 1, hasSideEffects = 0 in 698 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 699 (ins To.MemOp:$dst, To.KRCWM:$mask, 700 From.RC:$src1, u8imm:$idx), 701 "vextract" # To.EltTypeName # "x" # To.NumElts # 702 "\t{$idx, $src1, $dst {${mask}}|" 703 "$dst {${mask}}, $src1, $idx}", []>, 704 EVEX_K, EVEX, Sched<[SchedMR]>; 705 } 706} 707 708// Passes the same pattern operator for masked and unmasked ops. 709multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 710 X86VectorVTInfo To, 711 SDPatternOperator vextract_extract, 712 SchedWrite SchedRR, SchedWrite SchedMR> : 713 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 714 715// Codegen pattern for the alternative types 716multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 717 X86VectorVTInfo To, PatFrag vextract_extract, 718 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 719 let Predicates = p in { 720 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 721 (To.VT (!cast<Instruction>(InstrStr#"rr") 722 From.RC:$src1, 723 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 724 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 725 (iPTR imm))), addr:$dst), 726 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 727 (EXTRACT_get_vextract_imm To.RC:$ext))>; 728 } 729} 730 731multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 732 ValueType EltVT64, int Opcode256, 733 SchedWrite SchedRR, SchedWrite SchedMR> { 734 let Predicates = [HasAVX512] in { 735 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 736 X86VectorVTInfo<16, EltVT32, VR512>, 737 X86VectorVTInfo< 4, EltVT32, VR128X>, 738 vextract128_extract, SchedRR, SchedMR>, 739 EVEX_V512, EVEX_CD8<32, CD8VT4>; 740 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 741 X86VectorVTInfo< 8, EltVT64, VR512>, 742 X86VectorVTInfo< 4, EltVT64, VR256X>, 743 vextract256_extract, SchedRR, SchedMR>, 744 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 745 } 746 let Predicates = [HasVLX] in 747 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 748 X86VectorVTInfo< 8, EltVT32, VR256X>, 749 X86VectorVTInfo< 4, EltVT32, VR128X>, 750 vextract128_extract, SchedRR, SchedMR>, 751 EVEX_V256, EVEX_CD8<32, CD8VT4>; 752 753 // Even with DQI we'd like to only use these instructions for masking. 754 let Predicates = [HasVLX, HasDQI] in 755 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 756 X86VectorVTInfo< 4, EltVT64, VR256X>, 757 X86VectorVTInfo< 2, EltVT64, VR128X>, 758 null_frag, vextract128_extract, SchedRR, SchedMR>, 759 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; 760 761 // Even with DQI we'd like to only use these instructions for masking. 762 let Predicates = [HasDQI] in { 763 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 764 X86VectorVTInfo< 8, EltVT64, VR512>, 765 X86VectorVTInfo< 2, EltVT64, VR128X>, 766 null_frag, vextract128_extract, SchedRR, SchedMR>, 767 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 768 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 769 X86VectorVTInfo<16, EltVT32, VR512>, 770 X86VectorVTInfo< 8, EltVT32, VR256X>, 771 null_frag, vextract256_extract, SchedRR, SchedMR>, 772 EVEX_V512, EVEX_CD8<32, CD8VT8>; 773 } 774} 775 776// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 777defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 778defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 779 780// extract_subvector codegen patterns with the alternative types. 781// Even with AVX512DQ we'll still use these for unmasked operations. 782defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 783 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 784defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 785 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 786 787defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 788 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 789defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 790 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 791 792defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 793 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 794defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 795 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 796 797// Codegen pattern with the alternative types extract VEC128 from VEC256 798defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 799 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 800defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 801 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 802defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info, 803 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 804defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16bf16x_info, v8bf16x_info, 805 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 806 807// Codegen pattern with the alternative types extract VEC128 from VEC512 808defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 809 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 810defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 811 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 812defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info, 813 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 814defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32bf16_info, v8bf16x_info, 815 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 816// Codegen pattern with the alternative types extract VEC256 from VEC512 817defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 818 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 819defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 820 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 821defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info, 822 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 823defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32bf16_info, v16bf16x_info, 824 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 825 826 827// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 828// smaller extract to enable EVEX->VEX. 829let Predicates = [NoVLX, HasEVEX512] in { 830def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 831 (v2i64 (VEXTRACTI128rr 832 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 833 (iPTR 1)))>; 834def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 835 (v2f64 (VEXTRACTF128rr 836 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 837 (iPTR 1)))>; 838def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 839 (v4i32 (VEXTRACTI128rr 840 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 841 (iPTR 1)))>; 842def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 843 (v4f32 (VEXTRACTF128rr 844 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 845 (iPTR 1)))>; 846def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 847 (v8i16 (VEXTRACTI128rr 848 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 849 (iPTR 1)))>; 850def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 851 (v8f16 (VEXTRACTF128rr 852 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 853 (iPTR 1)))>; 854def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 855 (v16i8 (VEXTRACTI128rr 856 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 857 (iPTR 1)))>; 858} 859 860// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 861// smaller extract to enable EVEX->VEX. 862let Predicates = [HasVLX] in { 863def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 864 (v2i64 (VEXTRACTI32x4Z256rr 865 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 866 (iPTR 1)))>; 867def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 868 (v2f64 (VEXTRACTF32x4Z256rr 869 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 870 (iPTR 1)))>; 871def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 872 (v4i32 (VEXTRACTI32x4Z256rr 873 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 874 (iPTR 1)))>; 875def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 876 (v4f32 (VEXTRACTF32x4Z256rr 877 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 878 (iPTR 1)))>; 879def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 880 (v8i16 (VEXTRACTI32x4Z256rr 881 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 882 (iPTR 1)))>; 883def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 884 (v8f16 (VEXTRACTF32x4Z256rr 885 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 886 (iPTR 1)))>; 887def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 888 (v16i8 (VEXTRACTI32x4Z256rr 889 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 890 (iPTR 1)))>; 891} 892 893 894// Additional patterns for handling a bitcast between the vselect and the 895// extract_subvector. 896multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 897 X86VectorVTInfo To, X86VectorVTInfo Cast, 898 PatFrag vextract_extract, 899 SDNodeXForm EXTRACT_get_vextract_imm, 900 list<Predicate> p> { 901let Predicates = p in { 902 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 903 (bitconvert 904 (To.VT (vextract_extract:$ext 905 (From.VT From.RC:$src), (iPTR imm)))), 906 To.RC:$src0)), 907 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 908 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 909 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 910 911 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 912 (bitconvert 913 (To.VT (vextract_extract:$ext 914 (From.VT From.RC:$src), (iPTR imm)))), 915 Cast.ImmAllZerosV)), 916 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 917 Cast.KRCWM:$mask, From.RC:$src, 918 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 919} 920} 921 922defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 923 v4f32x_info, vextract128_extract, 924 EXTRACT_get_vextract128_imm, [HasVLX]>; 925defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 926 v2f64x_info, vextract128_extract, 927 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 928 929defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 930 v4i32x_info, vextract128_extract, 931 EXTRACT_get_vextract128_imm, [HasVLX]>; 932defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 933 v4i32x_info, vextract128_extract, 934 EXTRACT_get_vextract128_imm, [HasVLX]>; 935defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 936 v4i32x_info, vextract128_extract, 937 EXTRACT_get_vextract128_imm, [HasVLX]>; 938defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 939 v2i64x_info, vextract128_extract, 940 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 941defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 942 v2i64x_info, vextract128_extract, 943 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 944defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 945 v2i64x_info, vextract128_extract, 946 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 947 948defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 949 v4f32x_info, vextract128_extract, 950 EXTRACT_get_vextract128_imm, [HasAVX512]>; 951defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 952 v2f64x_info, vextract128_extract, 953 EXTRACT_get_vextract128_imm, [HasDQI]>; 954 955defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 956 v4i32x_info, vextract128_extract, 957 EXTRACT_get_vextract128_imm, [HasAVX512]>; 958defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 959 v4i32x_info, vextract128_extract, 960 EXTRACT_get_vextract128_imm, [HasAVX512]>; 961defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 962 v4i32x_info, vextract128_extract, 963 EXTRACT_get_vextract128_imm, [HasAVX512]>; 964defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 965 v2i64x_info, vextract128_extract, 966 EXTRACT_get_vextract128_imm, [HasDQI]>; 967defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 968 v2i64x_info, vextract128_extract, 969 EXTRACT_get_vextract128_imm, [HasDQI]>; 970defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 971 v2i64x_info, vextract128_extract, 972 EXTRACT_get_vextract128_imm, [HasDQI]>; 973 974defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 975 v8f32x_info, vextract256_extract, 976 EXTRACT_get_vextract256_imm, [HasDQI]>; 977defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 978 v4f64x_info, vextract256_extract, 979 EXTRACT_get_vextract256_imm, [HasAVX512]>; 980 981defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 982 v8i32x_info, vextract256_extract, 983 EXTRACT_get_vextract256_imm, [HasDQI]>; 984defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 985 v8i32x_info, vextract256_extract, 986 EXTRACT_get_vextract256_imm, [HasDQI]>; 987defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 988 v8i32x_info, vextract256_extract, 989 EXTRACT_get_vextract256_imm, [HasDQI]>; 990defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 991 v4i64x_info, vextract256_extract, 992 EXTRACT_get_vextract256_imm, [HasAVX512]>; 993defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 994 v4i64x_info, vextract256_extract, 995 EXTRACT_get_vextract256_imm, [HasAVX512]>; 996defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 997 v4i64x_info, vextract256_extract, 998 EXTRACT_get_vextract256_imm, [HasAVX512]>; 999 1000// vextractps - extract 32 bits from XMM 1001def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), 1002 (ins VR128X:$src1, u8imm:$src2), 1003 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1004 [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 1005 EVEX, WIG, Sched<[WriteVecExtract]>; 1006 1007def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 1008 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 1009 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1010 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 1011 addr:$dst)]>, 1012 EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1013 1014//===---------------------------------------------------------------------===// 1015// AVX-512 BROADCAST 1016//--- 1017// broadcast with a scalar argument. 1018multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo, 1019 X86VectorVTInfo SrcInfo> { 1020 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1021 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr) 1022 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1023 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1024 (X86VBroadcast SrcInfo.FRC:$src), 1025 DestInfo.RC:$src0)), 1026 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk) 1027 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1028 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1029 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1030 (X86VBroadcast SrcInfo.FRC:$src), 1031 DestInfo.ImmAllZerosV)), 1032 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz) 1033 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1034} 1035 1036// Split version to allow mask and broadcast node to be different types. This 1037// helps support the 32x2 broadcasts. 1038multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1039 SchedWrite SchedRR, SchedWrite SchedRM, 1040 X86VectorVTInfo MaskInfo, 1041 X86VectorVTInfo DestInfo, 1042 X86VectorVTInfo SrcInfo, 1043 bit IsConvertibleToThreeAddress, 1044 SDPatternOperator UnmaskedOp = X86VBroadcast, 1045 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { 1046 let hasSideEffects = 0 in 1047 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), 1048 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1049 [(set MaskInfo.RC:$dst, 1050 (MaskInfo.VT 1051 (bitconvert 1052 (DestInfo.VT 1053 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], 1054 DestInfo.ExeDomain>, T8, PD, EVEX, Sched<[SchedRR]>; 1055 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1056 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), 1057 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1058 "${dst} {${mask}} {z}, $src}"), 1059 [(set MaskInfo.RC:$dst, 1060 (vselect_mask MaskInfo.KRCWM:$mask, 1061 (MaskInfo.VT 1062 (bitconvert 1063 (DestInfo.VT 1064 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1065 MaskInfo.ImmAllZerosV))], 1066 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; 1067 let Constraints = "$src0 = $dst" in 1068 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1069 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1070 SrcInfo.RC:$src), 1071 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1072 "${dst} {${mask}}, $src}"), 1073 [(set MaskInfo.RC:$dst, 1074 (vselect_mask MaskInfo.KRCWM:$mask, 1075 (MaskInfo.VT 1076 (bitconvert 1077 (DestInfo.VT 1078 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1079 MaskInfo.RC:$src0))], 1080 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, Sched<[SchedRR]>; 1081 1082 let hasSideEffects = 0, mayLoad = 1, isReMaterializable = 1, canFoldAsLoad = 1 in 1083 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1084 (ins SrcInfo.ScalarMemOp:$src), 1085 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1086 [(set MaskInfo.RC:$dst, 1087 (MaskInfo.VT 1088 (bitconvert 1089 (DestInfo.VT 1090 (UnmaskedBcastOp addr:$src)))))], 1091 DestInfo.ExeDomain>, T8, PD, EVEX, 1092 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1093 1094 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1095 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), 1096 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1097 "${dst} {${mask}} {z}, $src}"), 1098 [(set MaskInfo.RC:$dst, 1099 (vselect_mask MaskInfo.KRCWM:$mask, 1100 (MaskInfo.VT 1101 (bitconvert 1102 (DestInfo.VT 1103 (SrcInfo.BroadcastLdFrag addr:$src)))), 1104 MaskInfo.ImmAllZerosV))], 1105 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, 1106 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1107 1108 let Constraints = "$src0 = $dst", 1109 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 1110 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1111 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1112 SrcInfo.ScalarMemOp:$src), 1113 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1114 "${dst} {${mask}}, $src}"), 1115 [(set MaskInfo.RC:$dst, 1116 (vselect_mask MaskInfo.KRCWM:$mask, 1117 (MaskInfo.VT 1118 (bitconvert 1119 (DestInfo.VT 1120 (SrcInfo.BroadcastLdFrag addr:$src)))), 1121 MaskInfo.RC:$src0))], 1122 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, 1123 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1124} 1125 1126// Helper class to force mask and broadcast result to same type. 1127multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, 1128 SchedWrite SchedRR, SchedWrite SchedRM, 1129 X86VectorVTInfo DestInfo, 1130 X86VectorVTInfo SrcInfo, 1131 bit IsConvertibleToThreeAddress> : 1132 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM, 1133 DestInfo, DestInfo, SrcInfo, 1134 IsConvertibleToThreeAddress>; 1135 1136multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1137 AVX512VLVectorVTInfo _> { 1138 let Predicates = [HasAVX512] in { 1139 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1140 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1141 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1142 EVEX_V512; 1143 } 1144 1145 let Predicates = [HasVLX] in { 1146 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1147 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1148 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1149 EVEX_V256; 1150 } 1151} 1152 1153multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1154 AVX512VLVectorVTInfo _> { 1155 let Predicates = [HasAVX512] in { 1156 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1157 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1158 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1159 EVEX_V512; 1160 } 1161 1162 let Predicates = [HasVLX] in { 1163 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1164 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1165 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1166 EVEX_V256; 1167 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1168 WriteFShuffle256Ld, _.info128, _.info128, 1>, 1169 avx512_broadcast_scalar<NAME, _.info128, _.info128>, 1170 EVEX_V128; 1171 } 1172} 1173defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1174 avx512vl_f32_info>; 1175defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1176 avx512vl_f64_info>, REX_W; 1177 1178multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1179 X86VectorVTInfo _, SDPatternOperator OpNode, 1180 RegisterClass SrcRC> { 1181 // Fold with a mask even if it has multiple uses since it is cheap. 1182 let ExeDomain = _.ExeDomain in 1183 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1184 (ins SrcRC:$src), 1185 "vpbroadcast"#_.Suffix, "$src", "$src", 1186 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0, 1187 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>, 1188 T8, PD, EVEX, Sched<[SchedRR]>; 1189} 1190 1191multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1192 X86VectorVTInfo _, SDPatternOperator OpNode, 1193 RegisterClass SrcRC, SubRegIndex Subreg> { 1194 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1195 defm rr : AVX512_maskable_custom<opc, MRMSrcReg, 1196 (outs _.RC:$dst), (ins GR32:$src), 1197 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1198 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1199 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [], 1200 "$src0 = $dst">, T8, PD, EVEX, Sched<[SchedRR]>; 1201 1202 def : Pat <(_.VT (OpNode SrcRC:$src)), 1203 (!cast<Instruction>(Name#rr) 1204 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1205 1206 // Fold with a mask even if it has multiple uses since it is cheap. 1207 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1208 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask, 1209 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1210 1211 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1212 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask, 1213 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1214} 1215 1216multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1217 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1218 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1219 let Predicates = [prd] in 1220 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1221 OpNode, SrcRC, Subreg>, EVEX_V512; 1222 let Predicates = [prd, HasVLX] in { 1223 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1224 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1225 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1226 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1227 } 1228} 1229 1230multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1231 SDPatternOperator OpNode, 1232 RegisterClass SrcRC, Predicate prd> { 1233 let Predicates = [prd] in 1234 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1235 SrcRC>, EVEX_V512; 1236 let Predicates = [prd, HasVLX] in { 1237 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1238 SrcRC>, EVEX_V256; 1239 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1240 SrcRC>, EVEX_V128; 1241 } 1242} 1243 1244defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1245 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1246defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1247 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1248 HasBWI>; 1249defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1250 X86VBroadcast, GR32, HasAVX512>; 1251defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1252 X86VBroadcast, GR64, HasAVX512>, REX_W; 1253 1254multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1255 AVX512VLVectorVTInfo _, Predicate prd, 1256 bit IsConvertibleToThreeAddress> { 1257 let Predicates = [prd] in { 1258 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1259 WriteShuffle256Ld, _.info512, _.info128, 1260 IsConvertibleToThreeAddress>, 1261 EVEX_V512; 1262 } 1263 let Predicates = [prd, HasVLX] in { 1264 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1265 WriteShuffle256Ld, _.info256, _.info128, 1266 IsConvertibleToThreeAddress>, 1267 EVEX_V256; 1268 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle, 1269 WriteShuffleXLd, _.info128, _.info128, 1270 IsConvertibleToThreeAddress>, 1271 EVEX_V128; 1272 } 1273} 1274 1275defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1276 avx512vl_i8_info, HasBWI, 0>; 1277defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1278 avx512vl_i16_info, HasBWI, 0>; 1279defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1280 avx512vl_i32_info, HasAVX512, 1>; 1281defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1282 avx512vl_i64_info, HasAVX512, 1>, REX_W; 1283 1284multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1285 SDPatternOperator OpNode, 1286 X86VectorVTInfo _Dst, 1287 X86VectorVTInfo _Src> { 1288 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1289 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1290 (_Dst.VT (OpNode addr:$src))>, 1291 Sched<[SchedWriteShuffle.YMM.Folded]>, 1292 AVX5128IBase, EVEX; 1293} 1294 1295// This should be used for the AVX512DQ broadcast instructions. It disables 1296// the unmasked patterns so that we only use the DQ instructions when masking 1297// is requested. 1298multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1299 SDPatternOperator OpNode, 1300 X86VectorVTInfo _Dst, 1301 X86VectorVTInfo _Src> { 1302 let hasSideEffects = 0, mayLoad = 1 in 1303 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1304 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1305 (null_frag), 1306 (_Dst.VT (OpNode addr:$src))>, 1307 Sched<[SchedWriteShuffle.YMM.Folded]>, 1308 AVX5128IBase, EVEX; 1309} 1310let Predicates = [HasBWI] in { 1311 def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)), 1312 (VPBROADCASTWZrm addr:$src)>; 1313 1314 def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))), 1315 (VPBROADCASTWZrr VR128X:$src)>; 1316 def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))), 1317 (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1318} 1319let Predicates = [HasVLX, HasBWI] in { 1320 def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)), 1321 (VPBROADCASTWZ128rm addr:$src)>; 1322 def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)), 1323 (VPBROADCASTWZ256rm addr:$src)>; 1324 1325 def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))), 1326 (VPBROADCASTWZ128rr VR128X:$src)>; 1327 def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))), 1328 (VPBROADCASTWZ256rr VR128X:$src)>; 1329 1330 def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))), 1331 (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1332 def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))), 1333 (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1334} 1335 1336//===----------------------------------------------------------------------===// 1337// AVX-512 BROADCAST SUBVECTORS 1338// 1339 1340defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1341 X86SubVBroadcastld128, v16i32_info, v4i32x_info>, 1342 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1343defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1344 X86SubVBroadcastld128, v16f32_info, v4f32x_info>, 1345 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1346defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1347 X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W, 1348 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1349defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1350 X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W, 1351 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1352 1353let Predicates = [HasAVX512] in { 1354def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)), 1355 (VBROADCASTF64X4rm addr:$src)>; 1356def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)), 1357 (VBROADCASTF64X4rm addr:$src)>; 1358def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)), 1359 (VBROADCASTF64X4rm addr:$src)>; 1360def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)), 1361 (VBROADCASTI64X4rm addr:$src)>; 1362def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)), 1363 (VBROADCASTI64X4rm addr:$src)>; 1364def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)), 1365 (VBROADCASTI64X4rm addr:$src)>; 1366def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)), 1367 (VBROADCASTI64X4rm addr:$src)>; 1368 1369def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)), 1370 (VBROADCASTF32X4rm addr:$src)>; 1371def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)), 1372 (VBROADCASTF32X4rm addr:$src)>; 1373def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)), 1374 (VBROADCASTF32X4rm addr:$src)>; 1375def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)), 1376 (VBROADCASTI32X4rm addr:$src)>; 1377def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)), 1378 (VBROADCASTI32X4rm addr:$src)>; 1379def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)), 1380 (VBROADCASTI32X4rm addr:$src)>; 1381def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)), 1382 (VBROADCASTI32X4rm addr:$src)>; 1383 1384// Patterns for selects of bitcasted operations. 1385def : Pat<(vselect_mask VK16WM:$mask, 1386 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1387 (v16f32 immAllZerosV)), 1388 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1389def : Pat<(vselect_mask VK16WM:$mask, 1390 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1391 VR512:$src0), 1392 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1393def : Pat<(vselect_mask VK16WM:$mask, 1394 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1395 (v16i32 immAllZerosV)), 1396 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1397def : Pat<(vselect_mask VK16WM:$mask, 1398 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1399 VR512:$src0), 1400 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1401 1402def : Pat<(vselect_mask VK8WM:$mask, 1403 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1404 (v8f64 immAllZerosV)), 1405 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1406def : Pat<(vselect_mask VK8WM:$mask, 1407 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1408 VR512:$src0), 1409 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1410def : Pat<(vselect_mask VK8WM:$mask, 1411 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1412 (v8i64 immAllZerosV)), 1413 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1414def : Pat<(vselect_mask VK8WM:$mask, 1415 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1416 VR512:$src0), 1417 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1418} 1419 1420let Predicates = [HasVLX] in { 1421defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1422 X86SubVBroadcastld128, v8i32x_info, v4i32x_info>, 1423 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1424defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1425 X86SubVBroadcastld128, v8f32x_info, v4f32x_info>, 1426 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1427 1428def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), 1429 (VBROADCASTF32X4Z256rm addr:$src)>; 1430def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), 1431 (VBROADCASTF32X4Z256rm addr:$src)>; 1432def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)), 1433 (VBROADCASTF32X4Z256rm addr:$src)>; 1434def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), 1435 (VBROADCASTI32X4Z256rm addr:$src)>; 1436def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), 1437 (VBROADCASTI32X4Z256rm addr:$src)>; 1438def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), 1439 (VBROADCASTI32X4Z256rm addr:$src)>; 1440def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), 1441 (VBROADCASTI32X4Z256rm addr:$src)>; 1442 1443// Patterns for selects of bitcasted operations. 1444def : Pat<(vselect_mask VK8WM:$mask, 1445 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1446 (v8f32 immAllZerosV)), 1447 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1448def : Pat<(vselect_mask VK8WM:$mask, 1449 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1450 VR256X:$src0), 1451 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1452def : Pat<(vselect_mask VK8WM:$mask, 1453 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1454 (v8i32 immAllZerosV)), 1455 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1456def : Pat<(vselect_mask VK8WM:$mask, 1457 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1458 VR256X:$src0), 1459 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1460} 1461 1462let Predicates = [HasBF16] in { 1463 def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)), 1464 (VBROADCASTF64X4rm addr:$src)>; 1465 def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)), 1466 (VBROADCASTF32X4rm addr:$src)>; 1467} 1468 1469let Predicates = [HasBF16, HasVLX] in 1470 def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)), 1471 (VBROADCASTF32X4Z256rm addr:$src)>; 1472 1473let Predicates = [HasVLX, HasDQI] in { 1474defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1475 X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, 1476 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; 1477defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1478 X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, 1479 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; 1480 1481// Patterns for selects of bitcasted operations. 1482def : Pat<(vselect_mask VK4WM:$mask, 1483 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1484 (v4f64 immAllZerosV)), 1485 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1486def : Pat<(vselect_mask VK4WM:$mask, 1487 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1488 VR256X:$src0), 1489 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1490def : Pat<(vselect_mask VK4WM:$mask, 1491 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1492 (v4i64 immAllZerosV)), 1493 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1494def : Pat<(vselect_mask VK4WM:$mask, 1495 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1496 VR256X:$src0), 1497 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1498} 1499 1500let Predicates = [HasDQI] in { 1501defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1502 X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W, 1503 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1504defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1505 X86SubVBroadcastld256, v16i32_info, v8i32x_info>, 1506 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1507defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1508 X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W, 1509 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1510defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1511 X86SubVBroadcastld256, v16f32_info, v8f32x_info>, 1512 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1513 1514// Patterns for selects of bitcasted operations. 1515def : Pat<(vselect_mask VK16WM:$mask, 1516 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1517 (v16f32 immAllZerosV)), 1518 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1519def : Pat<(vselect_mask VK16WM:$mask, 1520 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1521 VR512:$src0), 1522 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1523def : Pat<(vselect_mask VK16WM:$mask, 1524 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1525 (v16i32 immAllZerosV)), 1526 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1527def : Pat<(vselect_mask VK16WM:$mask, 1528 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1529 VR512:$src0), 1530 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1531 1532def : Pat<(vselect_mask VK8WM:$mask, 1533 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1534 (v8f64 immAllZerosV)), 1535 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1536def : Pat<(vselect_mask VK8WM:$mask, 1537 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1538 VR512:$src0), 1539 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1540def : Pat<(vselect_mask VK8WM:$mask, 1541 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1542 (v8i64 immAllZerosV)), 1543 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1544def : Pat<(vselect_mask VK8WM:$mask, 1545 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1546 VR512:$src0), 1547 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1548} 1549 1550multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1551 AVX512VLVectorVTInfo _Dst, 1552 AVX512VLVectorVTInfo _Src> { 1553 let Predicates = [HasDQI] in 1554 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1555 WriteShuffle256Ld, _Dst.info512, 1556 _Src.info512, _Src.info128, 0, null_frag, null_frag>, 1557 EVEX_V512; 1558 let Predicates = [HasDQI, HasVLX] in 1559 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1560 WriteShuffle256Ld, _Dst.info256, 1561 _Src.info256, _Src.info128, 0, null_frag, null_frag>, 1562 EVEX_V256; 1563} 1564 1565multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1566 AVX512VLVectorVTInfo _Dst, 1567 AVX512VLVectorVTInfo _Src> : 1568 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1569 1570 let Predicates = [HasDQI, HasVLX] in 1571 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle, 1572 WriteShuffleXLd, _Dst.info128, 1573 _Src.info128, _Src.info128, 0, null_frag, null_frag>, 1574 EVEX_V128; 1575} 1576 1577defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1578 avx512vl_i32_info, avx512vl_i64_info>; 1579defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1580 avx512vl_f32_info, avx512vl_f64_info>; 1581 1582//===----------------------------------------------------------------------===// 1583// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1584//--- 1585multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1586 X86VectorVTInfo _, RegisterClass KRC> { 1587 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1588 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1589 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1590 EVEX, Sched<[WriteShuffle]>; 1591} 1592 1593multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1594 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1595 let Predicates = [HasCDI] in 1596 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1597 let Predicates = [HasCDI, HasVLX] in { 1598 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1599 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1600 } 1601} 1602 1603defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1604 avx512vl_i32_info, VK16>; 1605defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1606 avx512vl_i64_info, VK8>, REX_W; 1607 1608//===----------------------------------------------------------------------===// 1609// -- VPERMI2 - 3 source operands form -- 1610multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1611 X86FoldableSchedWrite sched, 1612 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1613let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1614 hasSideEffects = 0 in { 1615 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1616 (ins _.RC:$src2, _.RC:$src3), 1617 OpcodeStr, "$src3, $src2", "$src2, $src3", 1618 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1619 EVEX, VVVV, AVX5128IBase, Sched<[sched]>; 1620 1621 let mayLoad = 1 in 1622 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1623 (ins _.RC:$src2, _.MemOp:$src3), 1624 OpcodeStr, "$src3, $src2", "$src2, $src3", 1625 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1626 (_.VT (_.LdFrag addr:$src3)))), 1>, 1627 EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1628 } 1629} 1630 1631multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1632 X86FoldableSchedWrite sched, 1633 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1634 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1635 hasSideEffects = 0, mayLoad = 1 in 1636 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1637 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1638 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1639 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1640 (_.VT (X86VPermt2 _.RC:$src2, 1641 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1642 AVX5128IBase, EVEX, VVVV, EVEX_B, 1643 Sched<[sched.Folded, sched.ReadAfterFold]>; 1644} 1645 1646multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1647 X86FoldableSchedWrite sched, 1648 AVX512VLVectorVTInfo VTInfo, 1649 AVX512VLVectorVTInfo ShuffleMask> { 1650 defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1651 ShuffleMask.info512>, 1652 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1653 ShuffleMask.info512>, EVEX_V512; 1654 let Predicates = [HasVLX] in { 1655 defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1656 ShuffleMask.info128>, 1657 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1658 ShuffleMask.info128>, EVEX_V128; 1659 defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1660 ShuffleMask.info256>, 1661 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1662 ShuffleMask.info256>, EVEX_V256; 1663 } 1664} 1665 1666multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1667 X86FoldableSchedWrite sched, 1668 AVX512VLVectorVTInfo VTInfo, 1669 AVX512VLVectorVTInfo Idx, 1670 Predicate Prd> { 1671 let Predicates = [Prd] in 1672 defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1673 Idx.info512>, EVEX_V512; 1674 let Predicates = [Prd, HasVLX] in { 1675 defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1676 Idx.info128>, EVEX_V128; 1677 defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1678 Idx.info256>, EVEX_V256; 1679 } 1680} 1681 1682defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1683 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1684defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1685 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1686defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1687 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1688 REX_W, EVEX_CD8<16, CD8VF>; 1689defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1690 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1691 EVEX_CD8<8, CD8VF>; 1692defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1693 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1694defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1695 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1696 1697// Extra patterns to deal with extra bitcasts due to passthru and index being 1698// different types on the fp versions. 1699multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1700 X86VectorVTInfo IdxVT, 1701 X86VectorVTInfo CastVT> { 1702 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1703 (X86VPermt2 (_.VT _.RC:$src2), 1704 (IdxVT.VT (bitconvert 1705 (CastVT.VT _.RC:$src1))), 1706 _.RC:$src3), 1707 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1708 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1709 _.RC:$src2, _.RC:$src3)>; 1710 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1711 (X86VPermt2 _.RC:$src2, 1712 (IdxVT.VT (bitconvert 1713 (CastVT.VT _.RC:$src1))), 1714 (_.LdFrag addr:$src3)), 1715 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1716 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1717 _.RC:$src2, addr:$src3)>; 1718 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1719 (X86VPermt2 _.RC:$src2, 1720 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1721 (_.BroadcastLdFrag addr:$src3)), 1722 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1723 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1724 _.RC:$src2, addr:$src3)>; 1725} 1726 1727// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1728defm : avx512_perm_i_lowering<"VPERMI2PSZ", v16f32_info, v16i32_info, v8i64_info>; 1729defm : avx512_perm_i_lowering<"VPERMI2PSZ256", v8f32x_info, v8i32x_info, v4i64x_info>; 1730defm : avx512_perm_i_lowering<"VPERMI2PSZ128", v4f32x_info, v4i32x_info, v2i64x_info>; 1731 1732// VPERMT2 1733multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1734 X86FoldableSchedWrite sched, 1735 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1736let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1737 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1738 (ins IdxVT.RC:$src2, _.RC:$src3), 1739 OpcodeStr, "$src3, $src2", "$src2, $src3", 1740 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1741 EVEX, VVVV, AVX5128IBase, Sched<[sched]>; 1742 1743 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1744 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1745 OpcodeStr, "$src3, $src2", "$src2, $src3", 1746 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1747 (_.LdFrag addr:$src3))), 1>, 1748 EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1749 } 1750} 1751multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1752 X86FoldableSchedWrite sched, 1753 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1754 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1755 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1756 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1757 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1758 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1759 (_.VT (X86VPermt2 _.RC:$src1, 1760 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1761 AVX5128IBase, EVEX, VVVV, EVEX_B, 1762 Sched<[sched.Folded, sched.ReadAfterFold]>; 1763} 1764 1765multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1766 X86FoldableSchedWrite sched, 1767 AVX512VLVectorVTInfo VTInfo, 1768 AVX512VLVectorVTInfo ShuffleMask> { 1769 defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1770 ShuffleMask.info512>, 1771 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1772 ShuffleMask.info512>, EVEX_V512; 1773 let Predicates = [HasVLX] in { 1774 defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1775 ShuffleMask.info128>, 1776 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1777 ShuffleMask.info128>, EVEX_V128; 1778 defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1779 ShuffleMask.info256>, 1780 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1781 ShuffleMask.info256>, EVEX_V256; 1782 } 1783} 1784 1785multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1786 X86FoldableSchedWrite sched, 1787 AVX512VLVectorVTInfo VTInfo, 1788 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1789 let Predicates = [Prd] in 1790 defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1791 Idx.info512>, EVEX_V512; 1792 let Predicates = [Prd, HasVLX] in { 1793 defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1794 Idx.info128>, EVEX_V128; 1795 defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1796 Idx.info256>, EVEX_V256; 1797 } 1798} 1799 1800defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1801 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1802defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1803 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1804defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1805 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1806 REX_W, EVEX_CD8<16, CD8VF>; 1807defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1808 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1809 EVEX_CD8<8, CD8VF>; 1810defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1811 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1812defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1813 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1814 1815//===----------------------------------------------------------------------===// 1816// AVX-512 - BLEND using mask 1817// 1818 1819multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1820 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1821 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1822 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1823 (ins _.RC:$src1, _.RC:$src2), 1824 !strconcat(OpcodeStr, 1825 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1826 EVEX, VVVV, Sched<[sched]>; 1827 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1828 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1829 !strconcat(OpcodeStr, 1830 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1831 []>, EVEX, VVVV, EVEX_K, Sched<[sched]>; 1832 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1833 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1834 !strconcat(OpcodeStr, 1835 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1836 []>, EVEX, VVVV, EVEX_KZ, Sched<[sched]>; 1837 let mayLoad = 1 in { 1838 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1839 (ins _.RC:$src1, _.MemOp:$src2), 1840 !strconcat(OpcodeStr, 1841 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 1842 []>, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 1843 Sched<[sched.Folded, sched.ReadAfterFold]>; 1844 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1845 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1846 !strconcat(OpcodeStr, 1847 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1848 []>, EVEX, VVVV, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 1849 Sched<[sched.Folded, sched.ReadAfterFold]>; 1850 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1851 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1852 !strconcat(OpcodeStr, 1853 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1854 []>, EVEX, VVVV, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 1855 Sched<[sched.Folded, sched.ReadAfterFold]>; 1856 } 1857 } 1858} 1859multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 1860 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1861 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { 1862 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1863 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1864 !strconcat(OpcodeStr, 1865 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 1866 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1867 EVEX, VVVV, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1868 Sched<[sched.Folded, sched.ReadAfterFold]>; 1869 1870 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1871 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1872 !strconcat(OpcodeStr, 1873 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 1874 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1875 EVEX, VVVV, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1876 Sched<[sched.Folded, sched.ReadAfterFold]>; 1877 1878 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1879 (ins _.RC:$src1, _.ScalarMemOp:$src2), 1880 !strconcat(OpcodeStr, 1881 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 1882 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1883 EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1884 Sched<[sched.Folded, sched.ReadAfterFold]>; 1885 } 1886} 1887 1888multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 1889 AVX512VLVectorVTInfo VTInfo> { 1890 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1891 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1892 EVEX_V512; 1893 1894 let Predicates = [HasVLX] in { 1895 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1896 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1897 EVEX_V256; 1898 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1899 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1900 EVEX_V128; 1901 } 1902} 1903 1904multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 1905 AVX512VLVectorVTInfo VTInfo> { 1906 let Predicates = [HasBWI] in 1907 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1908 EVEX_V512; 1909 1910 let Predicates = [HasBWI, HasVLX] in { 1911 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1912 EVEX_V256; 1913 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1914 EVEX_V128; 1915 } 1916} 1917 1918defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 1919 avx512vl_f32_info>; 1920defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 1921 avx512vl_f64_info>, REX_W; 1922defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 1923 avx512vl_i32_info>; 1924defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 1925 avx512vl_i64_info>, REX_W; 1926defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 1927 avx512vl_i8_info>; 1928defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 1929 avx512vl_i16_info>, REX_W; 1930 1931//===----------------------------------------------------------------------===// 1932// Compare Instructions 1933//===----------------------------------------------------------------------===// 1934 1935// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 1936 1937multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, 1938 PatFrag OpNode_su, PatFrag OpNodeSAE_su, 1939 X86FoldableSchedWrite sched> { 1940 defm rri_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 1941 (outs _.KRC:$dst), 1942 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 1943 "vcmp"#_.Suffix, 1944 "$cc, $src2, $src1", "$src1, $src2, $cc", 1945 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 1946 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc)>, 1947 EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC; 1948 let mayLoad = 1 in 1949 defm rmi_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 1950 (outs _.KRC:$dst), 1951 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), 1952 "vcmp"#_.Suffix, 1953 "$cc, $src2, $src1", "$src1, $src2, $cc", 1954 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 1955 timm:$cc), 1956 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 1957 timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 1958 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1959 1960 let Uses = [MXCSR] in 1961 defm rrib_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 1962 (outs _.KRC:$dst), 1963 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 1964 "vcmp"#_.Suffix, 1965 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", 1966 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 1967 timm:$cc), 1968 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 1969 timm:$cc)>, 1970 EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>; 1971 1972 let isCodeGenOnly = 1 in { 1973 let isCommutable = 1 in 1974 def rri : AVX512Ii8<0xC2, MRMSrcReg, 1975 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), 1976 !strconcat("vcmp", _.Suffix, 1977 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 1978 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 1979 _.FRC:$src2, 1980 timm:$cc))]>, 1981 EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC; 1982 def rmi : AVX512Ii8<0xC2, MRMSrcMem, 1983 (outs _.KRC:$dst), 1984 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 1985 !strconcat("vcmp", _.Suffix, 1986 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 1987 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 1988 (_.ScalarLdFrag addr:$src2), 1989 timm:$cc))]>, 1990 EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 1991 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1992 } 1993} 1994 1995let Predicates = [HasAVX512] in { 1996 let ExeDomain = SSEPackedSingle in 1997 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, 1998 X86cmpms_su, X86cmpmsSAE_su, 1999 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 2000 let ExeDomain = SSEPackedDouble in 2001 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, 2002 X86cmpms_su, X86cmpmsSAE_su, 2003 SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W; 2004} 2005let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in 2006 defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE, 2007 X86cmpms_su, X86cmpmsSAE_su, 2008 SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA; 2009 2010multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, 2011 X86FoldableSchedWrite sched, 2012 X86VectorVTInfo _, bit IsCommutable> { 2013 let isCommutable = IsCommutable, hasSideEffects = 0 in 2014 def rr : AVX512BI<opc, MRMSrcReg, 2015 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2016 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2017 []>, EVEX, VVVV, Sched<[sched]>; 2018 let mayLoad = 1, hasSideEffects = 0 in 2019 def rm : AVX512BI<opc, MRMSrcMem, 2020 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2021 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2022 []>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 2023 let isCommutable = IsCommutable, hasSideEffects = 0 in 2024 def rrk : AVX512BI<opc, MRMSrcReg, 2025 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2026 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2027 "$dst {${mask}}, $src1, $src2}"), 2028 []>, EVEX, VVVV, EVEX_K, Sched<[sched]>; 2029 let mayLoad = 1, hasSideEffects = 0 in 2030 def rmk : AVX512BI<opc, MRMSrcMem, 2031 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2032 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2033 "$dst {${mask}}, $src1, $src2}"), 2034 []>, EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2035} 2036 2037multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, 2038 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2039 bit IsCommutable> : 2040 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { 2041 let mayLoad = 1, hasSideEffects = 0 in { 2042 def rmb : AVX512BI<opc, MRMSrcMem, 2043 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2044 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2045 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2046 []>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2047 def rmbk : AVX512BI<opc, MRMSrcMem, 2048 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2049 _.ScalarMemOp:$src2), 2050 !strconcat(OpcodeStr, 2051 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2052 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2053 []>, EVEX, VVVV, EVEX_K, EVEX_B, 2054 Sched<[sched.Folded, sched.ReadAfterFold]>; 2055 } 2056} 2057 2058multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, 2059 X86SchedWriteWidths sched, 2060 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2061 bit IsCommutable = 0> { 2062 let Predicates = [prd] in 2063 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, 2064 VTInfo.info512, IsCommutable>, EVEX_V512; 2065 2066 let Predicates = [prd, HasVLX] in { 2067 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, 2068 VTInfo.info256, IsCommutable>, EVEX_V256; 2069 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, 2070 VTInfo.info128, IsCommutable>, EVEX_V128; 2071 } 2072} 2073 2074multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2075 X86SchedWriteWidths sched, 2076 AVX512VLVectorVTInfo VTInfo, 2077 Predicate prd, bit IsCommutable = 0> { 2078 let Predicates = [prd] in 2079 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, 2080 VTInfo.info512, IsCommutable>, EVEX_V512; 2081 2082 let Predicates = [prd, HasVLX] in { 2083 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, 2084 VTInfo.info256, IsCommutable>, EVEX_V256; 2085 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, 2086 VTInfo.info128, IsCommutable>, EVEX_V128; 2087 } 2088} 2089 2090// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2091// increase the pattern complexity the way an immediate would. 2092let AddedComplexity = 2 in { 2093// FIXME: Is there a better scheduler class for VPCMP? 2094defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", 2095 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2096 EVEX_CD8<8, CD8VF>, WIG; 2097 2098defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", 2099 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2100 EVEX_CD8<16, CD8VF>, WIG; 2101 2102defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", 2103 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2104 EVEX_CD8<32, CD8VF>; 2105 2106defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", 2107 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2108 T8, REX_W, EVEX_CD8<64, CD8VF>; 2109 2110defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", 2111 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2112 EVEX_CD8<8, CD8VF>, WIG; 2113 2114defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", 2115 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2116 EVEX_CD8<16, CD8VF>, WIG; 2117 2118defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", 2119 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2120 EVEX_CD8<32, CD8VF>; 2121 2122defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", 2123 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2124 T8, REX_W, EVEX_CD8<64, CD8VF>; 2125} 2126 2127multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2128 PatFrag Frag_su, 2129 X86FoldableSchedWrite sched, 2130 X86VectorVTInfo _, string Name> { 2131 let isCommutable = 1 in 2132 def rri : AVX512AIi8<opc, MRMSrcReg, 2133 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2134 !strconcat("vpcmp", Suffix, 2135 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2136 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2137 (_.VT _.RC:$src2), 2138 cond)))]>, 2139 EVEX, VVVV, Sched<[sched]>; 2140 def rmi : AVX512AIi8<opc, MRMSrcMem, 2141 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2142 !strconcat("vpcmp", Suffix, 2143 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2144 [(set _.KRC:$dst, (_.KVT 2145 (Frag:$cc 2146 (_.VT _.RC:$src1), 2147 (_.VT (_.LdFrag addr:$src2)), 2148 cond)))]>, 2149 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 2150 let isCommutable = 1 in 2151 def rrik : AVX512AIi8<opc, MRMSrcReg, 2152 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2153 u8imm:$cc), 2154 !strconcat("vpcmp", Suffix, 2155 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2156 "$dst {${mask}}, $src1, $src2, $cc}"), 2157 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2158 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), 2159 (_.VT _.RC:$src2), 2160 cond))))]>, 2161 EVEX, VVVV, EVEX_K, Sched<[sched]>; 2162 def rmik : AVX512AIi8<opc, MRMSrcMem, 2163 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2164 u8imm:$cc), 2165 !strconcat("vpcmp", Suffix, 2166 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2167 "$dst {${mask}}, $src1, $src2, $cc}"), 2168 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2169 (_.KVT 2170 (Frag_su:$cc 2171 (_.VT _.RC:$src1), 2172 (_.VT (_.LdFrag addr:$src2)), 2173 cond))))]>, 2174 EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2175 2176 def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2), 2177 (_.VT _.RC:$src1), cond)), 2178 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2179 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2180 2181 def : Pat<(and _.KRCWM:$mask, 2182 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2), 2183 (_.VT _.RC:$src1), cond))), 2184 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2185 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2186 (X86pcmpm_imm_commute $cc))>; 2187} 2188 2189multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2190 PatFrag Frag_su, X86FoldableSchedWrite sched, 2191 X86VectorVTInfo _, string Name> : 2192 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> { 2193 def rmib : AVX512AIi8<opc, MRMSrcMem, 2194 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2195 u8imm:$cc), 2196 !strconcat("vpcmp", Suffix, 2197 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2198 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2199 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2200 (_.VT _.RC:$src1), 2201 (_.BroadcastLdFrag addr:$src2), 2202 cond)))]>, 2203 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2204 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2205 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2206 _.ScalarMemOp:$src2, u8imm:$cc), 2207 !strconcat("vpcmp", Suffix, 2208 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2209 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2210 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2211 (_.KVT (Frag_su:$cc 2212 (_.VT _.RC:$src1), 2213 (_.BroadcastLdFrag addr:$src2), 2214 cond))))]>, 2215 EVEX, VVVV, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2216 2217 def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2), 2218 (_.VT _.RC:$src1), cond)), 2219 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2220 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2221 2222 def : Pat<(and _.KRCWM:$mask, 2223 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2), 2224 (_.VT _.RC:$src1), cond))), 2225 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2226 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2227 (X86pcmpm_imm_commute $cc))>; 2228} 2229 2230multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2231 PatFrag Frag_su, X86SchedWriteWidths sched, 2232 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2233 let Predicates = [prd] in 2234 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2235 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2236 2237 let Predicates = [prd, HasVLX] in { 2238 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2239 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2240 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2241 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2242 } 2243} 2244 2245multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2246 PatFrag Frag_su, X86SchedWriteWidths sched, 2247 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2248 let Predicates = [prd] in 2249 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2250 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2251 2252 let Predicates = [prd, HasVLX] in { 2253 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2254 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2255 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2256 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2257 } 2258} 2259 2260// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2261defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su, 2262 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2263 EVEX_CD8<8, CD8VF>; 2264defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su, 2265 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2266 EVEX_CD8<8, CD8VF>; 2267 2268defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su, 2269 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2270 REX_W, EVEX_CD8<16, CD8VF>; 2271defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su, 2272 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2273 REX_W, EVEX_CD8<16, CD8VF>; 2274 2275defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su, 2276 SchedWriteVecALU, avx512vl_i32_info, 2277 HasAVX512>, EVEX_CD8<32, CD8VF>; 2278defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su, 2279 SchedWriteVecALU, avx512vl_i32_info, 2280 HasAVX512>, EVEX_CD8<32, CD8VF>; 2281 2282defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su, 2283 SchedWriteVecALU, avx512vl_i64_info, 2284 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>; 2285defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su, 2286 SchedWriteVecALU, avx512vl_i64_info, 2287 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>; 2288 2289multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2290 string Name> { 2291let Uses = [MXCSR], mayRaiseFPException = 1 in { 2292 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2293 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), 2294 "vcmp"#_.Suffix, 2295 "$cc, $src2, $src1", "$src1, $src2, $cc", 2296 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2297 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2298 1>, Sched<[sched]>; 2299 2300 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2301 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2302 "vcmp"#_.Suffix, 2303 "$cc, $src2, $src1", "$src1, $src2, $cc", 2304 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2305 timm:$cc), 2306 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2307 timm:$cc)>, 2308 Sched<[sched.Folded, sched.ReadAfterFold]>; 2309 2310 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2311 (outs _.KRC:$dst), 2312 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2313 "vcmp"#_.Suffix, 2314 "$cc, ${src2}"#_.BroadcastStr#", $src1", 2315 "$src1, ${src2}"#_.BroadcastStr#", $cc", 2316 (X86any_cmpm (_.VT _.RC:$src1), 2317 (_.VT (_.BroadcastLdFrag addr:$src2)), 2318 timm:$cc), 2319 (X86cmpm_su (_.VT _.RC:$src1), 2320 (_.VT (_.BroadcastLdFrag addr:$src2)), 2321 timm:$cc)>, 2322 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2323 } 2324 2325 // Patterns for selecting with loads in other operand. 2326 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2327 timm:$cc), 2328 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2329 (X86cmpm_imm_commute timm:$cc))>; 2330 2331 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), 2332 (_.VT _.RC:$src1), 2333 timm:$cc)), 2334 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2335 _.RC:$src1, addr:$src2, 2336 (X86cmpm_imm_commute timm:$cc))>; 2337 2338 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2), 2339 (_.VT _.RC:$src1), timm:$cc), 2340 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2341 (X86cmpm_imm_commute timm:$cc))>; 2342 2343 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), 2344 (_.VT _.RC:$src1), 2345 timm:$cc)), 2346 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2347 _.RC:$src1, addr:$src2, 2348 (X86cmpm_imm_commute timm:$cc))>; 2349 2350 // Patterns for mask intrinsics. 2351 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, 2352 (_.KVT immAllOnesV)), 2353 (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>; 2354 2355 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask), 2356 (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1, 2357 _.RC:$src2, timm:$cc)>; 2358 2359 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2360 (_.KVT immAllOnesV)), 2361 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>; 2362 2363 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2364 _.KRCWM:$mask), 2365 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, 2366 addr:$src2, timm:$cc)>; 2367 2368 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2369 (_.KVT immAllOnesV)), 2370 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>; 2371 2372 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2373 _.KRCWM:$mask), 2374 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, 2375 addr:$src2, timm:$cc)>; 2376 2377 // Patterns for mask intrinsics with loads in other operand. 2378 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2379 (_.KVT immAllOnesV)), 2380 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2381 (X86cmpm_imm_commute timm:$cc))>; 2382 2383 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2384 _.KRCWM:$mask), 2385 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2386 _.RC:$src1, addr:$src2, 2387 (X86cmpm_imm_commute timm:$cc))>; 2388 2389 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2390 (_.KVT immAllOnesV)), 2391 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2392 (X86cmpm_imm_commute timm:$cc))>; 2393 2394 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2395 _.KRCWM:$mask), 2396 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2397 _.RC:$src1, addr:$src2, 2398 (X86cmpm_imm_commute timm:$cc))>; 2399} 2400 2401multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2402 // comparison code form (VCMP[EQ/LT/LE/...] 2403 let Uses = [MXCSR] in 2404 defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst), 2405 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2406 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc), 2407 "vcmp"#_.Suffix, 2408 "$cc, {sae}, $src2, $src1", 2409 "$src1, $src2, {sae}, $cc", 2410 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2411 (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))], 2412 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2413 (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>, 2414 EVEX_B, Sched<[sched]>; 2415} 2416 2417multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 2418 Predicate Pred = HasAVX512> { 2419 let Predicates = [Pred] in { 2420 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2421 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2422 2423 } 2424 let Predicates = [Pred,HasVLX] in { 2425 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2426 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2427 } 2428} 2429 2430defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2431 AVX512PDIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 2432defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2433 AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 2434defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>, 2435 AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA; 2436 2437// Patterns to select fp compares with load as first operand. 2438let Predicates = [HasAVX512] in { 2439 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)), 2440 (VCMPSDZrmi FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2441 2442 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)), 2443 (VCMPSSZrmi FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2444} 2445 2446let Predicates = [HasFP16] in { 2447 def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)), 2448 (VCMPSHZrmi FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2449} 2450 2451// ---------------------------------------------------------------- 2452// FPClass 2453 2454//handle fpclass instruction mask = op(reg_scalar,imm) 2455// op(mem_scalar,imm) 2456multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, 2457 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2458 Predicate prd> { 2459 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2460 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2461 (ins _.RC:$src1, i32u8imm:$src2), 2462 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2463 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), 2464 (i32 timm:$src2)))]>, 2465 Sched<[sched]>; 2466 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2467 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2468 OpcodeStr#_.Suffix# 2469 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2470 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2471 (X86Vfpclasss_su (_.VT _.RC:$src1), 2472 (i32 timm:$src2))))]>, 2473 EVEX_K, Sched<[sched]>; 2474 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2475 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2476 OpcodeStr#_.Suffix# 2477 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2478 [(set _.KRC:$dst, 2479 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1), 2480 (i32 timm:$src2)))]>, 2481 Sched<[sched.Folded, sched.ReadAfterFold]>; 2482 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2483 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2484 OpcodeStr#_.Suffix# 2485 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2486 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2487 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1), 2488 (i32 timm:$src2))))]>, 2489 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2490 } 2491} 2492 2493//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2494// fpclass(reg_vec, mem_vec, imm) 2495// fpclass(reg_vec, broadcast(eltVt), imm) 2496multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, 2497 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2498 string mem>{ 2499 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2500 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2501 (ins _.RC:$src1, i32u8imm:$src2), 2502 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2503 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), 2504 (i32 timm:$src2)))]>, 2505 Sched<[sched]>; 2506 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2507 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2508 OpcodeStr#_.Suffix# 2509 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2510 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2511 (X86Vfpclass_su (_.VT _.RC:$src1), 2512 (i32 timm:$src2))))]>, 2513 EVEX_K, Sched<[sched]>; 2514 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2515 (ins _.MemOp:$src1, i32u8imm:$src2), 2516 OpcodeStr#_.Suffix#"{"#mem#"}"# 2517 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2518 [(set _.KRC:$dst,(X86Vfpclass 2519 (_.VT (_.LdFrag addr:$src1)), 2520 (i32 timm:$src2)))]>, 2521 Sched<[sched.Folded, sched.ReadAfterFold]>; 2522 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2523 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2524 OpcodeStr#_.Suffix#"{"#mem#"}"# 2525 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2526 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su 2527 (_.VT (_.LdFrag addr:$src1)), 2528 (i32 timm:$src2))))]>, 2529 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2530 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2531 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2532 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2533 _.BroadcastStr#", $dst|$dst, ${src1}" 2534 #_.BroadcastStr#", $src2}", 2535 [(set _.KRC:$dst,(X86Vfpclass 2536 (_.VT (_.BroadcastLdFrag addr:$src1)), 2537 (i32 timm:$src2)))]>, 2538 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2539 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2540 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2541 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2542 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"# 2543 _.BroadcastStr#", $src2}", 2544 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su 2545 (_.VT (_.BroadcastLdFrag addr:$src1)), 2546 (i32 timm:$src2))))]>, 2547 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2548 } 2549 2550 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate 2551 // the memory form. 2552 def : InstAlias<OpcodeStr#_.Suffix#mem# 2553 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2554 (!cast<Instruction>(NAME#"rr") 2555 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2556 def : InstAlias<OpcodeStr#_.Suffix#mem# 2557 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2558 (!cast<Instruction>(NAME#"rrk") 2559 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2560 def : InstAlias<OpcodeStr#_.Suffix#mem# 2561 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"# 2562 _.BroadcastStr#", $src2}", 2563 (!cast<Instruction>(NAME#"rmb") 2564 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2565 def : InstAlias<OpcodeStr#_.Suffix#mem# 2566 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|" 2567 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}", 2568 (!cast<Instruction>(NAME#"rmbk") 2569 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2570} 2571 2572multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2573 bits<8> opc, X86SchedWriteWidths sched, 2574 Predicate prd>{ 2575 let Predicates = [prd] in { 2576 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM, 2577 _.info512, "z">, EVEX_V512; 2578 } 2579 let Predicates = [prd, HasVLX] in { 2580 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM, 2581 _.info128, "x">, EVEX_V128; 2582 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM, 2583 _.info256, "y">, EVEX_V256; 2584 } 2585} 2586 2587multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2588 bits<8> opcScalar, X86SchedWriteWidths sched> { 2589 defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec, 2590 sched, HasFP16>, 2591 EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA; 2592 defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2593 sched.Scl, f16x_info, HasFP16>, 2594 EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA; 2595 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2596 sched, HasDQI>, 2597 EVEX_CD8<32, CD8VF>, AVX512AIi8Base; 2598 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2599 sched, HasDQI>, 2600 EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W; 2601 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2602 sched.Scl, f32x_info, HasDQI>, VEX_LIG, 2603 EVEX_CD8<32, CD8VT1>, AVX512AIi8Base; 2604 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2605 sched.Scl, f64x_info, HasDQI>, VEX_LIG, 2606 EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W; 2607} 2608 2609defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX; 2610 2611//----------------------------------------------------------------- 2612// Mask register copy, including 2613// - copy between mask registers 2614// - load/store mask registers 2615// - copy from GPR to mask register and vice versa 2616// 2617multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2618 string OpcodeStr, RegisterClass KRC, ValueType vvt, 2619 X86MemOperand x86memop, string Suffix = ""> { 2620 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove], 2621 explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in 2622 def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2623 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2624 Sched<[WriteMove]>; 2625 def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2626 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2627 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2628 Sched<[WriteLoad]>, NoCD8; 2629 def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2630 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2631 [(store KRC:$src, addr:$dst)]>, 2632 Sched<[WriteStore]>, NoCD8; 2633} 2634 2635multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2636 string OpcodeStr, RegisterClass KRC, 2637 RegisterClass GRC, string Suffix = ""> { 2638 let hasSideEffects = 0 in { 2639 def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2640 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2641 Sched<[WriteMove]>; 2642 def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2643 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2644 Sched<[WriteMove]>; 2645 } 2646} 2647 2648let Predicates = [HasDQI, NoEGPR] in 2649 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2650 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2651 VEX, TB, PD; 2652let Predicates = [HasDQI, HasEGPR, In64BitMode] in 2653 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">, 2654 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">, 2655 EVEX, TB, PD; 2656 2657let Predicates = [HasAVX512, NoEGPR] in 2658 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2659 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2660 VEX, TB; 2661let Predicates = [HasAVX512, HasEGPR, In64BitMode] in 2662 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">, 2663 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">, 2664 EVEX, TB; 2665 2666let Predicates = [HasBWI, NoEGPR] in { 2667 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2668 VEX, TB, PD, REX_W; 2669 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2670 VEX, TB, XD; 2671 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2672 VEX, TB, REX_W; 2673 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2674 VEX, TB, XD, REX_W; 2675} 2676let Predicates = [HasBWI, HasEGPR, In64BitMode] in { 2677 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">, 2678 EVEX, TB, PD, REX_W; 2679 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">, 2680 EVEX, TB, XD; 2681 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">, 2682 EVEX, TB, REX_W; 2683 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">, 2684 EVEX, TB, XD, REX_W; 2685} 2686 2687// GR from/to mask register 2688def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2689 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2690def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2691 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2692def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))), 2693 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>; 2694 2695def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2696 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2697def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2698 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2699 2700def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2701 (KMOVWrk VK16:$src)>; 2702def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2703 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; 2704def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2705 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2706def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2707 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; 2708 2709def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2710 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2711def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2712 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; 2713def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2714 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2715def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2716 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; 2717 2718def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2719 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2720def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2721 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2722def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2723 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2724def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2725 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2726 2727// Load/store kreg 2728let Predicates = [HasDQI] in { 2729 def : Pat<(v1i1 (load addr:$src)), 2730 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2731 def : Pat<(v2i1 (load addr:$src)), 2732 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2733 def : Pat<(v4i1 (load addr:$src)), 2734 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2735} 2736 2737let Predicates = [HasAVX512] in { 2738 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2739 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2740 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), 2741 (KMOVWkm addr:$src)>; 2742} 2743 2744def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", 2745 SDTypeProfile<1, 2, [SDTCisVT<0, i8>, 2746 SDTCVecEltisVT<1, i1>, 2747 SDTCisPtrTy<2>]>>; 2748 2749let Predicates = [HasAVX512] in { 2750 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2751 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2752 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2753 2754 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2755 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2756 2757 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), 2758 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; 2759 2760 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), 2761 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; 2762 } 2763 2764 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2765 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2766 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2767 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2768 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2769 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2770 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2771 2772 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2773 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2774 (KMOVWkr (AND32ri 2775 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2776 (i32 1)))>; 2777} 2778 2779// Mask unary operation 2780// - KNOT 2781multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 2782 RegisterClass KRC, SDPatternOperator OpNode, 2783 X86FoldableSchedWrite sched, Predicate prd> { 2784 let Predicates = [prd] in 2785 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2786 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2787 [(set KRC:$dst, (OpNode KRC:$src))]>, 2788 Sched<[sched]>; 2789} 2790 2791multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 2792 SDPatternOperator OpNode, 2793 X86FoldableSchedWrite sched> { 2794 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2795 sched, HasDQI>, VEX, TB, PD; 2796 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2797 sched, HasAVX512>, VEX, TB; 2798 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2799 sched, HasBWI>, VEX, TB, PD, REX_W; 2800 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2801 sched, HasBWI>, VEX, TB, REX_W; 2802} 2803 2804// TODO - do we need a X86SchedWriteWidths::KMASK type? 2805defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 2806 2807// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 2808let Predicates = [HasAVX512, NoDQI] in 2809def : Pat<(vnot VK8:$src), 2810 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 2811 2812def : Pat<(vnot VK4:$src), 2813 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 2814def : Pat<(vnot VK2:$src), 2815 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 2816def : Pat<(vnot VK1:$src), 2817 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>; 2818 2819// Mask binary operation 2820// - KAND, KANDN, KOR, KXNOR, KXOR 2821multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 2822 RegisterClass KRC, SDPatternOperator OpNode, 2823 X86FoldableSchedWrite sched, Predicate prd, 2824 bit IsCommutable> { 2825 let Predicates = [prd], isCommutable = IsCommutable in 2826 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 2827 !strconcat(OpcodeStr, 2828 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2829 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 2830 Sched<[sched]>; 2831} 2832 2833multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 2834 SDPatternOperator OpNode, 2835 X86FoldableSchedWrite sched, bit IsCommutable, 2836 Predicate prdW = HasAVX512> { 2837 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2838 sched, HasDQI, IsCommutable>, VEX, VVVV, VEX_L, TB, PD; 2839 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2840 sched, prdW, IsCommutable>, VEX, VVVV, VEX_L, TB; 2841 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2842 sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB, PD; 2843 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2844 sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB; 2845} 2846 2847// TODO - do we need a X86SchedWriteWidths::KMASK type? 2848defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 2849defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 2850defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 2851defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 2852defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 2853defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 2854 2855multiclass avx512_binop_pat<SDPatternOperator VOpNode, 2856 Instruction Inst> { 2857 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 2858 // for the DQI set, this type is legal and KxxxB instruction is used 2859 let Predicates = [NoDQI] in 2860 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 2861 (COPY_TO_REGCLASS 2862 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 2863 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 2864 2865 // All types smaller than 8 bits require conversion anyway 2866 def : Pat<(VOpNode VK1:$src1, VK1:$src2), 2867 (COPY_TO_REGCLASS (Inst 2868 (COPY_TO_REGCLASS VK1:$src1, VK16), 2869 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 2870 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 2871 (COPY_TO_REGCLASS (Inst 2872 (COPY_TO_REGCLASS VK2:$src1, VK16), 2873 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; 2874 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 2875 (COPY_TO_REGCLASS (Inst 2876 (COPY_TO_REGCLASS VK4:$src1, VK16), 2877 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; 2878} 2879 2880defm : avx512_binop_pat<and, KANDWrr>; 2881defm : avx512_binop_pat<vandn, KANDNWrr>; 2882defm : avx512_binop_pat<or, KORWrr>; 2883defm : avx512_binop_pat<vxnor, KXNORWrr>; 2884defm : avx512_binop_pat<xor, KXORWrr>; 2885 2886// Mask unpacking 2887multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, 2888 X86KVectorVTInfo Src, X86FoldableSchedWrite sched, 2889 Predicate prd> { 2890 let Predicates = [prd] in { 2891 let hasSideEffects = 0 in 2892 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), 2893 (ins Src.KRC:$src1, Src.KRC:$src2), 2894 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 2895 VEX, VVVV, VEX_L, Sched<[sched]>; 2896 2897 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), 2898 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>; 2899 } 2900} 2901 2902defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, TB, PD; 2903defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, TB; 2904defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, TB, REX_W; 2905 2906// Mask bit testing 2907multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 2908 SDNode OpNode, X86FoldableSchedWrite sched, 2909 Predicate prd> { 2910 let Predicates = [prd], Defs = [EFLAGS] in 2911 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 2912 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2913 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 2914 Sched<[sched]>; 2915} 2916 2917multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 2918 X86FoldableSchedWrite sched, 2919 Predicate prdW = HasAVX512> { 2920 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 2921 VEX, TB, PD; 2922 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 2923 VEX, TB; 2924 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 2925 VEX, TB, REX_W; 2926 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 2927 VEX, TB, PD, REX_W; 2928} 2929 2930// TODO - do we need a X86SchedWriteWidths::KMASK type? 2931defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 2932defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 2933 2934// Mask shift 2935multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 2936 SDNode OpNode, X86FoldableSchedWrite sched> { 2937 let Predicates = [HasAVX512] in 2938 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 2939 !strconcat(OpcodeStr, 2940 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 2941 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, 2942 Sched<[sched]>; 2943} 2944 2945multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 2946 SDNode OpNode, X86FoldableSchedWrite sched> { 2947 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2948 sched>, VEX, TA, PD, REX_W; 2949 let Predicates = [HasDQI] in 2950 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2951 sched>, VEX, TA, PD; 2952 let Predicates = [HasBWI] in { 2953 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2954 sched>, VEX, TA, PD, REX_W; 2955 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2956 sched>, VEX, TA, PD; 2957 } 2958} 2959 2960defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 2961defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 2962 2963// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 2964multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 2965 string InstStr, 2966 X86VectorVTInfo Narrow, 2967 X86VectorVTInfo Wide> { 2968def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 2969 (Narrow.VT Narrow.RC:$src2), cond)), 2970 (COPY_TO_REGCLASS 2971 (!cast<Instruction>(InstStr#"Zrri") 2972 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 2973 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 2974 (X86pcmpm_imm $cc)), Narrow.KRC)>; 2975 2976def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 2977 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 2978 (Narrow.VT Narrow.RC:$src2), 2979 cond)))), 2980 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 2981 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 2982 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 2983 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 2984 (X86pcmpm_imm $cc)), Narrow.KRC)>; 2985} 2986 2987multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 2988 string InstStr, 2989 X86VectorVTInfo Narrow, 2990 X86VectorVTInfo Wide> { 2991// Broadcast load. 2992def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 2993 (Narrow.BroadcastLdFrag addr:$src2), cond)), 2994 (COPY_TO_REGCLASS 2995 (!cast<Instruction>(InstStr#"Zrmib") 2996 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 2997 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 2998 2999def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3000 (Narrow.KVT 3001 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 3002 (Narrow.BroadcastLdFrag addr:$src2), 3003 cond)))), 3004 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3005 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3006 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3007 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 3008 3009// Commuted with broadcast load. 3010def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2), 3011 (Narrow.VT Narrow.RC:$src1), 3012 cond)), 3013 (COPY_TO_REGCLASS 3014 (!cast<Instruction>(InstStr#"Zrmib") 3015 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3016 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3017 3018def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3019 (Narrow.KVT 3020 (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), 3021 (Narrow.VT Narrow.RC:$src1), 3022 cond)))), 3023 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3024 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3025 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3026 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3027} 3028 3029// Same as above, but for fp types which don't use PatFrags. 3030multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, 3031 X86VectorVTInfo Narrow, 3032 X86VectorVTInfo Wide> { 3033def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3034 (Narrow.VT Narrow.RC:$src2), timm:$cc)), 3035 (COPY_TO_REGCLASS 3036 (!cast<Instruction>(InstStr#"Zrri") 3037 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3038 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3039 timm:$cc), Narrow.KRC)>; 3040 3041def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3042 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3043 (Narrow.VT Narrow.RC:$src2), timm:$cc))), 3044 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3045 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3046 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3047 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3048 timm:$cc), Narrow.KRC)>; 3049 3050// Broadcast load. 3051def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3052 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), 3053 (COPY_TO_REGCLASS 3054 (!cast<Instruction>(InstStr#"Zrmbi") 3055 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3056 addr:$src2, timm:$cc), Narrow.KRC)>; 3057 3058def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3059 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3060 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), 3061 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3062 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3063 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3064 addr:$src2, timm:$cc), Narrow.KRC)>; 3065 3066// Commuted with broadcast load. 3067def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3068 (Narrow.VT Narrow.RC:$src1), timm:$cc)), 3069 (COPY_TO_REGCLASS 3070 (!cast<Instruction>(InstStr#"Zrmbi") 3071 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3072 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3073 3074def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3075 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3076 (Narrow.VT Narrow.RC:$src1), timm:$cc))), 3077 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3078 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3079 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3080 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3081} 3082 3083let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 3084 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3085 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3086 3087 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3088 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3089 3090 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3091 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3092 3093 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3094 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3095 3096 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3097 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3098 3099 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3100 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3101 3102 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3103 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3104 3105 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3106 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3107 3108 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; 3109 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; 3110 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; 3111 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; 3112} 3113 3114let Predicates = [HasBWI, NoVLX, HasEVEX512] in { 3115 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; 3116 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; 3117 3118 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>; 3119 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>; 3120 3121 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>; 3122 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>; 3123 3124 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>; 3125 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>; 3126} 3127 3128// Mask setting all 0s or 1s 3129multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> { 3130 let Predicates = [HasAVX512] in 3131 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3132 SchedRW = [WriteZero] in 3133 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3134 [(set KRC:$dst, (VT Val))]>; 3135} 3136 3137multiclass avx512_mask_setop_w<SDPatternOperator Val> { 3138 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3139 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3140 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3141} 3142 3143defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3144defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3145 3146// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3147let Predicates = [HasAVX512] in { 3148 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3149 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3150 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3151 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3152 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3153 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3154 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3155 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3156} 3157 3158// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3159multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3160 RegisterClass RC, ValueType VT> { 3161 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3162 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3163 3164 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3165 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3166} 3167defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3168defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3169defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3170defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3171defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3172defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3173 3174defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3175defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3176defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3177defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3178defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3179 3180defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3181defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3182defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3183defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3184 3185defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3186defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3187defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3188 3189defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3190defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3191 3192defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3193 3194//===----------------------------------------------------------------------===// 3195// AVX-512 - Aligned and unaligned load and store 3196// 3197 3198multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3199 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3200 X86SchedWriteMoveLS Sched, bit NoRMPattern = 0, 3201 SDPatternOperator SelectOprr = vselect> { 3202 let hasSideEffects = 0 in { 3203 let isMoveReg = 1 in 3204 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3205 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3206 _.ExeDomain>, EVEX, Sched<[Sched.RR]>; 3207 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3208 (ins _.KRCWM:$mask, _.RC:$src), 3209 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3210 "${dst} {${mask}} {z}, $src}"), 3211 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3212 (_.VT _.RC:$src), 3213 _.ImmAllZerosV)))], _.ExeDomain>, 3214 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3215 3216 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3217 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3218 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3219 !if(NoRMPattern, [], 3220 [(set _.RC:$dst, 3221 (_.VT (ld_frag addr:$src)))]), 3222 _.ExeDomain>, EVEX, Sched<[Sched.RM]>; 3223 3224 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3225 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3226 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3227 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3228 "${dst} {${mask}}, $src1}"), 3229 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3230 (_.VT _.RC:$src1), 3231 (_.VT _.RC:$src0))))], _.ExeDomain>, 3232 EVEX, EVEX_K, Sched<[Sched.RR]>; 3233 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3234 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3235 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3236 "${dst} {${mask}}, $src1}"), 3237 [(set _.RC:$dst, (_.VT 3238 (vselect_mask _.KRCWM:$mask, 3239 (_.VT (ld_frag addr:$src1)), 3240 (_.VT _.RC:$src0))))], _.ExeDomain>, 3241 EVEX, EVEX_K, Sched<[Sched.RM]>; 3242 } 3243 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3244 (ins _.KRCWM:$mask, _.MemOp:$src), 3245 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3246 "${dst} {${mask}} {z}, $src}", 3247 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask, 3248 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], 3249 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3250 } 3251 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3252 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3253 3254 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3255 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3256 3257 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3258 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0, 3259 _.KRCWM:$mask, addr:$ptr)>; 3260} 3261 3262multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3263 AVX512VLVectorVTInfo _, Predicate prd, 3264 X86SchedWriteMoveLSWidths Sched, 3265 bit NoRMPattern = 0> { 3266 let Predicates = [prd] in 3267 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3268 _.info512.AlignedLdFrag, masked_load_aligned, 3269 Sched.ZMM, NoRMPattern>, EVEX_V512; 3270 3271 let Predicates = [prd, HasVLX] in { 3272 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3273 _.info256.AlignedLdFrag, masked_load_aligned, 3274 Sched.YMM, NoRMPattern>, EVEX_V256; 3275 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3276 _.info128.AlignedLdFrag, masked_load_aligned, 3277 Sched.XMM, NoRMPattern>, EVEX_V128; 3278 } 3279} 3280 3281multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3282 AVX512VLVectorVTInfo _, Predicate prd, 3283 X86SchedWriteMoveLSWidths Sched, 3284 bit NoRMPattern = 0, 3285 SDPatternOperator SelectOprr = vselect> { 3286 let Predicates = [prd] in 3287 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3288 masked_load, Sched.ZMM, NoRMPattern, SelectOprr>, EVEX_V512; 3289 3290 let Predicates = [prd, HasVLX] in { 3291 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3292 masked_load, Sched.YMM, NoRMPattern, SelectOprr>, EVEX_V256; 3293 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3294 masked_load, Sched.XMM, NoRMPattern, SelectOprr>, EVEX_V128; 3295 } 3296} 3297 3298multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3299 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3300 X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> { 3301 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3302 let isMoveReg = 1 in 3303 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3304 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3305 [], _.ExeDomain>, EVEX, 3306 Sched<[Sched.RR]>; 3307 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3308 (ins _.KRCWM:$mask, _.RC:$src), 3309 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3310 "${dst} {${mask}}, $src}", 3311 [], _.ExeDomain>, EVEX, EVEX_K, 3312 Sched<[Sched.RR]>; 3313 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3314 (ins _.KRCWM:$mask, _.RC:$src), 3315 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3316 "${dst} {${mask}} {z}, $src}", 3317 [], _.ExeDomain>, EVEX, EVEX_KZ, 3318 Sched<[Sched.RR]>; 3319 } 3320 3321 let hasSideEffects = 0, mayStore = 1 in 3322 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3323 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3324 !if(NoMRPattern, [], 3325 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3326 _.ExeDomain>, EVEX, Sched<[Sched.MR]>; 3327 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3328 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3329 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3330 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>; 3331 3332 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask), 3333 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3334 _.KRCWM:$mask, _.RC:$src)>; 3335 3336 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3337 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3338 _.RC:$dst, _.RC:$src), 0>; 3339 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3340 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3341 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3342 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3343 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3344 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3345} 3346 3347multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3348 AVX512VLVectorVTInfo _, Predicate prd, 3349 X86SchedWriteMoveLSWidths Sched, 3350 bit NoMRPattern = 0> { 3351 let Predicates = [prd] in 3352 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3353 masked_store, Sched.ZMM, NoMRPattern>, EVEX_V512; 3354 let Predicates = [prd, HasVLX] in { 3355 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3356 masked_store, Sched.YMM, NoMRPattern>, EVEX_V256; 3357 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3358 masked_store, Sched.XMM, NoMRPattern>, EVEX_V128; 3359 } 3360} 3361 3362multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3363 AVX512VLVectorVTInfo _, Predicate prd, 3364 X86SchedWriteMoveLSWidths Sched, 3365 bit NoMRPattern = 0> { 3366 let Predicates = [prd] in 3367 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3368 masked_store_aligned, Sched.ZMM, NoMRPattern>, EVEX_V512; 3369 3370 let Predicates = [prd, HasVLX] in { 3371 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3372 masked_store_aligned, Sched.YMM, NoMRPattern>, EVEX_V256; 3373 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3374 masked_store_aligned, Sched.XMM, NoMRPattern>, EVEX_V128; 3375 } 3376} 3377 3378defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3379 HasAVX512, SchedWriteFMoveLS>, 3380 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3381 HasAVX512, SchedWriteFMoveLS>, 3382 TB, EVEX_CD8<32, CD8VF>; 3383 3384defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3385 HasAVX512, SchedWriteFMoveLS>, 3386 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3387 HasAVX512, SchedWriteFMoveLS>, 3388 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 3389 3390defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3391 SchedWriteFMoveLS, 0, null_frag>, 3392 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3393 SchedWriteFMoveLS>, 3394 TB, EVEX_CD8<32, CD8VF>; 3395 3396defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3397 SchedWriteFMoveLS, 0, null_frag>, 3398 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3399 SchedWriteFMoveLS>, 3400 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 3401 3402defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3403 HasAVX512, SchedWriteVecMoveLS, 1>, 3404 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3405 HasAVX512, SchedWriteVecMoveLS, 1>, 3406 TB, PD, EVEX_CD8<32, CD8VF>; 3407 3408defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3409 HasAVX512, SchedWriteVecMoveLS>, 3410 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3411 HasAVX512, SchedWriteVecMoveLS>, 3412 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 3413 3414defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3415 SchedWriteVecMoveLS, 1>, 3416 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3417 SchedWriteVecMoveLS, 1>, 3418 TB, XD, EVEX_CD8<8, CD8VF>; 3419 3420defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3421 SchedWriteVecMoveLS, 1>, 3422 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3423 SchedWriteVecMoveLS, 1>, 3424 TB, XD, REX_W, EVEX_CD8<16, CD8VF>; 3425 3426defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3427 SchedWriteVecMoveLS, 1, null_frag>, 3428 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3429 SchedWriteVecMoveLS, 1>, 3430 TB, XS, EVEX_CD8<32, CD8VF>; 3431 3432defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3433 SchedWriteVecMoveLS, 0, null_frag>, 3434 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3435 SchedWriteVecMoveLS>, 3436 TB, XS, REX_W, EVEX_CD8<64, CD8VF>; 3437 3438// Special instructions to help with spilling when we don't have VLX. We need 3439// to load or store from a ZMM register instead. These are converted in 3440// expandPostRAPseudos. 3441let isReMaterializable = 1, canFoldAsLoad = 1, 3442 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3443def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3444 "", []>, Sched<[WriteFLoadX]>; 3445def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3446 "", []>, Sched<[WriteFLoadY]>; 3447def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3448 "", []>, Sched<[WriteFLoadX]>; 3449def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3450 "", []>, Sched<[WriteFLoadY]>; 3451} 3452 3453let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3454def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3455 "", []>, Sched<[WriteFStoreX]>; 3456def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3457 "", []>, Sched<[WriteFStoreY]>; 3458def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3459 "", []>, Sched<[WriteFStoreX]>; 3460def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3461 "", []>, Sched<[WriteFStoreY]>; 3462} 3463 3464def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), 3465 (v8i64 VR512:$src))), 3466 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3467 VK8), VR512:$src)>; 3468 3469def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3470 (v16i32 VR512:$src))), 3471 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3472 3473// These patterns exist to prevent the above patterns from introducing a second 3474// mask inversion when one already exists. 3475def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)), 3476 (v8i64 immAllZerosV), 3477 (v8i64 VR512:$src))), 3478 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3479def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)), 3480 (v16i32 immAllZerosV), 3481 (v16i32 VR512:$src))), 3482 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3483 3484multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3485 X86VectorVTInfo Wide> { 3486 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3487 Narrow.RC:$src1, Narrow.RC:$src0)), 3488 (EXTRACT_SUBREG 3489 (Wide.VT 3490 (!cast<Instruction>(InstrStr#"rrk") 3491 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3492 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3493 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3494 Narrow.SubRegIdx)>; 3495 3496 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3497 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3498 (EXTRACT_SUBREG 3499 (Wide.VT 3500 (!cast<Instruction>(InstrStr#"rrkz") 3501 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3502 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3503 Narrow.SubRegIdx)>; 3504} 3505 3506// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3507// available. Use a 512-bit operation and extract. 3508let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 3509 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3510 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3511 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3512 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3513 3514 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3515 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3516 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3517 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3518} 3519 3520let Predicates = [HasBWI, NoVLX, HasEVEX512] in { 3521 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3522 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3523 3524 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3525 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3526 3527 defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>; 3528 defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>; 3529 3530 defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>; 3531 defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>; 3532} 3533 3534let Predicates = [HasAVX512] in { 3535 // 512-bit load. 3536 def : Pat<(alignedloadv16i32 addr:$src), 3537 (VMOVDQA64Zrm addr:$src)>; 3538 def : Pat<(alignedloadv32i16 addr:$src), 3539 (VMOVDQA64Zrm addr:$src)>; 3540 def : Pat<(alignedloadv32f16 addr:$src), 3541 (VMOVAPSZrm addr:$src)>; 3542 def : Pat<(alignedloadv32bf16 addr:$src), 3543 (VMOVAPSZrm addr:$src)>; 3544 def : Pat<(alignedloadv64i8 addr:$src), 3545 (VMOVDQA64Zrm addr:$src)>; 3546 def : Pat<(loadv16i32 addr:$src), 3547 (VMOVDQU64Zrm addr:$src)>; 3548 def : Pat<(loadv32i16 addr:$src), 3549 (VMOVDQU64Zrm addr:$src)>; 3550 def : Pat<(loadv32f16 addr:$src), 3551 (VMOVUPSZrm addr:$src)>; 3552 def : Pat<(loadv32bf16 addr:$src), 3553 (VMOVUPSZrm addr:$src)>; 3554 def : Pat<(loadv64i8 addr:$src), 3555 (VMOVDQU64Zrm addr:$src)>; 3556 3557 // 512-bit store. 3558 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3559 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3560 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3561 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3562 def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst), 3563 (VMOVAPSZmr addr:$dst, VR512:$src)>; 3564 def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst), 3565 (VMOVAPSZmr addr:$dst, VR512:$src)>; 3566 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3567 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3568 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3569 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3570 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3571 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3572 def : Pat<(store (v32f16 VR512:$src), addr:$dst), 3573 (VMOVUPSZmr addr:$dst, VR512:$src)>; 3574 def : Pat<(store (v32bf16 VR512:$src), addr:$dst), 3575 (VMOVUPSZmr addr:$dst, VR512:$src)>; 3576 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3577 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3578} 3579 3580let Predicates = [HasVLX] in { 3581 // 128-bit load. 3582 def : Pat<(alignedloadv4i32 addr:$src), 3583 (VMOVDQA64Z128rm addr:$src)>; 3584 def : Pat<(alignedloadv8i16 addr:$src), 3585 (VMOVDQA64Z128rm addr:$src)>; 3586 def : Pat<(alignedloadv8f16 addr:$src), 3587 (VMOVAPSZ128rm addr:$src)>; 3588 def : Pat<(alignedloadv8bf16 addr:$src), 3589 (VMOVAPSZ128rm addr:$src)>; 3590 def : Pat<(alignedloadv16i8 addr:$src), 3591 (VMOVDQA64Z128rm addr:$src)>; 3592 def : Pat<(loadv4i32 addr:$src), 3593 (VMOVDQU64Z128rm addr:$src)>; 3594 def : Pat<(loadv8i16 addr:$src), 3595 (VMOVDQU64Z128rm addr:$src)>; 3596 def : Pat<(loadv8f16 addr:$src), 3597 (VMOVUPSZ128rm addr:$src)>; 3598 def : Pat<(loadv8bf16 addr:$src), 3599 (VMOVUPSZ128rm addr:$src)>; 3600 def : Pat<(loadv16i8 addr:$src), 3601 (VMOVDQU64Z128rm addr:$src)>; 3602 3603 // 128-bit store. 3604 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3605 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3606 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3607 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3608 def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst), 3609 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 3610 def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst), 3611 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 3612 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3613 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3614 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3615 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3616 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3617 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3618 def : Pat<(store (v8f16 VR128X:$src), addr:$dst), 3619 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 3620 def : Pat<(store (v8bf16 VR128X:$src), addr:$dst), 3621 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 3622 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3623 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3624 3625 // 256-bit load. 3626 def : Pat<(alignedloadv8i32 addr:$src), 3627 (VMOVDQA64Z256rm addr:$src)>; 3628 def : Pat<(alignedloadv16i16 addr:$src), 3629 (VMOVDQA64Z256rm addr:$src)>; 3630 def : Pat<(alignedloadv16f16 addr:$src), 3631 (VMOVAPSZ256rm addr:$src)>; 3632 def : Pat<(alignedloadv16bf16 addr:$src), 3633 (VMOVAPSZ256rm addr:$src)>; 3634 def : Pat<(alignedloadv32i8 addr:$src), 3635 (VMOVDQA64Z256rm addr:$src)>; 3636 def : Pat<(loadv8i32 addr:$src), 3637 (VMOVDQU64Z256rm addr:$src)>; 3638 def : Pat<(loadv16i16 addr:$src), 3639 (VMOVDQU64Z256rm addr:$src)>; 3640 def : Pat<(loadv16f16 addr:$src), 3641 (VMOVUPSZ256rm addr:$src)>; 3642 def : Pat<(loadv16bf16 addr:$src), 3643 (VMOVUPSZ256rm addr:$src)>; 3644 def : Pat<(loadv32i8 addr:$src), 3645 (VMOVDQU64Z256rm addr:$src)>; 3646 3647 // 256-bit store. 3648 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3649 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3650 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3651 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3652 def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst), 3653 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; 3654 def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst), 3655 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; 3656 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3657 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3658 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3659 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3660 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3661 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3662 def : Pat<(store (v16f16 VR256X:$src), addr:$dst), 3663 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; 3664 def : Pat<(store (v16bf16 VR256X:$src), addr:$dst), 3665 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; 3666 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3667 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3668} 3669 3670multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> { 3671let Predicates = [HasBWI] in { 3672 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))), 3673 (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>; 3674 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)), 3675 (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>; 3676 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3677 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))), 3678 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3679 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3680 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)), 3681 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3682 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3683 (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))), 3684 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3685 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3686 (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)), 3687 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3688 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))), 3689 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3690 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)), 3691 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3692 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)), 3693 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3694 3695 def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask), 3696 (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>; 3697} 3698let Predicates = [HasBWI, HasVLX] in { 3699 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))), 3700 (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>; 3701 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)), 3702 (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>; 3703 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3704 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))), 3705 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3706 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3707 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)), 3708 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3709 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3710 (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))), 3711 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3712 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3713 (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)), 3714 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3715 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))), 3716 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3717 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)), 3718 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3719 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)), 3720 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3721 3722 def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask), 3723 (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>; 3724 3725 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))), 3726 (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>; 3727 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)), 3728 (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>; 3729 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3730 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))), 3731 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3732 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3733 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)), 3734 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3735 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3736 (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))), 3737 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3738 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3739 (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)), 3740 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3741 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))), 3742 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3743 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)), 3744 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3745 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)), 3746 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3747 3748 def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask), 3749 (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>; 3750} 3751} 3752 3753defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>; 3754defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>; 3755 3756// Move Int Doubleword to Packed Double Int 3757// 3758let ExeDomain = SSEPackedInt in { 3759def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 3760 "vmovd\t{$src, $dst|$dst, $src}", 3761 [(set VR128X:$dst, 3762 (v4i32 (scalar_to_vector GR32:$src)))]>, 3763 EVEX, Sched<[WriteVecMoveFromGpr]>; 3764def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 3765 "vmovd\t{$src, $dst|$dst, $src}", 3766 [(set VR128X:$dst, 3767 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3768 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3769def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 3770 "vmovq\t{$src, $dst|$dst, $src}", 3771 [(set VR128X:$dst, 3772 (v2i64 (scalar_to_vector GR64:$src)))]>, 3773 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 3774let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3775def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 3776 (ins i64mem:$src), 3777 "vmovq\t{$src, $dst|$dst, $src}", []>, 3778 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 3779let isCodeGenOnly = 1 in { 3780def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 3781 "vmovq\t{$src, $dst|$dst, $src}", 3782 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 3783 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 3784def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 3785 "vmovq\t{$src, $dst|$dst, $src}", 3786 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 3787 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 3788} 3789} // ExeDomain = SSEPackedInt 3790 3791// Move Int Doubleword to Single Scalar 3792// 3793let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3794def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 3795 "vmovd\t{$src, $dst|$dst, $src}", 3796 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 3797 EVEX, Sched<[WriteVecMoveFromGpr]>; 3798} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3799 3800// Move doubleword from xmm register to r/m32 3801// 3802let ExeDomain = SSEPackedInt in { 3803def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 3804 "vmovd\t{$src, $dst|$dst, $src}", 3805 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 3806 (iPTR 0)))]>, 3807 EVEX, Sched<[WriteVecMoveToGpr]>; 3808def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 3809 (ins i32mem:$dst, VR128X:$src), 3810 "vmovd\t{$src, $dst|$dst, $src}", 3811 [(store (i32 (extractelt (v4i32 VR128X:$src), 3812 (iPTR 0))), addr:$dst)]>, 3813 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 3814} // ExeDomain = SSEPackedInt 3815 3816// Move quadword from xmm1 register to r/m64 3817// 3818let ExeDomain = SSEPackedInt in { 3819def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 3820 "vmovq\t{$src, $dst|$dst, $src}", 3821 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 3822 (iPTR 0)))]>, 3823 TB, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>, 3824 Requires<[HasAVX512]>; 3825 3826let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 3827def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 3828 "vmovq\t{$src, $dst|$dst, $src}", []>, TB, PD, 3829 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>, 3830 Requires<[HasAVX512, In64BitMode]>; 3831 3832def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 3833 (ins i64mem:$dst, VR128X:$src), 3834 "vmovq\t{$src, $dst|$dst, $src}", 3835 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 3836 addr:$dst)]>, 3837 EVEX, TB, PD, REX_W, EVEX_CD8<64, CD8VT1>, 3838 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 3839 3840let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 3841def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 3842 (ins VR128X:$src), 3843 "vmovq\t{$src, $dst|$dst, $src}", []>, 3844 EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>; 3845} // ExeDomain = SSEPackedInt 3846 3847def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 3848 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 3849 3850let Predicates = [HasAVX512] in { 3851 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), 3852 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; 3853} 3854 3855// Move Scalar Single to Double Int 3856// 3857let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3858def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 3859 (ins FR32X:$src), 3860 "vmovd\t{$src, $dst|$dst, $src}", 3861 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 3862 EVEX, Sched<[WriteVecMoveToGpr]>; 3863} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3864 3865// Move Quadword Int to Packed Quadword Int 3866// 3867let ExeDomain = SSEPackedInt in { 3868def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 3869 (ins i64mem:$src), 3870 "vmovq\t{$src, $dst|$dst, $src}", 3871 [(set VR128X:$dst, 3872 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 3873 EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 3874} // ExeDomain = SSEPackedInt 3875 3876// Allow "vmovd" but print "vmovq". 3877def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3878 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 3879def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3880 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 3881 3882// Conversions between masks and scalar fp. 3883def : Pat<(v32i1 (bitconvert FR32X:$src)), 3884 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>; 3885def : Pat<(f32 (bitconvert VK32:$src)), 3886 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>; 3887 3888def : Pat<(v64i1 (bitconvert FR64X:$src)), 3889 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>; 3890def : Pat<(f64 (bitconvert VK64:$src)), 3891 (VMOV64toSDZrr (KMOVQrk VK64:$src))>; 3892 3893//===----------------------------------------------------------------------===// 3894// AVX-512 MOVSH, MOVSS, MOVSD 3895//===----------------------------------------------------------------------===// 3896 3897multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, 3898 X86VectorVTInfo _, Predicate prd = HasAVX512> { 3899 let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in 3900 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3901 (ins _.RC:$src1, _.RC:$src2), 3902 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3903 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 3904 _.ExeDomain>, EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>; 3905 let Predicates = [prd] in { 3906 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3907 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3908 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 3909 "$dst {${mask}} {z}, $src1, $src2}"), 3910 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3911 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3912 _.ImmAllZerosV)))], 3913 _.ExeDomain>, EVEX, VVVV, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 3914 let Constraints = "$src0 = $dst" in 3915 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3916 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3917 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 3918 "$dst {${mask}}, $src1, $src2}"), 3919 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3920 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3921 (_.VT _.RC:$src0))))], 3922 _.ExeDomain>, EVEX, VVVV, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 3923 let canFoldAsLoad = 1, isReMaterializable = 1 in { 3924 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), 3925 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3926 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))], 3927 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3928 // _alt version uses FR32/FR64 register class. 3929 let isCodeGenOnly = 1 in 3930 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 3931 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3932 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 3933 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3934 } 3935 let mayLoad = 1, hasSideEffects = 0 in { 3936 let Constraints = "$src0 = $dst" in 3937 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3938 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 3939 !strconcat(asm, "\t{$src, $dst {${mask}}|", 3940 "$dst {${mask}}, $src}"), 3941 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 3942 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3943 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 3944 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 3945 "$dst {${mask}} {z}, $src}"), 3946 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 3947 } 3948 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 3949 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3950 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 3951 EVEX, Sched<[WriteFStore]>; 3952 let mayStore = 1, hasSideEffects = 0 in 3953 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 3954 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src), 3955 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 3956 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>; 3957 } 3958} 3959 3960defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, 3961 VEX_LIG, TB, XS, EVEX_CD8<32, CD8VT1>; 3962 3963defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, 3964 VEX_LIG, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 3965 3966defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info, 3967 HasFP16>, 3968 VEX_LIG, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 3969 3970multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 3971 PatLeaf ZeroFP, X86VectorVTInfo _> { 3972 3973def : Pat<(_.VT (OpNode _.RC:$src0, 3974 (_.VT (scalar_to_vector 3975 (_.EltVT (X86selects VK1WM:$mask, 3976 (_.EltVT _.FRC:$src1), 3977 (_.EltVT _.FRC:$src2))))))), 3978 (!cast<Instruction>(InstrStr#rrk) 3979 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 3980 VK1WM:$mask, 3981 (_.VT _.RC:$src0), 3982 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 3983 3984def : Pat<(_.VT (OpNode _.RC:$src0, 3985 (_.VT (scalar_to_vector 3986 (_.EltVT (X86selects VK1WM:$mask, 3987 (_.EltVT _.FRC:$src1), 3988 (_.EltVT ZeroFP))))))), 3989 (!cast<Instruction>(InstrStr#rrkz) 3990 VK1WM:$mask, 3991 (_.VT _.RC:$src0), 3992 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 3993} 3994 3995multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 3996 dag Mask, RegisterClass MaskRC> { 3997 3998def : Pat<(masked_store 3999 (_.info512.VT (insert_subvector undef, 4000 (_.info128.VT _.info128.RC:$src), 4001 (iPTR 0))), addr:$dst, Mask), 4002 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4003 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4004 _.info128.RC:$src)>; 4005 4006} 4007 4008multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 4009 AVX512VLVectorVTInfo _, 4010 dag Mask, RegisterClass MaskRC, 4011 SubRegIndex subreg> { 4012 4013def : Pat<(masked_store 4014 (_.info512.VT (insert_subvector undef, 4015 (_.info128.VT _.info128.RC:$src), 4016 (iPTR 0))), addr:$dst, Mask), 4017 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4018 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4019 _.info128.RC:$src)>; 4020 4021} 4022 4023// This matches the more recent codegen from clang that avoids emitting a 512 4024// bit masked store directly. Codegen will widen 128-bit masked store to 512 4025// bits on AVX512F only targets. 4026multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4027 AVX512VLVectorVTInfo _, 4028 dag Mask512, dag Mask128, 4029 RegisterClass MaskRC, 4030 SubRegIndex subreg> { 4031 4032// AVX512F pattern. 4033def : Pat<(masked_store 4034 (_.info512.VT (insert_subvector undef, 4035 (_.info128.VT _.info128.RC:$src), 4036 (iPTR 0))), addr:$dst, Mask512), 4037 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4038 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4039 _.info128.RC:$src)>; 4040 4041// AVX512VL pattern. 4042def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128), 4043 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4044 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4045 _.info128.RC:$src)>; 4046} 4047 4048multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4049 dag Mask, RegisterClass MaskRC> { 4050 4051def : Pat<(_.info128.VT (extract_subvector 4052 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4053 _.info512.ImmAllZerosV)), 4054 (iPTR 0))), 4055 (!cast<Instruction>(InstrStr#rmkz) 4056 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4057 addr:$srcAddr)>; 4058 4059def : Pat<(_.info128.VT (extract_subvector 4060 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4061 (_.info512.VT (insert_subvector undef, 4062 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4063 (iPTR 0))))), 4064 (iPTR 0))), 4065 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4066 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4067 addr:$srcAddr)>; 4068 4069} 4070 4071multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4072 AVX512VLVectorVTInfo _, 4073 dag Mask, RegisterClass MaskRC, 4074 SubRegIndex subreg> { 4075 4076def : Pat<(_.info128.VT (extract_subvector 4077 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4078 _.info512.ImmAllZerosV)), 4079 (iPTR 0))), 4080 (!cast<Instruction>(InstrStr#rmkz) 4081 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4082 addr:$srcAddr)>; 4083 4084def : Pat<(_.info128.VT (extract_subvector 4085 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4086 (_.info512.VT (insert_subvector undef, 4087 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4088 (iPTR 0))))), 4089 (iPTR 0))), 4090 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4091 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4092 addr:$srcAddr)>; 4093 4094} 4095 4096// This matches the more recent codegen from clang that avoids emitting a 512 4097// bit masked load directly. Codegen will widen 128-bit masked load to 512 4098// bits on AVX512F only targets. 4099multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4100 AVX512VLVectorVTInfo _, 4101 dag Mask512, dag Mask128, 4102 RegisterClass MaskRC, 4103 SubRegIndex subreg> { 4104// AVX512F patterns. 4105def : Pat<(_.info128.VT (extract_subvector 4106 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4107 _.info512.ImmAllZerosV)), 4108 (iPTR 0))), 4109 (!cast<Instruction>(InstrStr#rmkz) 4110 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4111 addr:$srcAddr)>; 4112 4113def : Pat<(_.info128.VT (extract_subvector 4114 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4115 (_.info512.VT (insert_subvector undef, 4116 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4117 (iPTR 0))))), 4118 (iPTR 0))), 4119 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4120 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4121 addr:$srcAddr)>; 4122 4123// AVX512Vl patterns. 4124def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4125 _.info128.ImmAllZerosV)), 4126 (!cast<Instruction>(InstrStr#rmkz) 4127 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4128 addr:$srcAddr)>; 4129 4130def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4131 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4132 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4133 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4134 addr:$srcAddr)>; 4135} 4136 4137defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4138defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4139 4140defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4141 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4142defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4143 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4144defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4145 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4146 4147let Predicates = [HasFP16] in { 4148defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>; 4149defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4150 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4151defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4152 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4153defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4154 (v32i1 (insert_subvector 4155 (v32i1 immAllZerosV), 4156 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4157 (iPTR 0))), 4158 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4159 GR8, sub_8bit>; 4160 4161defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4162 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4163defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4164 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4165defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4166 (v32i1 (insert_subvector 4167 (v32i1 immAllZerosV), 4168 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4169 (iPTR 0))), 4170 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4171 GR8, sub_8bit>; 4172 4173def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))), 4174 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk 4175 (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)), 4176 VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4177 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4178 4179def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)), 4180 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4181 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4182} 4183 4184defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4185 (v16i1 (insert_subvector 4186 (v16i1 immAllZerosV), 4187 (v4i1 (extract_subvector 4188 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4189 (iPTR 0))), 4190 (iPTR 0))), 4191 (v4i1 (extract_subvector 4192 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4193 (iPTR 0))), GR8, sub_8bit>; 4194defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4195 (v8i1 4196 (extract_subvector 4197 (v16i1 4198 (insert_subvector 4199 (v16i1 immAllZerosV), 4200 (v2i1 (extract_subvector 4201 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4202 (iPTR 0))), 4203 (iPTR 0))), 4204 (iPTR 0))), 4205 (v2i1 (extract_subvector 4206 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4207 (iPTR 0))), GR8, sub_8bit>; 4208 4209defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4210 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4211defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4212 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4213defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4214 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4215 4216defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4217 (v16i1 (insert_subvector 4218 (v16i1 immAllZerosV), 4219 (v4i1 (extract_subvector 4220 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4221 (iPTR 0))), 4222 (iPTR 0))), 4223 (v4i1 (extract_subvector 4224 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4225 (iPTR 0))), GR8, sub_8bit>; 4226defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4227 (v8i1 4228 (extract_subvector 4229 (v16i1 4230 (insert_subvector 4231 (v16i1 immAllZerosV), 4232 (v2i1 (extract_subvector 4233 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4234 (iPTR 0))), 4235 (iPTR 0))), 4236 (iPTR 0))), 4237 (v2i1 (extract_subvector 4238 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4239 (iPTR 0))), GR8, sub_8bit>; 4240 4241def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4242 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4243 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4244 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4245 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4246 4247def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4248 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4249 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4250 4251def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), 4252 (COPY_TO_REGCLASS 4253 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), 4254 VK1WM:$mask, addr:$src)), 4255 FR32X)>; 4256def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), 4257 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; 4258 4259def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4260 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4261 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4262 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4263 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4264 4265def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), 4266 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4267 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4268 4269def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), 4270 (COPY_TO_REGCLASS 4271 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), 4272 VK1WM:$mask, addr:$src)), 4273 FR64X)>; 4274def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), 4275 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; 4276 4277 4278def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))), 4279 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4280def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))), 4281 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4282 4283def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))), 4284 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4285def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))), 4286 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4287 4288let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4289 let Predicates = [HasFP16] in { 4290 def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4291 (ins VR128X:$src1, VR128X:$src2), 4292 "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4293 []>, T_MAP5, XS, EVEX, VVVV, VEX_LIG, 4294 Sched<[SchedWriteFShuffle.XMM]>; 4295 4296 let Constraints = "$src0 = $dst" in 4297 def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4298 (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask, 4299 VR128X:$src1, VR128X:$src2), 4300 "vmovsh\t{$src2, $src1, $dst {${mask}}|"# 4301 "$dst {${mask}}, $src1, $src2}", 4302 []>, T_MAP5, XS, EVEX_K, EVEX, VVVV, VEX_LIG, 4303 Sched<[SchedWriteFShuffle.XMM]>; 4304 4305 def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4306 (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4307 "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"# 4308 "$dst {${mask}} {z}, $src1, $src2}", 4309 []>, EVEX_KZ, T_MAP5, XS, EVEX, VVVV, VEX_LIG, 4310 Sched<[SchedWriteFShuffle.XMM]>; 4311 } 4312 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4313 (ins VR128X:$src1, VR128X:$src2), 4314 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4315 []>, TB, XS, EVEX, VVVV, VEX_LIG, 4316 Sched<[SchedWriteFShuffle.XMM]>; 4317 4318 let Constraints = "$src0 = $dst" in 4319 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4320 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4321 VR128X:$src1, VR128X:$src2), 4322 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4323 "$dst {${mask}}, $src1, $src2}", 4324 []>, EVEX_K, TB, XS, EVEX, VVVV, VEX_LIG, 4325 Sched<[SchedWriteFShuffle.XMM]>; 4326 4327 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4328 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4329 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4330 "$dst {${mask}} {z}, $src1, $src2}", 4331 []>, EVEX_KZ, TB, XS, EVEX, VVVV, VEX_LIG, 4332 Sched<[SchedWriteFShuffle.XMM]>; 4333 4334 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4335 (ins VR128X:$src1, VR128X:$src2), 4336 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4337 []>, TB, XD, EVEX, VVVV, VEX_LIG, REX_W, 4338 Sched<[SchedWriteFShuffle.XMM]>; 4339 4340 let Constraints = "$src0 = $dst" in 4341 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4342 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4343 VR128X:$src1, VR128X:$src2), 4344 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4345 "$dst {${mask}}, $src1, $src2}", 4346 []>, EVEX_K, TB, XD, EVEX, VVVV, VEX_LIG, 4347 REX_W, Sched<[SchedWriteFShuffle.XMM]>; 4348 4349 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4350 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4351 VR128X:$src2), 4352 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4353 "$dst {${mask}} {z}, $src1, $src2}", 4354 []>, EVEX_KZ, TB, XD, EVEX, VVVV, VEX_LIG, 4355 REX_W, Sched<[SchedWriteFShuffle.XMM]>; 4356} 4357 4358def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4359 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4360def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"# 4361 "$dst {${mask}}, $src1, $src2}", 4362 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask, 4363 VR128X:$src1, VR128X:$src2), 0>; 4364def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4365 "$dst {${mask}} {z}, $src1, $src2}", 4366 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask, 4367 VR128X:$src1, VR128X:$src2), 0>; 4368def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4369 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4370def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4371 "$dst {${mask}}, $src1, $src2}", 4372 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4373 VR128X:$src1, VR128X:$src2), 0>; 4374def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4375 "$dst {${mask}} {z}, $src1, $src2}", 4376 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4377 VR128X:$src1, VR128X:$src2), 0>; 4378def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4379 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4380def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4381 "$dst {${mask}}, $src1, $src2}", 4382 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4383 VR128X:$src1, VR128X:$src2), 0>; 4384def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4385 "$dst {${mask}} {z}, $src1, $src2}", 4386 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4387 VR128X:$src1, VR128X:$src2), 0>; 4388 4389let Predicates = [HasAVX512, OptForSize] in { 4390 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4391 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4392 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4393 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4394 4395 // Move low f32 and clear high bits. 4396 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4397 (SUBREG_TO_REG (i32 0), 4398 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4399 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4400 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4401 (SUBREG_TO_REG (i32 0), 4402 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4403 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4404 4405 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4406 (SUBREG_TO_REG (i32 0), 4407 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4408 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4409 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4410 (SUBREG_TO_REG (i32 0), 4411 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4412 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4413} 4414 4415// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4416// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4417let Predicates = [HasAVX512, OptForSpeed] in { 4418 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4419 (SUBREG_TO_REG (i32 0), 4420 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4421 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4422 (i8 1))), sub_xmm)>; 4423 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4424 (SUBREG_TO_REG (i32 0), 4425 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4426 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4427 (i8 3))), sub_xmm)>; 4428} 4429 4430let Predicates = [HasAVX512] in { 4431 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 4432 (VMOVSSZrm addr:$src)>; 4433 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 4434 (VMOVSDZrm addr:$src)>; 4435 4436 // Represent the same patterns above but in the form they appear for 4437 // 256-bit types 4438 def : Pat<(v8f32 (X86vzload32 addr:$src)), 4439 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4440 def : Pat<(v4f64 (X86vzload64 addr:$src)), 4441 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4442 4443 // Represent the same patterns above but in the form they appear for 4444 // 512-bit types 4445 def : Pat<(v16f32 (X86vzload32 addr:$src)), 4446 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4447 def : Pat<(v8f64 (X86vzload64 addr:$src)), 4448 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4449} 4450let Predicates = [HasFP16] in { 4451 def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))), 4452 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>; 4453 def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))), 4454 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>; 4455 4456 // FIXME we need better canonicalization in dag combine 4457 def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))), 4458 (SUBREG_TO_REG (i32 0), 4459 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4460 (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>; 4461 def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))), 4462 (SUBREG_TO_REG (i32 0), 4463 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), 4464 (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>; 4465 4466 // FIXME we need better canonicalization in dag combine 4467 def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))), 4468 (SUBREG_TO_REG (i32 0), 4469 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4470 (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>; 4471 def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))), 4472 (SUBREG_TO_REG (i32 0), 4473 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), 4474 (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>; 4475 4476 def : Pat<(v8f16 (X86vzload16 addr:$src)), 4477 (VMOVSHZrm addr:$src)>; 4478 4479 def : Pat<(v16f16 (X86vzload16 addr:$src)), 4480 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4481 4482 def : Pat<(v32f16 (X86vzload16 addr:$src)), 4483 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4484} 4485 4486let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4487def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4488 (ins VR128X:$src), 4489 "vmovq\t{$src, $dst|$dst, $src}", 4490 [(set VR128X:$dst, (v2i64 (X86vzmovl 4491 (v2i64 VR128X:$src))))]>, 4492 EVEX, REX_W; 4493} 4494 4495let Predicates = [HasAVX512] in { 4496 def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))), 4497 (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 4498 GR8:$src, sub_8bit)))>; 4499 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4500 (VMOVDI2PDIZrr GR32:$src)>; 4501 4502 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4503 (VMOV64toPQIZrr GR64:$src)>; 4504 4505 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4506 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4507 (VMOVDI2PDIZrm addr:$src)>; 4508 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4509 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4510 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4511 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4512 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4513 (VMOVQI2PQIZrm addr:$src)>; 4514 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4515 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4516 4517 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4518 def : Pat<(v16i32 (X86vzload32 addr:$src)), 4519 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4520 def : Pat<(v8i64 (X86vzload64 addr:$src)), 4521 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4522 4523 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4524 (SUBREG_TO_REG (i32 0), 4525 (v2f64 (VMOVZPQILo2PQIZrr 4526 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), 4527 sub_xmm)>; 4528 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4529 (SUBREG_TO_REG (i32 0), 4530 (v2i64 (VMOVZPQILo2PQIZrr 4531 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), 4532 sub_xmm)>; 4533 4534 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4535 (SUBREG_TO_REG (i32 0), 4536 (v2f64 (VMOVZPQILo2PQIZrr 4537 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), 4538 sub_xmm)>; 4539 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4540 (SUBREG_TO_REG (i32 0), 4541 (v2i64 (VMOVZPQILo2PQIZrr 4542 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), 4543 sub_xmm)>; 4544} 4545 4546//===----------------------------------------------------------------------===// 4547// AVX-512 - Non-temporals 4548//===----------------------------------------------------------------------===// 4549 4550def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4551 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4552 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4553 EVEX, T8, PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4554 4555let Predicates = [HasVLX] in { 4556 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4557 (ins i256mem:$src), 4558 "vmovntdqa\t{$src, $dst|$dst, $src}", 4559 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4560 EVEX, T8, PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4561 4562 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4563 (ins i128mem:$src), 4564 "vmovntdqa\t{$src, $dst|$dst, $src}", 4565 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4566 EVEX, T8, PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4567} 4568 4569multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4570 X86SchedWriteMoveLS Sched, 4571 PatFrag st_frag = alignednontemporalstore> { 4572 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4573 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4574 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4575 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4576 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4577} 4578 4579multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4580 AVX512VLVectorVTInfo VTInfo, 4581 X86SchedWriteMoveLSWidths Sched> { 4582 let Predicates = [HasAVX512] in 4583 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4584 4585 let Predicates = [HasAVX512, HasVLX] in { 4586 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4587 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4588 } 4589} 4590 4591defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4592 SchedWriteVecMoveLSNT>, TB, PD; 4593defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4594 SchedWriteFMoveLSNT>, TB, PD, REX_W; 4595defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4596 SchedWriteFMoveLSNT>, TB; 4597 4598let Predicates = [HasAVX512], AddedComplexity = 400 in { 4599 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4600 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4601 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4602 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4603 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4604 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4605 4606 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4607 (VMOVNTDQAZrm addr:$src)>; 4608 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4609 (VMOVNTDQAZrm addr:$src)>; 4610 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4611 (VMOVNTDQAZrm addr:$src)>; 4612 def : Pat<(v16i32 (alignednontemporalload addr:$src)), 4613 (VMOVNTDQAZrm addr:$src)>; 4614 def : Pat<(v32i16 (alignednontemporalload addr:$src)), 4615 (VMOVNTDQAZrm addr:$src)>; 4616 def : Pat<(v64i8 (alignednontemporalload addr:$src)), 4617 (VMOVNTDQAZrm addr:$src)>; 4618} 4619 4620let Predicates = [HasVLX], AddedComplexity = 400 in { 4621 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4622 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4623 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4624 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4625 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4626 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4627 4628 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4629 (VMOVNTDQAZ256rm addr:$src)>; 4630 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4631 (VMOVNTDQAZ256rm addr:$src)>; 4632 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4633 (VMOVNTDQAZ256rm addr:$src)>; 4634 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 4635 (VMOVNTDQAZ256rm addr:$src)>; 4636 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 4637 (VMOVNTDQAZ256rm addr:$src)>; 4638 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 4639 (VMOVNTDQAZ256rm addr:$src)>; 4640 4641 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4642 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4643 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4644 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4645 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4646 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4647 4648 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4649 (VMOVNTDQAZ128rm addr:$src)>; 4650 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4651 (VMOVNTDQAZ128rm addr:$src)>; 4652 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4653 (VMOVNTDQAZ128rm addr:$src)>; 4654 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 4655 (VMOVNTDQAZ128rm addr:$src)>; 4656 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 4657 (VMOVNTDQAZ128rm addr:$src)>; 4658 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 4659 (VMOVNTDQAZ128rm addr:$src)>; 4660} 4661 4662//===----------------------------------------------------------------------===// 4663// AVX-512 - Integer arithmetic 4664// 4665multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4666 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4667 bit IsCommutable = 0> { 4668 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4669 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4670 "$src2, $src1", "$src1, $src2", 4671 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4672 IsCommutable, IsCommutable>, AVX512BIBase, EVEX, VVVV, 4673 Sched<[sched]>; 4674 4675 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4676 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4677 "$src2, $src1", "$src1, $src2", 4678 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, 4679 AVX512BIBase, EVEX, VVVV, 4680 Sched<[sched.Folded, sched.ReadAfterFold]>; 4681} 4682 4683multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4684 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4685 bit IsCommutable = 0> : 4686 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4687 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4688 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4689 "${src2}"#_.BroadcastStr#", $src1", 4690 "$src1, ${src2}"#_.BroadcastStr, 4691 (_.VT (OpNode _.RC:$src1, 4692 (_.BroadcastLdFrag addr:$src2)))>, 4693 AVX512BIBase, EVEX, VVVV, EVEX_B, 4694 Sched<[sched.Folded, sched.ReadAfterFold]>; 4695} 4696 4697multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4698 AVX512VLVectorVTInfo VTInfo, 4699 X86SchedWriteWidths sched, Predicate prd, 4700 bit IsCommutable = 0> { 4701 let Predicates = [prd] in 4702 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4703 IsCommutable>, EVEX_V512; 4704 4705 let Predicates = [prd, HasVLX] in { 4706 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4707 sched.YMM, IsCommutable>, EVEX_V256; 4708 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4709 sched.XMM, IsCommutable>, EVEX_V128; 4710 } 4711} 4712 4713multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4714 AVX512VLVectorVTInfo VTInfo, 4715 X86SchedWriteWidths sched, Predicate prd, 4716 bit IsCommutable = 0> { 4717 let Predicates = [prd] in 4718 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4719 IsCommutable>, EVEX_V512; 4720 4721 let Predicates = [prd, HasVLX] in { 4722 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4723 sched.YMM, IsCommutable>, EVEX_V256; 4724 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4725 sched.XMM, IsCommutable>, EVEX_V128; 4726 } 4727} 4728 4729multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4730 X86SchedWriteWidths sched, Predicate prd, 4731 bit IsCommutable = 0> { 4732 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4733 sched, prd, IsCommutable>, 4734 REX_W, EVEX_CD8<64, CD8VF>; 4735} 4736 4737multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4738 X86SchedWriteWidths sched, Predicate prd, 4739 bit IsCommutable = 0> { 4740 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4741 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4742} 4743 4744multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4745 X86SchedWriteWidths sched, Predicate prd, 4746 bit IsCommutable = 0> { 4747 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4748 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4749 WIG; 4750} 4751 4752multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4753 X86SchedWriteWidths sched, Predicate prd, 4754 bit IsCommutable = 0> { 4755 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4756 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4757 WIG; 4758} 4759 4760multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 4761 SDNode OpNode, X86SchedWriteWidths sched, 4762 Predicate prd, bit IsCommutable = 0> { 4763 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 4764 IsCommutable>; 4765 4766 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 4767 IsCommutable>; 4768} 4769 4770multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 4771 SDNode OpNode, X86SchedWriteWidths sched, 4772 Predicate prd, bit IsCommutable = 0> { 4773 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 4774 IsCommutable>; 4775 4776 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 4777 IsCommutable>; 4778} 4779 4780multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 4781 bits<8> opc_d, bits<8> opc_q, 4782 string OpcodeStr, SDNode OpNode, 4783 X86SchedWriteWidths sched, 4784 bit IsCommutable = 0> { 4785 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 4786 sched, HasAVX512, IsCommutable>, 4787 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 4788 sched, HasBWI, IsCommutable>; 4789} 4790 4791multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 4792 X86FoldableSchedWrite sched, 4793 SDNode OpNode,X86VectorVTInfo _Src, 4794 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 4795 bit IsCommutable = 0> { 4796 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4797 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4798 "$src2, $src1","$src1, $src2", 4799 (_Dst.VT (OpNode 4800 (_Src.VT _Src.RC:$src1), 4801 (_Src.VT _Src.RC:$src2))), 4802 IsCommutable>, 4803 AVX512BIBase, EVEX, VVVV, Sched<[sched]>; 4804 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4805 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4806 "$src2, $src1", "$src1, $src2", 4807 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4808 (_Src.LdFrag addr:$src2)))>, 4809 AVX512BIBase, EVEX, VVVV, 4810 Sched<[sched.Folded, sched.ReadAfterFold]>; 4811 4812 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4813 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 4814 OpcodeStr, 4815 "${src2}"#_Brdct.BroadcastStr#", $src1", 4816 "$src1, ${src2}"#_Brdct.BroadcastStr, 4817 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4818 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, 4819 AVX512BIBase, EVEX, VVVV, EVEX_B, 4820 Sched<[sched.Folded, sched.ReadAfterFold]>; 4821} 4822 4823defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 4824 SchedWriteVecALU, 1>; 4825defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 4826 SchedWriteVecALU, 0>; 4827defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, 4828 SchedWriteVecALU, HasBWI, 1>; 4829defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, 4830 SchedWriteVecALU, HasBWI, 0>; 4831defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, 4832 SchedWriteVecALU, HasBWI, 1>; 4833defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, 4834 SchedWriteVecALU, HasBWI, 0>; 4835defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 4836 SchedWritePMULLD, HasAVX512, 1>, T8; 4837defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 4838 SchedWriteVecIMul, HasBWI, 1>; 4839defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 4840 SchedWriteVecIMul, HasDQI, 1>, T8; 4841defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 4842 HasBWI, 1>; 4843defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 4844 HasBWI, 1>; 4845defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 4846 SchedWriteVecIMul, HasBWI, 1>, T8; 4847defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu, 4848 SchedWriteVecALU, HasBWI, 1>; 4849defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 4850 SchedWriteVecIMul, HasAVX512, 1>, T8; 4851defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 4852 SchedWriteVecIMul, HasAVX512, 1>; 4853 4854multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 4855 X86SchedWriteWidths sched, 4856 AVX512VLVectorVTInfo _SrcVTInfo, 4857 AVX512VLVectorVTInfo _DstVTInfo, 4858 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 4859 let Predicates = [prd] in 4860 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 4861 _SrcVTInfo.info512, _DstVTInfo.info512, 4862 v8i64_info, IsCommutable>, 4863 EVEX_V512, EVEX_CD8<64, CD8VF>, REX_W; 4864 let Predicates = [HasVLX, prd] in { 4865 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 4866 _SrcVTInfo.info256, _DstVTInfo.info256, 4867 v4i64x_info, IsCommutable>, 4868 EVEX_V256, EVEX_CD8<64, CD8VF>, REX_W; 4869 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 4870 _SrcVTInfo.info128, _DstVTInfo.info128, 4871 v2i64x_info, IsCommutable>, 4872 EVEX_V128, EVEX_CD8<64, CD8VF>, REX_W; 4873 } 4874} 4875 4876defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 4877 avx512vl_i8_info, avx512vl_i8_info, 4878 X86multishift, HasVBMI, 0>, T8; 4879 4880multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4881 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 4882 X86FoldableSchedWrite sched> { 4883 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4884 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 4885 OpcodeStr, 4886 "${src2}"#_Src.BroadcastStr#", $src1", 4887 "$src1, ${src2}"#_Src.BroadcastStr, 4888 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4889 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, 4890 EVEX, VVVV, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 4891 Sched<[sched.Folded, sched.ReadAfterFold]>; 4892} 4893 4894multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 4895 SDNode OpNode,X86VectorVTInfo _Src, 4896 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 4897 bit IsCommutable = 0> { 4898 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4899 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4900 "$src2, $src1","$src1, $src2", 4901 (_Dst.VT (OpNode 4902 (_Src.VT _Src.RC:$src1), 4903 (_Src.VT _Src.RC:$src2))), 4904 IsCommutable, IsCommutable>, 4905 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX, VVVV, Sched<[sched]>; 4906 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4907 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4908 "$src2, $src1", "$src1, $src2", 4909 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4910 (_Src.LdFrag addr:$src2)))>, 4911 EVEX, VVVV, EVEX_CD8<_Src.EltSize, CD8VF>, 4912 Sched<[sched.Folded, sched.ReadAfterFold]>; 4913} 4914 4915multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 4916 SDNode OpNode> { 4917 let Predicates = [HasBWI] in 4918 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 4919 v32i16_info, SchedWriteShuffle.ZMM>, 4920 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 4921 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 4922 let Predicates = [HasBWI, HasVLX] in { 4923 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 4924 v16i16x_info, SchedWriteShuffle.YMM>, 4925 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 4926 v16i16x_info, SchedWriteShuffle.YMM>, 4927 EVEX_V256; 4928 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 4929 v8i16x_info, SchedWriteShuffle.XMM>, 4930 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 4931 v8i16x_info, SchedWriteShuffle.XMM>, 4932 EVEX_V128; 4933 } 4934} 4935multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 4936 SDNode OpNode> { 4937 let Predicates = [HasBWI] in 4938 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 4939 SchedWriteShuffle.ZMM>, EVEX_V512, WIG; 4940 let Predicates = [HasBWI, HasVLX] in { 4941 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 4942 v32i8x_info, SchedWriteShuffle.YMM>, 4943 EVEX_V256, WIG; 4944 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 4945 v16i8x_info, SchedWriteShuffle.XMM>, 4946 EVEX_V128, WIG; 4947 } 4948} 4949 4950multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 4951 SDNode OpNode, AVX512VLVectorVTInfo _Src, 4952 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 4953 let Predicates = [HasBWI] in 4954 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 4955 _Dst.info512, SchedWriteVecIMul.ZMM, 4956 IsCommutable>, EVEX_V512; 4957 let Predicates = [HasBWI, HasVLX] in { 4958 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 4959 _Dst.info256, SchedWriteVecIMul.YMM, 4960 IsCommutable>, EVEX_V256; 4961 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 4962 _Dst.info128, SchedWriteVecIMul.XMM, 4963 IsCommutable>, EVEX_V128; 4964 } 4965} 4966 4967defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 4968defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 4969defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 4970defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 4971 4972defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 4973 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8, WIG; 4974defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 4975 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG; 4976 4977defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 4978 SchedWriteVecALU, HasBWI, 1>, T8; 4979defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 4980 SchedWriteVecALU, HasBWI, 1>; 4981defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 4982 SchedWriteVecALU, HasAVX512, 1>, T8; 4983defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 4984 SchedWriteVecALU, HasAVX512, 1>, T8; 4985 4986defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 4987 SchedWriteVecALU, HasBWI, 1>; 4988defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 4989 SchedWriteVecALU, HasBWI, 1>, T8; 4990defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 4991 SchedWriteVecALU, HasAVX512, 1>, T8; 4992defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 4993 SchedWriteVecALU, HasAVX512, 1>, T8; 4994 4995defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 4996 SchedWriteVecALU, HasBWI, 1>, T8; 4997defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 4998 SchedWriteVecALU, HasBWI, 1>; 4999defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 5000 SchedWriteVecALU, HasAVX512, 1>, T8; 5001defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 5002 SchedWriteVecALU, HasAVX512, 1>, T8; 5003 5004defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 5005 SchedWriteVecALU, HasBWI, 1>; 5006defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 5007 SchedWriteVecALU, HasBWI, 1>, T8; 5008defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 5009 SchedWriteVecALU, HasAVX512, 1>, T8; 5010defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 5011 SchedWriteVecALU, HasAVX512, 1>, T8; 5012 5013// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX, HasEVEX512. 5014let Predicates = [HasDQI, NoVLX, HasEVEX512] in { 5015 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 5016 (EXTRACT_SUBREG 5017 (VPMULLQZrr 5018 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5019 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5020 sub_ymm)>; 5021 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5022 (EXTRACT_SUBREG 5023 (VPMULLQZrmb 5024 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5025 addr:$src2), 5026 sub_ymm)>; 5027 5028 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5029 (EXTRACT_SUBREG 5030 (VPMULLQZrr 5031 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5032 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5033 sub_xmm)>; 5034 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5035 (EXTRACT_SUBREG 5036 (VPMULLQZrmb 5037 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5038 addr:$src2), 5039 sub_xmm)>; 5040} 5041 5042multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { 5043 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 5044 (EXTRACT_SUBREG 5045 (!cast<Instruction>(Instr#"rr") 5046 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5047 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5048 sub_ymm)>; 5049 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5050 (EXTRACT_SUBREG 5051 (!cast<Instruction>(Instr#"rmb") 5052 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5053 addr:$src2), 5054 sub_ymm)>; 5055 5056 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 5057 (EXTRACT_SUBREG 5058 (!cast<Instruction>(Instr#"rr") 5059 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5060 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5061 sub_xmm)>; 5062 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5063 (EXTRACT_SUBREG 5064 (!cast<Instruction>(Instr#"rmb") 5065 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5066 addr:$src2), 5067 sub_xmm)>; 5068} 5069 5070let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 5071 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; 5072 defm : avx512_min_max_lowering<"VPMINUQZ", umin>; 5073 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; 5074 defm : avx512_min_max_lowering<"VPMINSQZ", smin>; 5075} 5076 5077//===----------------------------------------------------------------------===// 5078// AVX-512 Logical Instructions 5079//===----------------------------------------------------------------------===// 5080 5081defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, 5082 SchedWriteVecLogic, HasAVX512, 1>; 5083defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, 5084 SchedWriteVecLogic, HasAVX512, 1>; 5085defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 5086 SchedWriteVecLogic, HasAVX512, 1>; 5087defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5088 SchedWriteVecLogic, HasAVX512>; 5089 5090let Predicates = [HasVLX] in { 5091 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)), 5092 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5093 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)), 5094 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5095 5096 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)), 5097 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5098 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)), 5099 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5100 5101 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)), 5102 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5103 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)), 5104 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5105 5106 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)), 5107 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5108 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)), 5109 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5110 5111 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)), 5112 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5113 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)), 5114 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5115 5116 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)), 5117 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5118 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)), 5119 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5120 5121 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)), 5122 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5123 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)), 5124 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5125 5126 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)), 5127 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5128 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), 5129 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5130 5131 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), 5132 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5133 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), 5134 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5135 5136 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)), 5137 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5138 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)), 5139 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5140 5141 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)), 5142 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5143 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)), 5144 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5145 5146 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)), 5147 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5148 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)), 5149 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5150 5151 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)), 5152 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5153 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)), 5154 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5155 5156 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)), 5157 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5158 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)), 5159 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5160 5161 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)), 5162 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5163 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)), 5164 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5165 5166 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)), 5167 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5168 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), 5169 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5170} 5171 5172let Predicates = [HasAVX512] in { 5173 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)), 5174 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5175 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)), 5176 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5177 5178 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)), 5179 (VPORQZrr VR512:$src1, VR512:$src2)>; 5180 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)), 5181 (VPORQZrr VR512:$src1, VR512:$src2)>; 5182 5183 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)), 5184 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5185 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)), 5186 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5187 5188 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)), 5189 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5190 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)), 5191 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5192 5193 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)), 5194 (VPANDQZrm VR512:$src1, addr:$src2)>; 5195 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)), 5196 (VPANDQZrm VR512:$src1, addr:$src2)>; 5197 5198 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)), 5199 (VPORQZrm VR512:$src1, addr:$src2)>; 5200 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)), 5201 (VPORQZrm VR512:$src1, addr:$src2)>; 5202 5203 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)), 5204 (VPXORQZrm VR512:$src1, addr:$src2)>; 5205 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)), 5206 (VPXORQZrm VR512:$src1, addr:$src2)>; 5207 5208 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)), 5209 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5210 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), 5211 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5212} 5213 5214// Patterns to catch vselect with different type than logic op. 5215multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, 5216 X86VectorVTInfo _, 5217 X86VectorVTInfo IntInfo> { 5218 // Masked register-register logical operations. 5219 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5220 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5221 _.RC:$src0)), 5222 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5223 _.RC:$src1, _.RC:$src2)>; 5224 5225 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5226 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5227 _.ImmAllZerosV)), 5228 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5229 _.RC:$src2)>; 5230 5231 // Masked register-memory logical operations. 5232 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5233 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5234 (load addr:$src2)))), 5235 _.RC:$src0)), 5236 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5237 _.RC:$src1, addr:$src2)>; 5238 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5239 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5240 (load addr:$src2)))), 5241 _.ImmAllZerosV)), 5242 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5243 addr:$src2)>; 5244} 5245 5246multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, 5247 X86VectorVTInfo _, 5248 X86VectorVTInfo IntInfo> { 5249 // Register-broadcast logical operations. 5250 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5251 (bitconvert 5252 (IntInfo.VT (OpNode _.RC:$src1, 5253 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5254 _.RC:$src0)), 5255 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5256 _.RC:$src1, addr:$src2)>; 5257 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5258 (bitconvert 5259 (IntInfo.VT (OpNode _.RC:$src1, 5260 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5261 _.ImmAllZerosV)), 5262 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5263 _.RC:$src1, addr:$src2)>; 5264} 5265 5266multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode, 5267 AVX512VLVectorVTInfo SelectInfo, 5268 AVX512VLVectorVTInfo IntInfo> { 5269let Predicates = [HasVLX] in { 5270 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128, 5271 IntInfo.info128>; 5272 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256, 5273 IntInfo.info256>; 5274} 5275let Predicates = [HasAVX512] in { 5276 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512, 5277 IntInfo.info512>; 5278} 5279} 5280 5281multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode, 5282 AVX512VLVectorVTInfo SelectInfo, 5283 AVX512VLVectorVTInfo IntInfo> { 5284let Predicates = [HasVLX] in { 5285 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode, 5286 SelectInfo.info128, IntInfo.info128>; 5287 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode, 5288 SelectInfo.info256, IntInfo.info256>; 5289} 5290let Predicates = [HasAVX512] in { 5291 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode, 5292 SelectInfo.info512, IntInfo.info512>; 5293} 5294} 5295 5296multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> { 5297 // i64 vselect with i32/i16/i8 logic op 5298 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5299 avx512vl_i32_info>; 5300 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5301 avx512vl_i16_info>; 5302 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5303 avx512vl_i8_info>; 5304 5305 // i32 vselect with i64/i16/i8 logic op 5306 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5307 avx512vl_i64_info>; 5308 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5309 avx512vl_i16_info>; 5310 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5311 avx512vl_i8_info>; 5312 5313 // f32 vselect with i64/i32/i16/i8 logic op 5314 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5315 avx512vl_i64_info>; 5316 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5317 avx512vl_i32_info>; 5318 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5319 avx512vl_i16_info>; 5320 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5321 avx512vl_i8_info>; 5322 5323 // f64 vselect with i64/i32/i16/i8 logic op 5324 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5325 avx512vl_i64_info>; 5326 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5327 avx512vl_i32_info>; 5328 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5329 avx512vl_i16_info>; 5330 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5331 avx512vl_i8_info>; 5332 5333 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode, 5334 avx512vl_f32_info, 5335 avx512vl_i32_info>; 5336 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode, 5337 avx512vl_f64_info, 5338 avx512vl_i64_info>; 5339} 5340 5341defm : avx512_logical_lowering_types<"VPAND", and>; 5342defm : avx512_logical_lowering_types<"VPOR", or>; 5343defm : avx512_logical_lowering_types<"VPXOR", xor>; 5344defm : avx512_logical_lowering_types<"VPANDN", X86andnp>; 5345 5346//===----------------------------------------------------------------------===// 5347// AVX-512 FP arithmetic 5348//===----------------------------------------------------------------------===// 5349 5350multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5351 SDPatternOperator OpNode, SDNode VecNode, 5352 X86FoldableSchedWrite sched, bit IsCommutable> { 5353 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5354 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5355 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5356 "$src2, $src1", "$src1, $src2", 5357 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5358 Sched<[sched]>; 5359 5360 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5361 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5362 "$src2, $src1", "$src1, $src2", 5363 (_.VT (VecNode _.RC:$src1, 5364 (_.ScalarIntMemFrags addr:$src2)))>, 5365 Sched<[sched.Folded, sched.ReadAfterFold]>; 5366 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5367 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5368 (ins _.FRC:$src1, _.FRC:$src2), 5369 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5370 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5371 Sched<[sched]> { 5372 let isCommutable = IsCommutable; 5373 } 5374 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5375 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5376 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5377 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5378 (_.ScalarLdFrag addr:$src2)))]>, 5379 Sched<[sched.Folded, sched.ReadAfterFold]>; 5380 } 5381 } 5382} 5383 5384multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5385 SDNode VecNode, X86FoldableSchedWrite sched> { 5386 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5387 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5388 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5389 "$rc, $src2, $src1", "$src1, $src2, $rc", 5390 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5391 (i32 timm:$rc))>, 5392 EVEX_B, EVEX_RC, Sched<[sched]>; 5393} 5394multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5395 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5396 X86FoldableSchedWrite sched, bit IsCommutable> { 5397 let ExeDomain = _.ExeDomain in { 5398 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5399 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5400 "$src2, $src1", "$src1, $src2", 5401 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5402 Sched<[sched]>, SIMD_EXC; 5403 5404 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5405 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5406 "$src2, $src1", "$src1, $src2", 5407 (_.VT (VecNode _.RC:$src1, 5408 (_.ScalarIntMemFrags addr:$src2)))>, 5409 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 5410 5411 let isCodeGenOnly = 1, Predicates = [HasAVX512], 5412 Uses = [MXCSR], mayRaiseFPException = 1 in { 5413 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5414 (ins _.FRC:$src1, _.FRC:$src2), 5415 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5416 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5417 Sched<[sched]> { 5418 let isCommutable = IsCommutable; 5419 } 5420 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5421 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5422 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5423 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5424 (_.ScalarLdFrag addr:$src2)))]>, 5425 Sched<[sched.Folded, sched.ReadAfterFold]>; 5426 } 5427 5428 let Uses = [MXCSR] in 5429 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5430 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5431 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5432 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 5433 EVEX_B, Sched<[sched]>; 5434 } 5435} 5436 5437multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5438 SDNode VecNode, SDNode RndNode, 5439 X86SchedWriteSizes sched, bit IsCommutable> { 5440 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5441 sched.PS.Scl, IsCommutable>, 5442 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, 5443 sched.PS.Scl>, 5444 TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5445 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5446 sched.PD.Scl, IsCommutable>, 5447 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, 5448 sched.PD.Scl>, 5449 TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5450 let Predicates = [HasFP16] in 5451 defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode, 5452 VecNode, sched.PH.Scl, IsCommutable>, 5453 avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode, 5454 sched.PH.Scl>, 5455 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>; 5456} 5457 5458multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5459 SDNode VecNode, SDNode SaeNode, 5460 X86SchedWriteSizes sched, bit IsCommutable> { 5461 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5462 VecNode, SaeNode, sched.PS.Scl, IsCommutable>, 5463 TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5464 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5465 VecNode, SaeNode, sched.PD.Scl, IsCommutable>, 5466 TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5467 let Predicates = [HasFP16] in { 5468 defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode, 5469 VecNode, SaeNode, sched.PH.Scl, IsCommutable>, 5470 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>; 5471 } 5472} 5473defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, 5474 SchedWriteFAddSizes, 1>; 5475defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, 5476 SchedWriteFMulSizes, 1>; 5477defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, 5478 SchedWriteFAddSizes, 0>; 5479defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, 5480 SchedWriteFDivSizes, 0>; 5481defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, 5482 SchedWriteFCmpSizes, 0>; 5483defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, 5484 SchedWriteFCmpSizes, 0>; 5485 5486// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5487// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5488multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5489 X86VectorVTInfo _, SDNode OpNode, 5490 X86FoldableSchedWrite sched> { 5491 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5492 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5493 (ins _.FRC:$src1, _.FRC:$src2), 5494 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5495 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5496 Sched<[sched]> { 5497 let isCommutable = 1; 5498 } 5499 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5500 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5501 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5502 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5503 (_.ScalarLdFrag addr:$src2)))]>, 5504 Sched<[sched.Folded, sched.ReadAfterFold]>; 5505 } 5506} 5507defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5508 SchedWriteFCmp.Scl>, TB, XS, 5509 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5510 5511defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5512 SchedWriteFCmp.Scl>, TB, XD, 5513 REX_W, EVEX, VVVV, VEX_LIG, 5514 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5515 5516defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5517 SchedWriteFCmp.Scl>, TB, XS, 5518 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5519 5520defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5521 SchedWriteFCmp.Scl>, TB, XD, 5522 REX_W, EVEX, VVVV, VEX_LIG, 5523 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5524 5525defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc, 5526 SchedWriteFCmp.Scl>, T_MAP5, XS, 5527 EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC; 5528 5529defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc, 5530 SchedWriteFCmp.Scl>, T_MAP5, XS, 5531 EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC; 5532 5533multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5534 SDPatternOperator MaskOpNode, 5535 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5536 bit IsCommutable, 5537 bit IsKCommutable = IsCommutable, 5538 string suffix = _.Suffix, 5539 string ClobberConstraint = "", 5540 bit MayRaiseFPException = 1> { 5541 let ExeDomain = _.ExeDomain, hasSideEffects = 0, 5542 Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in { 5543 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 5544 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix, 5545 "$src2, $src1", "$src1, $src2", 5546 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 5547 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint, 5548 IsCommutable, IsKCommutable, IsKCommutable>, EVEX, VVVV, Sched<[sched]>; 5549 let mayLoad = 1 in { 5550 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5551 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix, 5552 "$src2, $src1", "$src1, $src2", 5553 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5554 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5555 ClobberConstraint>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 5556 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5557 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix, 5558 "${src2}"#_.BroadcastStr#", $src1", 5559 "$src1, ${src2}"#_.BroadcastStr, 5560 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5561 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5562 ClobberConstraint>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5563 } 5564 } 5565} 5566 5567multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5568 SDPatternOperator OpNodeRnd, 5569 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5570 string suffix = _.Suffix, 5571 string ClobberConstraint = ""> { 5572 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5573 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5574 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix, 5575 "$rc, $src2, $src1", "$src1, $src2, $rc", 5576 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))), 5577 0, 0, 0, vselect_mask, ClobberConstraint>, 5578 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 5579} 5580 5581multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5582 SDPatternOperator OpNodeSAE, 5583 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5584 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5585 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5586 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5587 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5588 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, 5589 EVEX, VVVV, EVEX_B, Sched<[sched]>; 5590} 5591 5592multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5593 SDPatternOperator MaskOpNode, 5594 Predicate prd, X86SchedWriteSizes sched, 5595 bit IsCommutable = 0, 5596 bit IsPD128Commutable = IsCommutable> { 5597 let Predicates = [prd] in { 5598 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 5599 sched.PS.ZMM, IsCommutable>, EVEX_V512, TB, 5600 EVEX_CD8<32, CD8VF>; 5601 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info, 5602 sched.PD.ZMM, IsCommutable>, EVEX_V512, TB, PD, REX_W, 5603 EVEX_CD8<64, CD8VF>; 5604 } 5605 5606 // Define only if AVX512VL feature is present. 5607 let Predicates = [prd, HasVLX] in { 5608 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 5609 sched.PS.XMM, IsCommutable>, EVEX_V128, TB, 5610 EVEX_CD8<32, CD8VF>; 5611 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 5612 sched.PS.YMM, IsCommutable>, EVEX_V256, TB, 5613 EVEX_CD8<32, CD8VF>; 5614 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info, 5615 sched.PD.XMM, IsPD128Commutable, 5616 IsCommutable>, EVEX_V128, TB, PD, REX_W, 5617 EVEX_CD8<64, CD8VF>; 5618 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info, 5619 sched.PD.YMM, IsCommutable>, EVEX_V256, TB, PD, REX_W, 5620 EVEX_CD8<64, CD8VF>; 5621 } 5622} 5623 5624multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5625 SDPatternOperator MaskOpNode, 5626 X86SchedWriteSizes sched, bit IsCommutable = 0> { 5627 let Predicates = [HasFP16] in { 5628 defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info, 5629 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5, 5630 EVEX_CD8<16, CD8VF>; 5631 } 5632 let Predicates = [HasVLX, HasFP16] in { 5633 defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info, 5634 sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5, 5635 EVEX_CD8<16, CD8VF>; 5636 defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info, 5637 sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5, 5638 EVEX_CD8<16, CD8VF>; 5639 } 5640} 5641 5642let Uses = [MXCSR] in 5643multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5644 X86SchedWriteSizes sched> { 5645 let Predicates = [HasFP16] in { 5646 defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5647 v32f16_info>, 5648 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 5649 } 5650 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5651 v16f32_info>, 5652 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 5653 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5654 v8f64_info>, 5655 EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>; 5656} 5657 5658let Uses = [MXCSR] in 5659multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5660 X86SchedWriteSizes sched> { 5661 let Predicates = [HasFP16] in { 5662 defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5663 v32f16_info>, 5664 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 5665 } 5666 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5667 v16f32_info>, 5668 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 5669 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5670 v8f64_info>, 5671 EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>; 5672} 5673 5674defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512, 5675 SchedWriteFAddSizes, 1>, 5676 avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>, 5677 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5678defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512, 5679 SchedWriteFMulSizes, 1>, 5680 avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>, 5681 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5682defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512, 5683 SchedWriteFAddSizes>, 5684 avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>, 5685 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5686defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512, 5687 SchedWriteFDivSizes>, 5688 avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>, 5689 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5690defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512, 5691 SchedWriteFCmpSizes, 0>, 5692 avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>, 5693 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 5694defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512, 5695 SchedWriteFCmpSizes, 0>, 5696 avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>, 5697 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 5698let isCodeGenOnly = 1 in { 5699 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512, 5700 SchedWriteFCmpSizes, 1>, 5701 avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc, 5702 SchedWriteFCmpSizes, 1>; 5703 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512, 5704 SchedWriteFCmpSizes, 1>, 5705 avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc, 5706 SchedWriteFCmpSizes, 1>; 5707} 5708let Uses = []<Register>, mayRaiseFPException = 0 in { 5709defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI, 5710 SchedWriteFLogicSizes, 1>; 5711defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI, 5712 SchedWriteFLogicSizes, 0>; 5713defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI, 5714 SchedWriteFLogicSizes, 1>; 5715defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI, 5716 SchedWriteFLogicSizes, 1>; 5717} 5718 5719multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5720 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5721 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5722 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5723 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5724 "$src2, $src1", "$src1, $src2", 5725 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5726 EVEX, VVVV, Sched<[sched]>; 5727 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5728 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, 5729 "$src2, $src1", "$src1, $src2", 5730 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5731 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 5732 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5733 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, 5734 "${src2}"#_.BroadcastStr#", $src1", 5735 "$src1, ${src2}"#_.BroadcastStr, 5736 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5737 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5738 } 5739} 5740 5741multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5742 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5743 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5744 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5745 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5746 "$src2, $src1", "$src1, $src2", 5747 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5748 Sched<[sched]>; 5749 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5750 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix, 5751 "$src2, $src1", "$src1, $src2", 5752 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>, 5753 Sched<[sched.Folded, sched.ReadAfterFold]>; 5754 } 5755} 5756 5757multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 5758 X86SchedWriteWidths sched> { 5759 let Predicates = [HasFP16] in { 5760 defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>, 5761 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>, 5762 EVEX_V512, T_MAP6, PD, EVEX_CD8<16, CD8VF>; 5763 defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>, 5764 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>, 5765 EVEX, VVVV, T_MAP6, PD, EVEX_CD8<16, CD8VT1>; 5766 } 5767 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, 5768 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, 5769 EVEX_V512, EVEX_CD8<32, CD8VF>, T8, PD; 5770 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, 5771 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, 5772 EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8, PD; 5773 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, 5774 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info, 5775 X86scalefsRnd, sched.Scl>, 5776 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8, PD; 5777 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, 5778 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info, 5779 X86scalefsRnd, sched.Scl>, 5780 EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8, PD; 5781 5782 // Define only if AVX512VL feature is present. 5783 let Predicates = [HasVLX] in { 5784 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, 5785 EVEX_V128, EVEX_CD8<32, CD8VF>, T8, PD; 5786 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, 5787 EVEX_V256, EVEX_CD8<32, CD8VF>, T8, PD; 5788 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, 5789 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8, PD; 5790 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, 5791 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8, PD; 5792 } 5793 5794 let Predicates = [HasFP16, HasVLX] in { 5795 defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>, 5796 EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PD; 5797 defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>, 5798 EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD; 5799 } 5800} 5801defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", SchedWriteFAdd>; 5802 5803//===----------------------------------------------------------------------===// 5804// AVX-512 VPTESTM instructions 5805//===----------------------------------------------------------------------===// 5806 5807multiclass avx512_vptest<bits<8> opc, string OpcodeStr, 5808 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5809 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. 5810 // There are just too many permutations due to commutability and bitcasts. 5811 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 5812 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 5813 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5814 "$src2, $src1", "$src1, $src2", 5815 (null_frag), (null_frag), 1>, 5816 EVEX, VVVV, Sched<[sched]>; 5817 let mayLoad = 1 in 5818 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5819 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5820 "$src2, $src1", "$src1, $src2", 5821 (null_frag), (null_frag)>, 5822 EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 5823 Sched<[sched.Folded, sched.ReadAfterFold]>; 5824 } 5825} 5826 5827multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, 5828 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5829 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 5830 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5831 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5832 "${src2}"#_.BroadcastStr#", $src1", 5833 "$src1, ${src2}"#_.BroadcastStr, 5834 (null_frag), (null_frag)>, 5835 EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 5836 Sched<[sched.Folded, sched.ReadAfterFold]>; 5837} 5838 5839multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, 5840 X86SchedWriteWidths sched, 5841 AVX512VLVectorVTInfo _> { 5842 let Predicates = [HasAVX512] in 5843 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>, 5844 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512; 5845 5846 let Predicates = [HasAVX512, HasVLX] in { 5847 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>, 5848 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256; 5849 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>, 5850 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128; 5851 } 5852} 5853 5854multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, 5855 X86SchedWriteWidths sched> { 5856 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched, 5857 avx512vl_i32_info>; 5858 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched, 5859 avx512vl_i64_info>, REX_W; 5860} 5861 5862multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 5863 X86SchedWriteWidths sched> { 5864 let Predicates = [HasBWI] in { 5865 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM, 5866 v32i16_info>, EVEX_V512, REX_W; 5867 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM, 5868 v64i8_info>, EVEX_V512; 5869 } 5870 5871 let Predicates = [HasVLX, HasBWI] in { 5872 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM, 5873 v16i16x_info>, EVEX_V256, REX_W; 5874 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM, 5875 v8i16x_info>, EVEX_V128, REX_W; 5876 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM, 5877 v32i8x_info>, EVEX_V256; 5878 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM, 5879 v16i8x_info>, EVEX_V128; 5880 } 5881} 5882 5883multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 5884 X86SchedWriteWidths sched> : 5885 avx512_vptest_wb<opc_wb, OpcodeStr, sched>, 5886 avx512_vptest_dq<opc_dq, OpcodeStr, sched>; 5887 5888defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", 5889 SchedWriteVecLogic>, T8, PD; 5890defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", 5891 SchedWriteVecLogic>, T8, XS; 5892 5893//===----------------------------------------------------------------------===// 5894// AVX-512 Shift instructions 5895//===----------------------------------------------------------------------===// 5896 5897multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 5898 string OpcodeStr, SDNode OpNode, 5899 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5900 let ExeDomain = _.ExeDomain in { 5901 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 5902 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 5903 "$src2, $src1", "$src1, $src2", 5904 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, 5905 Sched<[sched]>; 5906 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5907 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 5908 "$src2, $src1", "$src1, $src2", 5909 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), 5910 (i8 timm:$src2)))>, 5911 Sched<[sched.Folded]>; 5912 } 5913} 5914 5915multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 5916 string OpcodeStr, SDNode OpNode, 5917 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5918 let ExeDomain = _.ExeDomain in 5919 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5920 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 5921 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2", 5922 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, 5923 EVEX_B, Sched<[sched.Folded]>; 5924} 5925 5926multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5927 X86FoldableSchedWrite sched, ValueType SrcVT, 5928 X86VectorVTInfo _> { 5929 // src2 is always 128-bit 5930 let ExeDomain = _.ExeDomain in { 5931 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5932 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 5933 "$src2, $src1", "$src1, $src2", 5934 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 5935 AVX512BIBase, EVEX, VVVV, Sched<[sched]>; 5936 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5937 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 5938 "$src2, $src1", "$src1, $src2", 5939 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, 5940 AVX512BIBase, 5941 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 5942 } 5943} 5944 5945multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5946 X86SchedWriteWidths sched, ValueType SrcVT, 5947 AVX512VLVectorVTInfo VTInfo, 5948 Predicate prd> { 5949 let Predicates = [prd] in 5950 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 5951 VTInfo.info512>, EVEX_V512, 5952 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 5953 let Predicates = [prd, HasVLX] in { 5954 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 5955 VTInfo.info256>, EVEX_V256, 5956 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 5957 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 5958 VTInfo.info128>, EVEX_V128, 5959 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 5960 } 5961} 5962 5963multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 5964 string OpcodeStr, SDNode OpNode, 5965 X86SchedWriteWidths sched> { 5966 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 5967 avx512vl_i32_info, HasAVX512>; 5968 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 5969 avx512vl_i64_info, HasAVX512>, REX_W; 5970 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 5971 avx512vl_i16_info, HasBWI>; 5972} 5973 5974multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 5975 string OpcodeStr, SDNode OpNode, 5976 X86SchedWriteWidths sched, 5977 AVX512VLVectorVTInfo VTInfo> { 5978 let Predicates = [HasAVX512] in 5979 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5980 sched.ZMM, VTInfo.info512>, 5981 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 5982 VTInfo.info512>, EVEX_V512; 5983 let Predicates = [HasAVX512, HasVLX] in { 5984 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5985 sched.YMM, VTInfo.info256>, 5986 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 5987 VTInfo.info256>, EVEX_V256; 5988 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5989 sched.XMM, VTInfo.info128>, 5990 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 5991 VTInfo.info128>, EVEX_V128; 5992 } 5993} 5994 5995multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 5996 string OpcodeStr, SDNode OpNode, 5997 X86SchedWriteWidths sched> { 5998 let Predicates = [HasBWI] in 5999 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6000 sched.ZMM, v32i16_info>, EVEX_V512, WIG; 6001 let Predicates = [HasVLX, HasBWI] in { 6002 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6003 sched.YMM, v16i16x_info>, EVEX_V256, WIG; 6004 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6005 sched.XMM, v8i16x_info>, EVEX_V128, WIG; 6006 } 6007} 6008 6009multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 6010 Format ImmFormR, Format ImmFormM, 6011 string OpcodeStr, SDNode OpNode, 6012 X86SchedWriteWidths sched> { 6013 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 6014 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 6015 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 6016 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W; 6017} 6018 6019defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 6020 SchedWriteVecShiftImm>, 6021 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 6022 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6023 6024defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 6025 SchedWriteVecShiftImm>, 6026 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 6027 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6028 6029defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 6030 SchedWriteVecShiftImm>, 6031 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 6032 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6033 6034defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 6035 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6036defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 6037 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6038 6039defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 6040 SchedWriteVecShift>; 6041defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 6042 SchedWriteVecShift>; 6043defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 6044 SchedWriteVecShift>; 6045 6046// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 6047let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 6048 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 6049 (EXTRACT_SUBREG (v8i64 6050 (VPSRAQZrr 6051 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6052 VR128X:$src2)), sub_ymm)>; 6053 6054 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6055 (EXTRACT_SUBREG (v8i64 6056 (VPSRAQZrr 6057 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6058 VR128X:$src2)), sub_xmm)>; 6059 6060 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), 6061 (EXTRACT_SUBREG (v8i64 6062 (VPSRAQZri 6063 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6064 timm:$src2)), sub_ymm)>; 6065 6066 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), 6067 (EXTRACT_SUBREG (v8i64 6068 (VPSRAQZri 6069 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6070 timm:$src2)), sub_xmm)>; 6071} 6072 6073//===-------------------------------------------------------------------===// 6074// Variable Bit Shifts 6075//===-------------------------------------------------------------------===// 6076 6077multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 6078 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6079 let ExeDomain = _.ExeDomain in { 6080 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 6081 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 6082 "$src2, $src1", "$src1, $src2", 6083 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 6084 AVX5128IBase, EVEX, VVVV, Sched<[sched]>; 6085 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6086 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 6087 "$src2, $src1", "$src1, $src2", 6088 (_.VT (OpNode _.RC:$src1, 6089 (_.VT (_.LdFrag addr:$src2))))>, 6090 AVX5128IBase, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 6091 Sched<[sched.Folded, sched.ReadAfterFold]>; 6092 } 6093} 6094 6095multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 6096 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6097 let ExeDomain = _.ExeDomain in 6098 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6099 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6100 "${src2}"#_.BroadcastStr#", $src1", 6101 "$src1, ${src2}"#_.BroadcastStr, 6102 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, 6103 AVX5128IBase, EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 6104 Sched<[sched.Folded, sched.ReadAfterFold]>; 6105} 6106 6107multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6108 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 6109 let Predicates = [HasAVX512] in 6110 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 6111 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 6112 6113 let Predicates = [HasAVX512, HasVLX] in { 6114 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 6115 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 6116 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 6117 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 6118 } 6119} 6120 6121multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 6122 SDNode OpNode, X86SchedWriteWidths sched> { 6123 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 6124 avx512vl_i32_info>; 6125 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 6126 avx512vl_i64_info>, REX_W; 6127} 6128 6129// Use 512bit version to implement 128/256 bit in case NoVLX. 6130multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 6131 SDNode OpNode, list<Predicate> p> { 6132 let Predicates = p in { 6133 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 6134 (_.info256.VT _.info256.RC:$src2))), 6135 (EXTRACT_SUBREG 6136 (!cast<Instruction>(OpcodeStr#"Zrr") 6137 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 6138 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 6139 sub_ymm)>; 6140 6141 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 6142 (_.info128.VT _.info128.RC:$src2))), 6143 (EXTRACT_SUBREG 6144 (!cast<Instruction>(OpcodeStr#"Zrr") 6145 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6146 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6147 sub_xmm)>; 6148 } 6149} 6150multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6151 SDNode OpNode, X86SchedWriteWidths sched> { 6152 let Predicates = [HasBWI] in 6153 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6154 EVEX_V512, REX_W; 6155 let Predicates = [HasVLX, HasBWI] in { 6156 6157 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6158 EVEX_V256, REX_W; 6159 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6160 EVEX_V128, REX_W; 6161 } 6162} 6163 6164defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, 6165 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; 6166 6167defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, 6168 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; 6169 6170defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, 6171 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; 6172 6173defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6174defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6175 6176defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX, HasEVEX512]>; 6177defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX, HasEVEX512]>; 6178defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX, HasEVEX512]>; 6179defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX, HasEVEX512]>; 6180 6181 6182// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6183let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 6184 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6185 (EXTRACT_SUBREG (v8i64 6186 (VPROLVQZrr 6187 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6188 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6189 sub_xmm)>; 6190 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6191 (EXTRACT_SUBREG (v8i64 6192 (VPROLVQZrr 6193 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6194 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6195 sub_ymm)>; 6196 6197 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6198 (EXTRACT_SUBREG (v16i32 6199 (VPROLVDZrr 6200 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6201 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6202 sub_xmm)>; 6203 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6204 (EXTRACT_SUBREG (v16i32 6205 (VPROLVDZrr 6206 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6207 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6208 sub_ymm)>; 6209 6210 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), 6211 (EXTRACT_SUBREG (v8i64 6212 (VPROLQZri 6213 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6214 timm:$src2)), sub_xmm)>; 6215 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), 6216 (EXTRACT_SUBREG (v8i64 6217 (VPROLQZri 6218 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6219 timm:$src2)), sub_ymm)>; 6220 6221 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), 6222 (EXTRACT_SUBREG (v16i32 6223 (VPROLDZri 6224 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6225 timm:$src2)), sub_xmm)>; 6226 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), 6227 (EXTRACT_SUBREG (v16i32 6228 (VPROLDZri 6229 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6230 timm:$src2)), sub_ymm)>; 6231} 6232 6233// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6234let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 6235 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6236 (EXTRACT_SUBREG (v8i64 6237 (VPRORVQZrr 6238 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6239 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6240 sub_xmm)>; 6241 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6242 (EXTRACT_SUBREG (v8i64 6243 (VPRORVQZrr 6244 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6245 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6246 sub_ymm)>; 6247 6248 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6249 (EXTRACT_SUBREG (v16i32 6250 (VPRORVDZrr 6251 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6252 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6253 sub_xmm)>; 6254 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6255 (EXTRACT_SUBREG (v16i32 6256 (VPRORVDZrr 6257 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6258 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6259 sub_ymm)>; 6260 6261 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), 6262 (EXTRACT_SUBREG (v8i64 6263 (VPRORQZri 6264 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6265 timm:$src2)), sub_xmm)>; 6266 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), 6267 (EXTRACT_SUBREG (v8i64 6268 (VPRORQZri 6269 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6270 timm:$src2)), sub_ymm)>; 6271 6272 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), 6273 (EXTRACT_SUBREG (v16i32 6274 (VPRORDZri 6275 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6276 timm:$src2)), sub_xmm)>; 6277 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), 6278 (EXTRACT_SUBREG (v16i32 6279 (VPRORDZri 6280 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6281 timm:$src2)), sub_ymm)>; 6282} 6283 6284//===-------------------------------------------------------------------===// 6285// 1-src variable permutation VPERMW/D/Q 6286//===-------------------------------------------------------------------===// 6287 6288multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6289 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6290 let Predicates = [HasAVX512] in 6291 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6292 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6293 6294 let Predicates = [HasAVX512, HasVLX] in 6295 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6296 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6297} 6298 6299multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6300 string OpcodeStr, SDNode OpNode, 6301 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6302 let Predicates = [HasAVX512] in 6303 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6304 sched, VTInfo.info512>, 6305 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6306 sched, VTInfo.info512>, EVEX_V512; 6307 let Predicates = [HasAVX512, HasVLX] in 6308 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6309 sched, VTInfo.info256>, 6310 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6311 sched, VTInfo.info256>, EVEX_V256; 6312} 6313 6314multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6315 Predicate prd, SDNode OpNode, 6316 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6317 let Predicates = [prd] in 6318 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6319 EVEX_V512 ; 6320 let Predicates = [HasVLX, prd] in { 6321 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6322 EVEX_V256 ; 6323 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6324 EVEX_V128 ; 6325 } 6326} 6327 6328defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6329 WriteVarShuffle256, avx512vl_i16_info>, REX_W; 6330defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6331 WriteVarShuffle256, avx512vl_i8_info>; 6332 6333defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6334 WriteVarShuffle256, avx512vl_i32_info>; 6335defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6336 WriteVarShuffle256, avx512vl_i64_info>, REX_W; 6337defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6338 WriteFVarShuffle256, avx512vl_f32_info>; 6339defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6340 WriteFVarShuffle256, avx512vl_f64_info>, REX_W; 6341 6342defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6343 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6344 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W; 6345defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6346 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6347 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W; 6348 6349//===----------------------------------------------------------------------===// 6350// AVX-512 - VPERMIL 6351//===----------------------------------------------------------------------===// 6352 6353multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6354 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6355 X86VectorVTInfo Ctrl> { 6356 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6357 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6358 "$src2, $src1", "$src1, $src2", 6359 (_.VT (OpNode _.RC:$src1, 6360 (Ctrl.VT Ctrl.RC:$src2)))>, 6361 T8, PD, EVEX, VVVV, Sched<[sched]>; 6362 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6363 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6364 "$src2, $src1", "$src1, $src2", 6365 (_.VT (OpNode 6366 _.RC:$src1, 6367 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, 6368 T8, PD, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 6369 Sched<[sched.Folded, sched.ReadAfterFold]>; 6370 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6371 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6372 "${src2}"#_.BroadcastStr#", $src1", 6373 "$src1, ${src2}"#_.BroadcastStr, 6374 (_.VT (OpNode 6375 _.RC:$src1, 6376 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, 6377 T8, PD, EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6378 Sched<[sched.Folded, sched.ReadAfterFold]>; 6379} 6380 6381multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6382 X86SchedWriteWidths sched, 6383 AVX512VLVectorVTInfo _, 6384 AVX512VLVectorVTInfo Ctrl> { 6385 let Predicates = [HasAVX512] in { 6386 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6387 _.info512, Ctrl.info512>, EVEX_V512; 6388 } 6389 let Predicates = [HasAVX512, HasVLX] in { 6390 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6391 _.info128, Ctrl.info128>, EVEX_V128; 6392 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6393 _.info256, Ctrl.info256>, EVEX_V256; 6394 } 6395} 6396 6397multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6398 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6399 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6400 _, Ctrl>; 6401 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6402 X86VPermilpi, SchedWriteFShuffle, _>, 6403 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6404} 6405 6406let ExeDomain = SSEPackedSingle in 6407defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6408 avx512vl_i32_info>; 6409let ExeDomain = SSEPackedDouble in 6410defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6411 avx512vl_i64_info>, REX_W; 6412 6413//===----------------------------------------------------------------------===// 6414// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6415//===----------------------------------------------------------------------===// 6416 6417defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6418 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6419 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6420defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6421 X86PShufhw, SchedWriteShuffle>, 6422 EVEX, AVX512XSIi8Base; 6423defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6424 X86PShuflw, SchedWriteShuffle>, 6425 EVEX, AVX512XDIi8Base; 6426 6427//===----------------------------------------------------------------------===// 6428// AVX-512 - VPSHUFB 6429//===----------------------------------------------------------------------===// 6430 6431multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6432 X86SchedWriteWidths sched> { 6433 let Predicates = [HasBWI] in 6434 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6435 EVEX_V512; 6436 6437 let Predicates = [HasVLX, HasBWI] in { 6438 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6439 EVEX_V256; 6440 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6441 EVEX_V128; 6442 } 6443} 6444 6445defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6446 SchedWriteVarShuffle>, WIG; 6447 6448//===----------------------------------------------------------------------===// 6449// Move Low to High and High to Low packed FP Instructions 6450//===----------------------------------------------------------------------===// 6451 6452def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6453 (ins VR128X:$src1, VR128X:$src2), 6454 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6455 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6456 Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV; 6457let isCommutable = 1 in 6458def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6459 (ins VR128X:$src1, VR128X:$src2), 6460 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6461 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6462 Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV; 6463 6464//===----------------------------------------------------------------------===// 6465// VMOVHPS/PD VMOVLPS Instructions 6466// All patterns was taken from SSS implementation. 6467//===----------------------------------------------------------------------===// 6468 6469multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6470 SDPatternOperator OpNode, 6471 X86VectorVTInfo _> { 6472 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6473 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6474 (ins _.RC:$src1, f64mem:$src2), 6475 !strconcat(OpcodeStr, 6476 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6477 [(set _.RC:$dst, 6478 (OpNode _.RC:$src1, 6479 (_.VT (bitconvert 6480 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6481 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX, VVVV; 6482} 6483 6484// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6485// SSE1. And MOVLPS pattern is even more complex. 6486defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6487 v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB; 6488defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6489 v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W; 6490defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6491 v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB; 6492defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6493 v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W; 6494 6495let Predicates = [HasAVX512] in { 6496 // VMOVHPD patterns 6497 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), 6498 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6499 6500 // VMOVLPD patterns 6501 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), 6502 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; 6503} 6504 6505let SchedRW = [WriteFStore] in { 6506let mayStore = 1, hasSideEffects = 0 in 6507def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6508 (ins f64mem:$dst, VR128X:$src), 6509 "vmovhps\t{$src, $dst|$dst, $src}", 6510 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6511def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6512 (ins f64mem:$dst, VR128X:$src), 6513 "vmovhpd\t{$src, $dst|$dst, $src}", 6514 [(store (f64 (extractelt 6515 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6516 (iPTR 0))), addr:$dst)]>, 6517 EVEX, EVEX_CD8<64, CD8VT1>, REX_W; 6518let mayStore = 1, hasSideEffects = 0 in 6519def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6520 (ins f64mem:$dst, VR128X:$src), 6521 "vmovlps\t{$src, $dst|$dst, $src}", 6522 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6523def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6524 (ins f64mem:$dst, VR128X:$src), 6525 "vmovlpd\t{$src, $dst|$dst, $src}", 6526 [(store (f64 (extractelt (v2f64 VR128X:$src), 6527 (iPTR 0))), addr:$dst)]>, 6528 EVEX, EVEX_CD8<64, CD8VT1>, REX_W; 6529} // SchedRW 6530 6531let Predicates = [HasAVX512] in { 6532 // VMOVHPD patterns 6533 def : Pat<(store (f64 (extractelt 6534 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6535 (iPTR 0))), addr:$dst), 6536 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6537} 6538//===----------------------------------------------------------------------===// 6539// FMA - Fused Multiply Operations 6540// 6541 6542multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6543 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6544 X86VectorVTInfo _> { 6545 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6546 Uses = [MXCSR], mayRaiseFPException = 1 in { 6547 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6548 (ins _.RC:$src2, _.RC:$src3), 6549 OpcodeStr, "$src3, $src2", "$src2, $src3", 6550 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 6551 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6552 EVEX, VVVV, Sched<[sched]>; 6553 6554 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6555 (ins _.RC:$src2, _.MemOp:$src3), 6556 OpcodeStr, "$src3, $src2", "$src2, $src3", 6557 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 6558 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6559 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 6560 sched.ReadAfterFold]>; 6561 6562 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6563 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6564 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6565 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6566 (OpNode _.RC:$src2, 6567 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 6568 (MaskOpNode _.RC:$src2, 6569 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, 6570 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, 6571 sched.ReadAfterFold]>; 6572 } 6573} 6574 6575multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6576 X86FoldableSchedWrite sched, 6577 X86VectorVTInfo _> { 6578 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6579 Uses = [MXCSR] in 6580 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6581 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6582 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6583 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 6584 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, 6585 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 6586} 6587 6588multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6589 SDNode MaskOpNode, SDNode OpNodeRnd, 6590 X86SchedWriteWidths sched, 6591 AVX512VLVectorVTInfo _, 6592 Predicate prd = HasAVX512> { 6593 let Predicates = [prd] in { 6594 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6595 sched.ZMM, _.info512>, 6596 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6597 _.info512>, 6598 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6599 } 6600 let Predicates = [HasVLX, prd] in { 6601 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6602 sched.YMM, _.info256>, 6603 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6604 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6605 sched.XMM, _.info128>, 6606 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6607 } 6608} 6609 6610multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6611 SDNode MaskOpNode, SDNode OpNodeRnd> { 6612 defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6613 OpNodeRnd, SchedWriteFMA, 6614 avx512vl_f16_info, HasFP16>, T_MAP6, PD; 6615 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6616 OpNodeRnd, SchedWriteFMA, 6617 avx512vl_f32_info>, T8, PD; 6618 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6619 OpNodeRnd, SchedWriteFMA, 6620 avx512vl_f64_info>, T8, PD, REX_W; 6621} 6622 6623defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma, 6624 fma, X86FmaddRnd>; 6625defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub, 6626 X86Fmsub, X86FmsubRnd>; 6627defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, 6628 X86Fmaddsub, X86FmaddsubRnd>; 6629defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, 6630 X86Fmsubadd, X86FmsubaddRnd>; 6631defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd, 6632 X86Fnmadd, X86FnmaddRnd>; 6633defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub, 6634 X86Fnmsub, X86FnmsubRnd>; 6635 6636 6637multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6638 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6639 X86VectorVTInfo _> { 6640 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6641 Uses = [MXCSR], mayRaiseFPException = 1 in { 6642 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6643 (ins _.RC:$src2, _.RC:$src3), 6644 OpcodeStr, "$src3, $src2", "$src2, $src3", 6645 (null_frag), 6646 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6647 EVEX, VVVV, Sched<[sched]>; 6648 6649 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6650 (ins _.RC:$src2, _.MemOp:$src3), 6651 OpcodeStr, "$src3, $src2", "$src2, $src3", 6652 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 6653 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6654 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 6655 sched.ReadAfterFold]>; 6656 6657 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6658 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6659 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6660 "$src2, ${src3}"#_.BroadcastStr, 6661 (_.VT (OpNode _.RC:$src2, 6662 (_.VT (_.BroadcastLdFrag addr:$src3)), 6663 _.RC:$src1)), 6664 (_.VT (MaskOpNode _.RC:$src2, 6665 (_.VT (_.BroadcastLdFrag addr:$src3)), 6666 _.RC:$src1)), 1, 0>, EVEX, VVVV, EVEX_B, 6667 Sched<[sched.Folded, sched.ReadAfterFold, 6668 sched.ReadAfterFold]>; 6669 } 6670} 6671 6672multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6673 X86FoldableSchedWrite sched, 6674 X86VectorVTInfo _> { 6675 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6676 Uses = [MXCSR] in 6677 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6678 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6679 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6680 (null_frag), 6681 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), 6682 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 6683} 6684 6685multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6686 SDNode MaskOpNode, SDNode OpNodeRnd, 6687 X86SchedWriteWidths sched, 6688 AVX512VLVectorVTInfo _, 6689 Predicate prd = HasAVX512> { 6690 let Predicates = [prd] in { 6691 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6692 sched.ZMM, _.info512>, 6693 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6694 _.info512>, 6695 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6696 } 6697 let Predicates = [HasVLX, prd] in { 6698 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6699 sched.YMM, _.info256>, 6700 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6701 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6702 sched.XMM, _.info128>, 6703 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6704 } 6705} 6706 6707multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6708 SDNode MaskOpNode, SDNode OpNodeRnd > { 6709 defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6710 OpNodeRnd, SchedWriteFMA, 6711 avx512vl_f16_info, HasFP16>, T_MAP6, PD; 6712 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6713 OpNodeRnd, SchedWriteFMA, 6714 avx512vl_f32_info>, T8, PD; 6715 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6716 OpNodeRnd, SchedWriteFMA, 6717 avx512vl_f64_info>, T8, PD, REX_W; 6718} 6719 6720defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma, 6721 fma, X86FmaddRnd>; 6722defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub, 6723 X86Fmsub, X86FmsubRnd>; 6724defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, 6725 X86Fmaddsub, X86FmaddsubRnd>; 6726defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, 6727 X86Fmsubadd, X86FmsubaddRnd>; 6728defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd, 6729 X86Fnmadd, X86FnmaddRnd>; 6730defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub, 6731 X86Fnmsub, X86FnmsubRnd>; 6732 6733multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6734 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6735 X86VectorVTInfo _> { 6736 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6737 Uses = [MXCSR], mayRaiseFPException = 1 in { 6738 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6739 (ins _.RC:$src2, _.RC:$src3), 6740 OpcodeStr, "$src3, $src2", "$src2, $src3", 6741 (null_frag), 6742 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>, 6743 EVEX, VVVV, Sched<[sched]>; 6744 6745 // Pattern is 312 order so that the load is in a different place from the 6746 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6747 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6748 (ins _.RC:$src2, _.MemOp:$src3), 6749 OpcodeStr, "$src3, $src2", "$src2, $src3", 6750 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 6751 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 6752 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 6753 sched.ReadAfterFold]>; 6754 6755 // Pattern is 312 order so that the load is in a different place from the 6756 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6757 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6758 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6759 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6760 "$src2, ${src3}"#_.BroadcastStr, 6761 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6762 _.RC:$src1, _.RC:$src2)), 6763 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6764 _.RC:$src1, _.RC:$src2)), 1, 0>, 6765 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, 6766 sched.ReadAfterFold]>; 6767 } 6768} 6769 6770multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6771 X86FoldableSchedWrite sched, 6772 X86VectorVTInfo _> { 6773 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6774 Uses = [MXCSR] in 6775 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6776 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6777 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6778 (null_frag), 6779 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), 6780 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 6781} 6782 6783multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6784 SDNode MaskOpNode, SDNode OpNodeRnd, 6785 X86SchedWriteWidths sched, 6786 AVX512VLVectorVTInfo _, 6787 Predicate prd = HasAVX512> { 6788 let Predicates = [prd] in { 6789 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6790 sched.ZMM, _.info512>, 6791 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6792 _.info512>, 6793 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6794 } 6795 let Predicates = [HasVLX, prd] in { 6796 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6797 sched.YMM, _.info256>, 6798 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6799 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6800 sched.XMM, _.info128>, 6801 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6802 } 6803} 6804 6805multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6806 SDNode MaskOpNode, SDNode OpNodeRnd > { 6807 defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6808 OpNodeRnd, SchedWriteFMA, 6809 avx512vl_f16_info, HasFP16>, T_MAP6, PD; 6810 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6811 OpNodeRnd, SchedWriteFMA, 6812 avx512vl_f32_info>, T8, PD; 6813 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6814 OpNodeRnd, SchedWriteFMA, 6815 avx512vl_f64_info>, T8, PD, REX_W; 6816} 6817 6818defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma, 6819 fma, X86FmaddRnd>; 6820defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub, 6821 X86Fmsub, X86FmsubRnd>; 6822defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, 6823 X86Fmaddsub, X86FmaddsubRnd>; 6824defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, 6825 X86Fmsubadd, X86FmsubaddRnd>; 6826defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd, 6827 X86Fnmadd, X86FnmaddRnd>; 6828defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub, 6829 X86Fnmsub, X86FnmsubRnd>; 6830 6831// Scalar FMA 6832multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 6833 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 6834let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 6835 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6836 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 6837 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6838 EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 6839 6840 let mayLoad = 1 in 6841 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 6842 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 6843 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6844 EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold, 6845 SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 6846 6847 let Uses = [MXCSR] in 6848 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6849 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6850 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 6851 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 6852 6853 let isCodeGenOnly = 1, isCommutable = 1 in { 6854 def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 6855 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 6856 !strconcat(OpcodeStr, 6857 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6858 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV, SIMD_EXC; 6859 def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst), 6860 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 6861 !strconcat(OpcodeStr, 6862 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6863 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold, 6864 SchedWriteFMA.Scl.ReadAfterFold]>, EVEX, VVVV, SIMD_EXC; 6865 6866 let Uses = [MXCSR] in 6867 def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 6868 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 6869 !strconcat(OpcodeStr, 6870 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), 6871 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 6872 Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV; 6873 }// isCodeGenOnly = 1 6874}// Constraints = "$src1 = $dst" 6875} 6876 6877multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6878 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd, 6879 X86VectorVTInfo _, string SUFF> { 6880 let ExeDomain = _.ExeDomain in { 6881 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 6882 // Operands for intrinsic are in 123 order to preserve passthu 6883 // semantics. 6884 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6885 _.FRC:$src3))), 6886 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6887 (_.ScalarLdFrag addr:$src3)))), 6888 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 6889 _.FRC:$src3, (i32 timm:$rc)))), 0>; 6890 6891 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 6892 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 6893 _.FRC:$src1))), 6894 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 6895 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 6896 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 6897 _.FRC:$src1, (i32 timm:$rc)))), 1>; 6898 6899 // One pattern is 312 order so that the load is in a different place from the 6900 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6901 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 6902 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 6903 _.FRC:$src2))), 6904 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 6905 _.FRC:$src1, _.FRC:$src2))), 6906 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 6907 _.FRC:$src2, (i32 timm:$rc)))), 1>; 6908 } 6909} 6910 6911multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6912 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> { 6913 let Predicates = [HasAVX512] in { 6914 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6915 OpNodeRnd, f32x_info, "SS">, 6916 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD; 6917 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6918 OpNodeRnd, f64x_info, "SD">, 6919 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD; 6920 } 6921 let Predicates = [HasFP16] in { 6922 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6923 OpNodeRnd, f16x_info, "SH">, 6924 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6, PD; 6925 } 6926} 6927 6928defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>; 6929defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>; 6930defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>; 6931defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>; 6932 6933multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp, 6934 SDNode RndOp, string Prefix, 6935 string Suffix, SDNode Move, 6936 X86VectorVTInfo _, PatLeaf ZeroFP, 6937 Predicate prd = HasAVX512> { 6938 let Predicates = [prd] in { 6939 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6940 (Op _.FRC:$src2, 6941 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6942 _.FRC:$src3))))), 6943 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 6944 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6945 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6946 6947 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6948 (Op _.FRC:$src2, _.FRC:$src3, 6949 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6950 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 6951 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6952 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6953 6954 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6955 (Op _.FRC:$src2, 6956 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6957 (_.ScalarLdFrag addr:$src3)))))), 6958 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 6959 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6960 addr:$src3)>; 6961 6962 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6963 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6964 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 6965 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 6966 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6967 addr:$src3)>; 6968 6969 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6970 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6971 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6972 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 6973 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6974 addr:$src3)>; 6975 6976 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6977 (X86selects_mask VK1WM:$mask, 6978 (MaskedOp _.FRC:$src2, 6979 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6980 _.FRC:$src3), 6981 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6982 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 6983 VR128X:$src1, VK1WM:$mask, 6984 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6985 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6986 6987 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6988 (X86selects_mask VK1WM:$mask, 6989 (MaskedOp _.FRC:$src2, 6990 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6991 (_.ScalarLdFrag addr:$src3)), 6992 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6993 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 6994 VR128X:$src1, VK1WM:$mask, 6995 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6996 6997 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6998 (X86selects_mask VK1WM:$mask, 6999 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7000 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 7001 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7002 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 7003 VR128X:$src1, VK1WM:$mask, 7004 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7005 7006 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7007 (X86selects_mask VK1WM:$mask, 7008 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7009 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7010 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7011 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 7012 VR128X:$src1, VK1WM:$mask, 7013 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7014 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7015 7016 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7017 (X86selects_mask VK1WM:$mask, 7018 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7019 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7020 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7021 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 7022 VR128X:$src1, VK1WM:$mask, 7023 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7024 7025 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7026 (X86selects_mask VK1WM:$mask, 7027 (MaskedOp _.FRC:$src2, 7028 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7029 _.FRC:$src3), 7030 (_.EltVT ZeroFP)))))), 7031 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 7032 VR128X:$src1, VK1WM:$mask, 7033 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7034 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7035 7036 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7037 (X86selects_mask VK1WM:$mask, 7038 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7039 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7040 (_.EltVT ZeroFP)))))), 7041 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 7042 VR128X:$src1, VK1WM:$mask, 7043 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7044 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7045 7046 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7047 (X86selects_mask VK1WM:$mask, 7048 (MaskedOp _.FRC:$src2, 7049 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7050 (_.ScalarLdFrag addr:$src3)), 7051 (_.EltVT ZeroFP)))))), 7052 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 7053 VR128X:$src1, VK1WM:$mask, 7054 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7055 7056 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7057 (X86selects_mask VK1WM:$mask, 7058 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7059 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 7060 (_.EltVT ZeroFP)))))), 7061 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 7062 VR128X:$src1, VK1WM:$mask, 7063 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7064 7065 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7066 (X86selects_mask VK1WM:$mask, 7067 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7068 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7069 (_.EltVT ZeroFP)))))), 7070 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 7071 VR128X:$src1, VK1WM:$mask, 7072 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7073 7074 // Patterns with rounding mode. 7075 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7076 (RndOp _.FRC:$src2, 7077 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7078 _.FRC:$src3, (i32 timm:$rc)))))), 7079 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 7080 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7081 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7082 7083 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7084 (RndOp _.FRC:$src2, _.FRC:$src3, 7085 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7086 (i32 timm:$rc)))))), 7087 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 7088 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7089 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7090 7091 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7092 (X86selects_mask VK1WM:$mask, 7093 (RndOp _.FRC:$src2, 7094 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7095 _.FRC:$src3, (i32 timm:$rc)), 7096 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7097 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 7098 VR128X:$src1, VK1WM:$mask, 7099 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7100 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7101 7102 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7103 (X86selects_mask VK1WM:$mask, 7104 (RndOp _.FRC:$src2, _.FRC:$src3, 7105 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7106 (i32 timm:$rc)), 7107 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7108 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 7109 VR128X:$src1, VK1WM:$mask, 7110 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7111 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7112 7113 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7114 (X86selects_mask VK1WM:$mask, 7115 (RndOp _.FRC:$src2, 7116 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7117 _.FRC:$src3, (i32 timm:$rc)), 7118 (_.EltVT ZeroFP)))))), 7119 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 7120 VR128X:$src1, VK1WM:$mask, 7121 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7122 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7123 7124 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7125 (X86selects_mask VK1WM:$mask, 7126 (RndOp _.FRC:$src2, _.FRC:$src3, 7127 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7128 (i32 timm:$rc)), 7129 (_.EltVT ZeroFP)))))), 7130 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 7131 VR128X:$src1, VK1WM:$mask, 7132 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7133 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7134 } 7135} 7136defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH", 7137 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7138defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH", 7139 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7140defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH", 7141 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7142defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH", 7143 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7144 7145defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7146 "SS", X86Movss, v4f32x_info, fp32imm0>; 7147defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7148 "SS", X86Movss, v4f32x_info, fp32imm0>; 7149defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7150 "SS", X86Movss, v4f32x_info, fp32imm0>; 7151defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7152 "SS", X86Movss, v4f32x_info, fp32imm0>; 7153 7154defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7155 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7156defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7157 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7158defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7159 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7160defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7161 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7162 7163//===----------------------------------------------------------------------===// 7164// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 7165//===----------------------------------------------------------------------===// 7166let Constraints = "$src1 = $dst" in { 7167multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7168 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 7169 // NOTE: The SDNode have the multiply operands first with the add last. 7170 // This enables commuted load patterns to be autogenerated by tablegen. 7171 let ExeDomain = _.ExeDomain in { 7172 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 7173 (ins _.RC:$src2, _.RC:$src3), 7174 OpcodeStr, "$src3, $src2", "$src2, $src3", 7175 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 7176 T8, PD, EVEX, VVVV, Sched<[sched]>; 7177 7178 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7179 (ins _.RC:$src2, _.MemOp:$src3), 7180 OpcodeStr, "$src3, $src2", "$src2, $src3", 7181 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 7182 T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 7183 sched.ReadAfterFold]>; 7184 7185 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7186 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7187 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 7188 !strconcat("$src2, ${src3}", _.BroadcastStr ), 7189 (OpNode _.RC:$src2, 7190 (_.VT (_.BroadcastLdFrag addr:$src3)), 7191 _.RC:$src1)>, 7192 T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, 7193 sched.ReadAfterFold]>; 7194 } 7195} 7196} // Constraints = "$src1 = $dst" 7197 7198multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 7199 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 7200 let Predicates = [HasIFMA] in { 7201 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 7202 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7203 } 7204 let Predicates = [HasVLX, HasIFMA] in { 7205 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 7206 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7207 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 7208 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7209 } 7210} 7211 7212defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 7213 SchedWriteVecIMul, avx512vl_i64_info>, 7214 REX_W; 7215defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 7216 SchedWriteVecIMul, avx512vl_i64_info>, 7217 REX_W; 7218 7219//===----------------------------------------------------------------------===// 7220// AVX-512 Scalar convert from sign integer to float/double 7221//===----------------------------------------------------------------------===// 7222 7223multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, 7224 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7225 X86MemOperand x86memop, PatFrag ld_frag, string asm, 7226 string mem, list<Register> _Uses = [MXCSR], 7227 bit _mayRaiseFPException = 1> { 7228let ExeDomain = DstVT.ExeDomain, Uses = _Uses, 7229 mayRaiseFPException = _mayRaiseFPException in { 7230 let hasSideEffects = 0, isCodeGenOnly = 1 in { 7231 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7232 (ins DstVT.FRC:$src1, SrcRC:$src), 7233 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7234 EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7235 let mayLoad = 1 in 7236 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7237 (ins DstVT.FRC:$src1, x86memop:$src), 7238 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 7239 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 7240 } // hasSideEffects = 0 7241 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7242 (ins DstVT.RC:$src1, SrcRC:$src2), 7243 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7244 [(set DstVT.RC:$dst, 7245 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, 7246 EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7247 7248 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7249 (ins DstVT.RC:$src1, x86memop:$src2), 7250 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7251 [(set DstVT.RC:$dst, 7252 (OpNode (DstVT.VT DstVT.RC:$src1), 7253 (ld_frag addr:$src2)))]>, 7254 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 7255} 7256 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7257 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, 7258 DstVT.RC:$src1, SrcRC:$src2), 0, "att">; 7259} 7260 7261multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7262 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7263 X86VectorVTInfo DstVT, string asm, 7264 string mem> { 7265 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in 7266 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7267 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7268 !strconcat(asm, 7269 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7270 [(set DstVT.RC:$dst, 7271 (OpNode (DstVT.VT DstVT.RC:$src1), 7272 SrcRC:$src2, 7273 (i32 timm:$rc)))]>, 7274 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7275 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", 7276 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, 7277 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; 7278} 7279 7280multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd, 7281 X86FoldableSchedWrite sched, 7282 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7283 X86MemOperand x86memop, PatFrag ld_frag, 7284 string asm, string mem> { 7285 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>, 7286 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7287 ld_frag, asm, mem>, VEX_LIG; 7288} 7289 7290let Predicates = [HasAVX512] in { 7291defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7292 WriteCvtI2SS, GR32, 7293 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, 7294 TB, XS, EVEX_CD8<32, CD8VT1>; 7295defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7296 WriteCvtI2SS, GR64, 7297 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, 7298 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>; 7299defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, 7300 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>, 7301 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7302defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7303 WriteCvtI2SD, GR64, 7304 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, 7305 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7306 7307def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7308 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7309def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7310 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7311 7312def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 7313 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7314def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 7315 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7316def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 7317 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7318def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 7319 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7320 7321def : Pat<(f32 (any_sint_to_fp GR32:$src)), 7322 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7323def : Pat<(f32 (any_sint_to_fp GR64:$src)), 7324 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7325def : Pat<(f64 (any_sint_to_fp GR32:$src)), 7326 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7327def : Pat<(f64 (any_sint_to_fp GR64:$src)), 7328 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7329 7330defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7331 WriteCvtI2SS, GR32, 7332 v4f32x_info, i32mem, loadi32, 7333 "cvtusi2ss", "l">, TB, XS, EVEX_CD8<32, CD8VT1>; 7334defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7335 WriteCvtI2SS, GR64, 7336 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, 7337 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>; 7338defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, 7339 i32mem, loadi32, "cvtusi2sd", "l", [], 0>, 7340 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7341defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7342 WriteCvtI2SD, GR64, 7343 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, 7344 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7345 7346def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7347 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7348def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7349 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7350 7351def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))), 7352 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7353def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))), 7354 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7355def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))), 7356 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7357def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))), 7358 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7359 7360def : Pat<(f32 (any_uint_to_fp GR32:$src)), 7361 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7362def : Pat<(f32 (any_uint_to_fp GR64:$src)), 7363 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7364def : Pat<(f64 (any_uint_to_fp GR32:$src)), 7365 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7366def : Pat<(f64 (any_uint_to_fp GR64:$src)), 7367 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7368} 7369 7370//===----------------------------------------------------------------------===// 7371// AVX-512 Scalar convert from float/double to integer 7372//===----------------------------------------------------------------------===// 7373 7374multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7375 X86VectorVTInfo DstVT, SDNode OpNode, 7376 SDNode OpNodeRnd, 7377 X86FoldableSchedWrite sched, string asm, 7378 string aliasStr, Predicate prd = HasAVX512> { 7379 let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in { 7380 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7381 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7382 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, 7383 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7384 let Uses = [MXCSR] in 7385 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7386 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7387 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, 7388 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7389 Sched<[sched]>; 7390 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7391 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7392 [(set DstVT.RC:$dst, (OpNode 7393 (SrcVT.ScalarIntMemFrags addr:$src)))]>, 7394 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7395 } // Predicates = [prd] 7396 7397 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7398 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7399 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7400 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7401 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7402 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7403 SrcVT.IntScalarMemOp:$src), 0, "att">; 7404} 7405 7406// Convert float/double to signed/unsigned int 32/64 7407defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, 7408 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, 7409 TB, XS, EVEX_CD8<32, CD8VT1>; 7410defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, 7411 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, 7412 TB, XS, REX_W, EVEX_CD8<32, CD8VT1>; 7413defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, 7414 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, 7415 TB, XS, EVEX_CD8<32, CD8VT1>; 7416defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, 7417 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, 7418 TB, XS, REX_W, EVEX_CD8<32, CD8VT1>; 7419defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, 7420 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, 7421 TB, XD, EVEX_CD8<64, CD8VT1>; 7422defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, 7423 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, 7424 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7425defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, 7426 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7427 TB, XD, EVEX_CD8<64, CD8VT1>; 7428defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, 7429 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7430 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7431 7432multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT, 7433 X86VectorVTInfo DstVT, SDNode OpNode, 7434 X86FoldableSchedWrite sched> { 7435 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7436 let isCodeGenOnly = 1 in { 7437 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src), 7438 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7439 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>, 7440 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7441 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src), 7442 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7443 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>, 7444 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7445 } 7446 } // Predicates = [HasAVX512] 7447} 7448 7449defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info, 7450 lrint, WriteCvtSS2I>, TB, XS, EVEX_CD8<32, CD8VT1>; 7451defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info, 7452 llrint, WriteCvtSS2I>, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>; 7453defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info, 7454 lrint, WriteCvtSD2I>, TB, XD, EVEX_CD8<64, CD8VT1>; 7455defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info, 7456 llrint, WriteCvtSD2I>, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>; 7457 7458let Predicates = [HasAVX512] in { 7459 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>; 7460 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>; 7461 7462 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>; 7463 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>; 7464} 7465 7466// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7467// which produce unnecessary vmovs{s,d} instructions 7468let Predicates = [HasAVX512] in { 7469def : Pat<(v4f32 (X86Movss 7470 (v4f32 VR128X:$dst), 7471 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 7472 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7473 7474def : Pat<(v4f32 (X86Movss 7475 (v4f32 VR128X:$dst), 7476 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 7477 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7478 7479def : Pat<(v4f32 (X86Movss 7480 (v4f32 VR128X:$dst), 7481 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 7482 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7483 7484def : Pat<(v4f32 (X86Movss 7485 (v4f32 VR128X:$dst), 7486 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 7487 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7488 7489def : Pat<(v2f64 (X86Movsd 7490 (v2f64 VR128X:$dst), 7491 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 7492 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7493 7494def : Pat<(v2f64 (X86Movsd 7495 (v2f64 VR128X:$dst), 7496 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 7497 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7498 7499def : Pat<(v2f64 (X86Movsd 7500 (v2f64 VR128X:$dst), 7501 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 7502 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7503 7504def : Pat<(v2f64 (X86Movsd 7505 (v2f64 VR128X:$dst), 7506 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 7507 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7508 7509def : Pat<(v4f32 (X86Movss 7510 (v4f32 VR128X:$dst), 7511 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))), 7512 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7513 7514def : Pat<(v4f32 (X86Movss 7515 (v4f32 VR128X:$dst), 7516 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))), 7517 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7518 7519def : Pat<(v4f32 (X86Movss 7520 (v4f32 VR128X:$dst), 7521 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))), 7522 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7523 7524def : Pat<(v4f32 (X86Movss 7525 (v4f32 VR128X:$dst), 7526 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))), 7527 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7528 7529def : Pat<(v2f64 (X86Movsd 7530 (v2f64 VR128X:$dst), 7531 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))), 7532 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7533 7534def : Pat<(v2f64 (X86Movsd 7535 (v2f64 VR128X:$dst), 7536 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))), 7537 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7538 7539def : Pat<(v2f64 (X86Movsd 7540 (v2f64 VR128X:$dst), 7541 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))), 7542 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7543 7544def : Pat<(v2f64 (X86Movsd 7545 (v2f64 VR128X:$dst), 7546 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))), 7547 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7548} // Predicates = [HasAVX512] 7549 7550// Convert float/double to signed/unsigned int 32/64 with truncation 7551multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7552 X86VectorVTInfo _DstRC, SDPatternOperator OpNode, 7553 SDNode OpNodeInt, SDNode OpNodeSAE, 7554 X86FoldableSchedWrite sched, string aliasStr, 7555 Predicate prd = HasAVX512> { 7556let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in { 7557 let isCodeGenOnly = 1 in { 7558 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7559 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7560 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7561 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7562 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7563 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7564 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7565 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7566 } 7567 7568 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7569 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7570 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 7571 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7572 let Uses = [MXCSR] in 7573 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7574 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7575 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 7576 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 7577 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7578 (ins _SrcRC.IntScalarMemOp:$src), 7579 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7580 [(set _DstRC.RC:$dst, 7581 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, 7582 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7583} // Predicates = [prd] 7584 7585 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7586 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7587 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7588 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7589 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7590 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7591 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7592} 7593 7594defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7595 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7596 "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>; 7597defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7598 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7599 "{q}">, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>; 7600defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7601 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7602 "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>; 7603defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7604 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7605 "{q}">, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>; 7606 7607defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, 7608 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7609 "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>; 7610defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, 7611 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7612 "{q}">, TB, XS,REX_W, EVEX_CD8<32, CD8VT1>; 7613defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7614 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7615 "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>; 7616defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7617 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7618 "{q}">, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7619 7620//===----------------------------------------------------------------------===// 7621// AVX-512 Convert form float to double and back 7622//===----------------------------------------------------------------------===// 7623 7624let Uses = [MXCSR], mayRaiseFPException = 1 in 7625multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7626 X86VectorVTInfo _Src, SDNode OpNode, 7627 X86FoldableSchedWrite sched> { 7628 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7629 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7630 "$src2, $src1", "$src1, $src2", 7631 (_.VT (OpNode (_.VT _.RC:$src1), 7632 (_Src.VT _Src.RC:$src2)))>, 7633 EVEX, VVVV, VEX_LIG, Sched<[sched]>; 7634 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7635 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7636 "$src2, $src1", "$src1, $src2", 7637 (_.VT (OpNode (_.VT _.RC:$src1), 7638 (_Src.ScalarIntMemFrags addr:$src2)))>, 7639 EVEX, VVVV, VEX_LIG, 7640 Sched<[sched.Folded, sched.ReadAfterFold]>; 7641 7642 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7643 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7644 (ins _.FRC:$src1, _Src.FRC:$src2), 7645 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7646 EVEX, VVVV, VEX_LIG, Sched<[sched]>; 7647 let mayLoad = 1 in 7648 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7649 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7650 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7651 EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7652 } 7653} 7654 7655// Scalar Conversion with SAE - suppress all exceptions 7656multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7657 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7658 X86FoldableSchedWrite sched> { 7659 let Uses = [MXCSR] in 7660 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7661 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7662 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7663 (_.VT (OpNodeSAE (_.VT _.RC:$src1), 7664 (_Src.VT _Src.RC:$src2)))>, 7665 EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>; 7666} 7667 7668// Scalar Conversion with rounding control (RC) 7669multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7670 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7671 X86FoldableSchedWrite sched> { 7672 let Uses = [MXCSR] in 7673 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7674 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7675 "$rc, $src2, $src1", "$src1, $src2, $rc", 7676 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7677 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 7678 EVEX, VVVV, VEX_LIG, Sched<[sched]>, 7679 EVEX_B, EVEX_RC; 7680} 7681multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr, 7682 SDNode OpNode, SDNode OpNodeRnd, 7683 X86FoldableSchedWrite sched, 7684 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7685 Predicate prd = HasAVX512> { 7686 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7687 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7688 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7689 OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>; 7690 } 7691} 7692 7693multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr, 7694 SDNode OpNode, SDNode OpNodeSAE, 7695 X86FoldableSchedWrite sched, 7696 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7697 Predicate prd = HasAVX512> { 7698 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7699 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7700 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, 7701 EVEX_CD8<_src.EltSize, CD8VT1>; 7702 } 7703} 7704defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds, 7705 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7706 f32x_info>, TB, XD, REX_W; 7707defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts, 7708 X86fpextsSAE, WriteCvtSS2SD, f32x_info, 7709 f64x_info>, TB, XS; 7710defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds, 7711 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7712 f16x_info, HasFP16>, T_MAP5, XD, REX_W; 7713defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts, 7714 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7715 f64x_info, HasFP16>, T_MAP5, XS; 7716defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds, 7717 X86froundsRnd, WriteCvtSD2SS, f32x_info, 7718 f16x_info, HasFP16>, T_MAP5; 7719defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts, 7720 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7721 f32x_info, HasFP16>, T_MAP6; 7722 7723def : Pat<(f64 (any_fpextend FR32X:$src)), 7724 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7725 Requires<[HasAVX512]>; 7726def : Pat<(f64 (any_fpextend (loadf32 addr:$src))), 7727 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7728 Requires<[HasAVX512, OptForSize]>; 7729 7730def : Pat<(f32 (any_fpround FR64X:$src)), 7731 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7732 Requires<[HasAVX512]>; 7733 7734def : Pat<(f32 (any_fpextend FR16X:$src)), 7735 (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>, 7736 Requires<[HasFP16]>; 7737def : Pat<(f32 (any_fpextend (loadf16 addr:$src))), 7738 (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>, 7739 Requires<[HasFP16, OptForSize]>; 7740 7741def : Pat<(f64 (any_fpextend FR16X:$src)), 7742 (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>, 7743 Requires<[HasFP16]>; 7744def : Pat<(f64 (any_fpextend (loadf16 addr:$src))), 7745 (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7746 Requires<[HasFP16, OptForSize]>; 7747 7748def : Pat<(f16 (any_fpround FR32X:$src)), 7749 (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>, 7750 Requires<[HasFP16]>; 7751def : Pat<(f16 (any_fpround FR64X:$src)), 7752 (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>, 7753 Requires<[HasFP16]>; 7754 7755def : Pat<(v4f32 (X86Movss 7756 (v4f32 VR128X:$dst), 7757 (v4f32 (scalar_to_vector 7758 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 7759 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 7760 Requires<[HasAVX512]>; 7761 7762def : Pat<(v2f64 (X86Movsd 7763 (v2f64 VR128X:$dst), 7764 (v2f64 (scalar_to_vector 7765 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 7766 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 7767 Requires<[HasAVX512]>; 7768 7769//===----------------------------------------------------------------------===// 7770// AVX-512 Vector convert from signed/unsigned integer to float/double 7771// and from float/double to signed/unsigned integer 7772//===----------------------------------------------------------------------===// 7773 7774multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7775 X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode, 7776 X86FoldableSchedWrite sched, 7777 string Broadcast = _.BroadcastStr, 7778 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7779 RegisterClass MaskRC = _.KRCWM, 7780 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))), 7781 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> { 7782let Uses = [MXCSR], mayRaiseFPException = 1 in { 7783 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst), 7784 (ins _Src.RC:$src), 7785 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), 7786 (ins MaskRC:$mask, _Src.RC:$src), 7787 OpcodeStr, "$src", "$src", 7788 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7789 (vselect_mask MaskRC:$mask, 7790 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7791 _.RC:$src0), 7792 (vselect_mask MaskRC:$mask, 7793 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7794 _.ImmAllZerosV)>, 7795 EVEX, Sched<[sched]>; 7796 7797 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 7798 (ins MemOp:$src), 7799 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), 7800 (ins MaskRC:$mask, MemOp:$src), 7801 OpcodeStr#Alias, "$src", "$src", 7802 LdDAG, 7803 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0), 7804 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>, 7805 EVEX, Sched<[sched.Folded]>; 7806 7807 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 7808 (ins _Src.ScalarMemOp:$src), 7809 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), 7810 (ins MaskRC:$mask, _Src.ScalarMemOp:$src), 7811 OpcodeStr, 7812 "${src}"#Broadcast, "${src}"#Broadcast, 7813 (_.VT (OpNode (_Src.VT 7814 (_Src.BroadcastLdFrag addr:$src)) 7815 )), 7816 (vselect_mask MaskRC:$mask, 7817 (_.VT 7818 (MaskOpNode 7819 (_Src.VT 7820 (_Src.BroadcastLdFrag addr:$src)))), 7821 _.RC:$src0), 7822 (vselect_mask MaskRC:$mask, 7823 (_.VT 7824 (MaskOpNode 7825 (_Src.VT 7826 (_Src.BroadcastLdFrag addr:$src)))), 7827 _.ImmAllZerosV)>, 7828 EVEX, EVEX_B, Sched<[sched.Folded]>; 7829 } 7830} 7831// Conversion with SAE - suppress all exceptions 7832multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7833 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7834 X86FoldableSchedWrite sched> { 7835 let Uses = [MXCSR] in 7836 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7837 (ins _Src.RC:$src), OpcodeStr, 7838 "{sae}, $src", "$src, {sae}", 7839 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, 7840 EVEX, EVEX_B, Sched<[sched]>; 7841} 7842 7843// Conversion with rounding control (RC) 7844multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7845 X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd, 7846 X86FoldableSchedWrite sched> { 7847 let Uses = [MXCSR] in 7848 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7849 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 7850 "$rc, $src", "$src, $rc", 7851 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, 7852 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 7853} 7854 7855// Similar to avx512_vcvt_fp, but uses an extload for the memory form. 7856multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7857 X86VectorVTInfo _Src, SDPatternOperator OpNode, 7858 SDNode MaskOpNode, 7859 X86FoldableSchedWrite sched, 7860 string Broadcast = _.BroadcastStr, 7861 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7862 RegisterClass MaskRC = _.KRCWM> 7863 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast, 7864 Alias, MemOp, MaskRC, 7865 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)), 7866 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; 7867 7868// Extend [Float to Double, Half to Float] 7869multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr, 7870 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 7871 X86SchedWriteWidths sched, Predicate prd = HasAVX512> { 7872 let Predicates = [prd] in { 7873 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256, 7874 any_fpextend, fpextend, sched.ZMM>, 7875 avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256, 7876 X86vfpextSAE, sched.ZMM>, EVEX_V512; 7877 } 7878 let Predicates = [prd, HasVLX] in { 7879 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128, 7880 X86any_vfpext, X86vfpext, sched.XMM, 7881 _dst.info128.BroadcastStr, 7882 "", f64mem>, EVEX_V128; 7883 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128, 7884 any_fpextend, fpextend, sched.YMM>, EVEX_V256; 7885 } 7886} 7887 7888// Truncate [Double to Float, Float to Half] 7889multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr, 7890 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 7891 X86SchedWriteWidths sched, Predicate prd = HasAVX512, 7892 PatFrag bcast128 = _src.info128.BroadcastLdFrag, 7893 PatFrag loadVT128 = _src.info128.LdFrag, 7894 RegisterClass maskRC128 = _src.info128.KRCWM> { 7895 let Predicates = [prd] in { 7896 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, 7897 X86any_vfpround, X86vfpround, sched.ZMM>, 7898 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 7899 X86vfproundRnd, sched.ZMM>, EVEX_V512; 7900 } 7901 let Predicates = [prd, HasVLX] in { 7902 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, 7903 null_frag, null_frag, sched.XMM, 7904 _src.info128.BroadcastStr, "{x}", 7905 f128mem, maskRC128>, EVEX_V128; 7906 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, 7907 X86any_vfpround, X86vfpround, 7908 sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256; 7909 7910 // Special patterns to allow use of X86vmfpround for masking. Instruction 7911 // patterns have been disabled with null_frag. 7912 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))), 7913 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 7914 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 7915 maskRC128:$mask), 7916 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>; 7917 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 7918 maskRC128:$mask), 7919 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>; 7920 7921 def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))), 7922 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 7923 def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0), 7924 maskRC128:$mask), 7925 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 7926 def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV, 7927 maskRC128:$mask), 7928 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>; 7929 7930 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))), 7931 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 7932 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 7933 (_dst.info128.VT VR128X:$src0), maskRC128:$mask), 7934 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 7935 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 7936 _dst.info128.ImmAllZerosV, maskRC128:$mask), 7937 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>; 7938 } 7939 7940 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 7941 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7942 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7943 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7944 VK2WM:$mask, VR128X:$src), 0, "att">; 7945 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|" 7946 "$dst {${mask}} {z}, $src}", 7947 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7948 VK2WM:$mask, VR128X:$src), 0, "att">; 7949 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7950 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7951 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 7952 "$dst {${mask}}, ${src}{1to2}}", 7953 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7954 VK2WM:$mask, f64mem:$src), 0, "att">; 7955 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7956 "$dst {${mask}} {z}, ${src}{1to2}}", 7957 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7958 VK2WM:$mask, f64mem:$src), 0, "att">; 7959 7960 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 7961 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7962 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7963 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7964 VK4WM:$mask, VR256X:$src), 0, "att">; 7965 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 7966 "$dst {${mask}} {z}, $src}", 7967 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7968 VK4WM:$mask, VR256X:$src), 0, "att">; 7969 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7970 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7971 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 7972 "$dst {${mask}}, ${src}{1to4}}", 7973 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7974 VK4WM:$mask, f64mem:$src), 0, "att">; 7975 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7976 "$dst {${mask}} {z}, ${src}{1to4}}", 7977 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7978 VK4WM:$mask, f64mem:$src), 0, "att">; 7979} 7980 7981defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps", 7982 avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>, 7983 REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 7984defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd", 7985 avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>, 7986 TB, EVEX_CD8<32, CD8VH>; 7987 7988// Extend Half to Double 7989multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr, 7990 X86SchedWriteWidths sched> { 7991 let Predicates = [HasFP16] in { 7992 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info, 7993 any_fpextend, fpextend, sched.ZMM>, 7994 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info, 7995 X86vfpextSAE, sched.ZMM>, EVEX_V512; 7996 def : Pat<(v8f64 (extloadv8f16 addr:$src)), 7997 (!cast<Instruction>(NAME # "Zrm") addr:$src)>; 7998 } 7999 let Predicates = [HasFP16, HasVLX] in { 8000 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info, 8001 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "", 8002 f32mem>, EVEX_V128; 8003 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info, 8004 X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "", 8005 f64mem>, EVEX_V256; 8006 } 8007} 8008 8009// Truncate Double to Half 8010multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 8011 let Predicates = [HasFP16] in { 8012 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info, 8013 X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">, 8014 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info, 8015 X86vfproundRnd, sched.ZMM>, EVEX_V512; 8016 } 8017 let Predicates = [HasFP16, HasVLX] in { 8018 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag, 8019 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8020 VK2WM>, EVEX_V128; 8021 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag, 8022 null_frag, sched.YMM, "{1to4}", "{y}", f256mem, 8023 VK4WM>, EVEX_V256; 8024 } 8025 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8026 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8027 VR128X:$src), 0, "att">; 8028 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8029 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8030 VK2WM:$mask, VR128X:$src), 0, "att">; 8031 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8032 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8033 VK2WM:$mask, VR128X:$src), 0, "att">; 8034 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8035 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8036 i64mem:$src), 0, "att">; 8037 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8038 "$dst {${mask}}, ${src}{1to2}}", 8039 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8040 VK2WM:$mask, i64mem:$src), 0, "att">; 8041 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8042 "$dst {${mask}} {z}, ${src}{1to2}}", 8043 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8044 VK2WM:$mask, i64mem:$src), 0, "att">; 8045 8046 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8047 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8048 VR256X:$src), 0, "att">; 8049 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8050 "$dst {${mask}}, $src}", 8051 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8052 VK4WM:$mask, VR256X:$src), 0, "att">; 8053 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8054 "$dst {${mask}} {z}, $src}", 8055 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8056 VK4WM:$mask, VR256X:$src), 0, "att">; 8057 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8058 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8059 i64mem:$src), 0, "att">; 8060 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8061 "$dst {${mask}}, ${src}{1to4}}", 8062 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8063 VK4WM:$mask, i64mem:$src), 0, "att">; 8064 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8065 "$dst {${mask}} {z}, ${src}{1to4}}", 8066 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8067 VK4WM:$mask, i64mem:$src), 0, "att">; 8068 8069 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 8070 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 8071 VR512:$src), 0, "att">; 8072 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 8073 "$dst {${mask}}, $src}", 8074 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 8075 VK8WM:$mask, VR512:$src), 0, "att">; 8076 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 8077 "$dst {${mask}} {z}, $src}", 8078 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 8079 VK8WM:$mask, VR512:$src), 0, "att">; 8080 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 8081 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 8082 i64mem:$src), 0, "att">; 8083 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 8084 "$dst {${mask}}, ${src}{1to8}}", 8085 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 8086 VK8WM:$mask, i64mem:$src), 0, "att">; 8087 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 8088 "$dst {${mask}} {z}, ${src}{1to8}}", 8089 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 8090 VK8WM:$mask, i64mem:$src), 0, "att">; 8091} 8092 8093defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info, 8094 avx512vl_f32_info, SchedWriteCvtPD2PS, 8095 HasFP16>, T_MAP5, PD, EVEX_CD8<32, CD8VF>; 8096defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info, 8097 avx512vl_f16_info, SchedWriteCvtPS2PD, 8098 HasFP16>, T_MAP6, PD, EVEX_CD8<16, CD8VH>; 8099defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>, 8100 REX_W, T_MAP5, PD, EVEX_CD8<64, CD8VF>; 8101defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>, 8102 T_MAP5, EVEX_CD8<16, CD8VQ>; 8103 8104let Predicates = [HasFP16, HasVLX] in { 8105 // Special patterns to allow use of X86vmfpround for masking. Instruction 8106 // patterns have been disabled with null_frag. 8107 def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))), 8108 (VCVTPD2PHZ256rr VR256X:$src)>; 8109 def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0), 8110 VK4WM:$mask)), 8111 (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 8112 def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV, 8113 VK4WM:$mask), 8114 (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 8115 8116 def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))), 8117 (VCVTPD2PHZ256rm addr:$src)>; 8118 def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0), 8119 VK4WM:$mask), 8120 (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8121 def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV, 8122 VK4WM:$mask), 8123 (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>; 8124 8125 def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))), 8126 (VCVTPD2PHZ256rmb addr:$src)>; 8127 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8128 (v8f16 VR128X:$src0), VK4WM:$mask), 8129 (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8130 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8131 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 8132 (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 8133 8134 def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))), 8135 (VCVTPD2PHZ128rr VR128X:$src)>; 8136 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0), 8137 VK2WM:$mask), 8138 (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8139 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV, 8140 VK2WM:$mask), 8141 (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 8142 8143 def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))), 8144 (VCVTPD2PHZ128rm addr:$src)>; 8145 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0), 8146 VK2WM:$mask), 8147 (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8148 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV, 8149 VK2WM:$mask), 8150 (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>; 8151 8152 def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))), 8153 (VCVTPD2PHZ128rmb addr:$src)>; 8154 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8155 (v8f16 VR128X:$src0), VK2WM:$mask), 8156 (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8157 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8158 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 8159 (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 8160} 8161 8162// Convert Signed/Unsigned Doubleword to Double 8163let Uses = []<Register>, mayRaiseFPException = 0 in 8164multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8165 SDNode MaskOpNode, SDPatternOperator OpNode128, 8166 SDNode MaskOpNode128, 8167 X86SchedWriteWidths sched> { 8168 // No rounding in this op 8169 let Predicates = [HasAVX512] in 8170 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 8171 MaskOpNode, sched.ZMM>, EVEX_V512; 8172 8173 let Predicates = [HasVLX] in { 8174 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 8175 OpNode128, MaskOpNode128, sched.XMM, "{1to2}", 8176 "", i64mem, VK2WM, 8177 (v2f64 (OpNode128 (bc_v4i32 8178 (v2i64 8179 (scalar_to_vector (loadi64 addr:$src)))))), 8180 (v2f64 (MaskOpNode128 (bc_v4i32 8181 (v2i64 8182 (scalar_to_vector (loadi64 addr:$src))))))>, 8183 EVEX_V128; 8184 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 8185 MaskOpNode, sched.YMM>, EVEX_V256; 8186 } 8187} 8188 8189// Convert Signed/Unsigned Doubleword to Float 8190multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8191 SDNode MaskOpNode, SDNode OpNodeRnd, 8192 X86SchedWriteWidths sched> { 8193 let Predicates = [HasAVX512] in 8194 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 8195 MaskOpNode, sched.ZMM>, 8196 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 8197 OpNodeRnd, sched.ZMM>, EVEX_V512; 8198 8199 let Predicates = [HasVLX] in { 8200 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 8201 MaskOpNode, sched.XMM>, EVEX_V128; 8202 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 8203 MaskOpNode, sched.YMM>, EVEX_V256; 8204 } 8205} 8206 8207// Convert Float to Signed/Unsigned Doubleword with truncation 8208multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8209 SDNode MaskOpNode, 8210 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 8211 let Predicates = [HasAVX512] in { 8212 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8213 MaskOpNode, sched.ZMM>, 8214 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 8215 OpNodeSAE, sched.ZMM>, EVEX_V512; 8216 } 8217 let Predicates = [HasVLX] in { 8218 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8219 MaskOpNode, sched.XMM>, EVEX_V128; 8220 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8221 MaskOpNode, sched.YMM>, EVEX_V256; 8222 } 8223} 8224 8225// Convert Float to Signed/Unsigned Doubleword 8226multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8227 SDNode MaskOpNode, SDNode OpNodeRnd, 8228 X86SchedWriteWidths sched> { 8229 let Predicates = [HasAVX512] in { 8230 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8231 MaskOpNode, sched.ZMM>, 8232 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 8233 OpNodeRnd, sched.ZMM>, EVEX_V512; 8234 } 8235 let Predicates = [HasVLX] in { 8236 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8237 MaskOpNode, sched.XMM>, EVEX_V128; 8238 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8239 MaskOpNode, sched.YMM>, EVEX_V256; 8240 } 8241} 8242 8243// Convert Double to Signed/Unsigned Doubleword with truncation 8244multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8245 SDNode MaskOpNode, SDNode OpNodeSAE, 8246 X86SchedWriteWidths sched> { 8247 let Predicates = [HasAVX512] in { 8248 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8249 MaskOpNode, sched.ZMM>, 8250 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 8251 OpNodeSAE, sched.ZMM>, EVEX_V512; 8252 } 8253 let Predicates = [HasVLX] in { 8254 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8255 // memory forms of these instructions in Asm Parser. They have the same 8256 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8257 // due to the same reason. 8258 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8259 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8260 VK2WM>, EVEX_V128; 8261 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8262 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8263 } 8264 8265 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8266 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8267 VR128X:$src), 0, "att">; 8268 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8269 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8270 VK2WM:$mask, VR128X:$src), 0, "att">; 8271 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8272 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8273 VK2WM:$mask, VR128X:$src), 0, "att">; 8274 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8275 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8276 f64mem:$src), 0, "att">; 8277 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8278 "$dst {${mask}}, ${src}{1to2}}", 8279 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8280 VK2WM:$mask, f64mem:$src), 0, "att">; 8281 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8282 "$dst {${mask}} {z}, ${src}{1to2}}", 8283 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8284 VK2WM:$mask, f64mem:$src), 0, "att">; 8285 8286 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8287 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8288 VR256X:$src), 0, "att">; 8289 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8290 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8291 VK4WM:$mask, VR256X:$src), 0, "att">; 8292 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8293 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8294 VK4WM:$mask, VR256X:$src), 0, "att">; 8295 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8296 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8297 f64mem:$src), 0, "att">; 8298 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8299 "$dst {${mask}}, ${src}{1to4}}", 8300 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8301 VK4WM:$mask, f64mem:$src), 0, "att">; 8302 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8303 "$dst {${mask}} {z}, ${src}{1to4}}", 8304 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8305 VK4WM:$mask, f64mem:$src), 0, "att">; 8306} 8307 8308// Convert Double to Signed/Unsigned Doubleword 8309multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8310 SDNode MaskOpNode, SDNode OpNodeRnd, 8311 X86SchedWriteWidths sched> { 8312 let Predicates = [HasAVX512] in { 8313 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8314 MaskOpNode, sched.ZMM>, 8315 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 8316 OpNodeRnd, sched.ZMM>, EVEX_V512; 8317 } 8318 let Predicates = [HasVLX] in { 8319 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8320 // memory forms of these instructions in Asm Parcer. They have the same 8321 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8322 // due to the same reason. 8323 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8324 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8325 VK2WM>, EVEX_V128; 8326 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8327 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8328 } 8329 8330 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8331 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 8332 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8333 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8334 VK2WM:$mask, VR128X:$src), 0, "att">; 8335 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8336 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8337 VK2WM:$mask, VR128X:$src), 0, "att">; 8338 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8339 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8340 f64mem:$src), 0, "att">; 8341 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8342 "$dst {${mask}}, ${src}{1to2}}", 8343 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8344 VK2WM:$mask, f64mem:$src), 0, "att">; 8345 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8346 "$dst {${mask}} {z}, ${src}{1to2}}", 8347 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8348 VK2WM:$mask, f64mem:$src), 0, "att">; 8349 8350 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8351 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 8352 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8353 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8354 VK4WM:$mask, VR256X:$src), 0, "att">; 8355 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8356 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8357 VK4WM:$mask, VR256X:$src), 0, "att">; 8358 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8359 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8360 f64mem:$src), 0, "att">; 8361 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8362 "$dst {${mask}}, ${src}{1to4}}", 8363 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8364 VK4WM:$mask, f64mem:$src), 0, "att">; 8365 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8366 "$dst {${mask}} {z}, ${src}{1to4}}", 8367 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8368 VK4WM:$mask, f64mem:$src), 0, "att">; 8369} 8370 8371// Convert Double to Signed/Unsigned Quardword 8372multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8373 SDNode MaskOpNode, SDNode OpNodeRnd, 8374 X86SchedWriteWidths sched> { 8375 let Predicates = [HasDQI] in { 8376 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8377 MaskOpNode, sched.ZMM>, 8378 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 8379 OpNodeRnd, sched.ZMM>, EVEX_V512; 8380 } 8381 let Predicates = [HasDQI, HasVLX] in { 8382 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8383 MaskOpNode, sched.XMM>, EVEX_V128; 8384 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8385 MaskOpNode, sched.YMM>, EVEX_V256; 8386 } 8387} 8388 8389// Convert Double to Signed/Unsigned Quardword with truncation 8390multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8391 SDNode MaskOpNode, SDNode OpNodeRnd, 8392 X86SchedWriteWidths sched> { 8393 let Predicates = [HasDQI] in { 8394 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8395 MaskOpNode, sched.ZMM>, 8396 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 8397 OpNodeRnd, sched.ZMM>, EVEX_V512; 8398 } 8399 let Predicates = [HasDQI, HasVLX] in { 8400 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8401 MaskOpNode, sched.XMM>, EVEX_V128; 8402 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8403 MaskOpNode, sched.YMM>, EVEX_V256; 8404 } 8405} 8406 8407// Convert Signed/Unsigned Quardword to Double 8408multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8409 SDNode MaskOpNode, SDNode OpNodeRnd, 8410 X86SchedWriteWidths sched> { 8411 let Predicates = [HasDQI] in { 8412 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 8413 MaskOpNode, sched.ZMM>, 8414 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 8415 OpNodeRnd, sched.ZMM>, EVEX_V512; 8416 } 8417 let Predicates = [HasDQI, HasVLX] in { 8418 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 8419 MaskOpNode, sched.XMM>, EVEX_V128; 8420 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 8421 MaskOpNode, sched.YMM>, EVEX_V256; 8422 } 8423} 8424 8425// Convert Float to Signed/Unsigned Quardword 8426multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8427 SDNode MaskOpNode, SDNode OpNodeRnd, 8428 X86SchedWriteWidths sched> { 8429 let Predicates = [HasDQI] in { 8430 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8431 MaskOpNode, sched.ZMM>, 8432 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 8433 OpNodeRnd, sched.ZMM>, EVEX_V512; 8434 } 8435 let Predicates = [HasDQI, HasVLX] in { 8436 // Explicitly specified broadcast string, since we take only 2 elements 8437 // from v4f32x_info source 8438 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8439 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8440 (v2i64 (OpNode (bc_v4f32 8441 (v2f64 8442 (scalar_to_vector (loadf64 addr:$src)))))), 8443 (v2i64 (MaskOpNode (bc_v4f32 8444 (v2f64 8445 (scalar_to_vector (loadf64 addr:$src))))))>, 8446 EVEX_V128; 8447 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8448 MaskOpNode, sched.YMM>, EVEX_V256; 8449 } 8450} 8451 8452// Convert Float to Signed/Unsigned Quardword with truncation 8453multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8454 SDNode MaskOpNode, SDNode OpNodeRnd, 8455 X86SchedWriteWidths sched> { 8456 let Predicates = [HasDQI] in { 8457 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8458 MaskOpNode, sched.ZMM>, 8459 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 8460 OpNodeRnd, sched.ZMM>, EVEX_V512; 8461 } 8462 let Predicates = [HasDQI, HasVLX] in { 8463 // Explicitly specified broadcast string, since we take only 2 elements 8464 // from v4f32x_info source 8465 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8466 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8467 (v2i64 (OpNode (bc_v4f32 8468 (v2f64 8469 (scalar_to_vector (loadf64 addr:$src)))))), 8470 (v2i64 (MaskOpNode (bc_v4f32 8471 (v2f64 8472 (scalar_to_vector (loadf64 addr:$src))))))>, 8473 EVEX_V128; 8474 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8475 MaskOpNode, sched.YMM>, EVEX_V256; 8476 } 8477} 8478 8479// Convert Signed/Unsigned Quardword to Float 8480// Also Convert Signed/Unsigned Doubleword to Half 8481multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8482 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128, 8483 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd, 8484 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8485 X86SchedWriteWidths sched, Predicate prd = HasDQI> { 8486 let Predicates = [prd] in { 8487 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode, 8488 MaskOpNode, sched.ZMM>, 8489 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 8490 OpNodeRnd, sched.ZMM>, EVEX_V512; 8491 } 8492 let Predicates = [prd, HasVLX] in { 8493 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8494 // memory forms of these instructions in Asm Parcer. They have the same 8495 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8496 // due to the same reason. 8497 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag, 8498 null_frag, sched.XMM, _src.info128.BroadcastStr, 8499 "{x}", i128mem, _src.info128.KRCWM>, 8500 EVEX_V128; 8501 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode, 8502 MaskOpNode, sched.YMM, _src.info256.BroadcastStr, 8503 "{y}">, EVEX_V256; 8504 8505 // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction 8506 // patterns have been disabled with null_frag. 8507 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))), 8508 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 8509 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 8510 _src.info128.KRCWM:$mask), 8511 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>; 8512 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 8513 _src.info128.KRCWM:$mask), 8514 (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>; 8515 8516 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))), 8517 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 8518 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0), 8519 _src.info128.KRCWM:$mask), 8520 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8521 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV, 8522 _src.info128.KRCWM:$mask), 8523 (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>; 8524 8525 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))), 8526 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 8527 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8528 (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask), 8529 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8530 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8531 _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask), 8532 (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>; 8533 } 8534 8535 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8536 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8537 VR128X:$src), 0, "att">; 8538 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8539 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8540 VK2WM:$mask, VR128X:$src), 0, "att">; 8541 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8542 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8543 VK2WM:$mask, VR128X:$src), 0, "att">; 8544 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8545 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8546 i64mem:$src), 0, "att">; 8547 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8548 "$dst {${mask}}, ${src}{1to2}}", 8549 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8550 VK2WM:$mask, i64mem:$src), 0, "att">; 8551 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8552 "$dst {${mask}} {z}, ${src}{1to2}}", 8553 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8554 VK2WM:$mask, i64mem:$src), 0, "att">; 8555 8556 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8557 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8558 VR256X:$src), 0, "att">; 8559 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8560 "$dst {${mask}}, $src}", 8561 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8562 VK4WM:$mask, VR256X:$src), 0, "att">; 8563 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8564 "$dst {${mask}} {z}, $src}", 8565 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8566 VK4WM:$mask, VR256X:$src), 0, "att">; 8567 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8568 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8569 i64mem:$src), 0, "att">; 8570 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8571 "$dst {${mask}}, ${src}{1to4}}", 8572 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8573 VK4WM:$mask, i64mem:$src), 0, "att">; 8574 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8575 "$dst {${mask}} {z}, ${src}{1to4}}", 8576 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8577 VK4WM:$mask, i64mem:$src), 0, "att">; 8578} 8579 8580defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp, 8581 X86any_VSintToFP, X86VSintToFP, 8582 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>; 8583 8584defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp, 8585 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8586 TB, EVEX_CD8<32, CD8VF>; 8587 8588defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, 8589 X86cvttp2si, X86cvttp2siSAE, 8590 SchedWriteCvtPS2DQ>, TB, XS, EVEX_CD8<32, CD8VF>; 8591 8592defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, 8593 X86cvttp2si, X86cvttp2siSAE, 8594 SchedWriteCvtPD2DQ>, 8595 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 8596 8597defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, 8598 X86cvttp2ui, X86cvttp2uiSAE, 8599 SchedWriteCvtPS2DQ>, TB, EVEX_CD8<32, CD8VF>; 8600 8601defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, 8602 X86cvttp2ui, X86cvttp2uiSAE, 8603 SchedWriteCvtPD2DQ>, 8604 TB, REX_W, EVEX_CD8<64, CD8VF>; 8605 8606defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, 8607 uint_to_fp, X86any_VUintToFP, X86VUintToFP, 8608 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>; 8609 8610defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, 8611 uint_to_fp, X86VUintToFpRnd, 8612 SchedWriteCvtDQ2PS>, TB, XD, EVEX_CD8<32, CD8VF>; 8613 8614defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int, 8615 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD, 8616 EVEX_CD8<32, CD8VF>; 8617 8618defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int, 8619 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, TB, XD, 8620 REX_W, EVEX_CD8<64, CD8VF>; 8621 8622defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt, 8623 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8624 TB, EVEX_CD8<32, CD8VF>; 8625 8626defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt, 8627 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W, 8628 TB, EVEX_CD8<64, CD8VF>; 8629 8630defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int, 8631 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W, 8632 TB, PD, EVEX_CD8<64, CD8VF>; 8633 8634defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int, 8635 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD, 8636 EVEX_CD8<32, CD8VH>; 8637 8638defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt, 8639 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W, 8640 TB, PD, EVEX_CD8<64, CD8VF>; 8641 8642defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt, 8643 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, TB, PD, 8644 EVEX_CD8<32, CD8VH>; 8645 8646defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, 8647 X86cvttp2si, X86cvttp2siSAE, 8648 SchedWriteCvtPD2DQ>, REX_W, 8649 TB, PD, EVEX_CD8<64, CD8VF>; 8650 8651defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, 8652 X86cvttp2si, X86cvttp2siSAE, 8653 SchedWriteCvtPS2DQ>, TB, PD, 8654 EVEX_CD8<32, CD8VH>; 8655 8656defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, 8657 X86cvttp2ui, X86cvttp2uiSAE, 8658 SchedWriteCvtPD2DQ>, REX_W, 8659 TB, PD, EVEX_CD8<64, CD8VF>; 8660 8661defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, 8662 X86cvttp2ui, X86cvttp2uiSAE, 8663 SchedWriteCvtPS2DQ>, TB, PD, 8664 EVEX_CD8<32, CD8VH>; 8665 8666defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp, 8667 sint_to_fp, X86VSintToFpRnd, 8668 SchedWriteCvtDQ2PD>, REX_W, TB, XS, EVEX_CD8<64, CD8VF>; 8669 8670defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp, 8671 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, 8672 REX_W, TB, XS, EVEX_CD8<64, CD8VF>; 8673 8674defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp, 8675 X86any_VSintToFP, X86VMSintToFP, 8676 X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8677 SchedWriteCvtDQ2PS, HasFP16>, 8678 T_MAP5, EVEX_CD8<32, CD8VF>; 8679 8680defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp, 8681 X86any_VUintToFP, X86VMUintToFP, 8682 X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8683 SchedWriteCvtDQ2PS, HasFP16>, T_MAP5, XD, 8684 EVEX_CD8<32, CD8VF>; 8685 8686defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp, 8687 X86any_VSintToFP, X86VMSintToFP, 8688 X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8689 SchedWriteCvtDQ2PS>, REX_W, TB, 8690 EVEX_CD8<64, CD8VF>; 8691 8692defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp, 8693 X86any_VUintToFP, X86VMUintToFP, 8694 X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8695 SchedWriteCvtDQ2PS>, REX_W, TB, XD, 8696 EVEX_CD8<64, CD8VF>; 8697 8698let Predicates = [HasVLX] in { 8699 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction 8700 // patterns have been disabled with null_frag. 8701 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), 8702 (VCVTPD2DQZ128rr VR128X:$src)>; 8703 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8704 VK2WM:$mask), 8705 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8706 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8707 VK2WM:$mask), 8708 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8709 8710 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), 8711 (VCVTPD2DQZ128rm addr:$src)>; 8712 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8713 VK2WM:$mask), 8714 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8715 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8716 VK2WM:$mask), 8717 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8718 8719 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), 8720 (VCVTPD2DQZ128rmb addr:$src)>; 8721 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8722 (v4i32 VR128X:$src0), VK2WM:$mask), 8723 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8724 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8725 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8726 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8727 8728 // Special patterns to allow use of X86mcvttp2si for masking. Instruction 8729 // patterns have been disabled with null_frag. 8730 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))), 8731 (VCVTTPD2DQZ128rr VR128X:$src)>; 8732 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8733 VK2WM:$mask), 8734 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8735 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8736 VK2WM:$mask), 8737 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8738 8739 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))), 8740 (VCVTTPD2DQZ128rm addr:$src)>; 8741 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8742 VK2WM:$mask), 8743 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8744 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8745 VK2WM:$mask), 8746 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8747 8748 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), 8749 (VCVTTPD2DQZ128rmb addr:$src)>; 8750 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8751 (v4i32 VR128X:$src0), VK2WM:$mask), 8752 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8753 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8754 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8755 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8756 8757 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8758 // patterns have been disabled with null_frag. 8759 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), 8760 (VCVTPD2UDQZ128rr VR128X:$src)>; 8761 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8762 VK2WM:$mask), 8763 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8764 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8765 VK2WM:$mask), 8766 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8767 8768 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), 8769 (VCVTPD2UDQZ128rm addr:$src)>; 8770 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8771 VK2WM:$mask), 8772 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8773 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8774 VK2WM:$mask), 8775 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8776 8777 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), 8778 (VCVTPD2UDQZ128rmb addr:$src)>; 8779 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8780 (v4i32 VR128X:$src0), VK2WM:$mask), 8781 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8782 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8783 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8784 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8785 8786 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8787 // patterns have been disabled with null_frag. 8788 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))), 8789 (VCVTTPD2UDQZ128rr VR128X:$src)>; 8790 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8791 VK2WM:$mask), 8792 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8793 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8794 VK2WM:$mask), 8795 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8796 8797 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))), 8798 (VCVTTPD2UDQZ128rm addr:$src)>; 8799 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8800 VK2WM:$mask), 8801 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8802 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8803 VK2WM:$mask), 8804 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8805 8806 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), 8807 (VCVTTPD2UDQZ128rmb addr:$src)>; 8808 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8809 (v4i32 VR128X:$src0), VK2WM:$mask), 8810 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8811 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8812 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8813 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8814 8815 def : Pat<(v4i32 (lrint VR128X:$src)), (VCVTPS2DQZ128rr VR128X:$src)>; 8816 def : Pat<(v4i32 (lrint (loadv4f32 addr:$src))), (VCVTPS2DQZ128rm addr:$src)>; 8817 def : Pat<(v8i32 (lrint VR256X:$src)), (VCVTPS2DQZ256rr VR256X:$src)>; 8818 def : Pat<(v8i32 (lrint (loadv8f32 addr:$src))), (VCVTPS2DQZ256rm addr:$src)>; 8819 def : Pat<(v4i32 (lrint VR256X:$src)), (VCVTPD2DQZ256rr VR256X:$src)>; 8820 def : Pat<(v4i32 (lrint (loadv4f64 addr:$src))), (VCVTPD2DQZ256rm addr:$src)>; 8821} 8822def : Pat<(v16i32 (lrint VR512:$src)), (VCVTPS2DQZrr VR512:$src)>; 8823def : Pat<(v16i32 (lrint (loadv16f32 addr:$src))), (VCVTPS2DQZrm addr:$src)>; 8824def : Pat<(v8i32 (lrint VR512:$src)), (VCVTPD2DQZrr VR512:$src)>; 8825def : Pat<(v8i32 (lrint (loadv8f64 addr:$src))), (VCVTPD2DQZrm addr:$src)>; 8826 8827let Predicates = [HasDQI, HasVLX] in { 8828 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8829 (VCVTPS2QQZ128rm addr:$src)>; 8830 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8831 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8832 VR128X:$src0)), 8833 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8834 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8835 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8836 v2i64x_info.ImmAllZerosV)), 8837 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8838 8839 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8840 (VCVTPS2UQQZ128rm addr:$src)>; 8841 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8842 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8843 VR128X:$src0)), 8844 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8845 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8846 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8847 v2i64x_info.ImmAllZerosV)), 8848 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8849 8850 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8851 (VCVTTPS2QQZ128rm addr:$src)>; 8852 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8853 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8854 VR128X:$src0)), 8855 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8856 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8857 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8858 v2i64x_info.ImmAllZerosV)), 8859 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8860 8861 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8862 (VCVTTPS2UQQZ128rm addr:$src)>; 8863 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8864 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8865 VR128X:$src0)), 8866 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8867 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8868 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8869 v2i64x_info.ImmAllZerosV)), 8870 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8871 8872 def : Pat<(v4i64 (lrint VR128X:$src)), (VCVTPS2QQZ256rr VR128X:$src)>; 8873 def : Pat<(v4i64 (lrint (loadv4f32 addr:$src))), (VCVTPS2QQZ256rm addr:$src)>; 8874 def : Pat<(v4i64 (llrint VR128X:$src)), (VCVTPS2QQZ256rr VR128X:$src)>; 8875 def : Pat<(v4i64 (llrint (loadv4f32 addr:$src))), (VCVTPS2QQZ256rm addr:$src)>; 8876 def : Pat<(v2i64 (lrint VR128X:$src)), (VCVTPD2QQZ128rr VR128X:$src)>; 8877 def : Pat<(v2i64 (lrint (loadv2f64 addr:$src))), (VCVTPD2QQZ128rm addr:$src)>; 8878 def : Pat<(v4i64 (lrint VR256X:$src)), (VCVTPD2QQZ256rr VR256X:$src)>; 8879 def : Pat<(v4i64 (lrint (loadv4f64 addr:$src))), (VCVTPD2QQZ256rm addr:$src)>; 8880 def : Pat<(v2i64 (llrint VR128X:$src)), (VCVTPD2QQZ128rr VR128X:$src)>; 8881 def : Pat<(v2i64 (llrint (loadv2f64 addr:$src))), (VCVTPD2QQZ128rm addr:$src)>; 8882 def : Pat<(v4i64 (llrint VR256X:$src)), (VCVTPD2QQZ256rr VR256X:$src)>; 8883 def : Pat<(v4i64 (llrint (loadv4f64 addr:$src))), (VCVTPD2QQZ256rm addr:$src)>; 8884} 8885 8886let Predicates = [HasDQI] in { 8887 def : Pat<(v8i64 (lrint VR256X:$src)), (VCVTPS2QQZrr VR256X:$src)>; 8888 def : Pat<(v8i64 (lrint (loadv8f32 addr:$src))), (VCVTPS2QQZrm addr:$src)>; 8889 def : Pat<(v8i64 (llrint VR256X:$src)), (VCVTPS2QQZrr VR256X:$src)>; 8890 def : Pat<(v8i64 (llrint (loadv8f32 addr:$src))), (VCVTPS2QQZrm addr:$src)>; 8891 def : Pat<(v8i64 (lrint VR512:$src)), (VCVTPD2QQZrr VR512:$src)>; 8892 def : Pat<(v8i64 (lrint (loadv8f64 addr:$src))), (VCVTPD2QQZrm addr:$src)>; 8893 def : Pat<(v8i64 (llrint VR512:$src)), (VCVTPD2QQZrr VR512:$src)>; 8894 def : Pat<(v8i64 (llrint (loadv8f64 addr:$src))), (VCVTPD2QQZrm addr:$src)>; 8895} 8896 8897let Predicates = [HasVLX] in { 8898 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8899 (VCVTDQ2PDZ128rm addr:$src)>; 8900 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8901 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8902 VR128X:$src0)), 8903 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8904 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8905 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8906 v2f64x_info.ImmAllZerosV)), 8907 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8908 8909 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8910 (VCVTUDQ2PDZ128rm addr:$src)>; 8911 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8912 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8913 VR128X:$src0)), 8914 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8915 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8916 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8917 v2f64x_info.ImmAllZerosV)), 8918 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8919} 8920 8921//===----------------------------------------------------------------------===// 8922// Half precision conversion instructions 8923//===----------------------------------------------------------------------===// 8924 8925let Uses = [MXCSR], mayRaiseFPException = 1 in 8926multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8927 X86MemOperand x86memop, dag ld_dag, 8928 X86FoldableSchedWrite sched> { 8929 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 8930 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 8931 (X86any_cvtph2ps (_src.VT _src.RC:$src)), 8932 (X86cvtph2ps (_src.VT _src.RC:$src))>, 8933 T8, PD, Sched<[sched]>; 8934 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 8935 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 8936 (X86any_cvtph2ps (_src.VT ld_dag)), 8937 (X86cvtph2ps (_src.VT ld_dag))>, 8938 T8, PD, Sched<[sched.Folded]>; 8939} 8940 8941multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8942 X86FoldableSchedWrite sched> { 8943 let Uses = [MXCSR] in 8944 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 8945 (ins _src.RC:$src), "vcvtph2ps", 8946 "{sae}, $src", "$src, {sae}", 8947 (X86cvtph2psSAE (_src.VT _src.RC:$src))>, 8948 T8, PD, EVEX_B, Sched<[sched]>; 8949} 8950 8951let Predicates = [HasAVX512] in 8952 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, 8953 (load addr:$src), WriteCvtPH2PSZ>, 8954 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 8955 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8956 8957let Predicates = [HasVLX] in { 8958 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 8959 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256, 8960 EVEX_CD8<32, CD8VH>; 8961 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 8962 (bitconvert (v2i64 (X86vzload64 addr:$src))), 8963 WriteCvtPH2PS>, EVEX, EVEX_V128, 8964 EVEX_CD8<32, CD8VH>; 8965 8966 // Pattern match vcvtph2ps of a scalar i64 load. 8967 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert 8968 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 8969 (VCVTPH2PSZ128rm addr:$src)>; 8970} 8971 8972multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8973 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 8974let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8975 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8976 (ins _src.RC:$src1, i32u8imm:$src2), 8977 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8978 [(set _dest.RC:$dst, 8979 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 8980 Sched<[RR]>; 8981 let Constraints = "$src0 = $dst" in 8982 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8983 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8984 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 8985 [(set _dest.RC:$dst, 8986 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8987 _dest.RC:$src0, _src.KRCWM:$mask))]>, 8988 Sched<[RR]>, EVEX_K; 8989 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8990 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8991 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", 8992 [(set _dest.RC:$dst, 8993 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8994 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 8995 Sched<[RR]>, EVEX_KZ; 8996 let hasSideEffects = 0, mayStore = 1 in { 8997 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 8998 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 8999 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9000 Sched<[MR]>; 9001 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 9002 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9003 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 9004 EVEX_K, Sched<[MR]>; 9005 } 9006} 9007} 9008 9009multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 9010 SchedWrite Sched> { 9011 let hasSideEffects = 0, Uses = [MXCSR] in { 9012 def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9013 (ins _src.RC:$src1, i32u8imm:$src2), 9014 "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}", 9015 [(set _dest.RC:$dst, 9016 (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 9017 EVEX_B, Sched<[Sched]>; 9018 let Constraints = "$src0 = $dst" in 9019 def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9020 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9021 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}", 9022 [(set _dest.RC:$dst, 9023 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2), 9024 _dest.RC:$src0, _src.KRCWM:$mask))]>, 9025 EVEX_B, Sched<[Sched]>, EVEX_K; 9026 def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 9027 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 9028 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}", 9029 [(set _dest.RC:$dst, 9030 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2), 9031 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 9032 EVEX_B, Sched<[Sched]>, EVEX_KZ; 9033} 9034} 9035 9036let Predicates = [HasAVX512] in { 9037 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 9038 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 9039 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 9040 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 9041 9042 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), 9043 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; 9044} 9045 9046let Predicates = [HasVLX] in { 9047 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 9048 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 9049 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 9050 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 9051 WriteCvtPS2PH, WriteCvtPS2PHSt>, 9052 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 9053 9054 def : Pat<(store (f64 (extractelt 9055 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9056 (iPTR 0))), addr:$dst), 9057 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9058 def : Pat<(store (i64 (extractelt 9059 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9060 (iPTR 0))), addr:$dst), 9061 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9062 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), 9063 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; 9064} 9065 9066// Unordered/Ordered scalar fp compare with Sae and set EFLAGS 9067multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 9068 string OpcodeStr, Domain d, 9069 X86FoldableSchedWrite sched = WriteFComX> { 9070 let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in 9071 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 9072 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 9073 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 9074} 9075 9076let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9077 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>, 9078 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9079 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>, 9080 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>; 9081 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>, 9082 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9083 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>, 9084 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>; 9085} 9086 9087let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9088 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32, 9089 "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9090 EVEX_CD8<32, CD8VT1>; 9091 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64, 9092 "ucomisd", SSEPackedDouble>, TB, PD, EVEX, 9093 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9094 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32, 9095 "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9096 EVEX_CD8<32, CD8VT1>; 9097 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64, 9098 "comisd", SSEPackedDouble>, TB, PD, EVEX, 9099 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9100 let isCodeGenOnly = 1 in { 9101 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 9102 sse_load_f32, "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9103 EVEX_CD8<32, CD8VT1>; 9104 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 9105 sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, EVEX, 9106 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9107 9108 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 9109 sse_load_f32, "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9110 EVEX_CD8<32, CD8VT1>; 9111 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 9112 sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, EVEX, 9113 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9114 } 9115} 9116 9117let Defs = [EFLAGS], Predicates = [HasFP16] in { 9118 defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish", 9119 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5, 9120 EVEX_CD8<16, CD8VT1>; 9121 defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish", 9122 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5, 9123 EVEX_CD8<16, CD8VT1>; 9124 defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16, 9125 "ucomish", SSEPackedSingle>, T_MAP5, EVEX, 9126 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9127 defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16, 9128 "comish", SSEPackedSingle>, T_MAP5, EVEX, 9129 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9130 let isCodeGenOnly = 1 in { 9131 defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem, 9132 sse_load_f16, "ucomish", SSEPackedSingle>, 9133 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9134 9135 defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem, 9136 sse_load_f16, "comish", SSEPackedSingle>, 9137 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9138 } 9139} 9140 9141/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh 9142multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9143 X86FoldableSchedWrite sched, X86VectorVTInfo _, 9144 Predicate prd = HasAVX512> { 9145 let Predicates = [prd], ExeDomain = _.ExeDomain in { 9146 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9147 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9148 "$src2, $src1", "$src1, $src2", 9149 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9150 EVEX, VVVV, VEX_LIG, Sched<[sched]>; 9151 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9152 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9153 "$src2, $src1", "$src1, $src2", 9154 (OpNode (_.VT _.RC:$src1), 9155 (_.ScalarIntMemFrags addr:$src2))>, EVEX, VVVV, VEX_LIG, 9156 Sched<[sched.Folded, sched.ReadAfterFold]>; 9157} 9158} 9159 9160defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl, 9161 f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>, 9162 T_MAP6, PD; 9163defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s, 9164 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>, 9165 EVEX_CD8<16, CD8VT1>, T_MAP6, PD; 9166let Uses = [MXCSR] in { 9167defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 9168 f32x_info>, EVEX_CD8<32, CD8VT1>, 9169 T8, PD; 9170defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 9171 f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>, 9172 T8, PD; 9173defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 9174 SchedWriteFRsqrt.Scl, f32x_info>, 9175 EVEX_CD8<32, CD8VT1>, T8, PD; 9176defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 9177 SchedWriteFRsqrt.Scl, f64x_info>, REX_W, 9178 EVEX_CD8<64, CD8VT1>, T8, PD; 9179} 9180 9181/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 9182multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 9183 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9184 let ExeDomain = _.ExeDomain in { 9185 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9186 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9187 (_.VT (OpNode _.RC:$src))>, EVEX, T8, PD, 9188 Sched<[sched]>; 9189 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9190 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9191 (OpNode (_.VT 9192 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8, PD, 9193 Sched<[sched.Folded, sched.ReadAfterFold]>; 9194 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9195 (ins _.ScalarMemOp:$src), OpcodeStr, 9196 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9197 (OpNode (_.VT 9198 (_.BroadcastLdFrag addr:$src)))>, 9199 EVEX, T8, PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9200 } 9201} 9202 9203multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 9204 X86SchedWriteWidths sched> { 9205 let Uses = [MXCSR] in { 9206 defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM, 9207 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 9208 defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM, 9209 v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>; 9210 } 9211 let Predicates = [HasFP16] in 9212 defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM, 9213 v32f16_info>, EVEX_V512, T_MAP6, EVEX_CD8<16, CD8VF>; 9214 9215 // Define only if AVX512VL feature is present. 9216 let Predicates = [HasVLX], Uses = [MXCSR] in { 9217 defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9218 OpNode, sched.XMM, v4f32x_info>, 9219 EVEX_V128, EVEX_CD8<32, CD8VF>; 9220 defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9221 OpNode, sched.YMM, v8f32x_info>, 9222 EVEX_V256, EVEX_CD8<32, CD8VF>; 9223 defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9224 OpNode, sched.XMM, v2f64x_info>, 9225 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>; 9226 defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9227 OpNode, sched.YMM, v4f64x_info>, 9228 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>; 9229 } 9230 let Predicates = [HasFP16, HasVLX] in { 9231 defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9232 OpNode, sched.XMM, v8f16x_info>, 9233 EVEX_V128, T_MAP6, EVEX_CD8<16, CD8VF>; 9234 defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9235 OpNode, sched.YMM, v16f16x_info>, 9236 EVEX_V256, T_MAP6, EVEX_CD8<16, CD8VF>; 9237 } 9238} 9239 9240defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>; 9241defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>; 9242 9243/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 9244multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 9245 SDNode OpNode, SDNode OpNodeSAE, 9246 X86FoldableSchedWrite sched> { 9247 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 9248 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9249 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9250 "$src2, $src1", "$src1, $src2", 9251 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9252 Sched<[sched]>, SIMD_EXC; 9253 9254 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9255 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9256 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 9257 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9258 EVEX_B, Sched<[sched]>; 9259 9260 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9261 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9262 "$src2, $src1", "$src1, $src2", 9263 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>, 9264 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9265 } 9266} 9267 9268multiclass avx512_fp28_s_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9269 X86FoldableSchedWrite sched> { 9270 let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in { 9271 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9272 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9273 "$src2, $src1", "$src1, $src2", 9274 (null_frag)>, Sched<[sched]>, SIMD_EXC; 9275 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9276 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9277 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 9278 (null_frag)>, EVEX_B, Sched<[sched]>; 9279 let mayLoad = 1 in 9280 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9281 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9282 "$src2, $src1", "$src1, $src2", 9283 (null_frag)>, 9284 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9285 } 9286} 9287 9288multiclass avx512_eri_s_ass<bits<8> opc, string OpcodeStr, 9289 X86FoldableSchedWrite sched> { 9290 defm SSZ : avx512_fp28_s_ass<opc, OpcodeStr#"ss", f32x_info, sched>, 9291 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV; 9292 defm SDZ : avx512_fp28_s_ass<opc, OpcodeStr#"sd", f64x_info, sched>, 9293 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV; 9294} 9295 9296defm VRCP28 : avx512_eri_s_ass<0xCB, "vrcp28", SchedWriteFRcp.Scl>; 9297defm VRSQRT28 : avx512_eri_s_ass<0xCD, "vrsqrt28", SchedWriteFRsqrt.Scl>; 9298 9299multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9300 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9301 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, 9302 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV; 9303 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, 9304 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV; 9305} 9306 9307multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode, 9308 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9309 let Predicates = [HasFP16] in 9310 defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>, 9311 EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV; 9312} 9313 9314defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9315 SchedWriteFRnd.Scl>, 9316 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9317 SchedWriteFRnd.Scl>; 9318/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 9319 9320multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9321 SDNode OpNode, X86FoldableSchedWrite sched> { 9322 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9323 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9324 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9325 (OpNode (_.VT _.RC:$src))>, 9326 Sched<[sched]>; 9327 9328 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9329 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9330 (OpNode (_.VT 9331 (bitconvert (_.LdFrag addr:$src))))>, 9332 Sched<[sched.Folded, sched.ReadAfterFold]>; 9333 9334 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9335 (ins _.ScalarMemOp:$src), OpcodeStr, 9336 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9337 (OpNode (_.VT 9338 (_.BroadcastLdFrag addr:$src)))>, 9339 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9340 } 9341} 9342multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9343 SDNode OpNode, X86FoldableSchedWrite sched> { 9344 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 9345 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9346 (ins _.RC:$src), OpcodeStr, 9347 "{sae}, $src", "$src, {sae}", 9348 (OpNode (_.VT _.RC:$src))>, 9349 EVEX_B, Sched<[sched]>; 9350} 9351 9352multiclass avx512_fp28_p_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9353 X86FoldableSchedWrite sched> { 9354 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1, 9355 hasSideEffects = 0 in { 9356 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9357 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9358 (null_frag)>, Sched<[sched]>; 9359 let mayLoad = 1 in 9360 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9361 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9362 (null_frag)>, 9363 Sched<[sched.Folded, sched.ReadAfterFold]>; 9364 let mayLoad = 1 in 9365 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9366 (ins _.ScalarMemOp:$src), OpcodeStr, 9367 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9368 (null_frag)>, 9369 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9370 } 9371} 9372multiclass avx512_fp28_p_sae_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9373 X86FoldableSchedWrite sched> { 9374 let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in 9375 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9376 (ins _.RC:$src), OpcodeStr, 9377 "{sae}, $src", "$src, {sae}", 9378 (null_frag)>, Sched<[sched]>, EVEX_B; 9379} 9380 9381multiclass avx512_eri_ass<bits<8> opc, string OpcodeStr, 9382 X86SchedWriteWidths sched> { 9383 defm PSZ : avx512_fp28_p_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>, 9384 avx512_fp28_p_sae_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>, 9385 T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 9386 defm PDZ : avx512_fp28_p_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>, 9387 avx512_fp28_p_sae_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>, 9388 T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>; 9389} 9390 9391defm VRSQRT28 : avx512_eri_ass<0xCC, "vrsqrt28", SchedWriteFRsqrt>, EVEX; 9392defm VRCP28 : avx512_eri_ass<0xCA, "vrcp28", SchedWriteFRcp>, EVEX; 9393defm VEXP2 : avx512_eri_ass<0xC8, "vexp2", SchedWriteFAdd>, EVEX; 9394 9395multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 9396 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9397 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 9398 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, 9399 T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 9400 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 9401 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, 9402 T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>; 9403} 9404 9405multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 9406 SDNode OpNode, X86SchedWriteWidths sched> { 9407 // Define only if AVX512VL feature is present. 9408 let Predicates = [HasVLX] in { 9409 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, 9410 sched.XMM>, 9411 EVEX_V128, T8, PD, EVEX_CD8<32, CD8VF>; 9412 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, 9413 sched.YMM>, 9414 EVEX_V256, T8, PD, EVEX_CD8<32, CD8VF>; 9415 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, 9416 sched.XMM>, 9417 EVEX_V128, REX_W, T8, PD, EVEX_CD8<64, CD8VF>; 9418 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, 9419 sched.YMM>, 9420 EVEX_V256, REX_W, T8, PD, EVEX_CD8<64, CD8VF>; 9421 } 9422} 9423 9424multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode, 9425 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9426 let Predicates = [HasFP16] in 9427 defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>, 9428 avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>, 9429 T_MAP6, PD, EVEX_V512, EVEX_CD8<16, CD8VF>; 9430 let Predicates = [HasFP16, HasVLX] in { 9431 defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>, 9432 EVEX_V128, T_MAP6, PD, EVEX_CD8<16, CD8VF>; 9433 defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>, 9434 EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>; 9435 } 9436} 9437defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9438 SchedWriteFRnd>, 9439 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9440 SchedWriteFRnd>, 9441 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, 9442 SchedWriteFRnd>, EVEX; 9443 9444multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 9445 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9446 let ExeDomain = _.ExeDomain in 9447 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9448 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 9449 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, 9450 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 9451} 9452 9453multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 9454 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9455 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9456 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 9457 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9458 (_.VT (any_fsqrt _.RC:$src)), 9459 (_.VT (fsqrt _.RC:$src))>, EVEX, 9460 Sched<[sched]>; 9461 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9462 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9463 (any_fsqrt (_.VT (_.LdFrag addr:$src))), 9464 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX, 9465 Sched<[sched.Folded, sched.ReadAfterFold]>; 9466 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9467 (ins _.ScalarMemOp:$src), OpcodeStr, 9468 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9469 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))), 9470 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>, 9471 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9472 } 9473} 9474 9475let Uses = [MXCSR], mayRaiseFPException = 1 in 9476multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 9477 X86SchedWriteSizes sched> { 9478 let Predicates = [HasFP16] in 9479 defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9480 sched.PH.ZMM, v32f16_info>, 9481 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 9482 let Predicates = [HasFP16, HasVLX] in { 9483 defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9484 sched.PH.XMM, v8f16x_info>, 9485 EVEX_V128, T_MAP5, EVEX_CD8<16, CD8VF>; 9486 defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9487 sched.PH.YMM, v16f16x_info>, 9488 EVEX_V256, T_MAP5, EVEX_CD8<16, CD8VF>; 9489 } 9490 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9491 sched.PS.ZMM, v16f32_info>, 9492 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 9493 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9494 sched.PD.ZMM, v8f64_info>, 9495 EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9496 // Define only if AVX512VL feature is present. 9497 let Predicates = [HasVLX] in { 9498 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9499 sched.PS.XMM, v4f32x_info>, 9500 EVEX_V128, TB, EVEX_CD8<32, CD8VF>; 9501 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9502 sched.PS.YMM, v8f32x_info>, 9503 EVEX_V256, TB, EVEX_CD8<32, CD8VF>; 9504 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9505 sched.PD.XMM, v2f64x_info>, 9506 EVEX_V128, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9507 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9508 sched.PD.YMM, v4f64x_info>, 9509 EVEX_V256, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9510 } 9511} 9512 9513let Uses = [MXCSR] in 9514multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 9515 X86SchedWriteSizes sched> { 9516 let Predicates = [HasFP16] in 9517 defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"), 9518 sched.PH.ZMM, v32f16_info>, 9519 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 9520 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 9521 sched.PS.ZMM, v16f32_info>, 9522 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 9523 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 9524 sched.PD.ZMM, v8f64_info>, 9525 EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9526} 9527 9528multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9529 X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> { 9530 let ExeDomain = _.ExeDomain, Predicates = [prd] in { 9531 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9532 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9533 "$src2, $src1", "$src1, $src2", 9534 (X86fsqrts (_.VT _.RC:$src1), 9535 (_.VT _.RC:$src2))>, 9536 Sched<[sched]>, SIMD_EXC; 9537 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9538 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9539 "$src2, $src1", "$src1, $src2", 9540 (X86fsqrts (_.VT _.RC:$src1), 9541 (_.ScalarIntMemFrags addr:$src2))>, 9542 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9543 let Uses = [MXCSR] in 9544 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9545 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 9546 "$rc, $src2, $src1", "$src1, $src2, $rc", 9547 (X86fsqrtRnds (_.VT _.RC:$src1), 9548 (_.VT _.RC:$src2), 9549 (i32 timm:$rc))>, 9550 EVEX_B, EVEX_RC, Sched<[sched]>; 9551 9552 let isCodeGenOnly = 1, hasSideEffects = 0 in { 9553 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9554 (ins _.FRC:$src1, _.FRC:$src2), 9555 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9556 Sched<[sched]>, SIMD_EXC; 9557 let mayLoad = 1 in 9558 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9559 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 9560 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9561 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9562 } 9563 } 9564 9565 let Predicates = [prd] in { 9566 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)), 9567 (!cast<Instruction>(Name#Zr) 9568 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 9569 } 9570 9571 let Predicates = [prd, OptForSize] in { 9572 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))), 9573 (!cast<Instruction>(Name#Zm) 9574 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 9575 } 9576} 9577 9578multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 9579 X86SchedWriteSizes sched> { 9580 defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>, 9581 EVEX_CD8<16, CD8VT1>, EVEX, VVVV, T_MAP5, XS; 9582 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 9583 EVEX_CD8<32, CD8VT1>, EVEX, VVVV, TB, XS; 9584 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 9585 EVEX_CD8<64, CD8VT1>, EVEX, VVVV, TB, XD, REX_W; 9586} 9587 9588defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 9589 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 9590 9591defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 9592 9593multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 9594 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9595 let ExeDomain = _.ExeDomain in { 9596 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9597 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9598 "$src3, $src2, $src1", "$src1, $src2, $src3", 9599 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9600 (i32 timm:$src3)))>, 9601 Sched<[sched]>, SIMD_EXC; 9602 9603 let Uses = [MXCSR] in 9604 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9605 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9606 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 9607 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9608 (i32 timm:$src3)))>, EVEX_B, 9609 Sched<[sched]>; 9610 9611 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9612 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 9613 OpcodeStr, 9614 "$src3, $src2, $src1", "$src1, $src2, $src3", 9615 (_.VT (X86RndScales _.RC:$src1, 9616 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>, 9617 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9618 9619 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 9620 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9621 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 9622 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9623 []>, Sched<[sched]>, SIMD_EXC; 9624 9625 let mayLoad = 1 in 9626 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9627 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9628 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9629 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9630 } 9631 } 9632 9633 let Predicates = [HasAVX512] in { 9634 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2), 9635 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)), 9636 _.FRC:$src1, timm:$src2))>; 9637 } 9638 9639 let Predicates = [HasAVX512, OptForSize] in { 9640 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), 9641 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)), 9642 addr:$src1, timm:$src2))>; 9643 } 9644} 9645 9646let Predicates = [HasFP16] in 9647defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh", 9648 SchedWriteFRnd.Scl, f16x_info>, 9649 AVX512PSIi8Base, TA, EVEX, VVVV, 9650 EVEX_CD8<16, CD8VT1>; 9651 9652defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 9653 SchedWriteFRnd.Scl, f32x_info>, 9654 AVX512AIi8Base, EVEX, VVVV, VEX_LIG, 9655 EVEX_CD8<32, CD8VT1>; 9656 9657defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 9658 SchedWriteFRnd.Scl, f64x_info>, 9659 REX_W, AVX512AIi8Base, EVEX, VVVV, VEX_LIG, 9660 EVEX_CD8<64, CD8VT1>; 9661 9662multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 9663 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 9664 dag OutMask, Predicate BasePredicate> { 9665 let Predicates = [BasePredicate] in { 9666 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9667 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9668 (extractelt _.VT:$dst, (iPTR 0))))), 9669 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 9670 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 9671 9672 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9673 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9674 ZeroFP))), 9675 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 9676 OutMask, _.VT:$src2, _.VT:$src1)>; 9677 } 9678} 9679 9680defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh, 9681 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info, 9682 fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>; 9683defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 9684 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 9685 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9686defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 9687 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 9688 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9689 9690 9691//------------------------------------------------- 9692// Integer truncate and extend operations 9693//------------------------------------------------- 9694 9695multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9696 SDPatternOperator MaskNode, 9697 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9698 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9699 let ExeDomain = DestInfo.ExeDomain in { 9700 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9701 (ins SrcInfo.RC:$src), 9702 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9703 [(set DestInfo.RC:$dst, 9704 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>, 9705 EVEX, Sched<[sched]>; 9706 let Constraints = "$src0 = $dst" in 9707 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9708 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9709 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9710 [(set DestInfo.RC:$dst, 9711 (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9712 (DestInfo.VT DestInfo.RC:$src0), 9713 SrcInfo.KRCWM:$mask))]>, 9714 EVEX, EVEX_K, Sched<[sched]>; 9715 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9716 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9717 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 9718 [(set DestInfo.RC:$dst, 9719 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9720 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>, 9721 EVEX, EVEX_KZ, Sched<[sched]>; 9722 } 9723 9724 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9725 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9726 (ins x86memop:$dst, SrcInfo.RC:$src), 9727 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9728 EVEX, Sched<[sched.Folded]>; 9729 9730 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9731 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9732 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9733 EVEX, EVEX_K, Sched<[sched.Folded]>; 9734 }//mayStore = 1, hasSideEffects = 0 9735} 9736 9737multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9738 PatFrag truncFrag, PatFrag mtruncFrag, 9739 string Name> { 9740 9741 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9742 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr) 9743 addr:$dst, SrcInfo.RC:$src)>; 9744 9745 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, 9746 SrcInfo.KRCWM:$mask), 9747 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk) 9748 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9749} 9750 9751multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9752 SDNode OpNode256, SDNode OpNode512, 9753 SDPatternOperator MaskNode128, 9754 SDPatternOperator MaskNode256, 9755 SDPatternOperator MaskNode512, 9756 X86SchedWriteWidths sched, 9757 AVX512VLVectorVTInfo VTSrcInfo, 9758 X86VectorVTInfo DestInfoZ128, 9759 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9760 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9761 X86MemOperand x86memopZ, PatFrag truncFrag, 9762 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9763 9764 let Predicates = [HasVLX, prd] in { 9765 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM, 9766 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9767 avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag, 9768 mtruncFrag, NAME>, EVEX_V128; 9769 9770 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM, 9771 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9772 avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag, 9773 mtruncFrag, NAME>, EVEX_V256; 9774 } 9775 let Predicates = [prd] in 9776 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM, 9777 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9778 avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag, 9779 mtruncFrag, NAME>, EVEX_V512; 9780} 9781 9782multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, 9783 X86SchedWriteWidths sched, PatFrag StoreNode, 9784 PatFrag MaskedStoreNode, SDNode InVecNode, 9785 SDPatternOperator InVecMaskNode> { 9786 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, 9787 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched, 9788 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9789 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9790 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9791} 9792 9793multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9794 SDPatternOperator MaskNode, 9795 X86SchedWriteWidths sched, PatFrag StoreNode, 9796 PatFrag MaskedStoreNode, SDNode InVecNode, 9797 SDPatternOperator InVecMaskNode> { 9798 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9799 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9800 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9801 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9802 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9803} 9804 9805multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9806 SDPatternOperator MaskNode, 9807 X86SchedWriteWidths sched, PatFrag StoreNode, 9808 PatFrag MaskedStoreNode, SDNode InVecNode, 9809 SDPatternOperator InVecMaskNode> { 9810 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9811 InVecMaskNode, MaskNode, MaskNode, sched, 9812 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9813 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9814 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9815} 9816 9817multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 9818 SDPatternOperator MaskNode, 9819 X86SchedWriteWidths sched, PatFrag StoreNode, 9820 PatFrag MaskedStoreNode, SDNode InVecNode, 9821 SDPatternOperator InVecMaskNode> { 9822 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9823 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9824 avx512vl_i32_info, v16i8x_info, v16i8x_info, 9825 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 9826 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 9827} 9828 9829multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9830 SDPatternOperator MaskNode, 9831 X86SchedWriteWidths sched, PatFrag StoreNode, 9832 PatFrag MaskedStoreNode, SDNode InVecNode, 9833 SDPatternOperator InVecMaskNode> { 9834 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9835 InVecMaskNode, MaskNode, MaskNode, sched, 9836 avx512vl_i32_info, v8i16x_info, v8i16x_info, 9837 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 9838 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 9839} 9840 9841multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9842 SDPatternOperator MaskNode, 9843 X86SchedWriteWidths sched, PatFrag StoreNode, 9844 PatFrag MaskedStoreNode, SDNode InVecNode, 9845 SDPatternOperator InVecMaskNode> { 9846 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9847 InVecMaskNode, MaskNode, MaskNode, sched, 9848 avx512vl_i16_info, v16i8x_info, v16i8x_info, 9849 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 9850 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 9851} 9852 9853defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", 9854 SchedWriteVecTruncate, truncstorevi8, 9855 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9856defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", 9857 SchedWriteVecTruncate, truncstore_s_vi8, 9858 masked_truncstore_s_vi8, X86vtruncs, 9859 X86vmtruncs>; 9860defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", 9861 SchedWriteVecTruncate, truncstore_us_vi8, 9862 masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; 9863 9864defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, 9865 SchedWriteVecTruncate, truncstorevi16, 9866 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9867defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, 9868 SchedWriteVecTruncate, truncstore_s_vi16, 9869 masked_truncstore_s_vi16, X86vtruncs, 9870 X86vmtruncs>; 9871defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, 9872 select_truncus, SchedWriteVecTruncate, 9873 truncstore_us_vi16, masked_truncstore_us_vi16, 9874 X86vtruncus, X86vmtruncus>; 9875 9876defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, 9877 SchedWriteVecTruncate, truncstorevi32, 9878 masked_truncstorevi32, X86vtrunc, X86vmtrunc>; 9879defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, 9880 SchedWriteVecTruncate, truncstore_s_vi32, 9881 masked_truncstore_s_vi32, X86vtruncs, 9882 X86vmtruncs>; 9883defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, 9884 select_truncus, SchedWriteVecTruncate, 9885 truncstore_us_vi32, masked_truncstore_us_vi32, 9886 X86vtruncus, X86vmtruncus>; 9887 9888defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, 9889 SchedWriteVecTruncate, truncstorevi8, 9890 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9891defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, 9892 SchedWriteVecTruncate, truncstore_s_vi8, 9893 masked_truncstore_s_vi8, X86vtruncs, 9894 X86vmtruncs>; 9895defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, 9896 select_truncus, SchedWriteVecTruncate, 9897 truncstore_us_vi8, masked_truncstore_us_vi8, 9898 X86vtruncus, X86vmtruncus>; 9899 9900defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, 9901 SchedWriteVecTruncate, truncstorevi16, 9902 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9903defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, 9904 SchedWriteVecTruncate, truncstore_s_vi16, 9905 masked_truncstore_s_vi16, X86vtruncs, 9906 X86vmtruncs>; 9907defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, 9908 select_truncus, SchedWriteVecTruncate, 9909 truncstore_us_vi16, masked_truncstore_us_vi16, 9910 X86vtruncus, X86vmtruncus>; 9911 9912defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, 9913 SchedWriteVecTruncate, truncstorevi8, 9914 masked_truncstorevi8, X86vtrunc, 9915 X86vmtrunc>; 9916defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, 9917 SchedWriteVecTruncate, truncstore_s_vi8, 9918 masked_truncstore_s_vi8, X86vtruncs, 9919 X86vmtruncs>; 9920defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, 9921 select_truncus, SchedWriteVecTruncate, 9922 truncstore_us_vi8, masked_truncstore_us_vi8, 9923 X86vtruncus, X86vmtruncus>; 9924 9925let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 9926def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 9927 (v8i16 (EXTRACT_SUBREG 9928 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 9929 VR256X:$src, sub_ymm)))), sub_xmm))>; 9930def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 9931 (v4i32 (EXTRACT_SUBREG 9932 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 9933 VR256X:$src, sub_ymm)))), sub_xmm))>; 9934} 9935 9936let Predicates = [HasBWI, NoVLX, HasEVEX512] in { 9937def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9938 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 9939 VR256X:$src, sub_ymm))), sub_xmm))>; 9940} 9941 9942// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. 9943multiclass mtrunc_lowering<string InstrName, SDNode OpNode, 9944 X86VectorVTInfo DestInfo, 9945 X86VectorVTInfo SrcInfo> { 9946 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9947 DestInfo.RC:$src0, 9948 SrcInfo.KRCWM:$mask)), 9949 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0, 9950 SrcInfo.KRCWM:$mask, 9951 SrcInfo.RC:$src)>; 9952 9953 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9954 DestInfo.ImmAllZerosV, 9955 SrcInfo.KRCWM:$mask)), 9956 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask, 9957 SrcInfo.RC:$src)>; 9958} 9959 9960let Predicates = [HasVLX] in { 9961defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; 9962defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; 9963defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; 9964} 9965 9966let Predicates = [HasAVX512] in { 9967defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; 9968defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; 9969defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; 9970 9971defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; 9972defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; 9973defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; 9974 9975defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; 9976defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; 9977defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; 9978} 9979 9980multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9981 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 9982 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 9983 let ExeDomain = DestInfo.ExeDomain in { 9984 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 9985 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 9986 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 9987 EVEX, Sched<[sched]>; 9988 9989 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 9990 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 9991 (DestInfo.VT (LdFrag addr:$src))>, 9992 EVEX, Sched<[sched.Folded]>; 9993 } 9994} 9995 9996multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr, 9997 SDNode OpNode, SDNode InVecNode, string ExtTy, 9998 X86SchedWriteWidths sched, 9999 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10000 let Predicates = [HasVLX, HasBWI] in { 10001 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info, 10002 v16i8x_info, i64mem, LdFrag, InVecNode>, 10003 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V128, WIG; 10004 10005 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info, 10006 v16i8x_info, i128mem, LdFrag, OpNode>, 10007 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V256, WIG; 10008 } 10009 let Predicates = [HasBWI] in { 10010 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info, 10011 v32i8x_info, i256mem, LdFrag, OpNode>, 10012 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V512, WIG; 10013 } 10014} 10015 10016multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr, 10017 SDNode OpNode, SDNode InVecNode, string ExtTy, 10018 X86SchedWriteWidths sched, 10019 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10020 let Predicates = [HasVLX, HasAVX512] in { 10021 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info, 10022 v16i8x_info, i32mem, LdFrag, InVecNode>, 10023 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V128, WIG; 10024 10025 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info, 10026 v16i8x_info, i64mem, LdFrag, InVecNode>, 10027 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V256, WIG; 10028 } 10029 let Predicates = [HasAVX512] in { 10030 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info, 10031 v16i8x_info, i128mem, LdFrag, OpNode>, 10032 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V512, WIG; 10033 } 10034} 10035 10036multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr, 10037 SDNode InVecNode, string ExtTy, 10038 X86SchedWriteWidths sched, 10039 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 10040 let Predicates = [HasVLX, HasAVX512] in { 10041 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 10042 v16i8x_info, i16mem, LdFrag, InVecNode>, 10043 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V128, WIG; 10044 10045 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 10046 v16i8x_info, i32mem, LdFrag, InVecNode>, 10047 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V256, WIG; 10048 } 10049 let Predicates = [HasAVX512] in { 10050 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 10051 v16i8x_info, i64mem, LdFrag, InVecNode>, 10052 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V512, WIG; 10053 } 10054} 10055 10056multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr, 10057 SDNode OpNode, SDNode InVecNode, string ExtTy, 10058 X86SchedWriteWidths sched, 10059 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 10060 let Predicates = [HasVLX, HasAVX512] in { 10061 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info, 10062 v8i16x_info, i64mem, LdFrag, InVecNode>, 10063 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V128, WIG; 10064 10065 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info, 10066 v8i16x_info, i128mem, LdFrag, OpNode>, 10067 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V256, WIG; 10068 } 10069 let Predicates = [HasAVX512] in { 10070 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info, 10071 v16i16x_info, i256mem, LdFrag, OpNode>, 10072 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V512, WIG; 10073 } 10074} 10075 10076multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr, 10077 SDNode OpNode, SDNode InVecNode, string ExtTy, 10078 X86SchedWriteWidths sched, 10079 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 10080 let Predicates = [HasVLX, HasAVX512] in { 10081 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 10082 v8i16x_info, i32mem, LdFrag, InVecNode>, 10083 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V128, WIG; 10084 10085 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 10086 v8i16x_info, i64mem, LdFrag, InVecNode>, 10087 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V256, WIG; 10088 } 10089 let Predicates = [HasAVX512] in { 10090 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 10091 v8i16x_info, i128mem, LdFrag, OpNode>, 10092 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V512, WIG; 10093 } 10094} 10095 10096multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr, 10097 SDNode OpNode, SDNode InVecNode, string ExtTy, 10098 X86SchedWriteWidths sched, 10099 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 10100 10101 let Predicates = [HasVLX, HasAVX512] in { 10102 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 10103 v4i32x_info, i64mem, LdFrag, InVecNode>, 10104 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V128; 10105 10106 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 10107 v4i32x_info, i128mem, LdFrag, OpNode>, 10108 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V256; 10109 } 10110 let Predicates = [HasAVX512] in { 10111 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 10112 v8i32x_info, i256mem, LdFrag, OpNode>, 10113 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V512; 10114 } 10115} 10116 10117defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>; 10118defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>; 10119defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteVecExtend>; 10120defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>; 10121defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>; 10122defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>; 10123 10124defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>; 10125defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>; 10126defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteVecExtend>; 10127defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>; 10128defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>; 10129defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>; 10130 10131 10132// Patterns that we also need any extend versions of. aext_vector_inreg 10133// is currently legalized to zext_vector_inreg. 10134multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { 10135 // 256-bit patterns 10136 let Predicates = [HasVLX, HasBWI] in { 10137 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 10138 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 10139 } 10140 10141 let Predicates = [HasVLX] in { 10142 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 10143 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 10144 10145 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 10146 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 10147 } 10148 10149 // 512-bit patterns 10150 let Predicates = [HasBWI] in { 10151 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))), 10152 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 10153 } 10154 let Predicates = [HasAVX512] in { 10155 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))), 10156 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 10157 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))), 10158 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 10159 10160 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))), 10161 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 10162 10163 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))), 10164 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 10165 } 10166} 10167 10168multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 10169 SDNode InVecOp> : 10170 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { 10171 // 128-bit patterns 10172 let Predicates = [HasVLX, HasBWI] in { 10173 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10174 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10175 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10176 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10177 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10178 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10179 } 10180 let Predicates = [HasVLX] in { 10181 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10182 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10183 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10184 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10185 10186 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 10187 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 10188 10189 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10190 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10191 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10192 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10193 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10194 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10195 10196 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10197 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10198 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 10199 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10200 10201 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10202 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10203 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10204 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10205 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 10206 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10207 } 10208 let Predicates = [HasVLX] in { 10209 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10210 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10211 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10212 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10213 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10214 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10215 10216 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10217 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10218 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10219 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10220 10221 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10222 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10223 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10224 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10225 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10226 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10227 } 10228 // 512-bit patterns 10229 let Predicates = [HasAVX512] in { 10230 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10231 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10232 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10233 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10234 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10235 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10236 } 10237} 10238 10239defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; 10240defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; 10241 10242// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge 10243// ext+trunc aggressively making it impossible to legalize the DAG to this 10244// pattern directly. 10245let Predicates = [HasAVX512, NoBWI] in { 10246def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 10247 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; 10248def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), 10249 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; 10250} 10251 10252//===----------------------------------------------------------------------===// 10253// GATHER - SCATTER Operations 10254 10255// FIXME: Improve scheduling of gather/scatter instructions. 10256multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10257 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10258 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 10259 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 10260 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 10261 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 10262 !strconcat(OpcodeStr#_.Suffix, 10263 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 10264 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10265 Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 10266} 10267 10268multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 10269 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10270 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, 10271 vy512xmem>, EVEX_V512, REX_W; 10272 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512, 10273 vz512mem>, EVEX_V512, REX_W; 10274let Predicates = [HasVLX] in { 10275 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10276 vx256xmem>, EVEX_V256, REX_W; 10277 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256, 10278 vy256xmem>, EVEX_V256, REX_W; 10279 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10280 vx128xmem>, EVEX_V128, REX_W; 10281 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10282 vx128xmem>, EVEX_V128, REX_W; 10283} 10284} 10285 10286multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 10287 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10288 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10289 EVEX_V512; 10290 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10291 EVEX_V512; 10292let Predicates = [HasVLX] in { 10293 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10294 vy256xmem>, EVEX_V256; 10295 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10296 vy128xmem>, EVEX_V256; 10297 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10298 vx128xmem>, EVEX_V128; 10299 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10300 vx64xmem, VK2WM>, EVEX_V128; 10301} 10302} 10303 10304 10305defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 10306 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 10307 10308defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 10309 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 10310 10311multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10312 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10313 10314let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain, 10315 hasSideEffects = 0 in 10316 10317 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 10318 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 10319 !strconcat(OpcodeStr#_.Suffix, 10320 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 10321 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10322 Sched<[WriteStore]>; 10323} 10324 10325multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 10326 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10327 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, 10328 vy512xmem>, EVEX_V512, REX_W; 10329 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512, 10330 vz512mem>, EVEX_V512, REX_W; 10331let Predicates = [HasVLX] in { 10332 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10333 vx256xmem>, EVEX_V256, REX_W; 10334 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256, 10335 vy256xmem>, EVEX_V256, REX_W; 10336 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10337 vx128xmem>, EVEX_V128, REX_W; 10338 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10339 vx128xmem>, EVEX_V128, REX_W; 10340} 10341} 10342 10343multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 10344 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10345 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10346 EVEX_V512; 10347 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10348 EVEX_V512; 10349let Predicates = [HasVLX] in { 10350 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10351 vy256xmem>, EVEX_V256; 10352 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10353 vy128xmem>, EVEX_V256; 10354 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10355 vx128xmem>, EVEX_V128; 10356 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10357 vx64xmem, VK2WM>, EVEX_V128; 10358} 10359} 10360 10361defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 10362 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 10363 10364defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 10365 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 10366 10367// prefetch 10368multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 10369 RegisterClass KRC, X86MemOperand memop> { 10370 let mayLoad = 1, mayStore = 1 in 10371 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 10372 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 10373 EVEX, EVEX_K, Sched<[WriteLoad]>; 10374} 10375 10376defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 10377 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10378 10379defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 10380 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10381 10382defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 10383 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10384 10385defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 10386 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10387 10388defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 10389 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10390 10391defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 10392 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10393 10394defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 10395 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10396 10397defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 10398 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10399 10400defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 10401 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10402 10403defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 10404 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10405 10406defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 10407 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10408 10409defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 10410 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10411 10412defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 10413 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10414 10415defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 10416 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10417 10418defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 10419 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10420 10421defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 10422 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10423 10424multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> { 10425def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 10426 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 10427 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 10428 EVEX, Sched<[Sched]>; 10429} 10430 10431multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 10432 string OpcodeStr, Predicate prd> { 10433let Predicates = [prd] in 10434 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512; 10435 10436 let Predicates = [prd, HasVLX] in { 10437 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256; 10438 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128; 10439 } 10440} 10441 10442defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 10443defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W; 10444defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 10445defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W; 10446 10447multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 10448 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 10449 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 10450 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 10451 EVEX, Sched<[WriteMove]>; 10452} 10453 10454// Use 512bit version to implement 128/256 bit in case NoVLX. 10455multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 10456 X86VectorVTInfo _, 10457 string Name> { 10458 10459 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 10460 (_.KVT (COPY_TO_REGCLASS 10461 (!cast<Instruction>(Name#"Zrr") 10462 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 10463 _.RC:$src, _.SubRegIdx)), 10464 _.KRC))>; 10465} 10466 10467multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 10468 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10469 let Predicates = [prd] in 10470 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 10471 EVEX_V512; 10472 10473 let Predicates = [prd, HasVLX] in { 10474 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 10475 EVEX_V256; 10476 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 10477 EVEX_V128; 10478 } 10479 let Predicates = [prd, NoVLX, HasEVEX512] in { 10480 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 10481 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 10482 } 10483} 10484 10485defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 10486 avx512vl_i8_info, HasBWI>; 10487defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 10488 avx512vl_i16_info, HasBWI>, REX_W; 10489defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 10490 avx512vl_i32_info, HasDQI>; 10491defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 10492 avx512vl_i64_info, HasDQI>, REX_W; 10493 10494// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 10495// is available, but BWI is not. We can't handle this in lowering because 10496// a target independent DAG combine likes to combine sext and trunc. 10497let Predicates = [HasDQI, NoBWI] in { 10498 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 10499 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10500 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 10501 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10502} 10503 10504let Predicates = [HasDQI, NoBWI, HasVLX] in { 10505 def : Pat<(v8i16 (sext (v8i1 VK8:$src))), 10506 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 10507} 10508 10509//===----------------------------------------------------------------------===// 10510// AVX-512 - COMPRESS and EXPAND 10511// 10512 10513multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 10514 string OpcodeStr, X86FoldableSchedWrite sched> { 10515 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 10516 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10517 (null_frag)>, AVX5128IBase, 10518 Sched<[sched]>; 10519 10520 let mayStore = 1, hasSideEffects = 0 in 10521 def mr : AVX5128I<opc, MRMDestMem, (outs), 10522 (ins _.MemOp:$dst, _.RC:$src), 10523 OpcodeStr # "\t{$src, $dst|$dst, $src}", 10524 []>, EVEX_CD8<_.EltSize, CD8VT1>, 10525 Sched<[sched.Folded]>; 10526 10527 def mrk : AVX5128I<opc, MRMDestMem, (outs), 10528 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 10529 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 10530 []>, 10531 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10532 Sched<[sched.Folded]>; 10533} 10534 10535multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10536 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), 10537 (!cast<Instruction>(Name#_.ZSuffix#mrk) 10538 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 10539 10540 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10541 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10542 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10543 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10544 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10545 _.KRCWM:$mask, _.RC:$src)>; 10546} 10547 10548multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 10549 X86FoldableSchedWrite sched, 10550 AVX512VLVectorVTInfo VTInfo, 10551 Predicate Pred = HasAVX512> { 10552 let Predicates = [Pred] in 10553 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 10554 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10555 10556 let Predicates = [Pred, HasVLX] in { 10557 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 10558 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10559 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 10560 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10561 } 10562} 10563 10564// FIXME: Is there a better scheduler class for VPCOMPRESS? 10565defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 10566 avx512vl_i32_info>, EVEX; 10567defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 10568 avx512vl_i64_info>, EVEX, REX_W; 10569defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 10570 avx512vl_f32_info>, EVEX; 10571defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 10572 avx512vl_f64_info>, EVEX, REX_W; 10573 10574// expand 10575multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 10576 string OpcodeStr, X86FoldableSchedWrite sched> { 10577 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10578 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10579 (null_frag)>, AVX5128IBase, 10580 Sched<[sched]>; 10581 10582 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10583 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 10584 (null_frag)>, 10585 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 10586 Sched<[sched.Folded, sched.ReadAfterFold]>; 10587} 10588 10589multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10590 10591 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 10592 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10593 _.KRCWM:$mask, addr:$src)>; 10594 10595 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 10596 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10597 _.KRCWM:$mask, addr:$src)>; 10598 10599 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 10600 (_.VT _.RC:$src0))), 10601 (!cast<Instruction>(Name#_.ZSuffix#rmk) 10602 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 10603 10604 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10605 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10606 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10607 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10608 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10609 _.KRCWM:$mask, _.RC:$src)>; 10610} 10611 10612multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 10613 X86FoldableSchedWrite sched, 10614 AVX512VLVectorVTInfo VTInfo, 10615 Predicate Pred = HasAVX512> { 10616 let Predicates = [Pred] in 10617 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 10618 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10619 10620 let Predicates = [Pred, HasVLX] in { 10621 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 10622 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10623 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 10624 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10625 } 10626} 10627 10628// FIXME: Is there a better scheduler class for VPEXPAND? 10629defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 10630 avx512vl_i32_info>, EVEX; 10631defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 10632 avx512vl_i64_info>, EVEX, REX_W; 10633defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 10634 avx512vl_f32_info>, EVEX; 10635defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 10636 avx512vl_f64_info>, EVEX, REX_W; 10637 10638//handle instruction reg_vec1 = op(reg_vec,imm) 10639// op(mem_vec,imm) 10640// op(broadcast(eltVt),imm) 10641//all instruction created with FROUND_CURRENT 10642multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, 10643 SDPatternOperator OpNode, 10644 SDPatternOperator MaskOpNode, 10645 X86FoldableSchedWrite sched, 10646 X86VectorVTInfo _> { 10647 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10648 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 10649 (ins _.RC:$src1, i32u8imm:$src2), 10650 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10651 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)), 10652 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>, 10653 Sched<[sched]>; 10654 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10655 (ins _.MemOp:$src1, i32u8imm:$src2), 10656 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10657 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10658 (i32 timm:$src2)), 10659 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10660 (i32 timm:$src2))>, 10661 Sched<[sched.Folded, sched.ReadAfterFold]>; 10662 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10663 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 10664 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr, 10665 "${src1}"#_.BroadcastStr#", $src2", 10666 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10667 (i32 timm:$src2)), 10668 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10669 (i32 timm:$src2))>, EVEX_B, 10670 Sched<[sched.Folded, sched.ReadAfterFold]>; 10671 } 10672} 10673 10674//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10675multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10676 SDNode OpNode, X86FoldableSchedWrite sched, 10677 X86VectorVTInfo _> { 10678 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10679 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10680 (ins _.RC:$src1, i32u8imm:$src2), 10681 OpcodeStr#_.Suffix, "$src2, {sae}, $src1", 10682 "$src1, {sae}, $src2", 10683 (OpNode (_.VT _.RC:$src1), 10684 (i32 timm:$src2))>, 10685 EVEX_B, Sched<[sched]>; 10686} 10687 10688multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 10689 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode, 10690 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched, 10691 Predicate prd>{ 10692 let Predicates = [prd] in { 10693 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10694 sched.ZMM, _.info512>, 10695 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, 10696 sched.ZMM, _.info512>, EVEX_V512; 10697 } 10698 let Predicates = [prd, HasVLX] in { 10699 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10700 sched.XMM, _.info128>, EVEX_V128; 10701 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10702 sched.YMM, _.info256>, EVEX_V256; 10703 } 10704} 10705 10706//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10707// op(reg_vec2,mem_vec,imm) 10708// op(reg_vec2,broadcast(eltVt),imm) 10709//all instruction created with FROUND_CURRENT 10710multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10711 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10712 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10713 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10714 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10715 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10716 (OpNode (_.VT _.RC:$src1), 10717 (_.VT _.RC:$src2), 10718 (i32 timm:$src3))>, 10719 Sched<[sched]>; 10720 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10721 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 10722 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10723 (OpNode (_.VT _.RC:$src1), 10724 (_.VT (bitconvert (_.LdFrag addr:$src2))), 10725 (i32 timm:$src3))>, 10726 Sched<[sched.Folded, sched.ReadAfterFold]>; 10727 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10728 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10729 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10730 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10731 (OpNode (_.VT _.RC:$src1), 10732 (_.VT (_.BroadcastLdFrag addr:$src2)), 10733 (i32 timm:$src3))>, EVEX_B, 10734 Sched<[sched.Folded, sched.ReadAfterFold]>; 10735 } 10736} 10737 10738//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10739// op(reg_vec2,mem_vec,imm) 10740multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10741 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 10742 X86VectorVTInfo SrcInfo>{ 10743 let ExeDomain = DestInfo.ExeDomain, ImmT = Imm8 in { 10744 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10745 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 10746 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10747 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10748 (SrcInfo.VT SrcInfo.RC:$src2), 10749 (i8 timm:$src3)))>, 10750 Sched<[sched]>; 10751 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10752 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 10753 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10754 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10755 (SrcInfo.VT (bitconvert 10756 (SrcInfo.LdFrag addr:$src2))), 10757 (i8 timm:$src3)))>, 10758 Sched<[sched.Folded, sched.ReadAfterFold]>; 10759 } 10760} 10761 10762//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10763// op(reg_vec2,mem_vec,imm) 10764// op(reg_vec2,broadcast(eltVt),imm) 10765multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10766 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 10767 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 10768 10769 let ExeDomain = _.ExeDomain, ImmT = Imm8 in 10770 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10771 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10772 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10773 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10774 (OpNode (_.VT _.RC:$src1), 10775 (_.VT (_.BroadcastLdFrag addr:$src2)), 10776 (i8 timm:$src3))>, EVEX_B, 10777 Sched<[sched.Folded, sched.ReadAfterFold]>; 10778} 10779 10780//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10781// op(reg_vec2,mem_scalar,imm) 10782multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10783 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10784 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10785 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10786 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10787 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10788 (OpNode (_.VT _.RC:$src1), 10789 (_.VT _.RC:$src2), 10790 (i32 timm:$src3))>, 10791 Sched<[sched]>; 10792 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 10793 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 10794 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10795 (OpNode (_.VT _.RC:$src1), 10796 (_.ScalarIntMemFrags addr:$src2), 10797 (i32 timm:$src3))>, 10798 Sched<[sched.Folded, sched.ReadAfterFold]>; 10799 } 10800} 10801 10802//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10803multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10804 SDNode OpNode, X86FoldableSchedWrite sched, 10805 X86VectorVTInfo _> { 10806 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10807 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10808 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10809 OpcodeStr, "$src3, {sae}, $src2, $src1", 10810 "$src1, $src2, {sae}, $src3", 10811 (OpNode (_.VT _.RC:$src1), 10812 (_.VT _.RC:$src2), 10813 (i32 timm:$src3))>, 10814 EVEX_B, Sched<[sched]>; 10815} 10816 10817//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10818multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10819 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10820 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10821 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10822 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10823 OpcodeStr, "$src3, {sae}, $src2, $src1", 10824 "$src1, $src2, {sae}, $src3", 10825 (OpNode (_.VT _.RC:$src1), 10826 (_.VT _.RC:$src2), 10827 (i32 timm:$src3))>, 10828 EVEX_B, Sched<[sched]>; 10829} 10830 10831multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 10832 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10833 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10834 let Predicates = [prd] in { 10835 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10836 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, 10837 EVEX_V512; 10838 10839 } 10840 let Predicates = [prd, HasVLX] in { 10841 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10842 EVEX_V128; 10843 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10844 EVEX_V256; 10845 } 10846} 10847 10848multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 10849 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 10850 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 10851 let Predicates = [Pred] in { 10852 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 10853 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX, VVVV; 10854 } 10855 let Predicates = [Pred, HasVLX] in { 10856 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 10857 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX, VVVV; 10858 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 10859 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX, VVVV; 10860 } 10861} 10862 10863multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 10864 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 10865 Predicate Pred = HasAVX512> { 10866 let Predicates = [Pred] in { 10867 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10868 EVEX_V512; 10869 } 10870 let Predicates = [Pred, HasVLX] in { 10871 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10872 EVEX_V128; 10873 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10874 EVEX_V256; 10875 } 10876} 10877 10878multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 10879 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 10880 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> { 10881 let Predicates = [prd] in { 10882 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 10883 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>; 10884 } 10885} 10886 10887multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 10888 bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode, 10889 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, 10890 X86SchedWriteWidths sched, Predicate prd>{ 10891 defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info, 10892 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>, 10893 AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 10894 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 10895 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 10896 AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>; 10897 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 10898 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 10899 AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W; 10900} 10901 10902defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 10903 X86VReduce, X86VReduce, X86VReduceSAE, 10904 SchedWriteFRnd, HasDQI>; 10905defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 10906 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE, 10907 SchedWriteFRnd, HasAVX512>; 10908defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 10909 X86VGetMant, X86VGetMant, X86VGetMantSAE, 10910 SchedWriteFRnd, HasAVX512>; 10911 10912defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 10913 0x50, X86VRange, X86VRangeSAE, 10914 SchedWriteFAdd, HasDQI>, 10915 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 10916defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 10917 0x50, X86VRange, X86VRangeSAE, 10918 SchedWriteFAdd, HasDQI>, 10919 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 10920 10921defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 10922 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10923 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 10924defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 10925 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10926 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 10927 10928defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 10929 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10930 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 10931defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 10932 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10933 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 10934defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info, 10935 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>, 10936 AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>; 10937 10938defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 10939 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10940 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 10941defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 10942 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10943 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 10944defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info, 10945 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>, 10946 AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>; 10947 10948multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 10949 X86FoldableSchedWrite sched, 10950 X86VectorVTInfo _, 10951 X86VectorVTInfo CastInfo> { 10952 let ExeDomain = _.ExeDomain in { 10953 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10954 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10955 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10956 (_.VT (bitconvert 10957 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 10958 (i8 timm:$src3)))))>, 10959 Sched<[sched]>; 10960 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10961 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10962 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10963 (_.VT 10964 (bitconvert 10965 (CastInfo.VT (X86Shuf128 _.RC:$src1, 10966 (CastInfo.LdFrag addr:$src2), 10967 (i8 timm:$src3)))))>, 10968 Sched<[sched.Folded, sched.ReadAfterFold]>; 10969 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10970 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10971 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10972 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10973 (_.VT 10974 (bitconvert 10975 (CastInfo.VT 10976 (X86Shuf128 _.RC:$src1, 10977 (_.BroadcastLdFrag addr:$src2), 10978 (i8 timm:$src3)))))>, EVEX_B, 10979 Sched<[sched.Folded, sched.ReadAfterFold]>; 10980 } 10981} 10982 10983multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 10984 AVX512VLVectorVTInfo _, 10985 AVX512VLVectorVTInfo CastInfo, bits<8> opc>{ 10986 let Predicates = [HasAVX512] in 10987 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10988 _.info512, CastInfo.info512>, EVEX_V512; 10989 10990 let Predicates = [HasAVX512, HasVLX] in 10991 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10992 _.info256, CastInfo.info256>, EVEX_V256; 10993} 10994 10995defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 10996 avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 10997defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 10998 avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 10999defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 11000 avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 11001defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 11002 avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 11003 11004multiclass avx512_valign<bits<8> opc, string OpcodeStr, 11005 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11006 let ExeDomain = _.ExeDomain in { 11007 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11008 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 11009 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11010 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, 11011 Sched<[sched]>; 11012 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11013 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 11014 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 11015 (_.VT (X86VAlign _.RC:$src1, 11016 (bitconvert (_.LdFrag addr:$src2)), 11017 (i8 timm:$src3)))>, 11018 Sched<[sched.Folded, sched.ReadAfterFold]>; 11019 11020 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11021 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11022 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 11023 "$src1, ${src2}"#_.BroadcastStr#", $src3", 11024 (X86VAlign _.RC:$src1, 11025 (_.VT (_.BroadcastLdFrag addr:$src2)), 11026 (i8 timm:$src3))>, EVEX_B, 11027 Sched<[sched.Folded, sched.ReadAfterFold]>; 11028 } 11029} 11030 11031multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 11032 AVX512VLVectorVTInfo _> { 11033 let Predicates = [HasAVX512] in { 11034 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 11035 AVX512AIi8Base, EVEX, VVVV, EVEX_V512; 11036 } 11037 let Predicates = [HasAVX512, HasVLX] in { 11038 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 11039 AVX512AIi8Base, EVEX, VVVV, EVEX_V128; 11040 // We can't really override the 256-bit version so change it back to unset. 11041 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 11042 AVX512AIi8Base, EVEX, VVVV, EVEX_V256; 11043 } 11044} 11045 11046defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 11047 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11048defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 11049 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 11050 REX_W; 11051 11052defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 11053 SchedWriteShuffle, avx512vl_i8_info, 11054 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 11055 11056// Fragments to help convert valignq into masked valignd. Or valignq/valignd 11057// into vpalignr. 11058def ValignqImm32XForm : SDNodeXForm<timm, [{ 11059 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 11060}]>; 11061def ValignqImm8XForm : SDNodeXForm<timm, [{ 11062 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 11063}]>; 11064def ValigndImm8XForm : SDNodeXForm<timm, [{ 11065 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 11066}]>; 11067 11068multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 11069 X86VectorVTInfo From, X86VectorVTInfo To, 11070 SDNodeXForm ImmXForm> { 11071 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11072 (bitconvert 11073 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 11074 timm:$src3))), 11075 To.RC:$src0)), 11076 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 11077 To.RC:$src1, To.RC:$src2, 11078 (ImmXForm timm:$src3))>; 11079 11080 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11081 (bitconvert 11082 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 11083 timm:$src3))), 11084 To.ImmAllZerosV)), 11085 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 11086 To.RC:$src1, To.RC:$src2, 11087 (ImmXForm timm:$src3))>; 11088 11089 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11090 (bitconvert 11091 (From.VT (OpNode From.RC:$src1, 11092 (From.LdFrag addr:$src2), 11093 timm:$src3))), 11094 To.RC:$src0)), 11095 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 11096 To.RC:$src1, addr:$src2, 11097 (ImmXForm timm:$src3))>; 11098 11099 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11100 (bitconvert 11101 (From.VT (OpNode From.RC:$src1, 11102 (From.LdFrag addr:$src2), 11103 timm:$src3))), 11104 To.ImmAllZerosV)), 11105 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 11106 To.RC:$src1, addr:$src2, 11107 (ImmXForm timm:$src3))>; 11108} 11109 11110multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 11111 X86VectorVTInfo From, 11112 X86VectorVTInfo To, 11113 SDNodeXForm ImmXForm> : 11114 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 11115 def : Pat<(From.VT (OpNode From.RC:$src1, 11116 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), 11117 timm:$src3)), 11118 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 11119 (ImmXForm timm:$src3))>; 11120 11121 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11122 (bitconvert 11123 (From.VT (OpNode From.RC:$src1, 11124 (bitconvert 11125 (To.VT (To.BroadcastLdFrag addr:$src2))), 11126 timm:$src3))), 11127 To.RC:$src0)), 11128 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 11129 To.RC:$src1, addr:$src2, 11130 (ImmXForm timm:$src3))>; 11131 11132 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11133 (bitconvert 11134 (From.VT (OpNode From.RC:$src1, 11135 (bitconvert 11136 (To.VT (To.BroadcastLdFrag addr:$src2))), 11137 timm:$src3))), 11138 To.ImmAllZerosV)), 11139 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 11140 To.RC:$src1, addr:$src2, 11141 (ImmXForm timm:$src3))>; 11142} 11143 11144let Predicates = [HasAVX512] in { 11145 // For 512-bit we lower to the widest element type we can. So we only need 11146 // to handle converting valignq to valignd. 11147 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 11148 v16i32_info, ValignqImm32XForm>; 11149} 11150 11151let Predicates = [HasVLX] in { 11152 // For 128-bit we lower to the widest element type we can. So we only need 11153 // to handle converting valignq to valignd. 11154 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 11155 v4i32x_info, ValignqImm32XForm>; 11156 // For 256-bit we lower to the widest element type we can. So we only need 11157 // to handle converting valignq to valignd. 11158 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 11159 v8i32x_info, ValignqImm32XForm>; 11160} 11161 11162let Predicates = [HasVLX, HasBWI] in { 11163 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 11164 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 11165 v16i8x_info, ValignqImm8XForm>; 11166 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 11167 v16i8x_info, ValigndImm8XForm>; 11168} 11169 11170defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 11171 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 11172 EVEX_CD8<8, CD8VF>; 11173 11174multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 11175 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11176 let ExeDomain = _.ExeDomain in { 11177 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11178 (ins _.RC:$src1), OpcodeStr, 11179 "$src1", "$src1", 11180 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, 11181 Sched<[sched]>; 11182 11183 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11184 (ins _.MemOp:$src1), OpcodeStr, 11185 "$src1", "$src1", 11186 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, 11187 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 11188 Sched<[sched.Folded]>; 11189 } 11190} 11191 11192multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 11193 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 11194 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 11195 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11196 (ins _.ScalarMemOp:$src1), OpcodeStr, 11197 "${src1}"#_.BroadcastStr, 11198 "${src1}"#_.BroadcastStr, 11199 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, 11200 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 11201 Sched<[sched.Folded]>; 11202} 11203 11204multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11205 X86SchedWriteWidths sched, 11206 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 11207 let Predicates = [prd] in 11208 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11209 EVEX_V512; 11210 11211 let Predicates = [prd, HasVLX] in { 11212 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11213 EVEX_V256; 11214 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11215 EVEX_V128; 11216 } 11217} 11218 11219multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11220 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 11221 Predicate prd> { 11222 let Predicates = [prd] in 11223 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11224 EVEX_V512; 11225 11226 let Predicates = [prd, HasVLX] in { 11227 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11228 EVEX_V256; 11229 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11230 EVEX_V128; 11231 } 11232} 11233 11234multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 11235 SDNode OpNode, X86SchedWriteWidths sched, 11236 Predicate prd> { 11237 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 11238 avx512vl_i64_info, prd>, REX_W; 11239 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 11240 avx512vl_i32_info, prd>; 11241} 11242 11243multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 11244 SDNode OpNode, X86SchedWriteWidths sched, 11245 Predicate prd> { 11246 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 11247 avx512vl_i16_info, prd>, WIG; 11248 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 11249 avx512vl_i8_info, prd>, WIG; 11250} 11251 11252multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 11253 bits<8> opc_d, bits<8> opc_q, 11254 string OpcodeStr, SDNode OpNode, 11255 X86SchedWriteWidths sched> { 11256 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 11257 HasAVX512>, 11258 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 11259 HasBWI>; 11260} 11261 11262defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 11263 SchedWriteVecALU>; 11264 11265// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 11266let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 11267 def : Pat<(v4i64 (abs VR256X:$src)), 11268 (EXTRACT_SUBREG 11269 (VPABSQZrr 11270 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 11271 sub_ymm)>; 11272 def : Pat<(v2i64 (abs VR128X:$src)), 11273 (EXTRACT_SUBREG 11274 (VPABSQZrr 11275 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 11276 sub_xmm)>; 11277} 11278 11279// Use 512bit version to implement 128/256 bit. 11280multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 11281 AVX512VLVectorVTInfo _, Predicate prd> { 11282 let Predicates = [prd, NoVLX, HasEVEX512] in { 11283 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), 11284 (EXTRACT_SUBREG 11285 (!cast<Instruction>(InstrStr # "Zrr") 11286 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11287 _.info256.RC:$src1, 11288 _.info256.SubRegIdx)), 11289 _.info256.SubRegIdx)>; 11290 11291 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), 11292 (EXTRACT_SUBREG 11293 (!cast<Instruction>(InstrStr # "Zrr") 11294 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11295 _.info128.RC:$src1, 11296 _.info128.SubRegIdx)), 11297 _.info128.SubRegIdx)>; 11298 } 11299} 11300 11301defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 11302 SchedWriteVecIMul, HasCDI>; 11303 11304// FIXME: Is there a better scheduler class for VPCONFLICT? 11305defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 11306 SchedWriteVecALU, HasCDI>; 11307 11308// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 11309defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 11310defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 11311 11312//===---------------------------------------------------------------------===// 11313// Counts number of ones - VPOPCNTD and VPOPCNTQ 11314//===---------------------------------------------------------------------===// 11315 11316// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 11317defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 11318 SchedWriteVecALU, HasVPOPCNTDQ>; 11319 11320defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 11321defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 11322 11323//===---------------------------------------------------------------------===// 11324// Replicate Single FP - MOVSHDUP and MOVSLDUP 11325//===---------------------------------------------------------------------===// 11326 11327multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 11328 X86SchedWriteWidths sched> { 11329 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 11330 avx512vl_f32_info, HasAVX512>, TB, XS; 11331} 11332 11333defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 11334 SchedWriteFShuffle>; 11335defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 11336 SchedWriteFShuffle>; 11337 11338//===----------------------------------------------------------------------===// 11339// AVX-512 - MOVDDUP 11340//===----------------------------------------------------------------------===// 11341 11342multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, 11343 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11344 let ExeDomain = _.ExeDomain in { 11345 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11346 (ins _.RC:$src), OpcodeStr, "$src", "$src", 11347 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, 11348 Sched<[sched]>; 11349 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11350 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 11351 (_.VT (_.BroadcastLdFrag addr:$src))>, 11352 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 11353 Sched<[sched.Folded]>; 11354 } 11355} 11356 11357multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, 11358 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 11359 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 11360 VTInfo.info512>, EVEX_V512; 11361 11362 let Predicates = [HasAVX512, HasVLX] in { 11363 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 11364 VTInfo.info256>, EVEX_V256; 11365 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM, 11366 VTInfo.info128>, EVEX_V128; 11367 } 11368} 11369 11370multiclass avx512_movddup<bits<8> opc, string OpcodeStr, 11371 X86SchedWriteWidths sched> { 11372 defm NAME: avx512_movddup_common<opc, OpcodeStr, sched, 11373 avx512vl_f64_info>, TB, XD, REX_W; 11374} 11375 11376defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>; 11377 11378let Predicates = [HasVLX] in { 11379def : Pat<(v2f64 (X86VBroadcast f64:$src)), 11380 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11381 11382def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11383 (v2f64 VR128X:$src0)), 11384 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 11385 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11386def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11387 immAllZerosV), 11388 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11389} 11390 11391//===----------------------------------------------------------------------===// 11392// AVX-512 - Unpack Instructions 11393//===----------------------------------------------------------------------===// 11394 11395let Uses = []<Register>, mayRaiseFPException = 0 in { 11396defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512, 11397 SchedWriteFShuffleSizes, 0, 1>; 11398defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512, 11399 SchedWriteFShuffleSizes>; 11400} 11401 11402defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 11403 SchedWriteShuffle, HasBWI>; 11404defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 11405 SchedWriteShuffle, HasBWI>; 11406defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 11407 SchedWriteShuffle, HasBWI>; 11408defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 11409 SchedWriteShuffle, HasBWI>; 11410 11411defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 11412 SchedWriteShuffle, HasAVX512>; 11413defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 11414 SchedWriteShuffle, HasAVX512>; 11415defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 11416 SchedWriteShuffle, HasAVX512>; 11417defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 11418 SchedWriteShuffle, HasAVX512>; 11419 11420//===----------------------------------------------------------------------===// 11421// AVX-512 - Extract & Insert Integer Instructions 11422//===----------------------------------------------------------------------===// 11423 11424multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11425 X86VectorVTInfo _> { 11426 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 11427 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11428 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11429 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))), 11430 addr:$dst)]>, 11431 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 11432} 11433 11434multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 11435 let Predicates = [HasBWI] in { 11436 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 11437 (ins _.RC:$src1, u8imm:$src2), 11438 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11439 [(set GR32orGR64:$dst, 11440 (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>, 11441 EVEX, TA, PD, Sched<[WriteVecExtract]>; 11442 11443 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TA, PD; 11444 } 11445} 11446 11447multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 11448 let Predicates = [HasBWI] in { 11449 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 11450 (ins _.RC:$src1, u8imm:$src2), 11451 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11452 [(set GR32orGR64:$dst, 11453 (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>, 11454 EVEX, TB, PD, Sched<[WriteVecExtract]>; 11455 11456 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 11457 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 11458 (ins _.RC:$src1, u8imm:$src2), 11459 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 11460 EVEX, TA, PD, Sched<[WriteVecExtract]>; 11461 11462 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TA, PD; 11463 } 11464} 11465 11466multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 11467 RegisterClass GRC> { 11468 let Predicates = [HasDQI] in { 11469 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 11470 (ins _.RC:$src1, u8imm:$src2), 11471 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11472 [(set GRC:$dst, 11473 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 11474 EVEX, TA, PD, Sched<[WriteVecExtract]>; 11475 11476 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 11477 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11478 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11479 [(store (extractelt (_.VT _.RC:$src1), 11480 imm:$src2),addr:$dst)]>, 11481 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TA, PD, 11482 Sched<[WriteVecExtractSt]>; 11483 } 11484} 11485 11486defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG; 11487defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG; 11488defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 11489defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W; 11490 11491multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11492 X86VectorVTInfo _, PatFrag LdFrag, 11493 SDPatternOperator immoperator> { 11494 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 11495 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11496 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11497 [(set _.RC:$dst, 11498 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>, 11499 EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 11500} 11501 11502multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 11503 X86VectorVTInfo _, PatFrag LdFrag> { 11504 let Predicates = [HasBWI] in { 11505 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11506 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 11507 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11508 [(set _.RC:$dst, 11509 (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX, VVVV, 11510 Sched<[WriteVecInsert]>; 11511 11512 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>; 11513 } 11514} 11515 11516multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 11517 X86VectorVTInfo _, RegisterClass GRC> { 11518 let Predicates = [HasDQI] in { 11519 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11520 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 11521 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11522 [(set _.RC:$dst, 11523 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 11524 EVEX, VVVV, TA, PD, Sched<[WriteVecInsert]>; 11525 11526 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 11527 _.ScalarLdFrag, imm>, TA, PD; 11528 } 11529} 11530 11531defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 11532 extloadi8>, TA, PD, WIG; 11533defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 11534 extloadi16>, TB, PD, WIG; 11535defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 11536defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W; 11537 11538let Predicates = [HasAVX512, NoBWI] in { 11539 def : Pat<(X86pinsrb VR128:$src1, 11540 (i32 (anyext (i8 (bitconvert v8i1:$src2)))), 11541 timm:$src3), 11542 (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)), 11543 timm:$src3)>; 11544} 11545 11546let Predicates = [HasBWI] in { 11547 def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3), 11548 (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 11549 GR8:$src2, sub_8bit), timm:$src3)>; 11550 def : Pat<(X86pinsrb VR128:$src1, 11551 (i32 (anyext (i8 (bitconvert v8i1:$src2)))), 11552 timm:$src3), 11553 (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)), 11554 timm:$src3)>; 11555} 11556 11557// Always select FP16 instructions if available. 11558let Predicates = [HasBWI], AddedComplexity = -10 in { 11559 def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>; 11560 def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>; 11561 def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>; 11562 def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>; 11563} 11564 11565//===----------------------------------------------------------------------===// 11566// VSHUFPS - VSHUFPD Operations 11567//===----------------------------------------------------------------------===// 11568 11569multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{ 11570 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 11571 SchedWriteFShuffle>, 11572 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 11573 TA, EVEX, VVVV; 11574} 11575 11576defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, TB; 11577defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, TB, PD, REX_W; 11578 11579//===----------------------------------------------------------------------===// 11580// AVX-512 - Byte shift Left/Right 11581//===----------------------------------------------------------------------===// 11582 11583multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 11584 Format MRMm, string OpcodeStr, 11585 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11586 def ri : AVX512<opc, MRMr, 11587 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 11588 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11589 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, 11590 Sched<[sched]>; 11591 def mi : AVX512<opc, MRMm, 11592 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 11593 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11594 [(set _.RC:$dst,(_.VT (OpNode 11595 (_.VT (bitconvert (_.LdFrag addr:$src1))), 11596 (i8 timm:$src2))))]>, 11597 Sched<[sched.Folded, sched.ReadAfterFold]>; 11598} 11599 11600multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 11601 Format MRMm, string OpcodeStr, 11602 X86SchedWriteWidths sched, Predicate prd>{ 11603 let Predicates = [prd] in 11604 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11605 sched.ZMM, v64i8_info>, EVEX_V512; 11606 let Predicates = [prd, HasVLX] in { 11607 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11608 sched.YMM, v32i8x_info>, EVEX_V256; 11609 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11610 sched.XMM, v16i8x_info>, EVEX_V128; 11611 } 11612} 11613defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11614 SchedWriteShuffle, HasBWI>, 11615 AVX512PDIi8Base, EVEX, VVVV, WIG; 11616defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11617 SchedWriteShuffle, HasBWI>, 11618 AVX512PDIi8Base, EVEX, VVVV, WIG; 11619 11620multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11621 string OpcodeStr, X86FoldableSchedWrite sched, 11622 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11623 let isCommutable = 1 in 11624 def rr : AVX512BI<opc, MRMSrcReg, 11625 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11626 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11627 [(set _dst.RC:$dst,(_dst.VT 11628 (OpNode (_src.VT _src.RC:$src1), 11629 (_src.VT _src.RC:$src2))))]>, 11630 Sched<[sched]>; 11631 def rm : AVX512BI<opc, MRMSrcMem, 11632 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11633 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11634 [(set _dst.RC:$dst,(_dst.VT 11635 (OpNode (_src.VT _src.RC:$src1), 11636 (_src.VT (bitconvert 11637 (_src.LdFrag addr:$src2))))))]>, 11638 Sched<[sched.Folded, sched.ReadAfterFold]>; 11639} 11640 11641multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11642 string OpcodeStr, X86SchedWriteWidths sched, 11643 Predicate prd> { 11644 let Predicates = [prd] in 11645 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11646 v8i64_info, v64i8_info>, EVEX_V512; 11647 let Predicates = [prd, HasVLX] in { 11648 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11649 v4i64x_info, v32i8x_info>, EVEX_V256; 11650 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11651 v2i64x_info, v16i8x_info>, EVEX_V128; 11652 } 11653} 11654 11655defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11656 SchedWritePSADBW, HasBWI>, EVEX, VVVV, WIG; 11657 11658// Transforms to swizzle an immediate to enable better matching when 11659// memory operand isn't in the right place. 11660def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{ 11661 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11662 uint8_t Imm = N->getZExtValue(); 11663 // Swap bits 1/4 and 3/6. 11664 uint8_t NewImm = Imm & 0xa5; 11665 if (Imm & 0x02) NewImm |= 0x10; 11666 if (Imm & 0x10) NewImm |= 0x02; 11667 if (Imm & 0x08) NewImm |= 0x40; 11668 if (Imm & 0x40) NewImm |= 0x08; 11669 return getI8Imm(NewImm, SDLoc(N)); 11670}]>; 11671def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{ 11672 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11673 uint8_t Imm = N->getZExtValue(); 11674 // Swap bits 2/4 and 3/5. 11675 uint8_t NewImm = Imm & 0xc3; 11676 if (Imm & 0x04) NewImm |= 0x10; 11677 if (Imm & 0x10) NewImm |= 0x04; 11678 if (Imm & 0x08) NewImm |= 0x20; 11679 if (Imm & 0x20) NewImm |= 0x08; 11680 return getI8Imm(NewImm, SDLoc(N)); 11681}]>; 11682def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{ 11683 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11684 uint8_t Imm = N->getZExtValue(); 11685 // Swap bits 1/2 and 5/6. 11686 uint8_t NewImm = Imm & 0x99; 11687 if (Imm & 0x02) NewImm |= 0x04; 11688 if (Imm & 0x04) NewImm |= 0x02; 11689 if (Imm & 0x20) NewImm |= 0x40; 11690 if (Imm & 0x40) NewImm |= 0x20; 11691 return getI8Imm(NewImm, SDLoc(N)); 11692}]>; 11693def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{ 11694 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11695 uint8_t Imm = N->getZExtValue(); 11696 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11697 uint8_t NewImm = Imm & 0x81; 11698 if (Imm & 0x02) NewImm |= 0x04; 11699 if (Imm & 0x04) NewImm |= 0x10; 11700 if (Imm & 0x08) NewImm |= 0x40; 11701 if (Imm & 0x10) NewImm |= 0x02; 11702 if (Imm & 0x20) NewImm |= 0x08; 11703 if (Imm & 0x40) NewImm |= 0x20; 11704 return getI8Imm(NewImm, SDLoc(N)); 11705}]>; 11706def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{ 11707 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11708 uint8_t Imm = N->getZExtValue(); 11709 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11710 uint8_t NewImm = Imm & 0x81; 11711 if (Imm & 0x02) NewImm |= 0x10; 11712 if (Imm & 0x04) NewImm |= 0x02; 11713 if (Imm & 0x08) NewImm |= 0x20; 11714 if (Imm & 0x10) NewImm |= 0x04; 11715 if (Imm & 0x20) NewImm |= 0x40; 11716 if (Imm & 0x40) NewImm |= 0x08; 11717 return getI8Imm(NewImm, SDLoc(N)); 11718}]>; 11719 11720multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11721 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11722 string Name>{ 11723 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11724 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11725 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11726 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11727 (OpNode (_.VT _.RC:$src1), 11728 (_.VT _.RC:$src2), 11729 (_.VT _.RC:$src3), 11730 (i8 timm:$src4)), 1, 1>, 11731 AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>; 11732 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11733 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11734 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11735 (OpNode (_.VT _.RC:$src1), 11736 (_.VT _.RC:$src2), 11737 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11738 (i8 timm:$src4)), 1, 0>, 11739 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 11740 Sched<[sched.Folded, sched.ReadAfterFold]>; 11741 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11742 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11743 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11744 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11745 (OpNode (_.VT _.RC:$src1), 11746 (_.VT _.RC:$src2), 11747 (_.VT (_.BroadcastLdFrag addr:$src3)), 11748 (i8 timm:$src4)), 1, 0>, EVEX_B, 11749 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 11750 Sched<[sched.Folded, sched.ReadAfterFold]>; 11751 }// Constraints = "$src1 = $dst" 11752 11753 // Additional patterns for matching passthru operand in other positions. 11754 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11755 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11756 _.RC:$src1)), 11757 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11758 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11759 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11760 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), 11761 _.RC:$src1)), 11762 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11763 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11764 11765 // Additional patterns for matching zero masking with loads in other 11766 // positions. 11767 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11768 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11769 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11770 _.ImmAllZerosV)), 11771 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11772 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11773 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11774 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11775 _.RC:$src2, (i8 timm:$src4)), 11776 _.ImmAllZerosV)), 11777 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11778 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11779 11780 // Additional patterns for matching masked loads with different 11781 // operand orders. 11782 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11783 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11784 _.RC:$src2, (i8 timm:$src4)), 11785 _.RC:$src1)), 11786 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11787 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11788 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11789 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11790 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11791 _.RC:$src1)), 11792 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11793 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11794 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11795 (OpNode _.RC:$src2, _.RC:$src1, 11796 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), 11797 _.RC:$src1)), 11798 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11799 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11800 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11801 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11802 _.RC:$src1, (i8 timm:$src4)), 11803 _.RC:$src1)), 11804 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11805 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11806 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11807 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11808 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11809 _.RC:$src1)), 11810 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11811 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11812 11813 // Additional patterns for matching zero masking with broadcasts in other 11814 // positions. 11815 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11816 (OpNode (_.BroadcastLdFrag addr:$src3), 11817 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11818 _.ImmAllZerosV)), 11819 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11820 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11821 (VPTERNLOG321_imm8 timm:$src4))>; 11822 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11823 (OpNode _.RC:$src1, 11824 (_.BroadcastLdFrag addr:$src3), 11825 _.RC:$src2, (i8 timm:$src4)), 11826 _.ImmAllZerosV)), 11827 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11828 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11829 (VPTERNLOG132_imm8 timm:$src4))>; 11830 11831 // Additional patterns for matching masked broadcasts with different 11832 // operand orders. 11833 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11834 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), 11835 _.RC:$src2, (i8 timm:$src4)), 11836 _.RC:$src1)), 11837 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11838 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11839 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11840 (OpNode (_.BroadcastLdFrag addr:$src3), 11841 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11842 _.RC:$src1)), 11843 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11844 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11845 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11846 (OpNode _.RC:$src2, _.RC:$src1, 11847 (_.BroadcastLdFrag addr:$src3), 11848 (i8 timm:$src4)), _.RC:$src1)), 11849 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11850 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11851 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11852 (OpNode _.RC:$src2, 11853 (_.BroadcastLdFrag addr:$src3), 11854 _.RC:$src1, (i8 timm:$src4)), 11855 _.RC:$src1)), 11856 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11857 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11858 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11859 (OpNode (_.BroadcastLdFrag addr:$src3), 11860 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11861 _.RC:$src1)), 11862 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11863 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11864} 11865 11866multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 11867 AVX512VLVectorVTInfo _> { 11868 let Predicates = [HasAVX512] in 11869 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 11870 _.info512, NAME>, EVEX_V512; 11871 let Predicates = [HasAVX512, HasVLX] in { 11872 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 11873 _.info128, NAME>, EVEX_V128; 11874 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 11875 _.info256, NAME>, EVEX_V256; 11876 } 11877} 11878 11879defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 11880 avx512vl_i32_info>; 11881defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 11882 avx512vl_i64_info>, REX_W; 11883 11884// Patterns to implement vnot using vpternlog instead of creating all ones 11885// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 11886// so that the result is only dependent on src0. But we use the same source 11887// for all operands to prevent a false dependency. 11888// TODO: We should maybe have a more generalized algorithm for folding to 11889// vpternlog. 11890let Predicates = [HasAVX512] in { 11891 def : Pat<(v64i8 (vnot VR512:$src)), 11892 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11893 def : Pat<(v32i16 (vnot VR512:$src)), 11894 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11895 def : Pat<(v16i32 (vnot VR512:$src)), 11896 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11897 def : Pat<(v8i64 (vnot VR512:$src)), 11898 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11899} 11900 11901let Predicates = [HasAVX512, NoVLX, HasEVEX512] in { 11902 def : Pat<(v16i8 (vnot VR128X:$src)), 11903 (EXTRACT_SUBREG 11904 (VPTERNLOGQZrri 11905 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11906 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11907 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11908 (i8 15)), sub_xmm)>; 11909 def : Pat<(v8i16 (vnot VR128X:$src)), 11910 (EXTRACT_SUBREG 11911 (VPTERNLOGQZrri 11912 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11913 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11914 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11915 (i8 15)), sub_xmm)>; 11916 def : Pat<(v4i32 (vnot VR128X:$src)), 11917 (EXTRACT_SUBREG 11918 (VPTERNLOGQZrri 11919 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11920 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11921 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11922 (i8 15)), sub_xmm)>; 11923 def : Pat<(v2i64 (vnot VR128X:$src)), 11924 (EXTRACT_SUBREG 11925 (VPTERNLOGQZrri 11926 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11927 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11928 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11929 (i8 15)), sub_xmm)>; 11930 11931 def : Pat<(v32i8 (vnot VR256X:$src)), 11932 (EXTRACT_SUBREG 11933 (VPTERNLOGQZrri 11934 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11935 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11936 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11937 (i8 15)), sub_ymm)>; 11938 def : Pat<(v16i16 (vnot VR256X:$src)), 11939 (EXTRACT_SUBREG 11940 (VPTERNLOGQZrri 11941 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11942 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11943 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11944 (i8 15)), sub_ymm)>; 11945 def : Pat<(v8i32 (vnot VR256X:$src)), 11946 (EXTRACT_SUBREG 11947 (VPTERNLOGQZrri 11948 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11949 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11950 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11951 (i8 15)), sub_ymm)>; 11952 def : Pat<(v4i64 (vnot VR256X:$src)), 11953 (EXTRACT_SUBREG 11954 (VPTERNLOGQZrri 11955 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11956 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11957 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11958 (i8 15)), sub_ymm)>; 11959} 11960 11961let Predicates = [HasVLX] in { 11962 def : Pat<(v16i8 (vnot VR128X:$src)), 11963 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11964 def : Pat<(v8i16 (vnot VR128X:$src)), 11965 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11966 def : Pat<(v4i32 (vnot VR128X:$src)), 11967 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11968 def : Pat<(v2i64 (vnot VR128X:$src)), 11969 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11970 11971 def : Pat<(v32i8 (vnot VR256X:$src)), 11972 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11973 def : Pat<(v16i16 (vnot VR256X:$src)), 11974 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11975 def : Pat<(v8i32 (vnot VR256X:$src)), 11976 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11977 def : Pat<(v4i64 (vnot VR256X:$src)), 11978 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11979} 11980 11981//===----------------------------------------------------------------------===// 11982// AVX-512 - FixupImm 11983//===----------------------------------------------------------------------===// 11984 11985multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, 11986 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11987 X86VectorVTInfo TblVT>{ 11988 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 11989 Uses = [MXCSR], mayRaiseFPException = 1 in { 11990 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11991 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11992 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11993 (X86VFixupimm (_.VT _.RC:$src1), 11994 (_.VT _.RC:$src2), 11995 (TblVT.VT _.RC:$src3), 11996 (i32 timm:$src4))>, Sched<[sched]>; 11997 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11998 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 11999 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12000 (X86VFixupimm (_.VT _.RC:$src1), 12001 (_.VT _.RC:$src2), 12002 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 12003 (i32 timm:$src4))>, 12004 Sched<[sched.Folded, sched.ReadAfterFold]>; 12005 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12006 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 12007 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2", 12008 "$src2, ${src3}"#_.BroadcastStr#", $src4", 12009 (X86VFixupimm (_.VT _.RC:$src1), 12010 (_.VT _.RC:$src2), 12011 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), 12012 (i32 timm:$src4))>, 12013 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 12014 } // Constraints = "$src1 = $dst" 12015} 12016 12017multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 12018 X86FoldableSchedWrite sched, 12019 X86VectorVTInfo _, X86VectorVTInfo TblVT> 12020 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> { 12021let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 12022 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12023 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12024 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 12025 "$src2, $src3, {sae}, $src4", 12026 (X86VFixupimmSAE (_.VT _.RC:$src1), 12027 (_.VT _.RC:$src2), 12028 (TblVT.VT _.RC:$src3), 12029 (i32 timm:$src4))>, 12030 EVEX_B, Sched<[sched]>; 12031 } 12032} 12033 12034multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, 12035 X86FoldableSchedWrite sched, X86VectorVTInfo _, 12036 X86VectorVTInfo _src3VT> { 12037 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 12038 ExeDomain = _.ExeDomain in { 12039 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 12040 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12041 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12042 (X86VFixupimms (_.VT _.RC:$src1), 12043 (_.VT _.RC:$src2), 12044 (_src3VT.VT _src3VT.RC:$src3), 12045 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC; 12046 let Uses = [MXCSR] in 12047 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 12048 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 12049 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 12050 "$src2, $src3, {sae}, $src4", 12051 (X86VFixupimmSAEs (_.VT _.RC:$src1), 12052 (_.VT _.RC:$src2), 12053 (_src3VT.VT _src3VT.RC:$src3), 12054 (i32 timm:$src4))>, 12055 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 12056 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 12057 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 12058 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 12059 (X86VFixupimms (_.VT _.RC:$src1), 12060 (_.VT _.RC:$src2), 12061 (_src3VT.VT (scalar_to_vector 12062 (_src3VT.ScalarLdFrag addr:$src3))), 12063 (i32 timm:$src4))>, 12064 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 12065 } 12066} 12067 12068multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 12069 AVX512VLVectorVTInfo _Vec, 12070 AVX512VLVectorVTInfo _Tbl> { 12071 let Predicates = [HasAVX512] in 12072 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, 12073 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 12074 EVEX, VVVV, EVEX_V512; 12075 let Predicates = [HasAVX512, HasVLX] in { 12076 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, 12077 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 12078 EVEX, VVVV, EVEX_V128; 12079 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, 12080 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 12081 EVEX, VVVV, EVEX_V256; 12082 } 12083} 12084 12085defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 12086 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 12087 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 12088defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 12089 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 12090 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 12091defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 12092 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12093defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 12094 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W; 12095 12096// Patterns used to select SSE scalar fp arithmetic instructions from 12097// either: 12098// 12099// (1) a scalar fp operation followed by a blend 12100// 12101// The effect is that the backend no longer emits unnecessary vector 12102// insert instructions immediately after SSE scalar fp instructions 12103// like addss or mulss. 12104// 12105// For example, given the following code: 12106// __m128 foo(__m128 A, __m128 B) { 12107// A[0] += B[0]; 12108// return A; 12109// } 12110// 12111// Previously we generated: 12112// addss %xmm0, %xmm1 12113// movss %xmm1, %xmm0 12114// 12115// We now generate: 12116// addss %xmm1, %xmm0 12117// 12118// (2) a vector packed single/double fp operation followed by a vector insert 12119// 12120// The effect is that the backend converts the packed fp instruction 12121// followed by a vector insert into a single SSE scalar fp instruction. 12122// 12123// For example, given the following code: 12124// __m128 foo(__m128 A, __m128 B) { 12125// __m128 C = A + B; 12126// return (__m128) {c[0], a[1], a[2], a[3]}; 12127// } 12128// 12129// Previously we generated: 12130// addps %xmm0, %xmm1 12131// movss %xmm1, %xmm0 12132// 12133// We now generate: 12134// addss %xmm1, %xmm0 12135 12136// TODO: Some canonicalization in lowering would simplify the number of 12137// patterns we have to try to match. 12138multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp, 12139 string OpcPrefix, SDNode MoveNode, 12140 X86VectorVTInfo _, PatLeaf ZeroFP> { 12141 let Predicates = [HasAVX512] in { 12142 // extracted scalar math op with insert via movss 12143 def : Pat<(MoveNode 12144 (_.VT VR128X:$dst), 12145 (_.VT (scalar_to_vector 12146 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12147 _.FRC:$src)))), 12148 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst, 12149 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 12150 def : Pat<(MoveNode 12151 (_.VT VR128X:$dst), 12152 (_.VT (scalar_to_vector 12153 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12154 (_.ScalarLdFrag addr:$src))))), 12155 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>; 12156 12157 // extracted masked scalar math op with insert via movss 12158 def : Pat<(MoveNode (_.VT VR128X:$src1), 12159 (scalar_to_vector 12160 (X86selects_mask VK1WM:$mask, 12161 (MaskedOp (_.EltVT 12162 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12163 _.FRC:$src2), 12164 _.FRC:$src0))), 12165 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk") 12166 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12167 VK1WM:$mask, _.VT:$src1, 12168 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12169 def : Pat<(MoveNode (_.VT VR128X:$src1), 12170 (scalar_to_vector 12171 (X86selects_mask VK1WM:$mask, 12172 (MaskedOp (_.EltVT 12173 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12174 (_.ScalarLdFrag addr:$src2)), 12175 _.FRC:$src0))), 12176 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk") 12177 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12178 VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12179 12180 // extracted masked scalar math op with insert via movss 12181 def : Pat<(MoveNode (_.VT VR128X:$src1), 12182 (scalar_to_vector 12183 (X86selects_mask VK1WM:$mask, 12184 (MaskedOp (_.EltVT 12185 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12186 _.FRC:$src2), (_.EltVT ZeroFP)))), 12187 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz") 12188 VK1WM:$mask, _.VT:$src1, 12189 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12190 def : Pat<(MoveNode (_.VT VR128X:$src1), 12191 (scalar_to_vector 12192 (X86selects_mask VK1WM:$mask, 12193 (MaskedOp (_.EltVT 12194 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12195 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), 12196 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12197 } 12198} 12199 12200defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 12201defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 12202defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 12203defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 12204 12205defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 12206defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 12207defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 12208defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 12209 12210defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>; 12211defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>; 12212defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>; 12213defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>; 12214 12215multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, 12216 SDNode Move, X86VectorVTInfo _> { 12217 let Predicates = [HasAVX512] in { 12218 def : Pat<(_.VT (Move _.VT:$dst, 12219 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 12220 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>; 12221 } 12222} 12223 12224defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 12225defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 12226defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>; 12227 12228//===----------------------------------------------------------------------===// 12229// AES instructions 12230//===----------------------------------------------------------------------===// 12231 12232multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 12233 let Predicates = [HasVLX, HasVAES] in { 12234 defm Z128 : AESI_binop_rm_int<Op, OpStr, 12235 !cast<Intrinsic>(IntPrefix), 12236 loadv2i64, 0, VR128X, i128mem>, 12237 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG; 12238 defm Z256 : AESI_binop_rm_int<Op, OpStr, 12239 !cast<Intrinsic>(IntPrefix#"_256"), 12240 loadv4i64, 0, VR256X, i256mem>, 12241 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG; 12242 } 12243 let Predicates = [HasAVX512, HasVAES] in 12244 defm Z : AESI_binop_rm_int<Op, OpStr, 12245 !cast<Intrinsic>(IntPrefix#"_512"), 12246 loadv8i64, 0, VR512, i512mem>, 12247 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG; 12248} 12249 12250defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 12251defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 12252defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 12253defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 12254 12255//===----------------------------------------------------------------------===// 12256// PCLMUL instructions - Carry less multiplication 12257//===----------------------------------------------------------------------===// 12258 12259let Predicates = [HasAVX512, HasVPCLMULQDQ] in 12260defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 12261 EVEX, VVVV, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG; 12262 12263let Predicates = [HasVLX, HasVPCLMULQDQ] in { 12264defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 12265 EVEX, VVVV, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG; 12266 12267defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 12268 int_x86_pclmulqdq_256>, EVEX, VVVV, EVEX_V256, 12269 EVEX_CD8<64, CD8VF>, WIG; 12270} 12271 12272// Aliases 12273defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 12274defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 12275defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 12276 12277//===----------------------------------------------------------------------===// 12278// VBMI2 12279//===----------------------------------------------------------------------===// 12280 12281multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 12282 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12283 let Constraints = "$src1 = $dst", 12284 ExeDomain = VTI.ExeDomain in { 12285 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12286 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12287 "$src3, $src2", "$src2, $src3", 12288 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 12289 T8, PD, EVEX, VVVV, Sched<[sched]>; 12290 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12291 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12292 "$src3, $src2", "$src2, $src3", 12293 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12294 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12295 T8, PD, EVEX, VVVV, 12296 Sched<[sched.Folded, sched.ReadAfterFold]>; 12297 } 12298} 12299 12300multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12301 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 12302 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 12303 let Constraints = "$src1 = $dst", 12304 ExeDomain = VTI.ExeDomain in 12305 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12306 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 12307 "${src3}"#VTI.BroadcastStr#", $src2", 12308 "$src2, ${src3}"#VTI.BroadcastStr, 12309 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12310 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12311 T8, PD, EVEX, VVVV, EVEX_B, 12312 Sched<[sched.Folded, sched.ReadAfterFold]>; 12313} 12314 12315multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 12316 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12317 let Predicates = [HasVBMI2] in 12318 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12319 EVEX_V512; 12320 let Predicates = [HasVBMI2, HasVLX] in { 12321 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12322 EVEX_V256; 12323 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12324 EVEX_V128; 12325 } 12326} 12327 12328multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 12329 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12330 let Predicates = [HasVBMI2] in 12331 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12332 EVEX_V512; 12333 let Predicates = [HasVBMI2, HasVLX] in { 12334 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12335 EVEX_V256; 12336 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12337 EVEX_V128; 12338 } 12339} 12340multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 12341 SDNode OpNode, X86SchedWriteWidths sched> { 12342 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched, 12343 avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>; 12344 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched, 12345 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12346 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched, 12347 avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 12348} 12349 12350multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 12351 SDNode OpNode, X86SchedWriteWidths sched> { 12352 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched, 12353 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 12354 REX_W, EVEX_CD8<16, CD8VF>; 12355 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp, 12356 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 12357 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode, 12358 sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 12359} 12360 12361// Concat & Shift 12362defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 12363defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 12364defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 12365defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 12366 12367// Compress 12368defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 12369 avx512vl_i8_info, HasVBMI2>, EVEX; 12370defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 12371 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W; 12372// Expand 12373defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 12374 avx512vl_i8_info, HasVBMI2>, EVEX; 12375defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 12376 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W; 12377 12378//===----------------------------------------------------------------------===// 12379// VNNI 12380//===----------------------------------------------------------------------===// 12381 12382let Constraints = "$src1 = $dst" in 12383multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12384 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12385 bit IsCommutable> { 12386 let ExeDomain = VTI.ExeDomain in { 12387 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12388 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12389 "$src3, $src2", "$src2, $src3", 12390 (VTI.VT (OpNode VTI.RC:$src1, 12391 VTI.RC:$src2, VTI.RC:$src3)), 12392 IsCommutable, IsCommutable>, 12393 EVEX, VVVV, T8, PD, Sched<[sched]>; 12394 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12395 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12396 "$src3, $src2", "$src2, $src3", 12397 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12398 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12399 EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8, PD, 12400 Sched<[sched.Folded, sched.ReadAfterFold, 12401 sched.ReadAfterFold]>; 12402 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12403 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 12404 OpStr, "${src3}"#VTI.BroadcastStr#", $src2", 12405 "$src2, ${src3}"#VTI.BroadcastStr, 12406 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12407 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12408 EVEX, VVVV, EVEX_CD8<32, CD8VF>, EVEX_B, 12409 T8, PD, Sched<[sched.Folded, sched.ReadAfterFold, 12410 sched.ReadAfterFold]>; 12411 } 12412} 12413 12414multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 12415 X86SchedWriteWidths sched, bit IsCommutable> { 12416 let Predicates = [HasVNNI] in 12417 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info, 12418 IsCommutable>, EVEX_V512; 12419 let Predicates = [HasVNNI, HasVLX] in { 12420 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info, 12421 IsCommutable>, EVEX_V256; 12422 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info, 12423 IsCommutable>, EVEX_V128; 12424 } 12425} 12426 12427// FIXME: Is there a better scheduler class for VPDP? 12428defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; 12429defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; 12430defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; 12431defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; 12432 12433// Patterns to match VPDPWSSD from existing instructions/intrinsics. 12434let Predicates = [HasVNNI] in { 12435 def : Pat<(v16i32 (add VR512:$src1, 12436 (X86vpmaddwd_su VR512:$src2, VR512:$src3))), 12437 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; 12438 def : Pat<(v16i32 (add VR512:$src1, 12439 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), 12440 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; 12441} 12442let Predicates = [HasVNNI,HasVLX] in { 12443 def : Pat<(v8i32 (add VR256X:$src1, 12444 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), 12445 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; 12446 def : Pat<(v8i32 (add VR256X:$src1, 12447 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), 12448 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; 12449 def : Pat<(v4i32 (add VR128X:$src1, 12450 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), 12451 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; 12452 def : Pat<(v4i32 (add VR128X:$src1, 12453 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), 12454 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; 12455} 12456 12457//===----------------------------------------------------------------------===// 12458// Bit Algorithms 12459//===----------------------------------------------------------------------===// 12460 12461// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 12462defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 12463 avx512vl_i8_info, HasBITALG>; 12464defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 12465 avx512vl_i16_info, HasBITALG>, REX_W; 12466 12467defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 12468defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 12469 12470multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12471 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 12472 (ins VTI.RC:$src1, VTI.RC:$src2), 12473 "vpshufbitqmb", 12474 "$src2, $src1", "$src1, $src2", 12475 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12476 (VTI.VT VTI.RC:$src2)), 12477 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12478 (VTI.VT VTI.RC:$src2))>, EVEX, VVVV, T8, PD, 12479 Sched<[sched]>; 12480 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 12481 (ins VTI.RC:$src1, VTI.MemOp:$src2), 12482 "vpshufbitqmb", 12483 "$src2, $src1", "$src1, $src2", 12484 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12485 (VTI.VT (VTI.LdFrag addr:$src2))), 12486 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12487 (VTI.VT (VTI.LdFrag addr:$src2)))>, 12488 EVEX, VVVV, EVEX_CD8<8, CD8VF>, T8, PD, 12489 Sched<[sched.Folded, sched.ReadAfterFold]>; 12490} 12491 12492multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12493 let Predicates = [HasBITALG] in 12494 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 12495 let Predicates = [HasBITALG, HasVLX] in { 12496 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 12497 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 12498 } 12499} 12500 12501// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 12502defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 12503 12504//===----------------------------------------------------------------------===// 12505// GFNI 12506//===----------------------------------------------------------------------===// 12507 12508multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12509 X86SchedWriteWidths sched> { 12510 let Predicates = [HasGFNI, HasAVX512] in 12511 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 12512 EVEX_V512; 12513 let Predicates = [HasGFNI, HasVLX] in { 12514 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 12515 EVEX_V256; 12516 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 12517 EVEX_V128; 12518 } 12519} 12520 12521defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 12522 SchedWriteVecALU>, 12523 EVEX_CD8<8, CD8VF>, T8; 12524 12525multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 12526 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12527 X86VectorVTInfo BcstVTI> 12528 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 12529 let ExeDomain = VTI.ExeDomain in 12530 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12531 (ins VTI.RC:$src1, BcstVTI.ScalarMemOp:$src2, u8imm:$src3), 12532 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1", 12533 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3", 12534 (OpNode (VTI.VT VTI.RC:$src1), 12535 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), 12536 (i8 timm:$src3))>, EVEX_B, 12537 Sched<[sched.Folded, sched.ReadAfterFold]>; 12538} 12539 12540multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12541 X86SchedWriteWidths sched> { 12542 let Predicates = [HasGFNI, HasAVX512] in 12543 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 12544 v64i8_info, v8i64_info>, EVEX_V512; 12545 let Predicates = [HasGFNI, HasVLX] in { 12546 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 12547 v32i8x_info, v4i64x_info>, EVEX_V256; 12548 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 12549 v16i8x_info, v2i64x_info>, EVEX_V128; 12550 } 12551} 12552 12553defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 12554 X86GF2P8affineinvqb, SchedWriteVecIMul>, 12555 EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base; 12556defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 12557 X86GF2P8affineqb, SchedWriteVecIMul>, 12558 EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base; 12559 12560 12561//===----------------------------------------------------------------------===// 12562// AVX5124FMAPS 12563//===----------------------------------------------------------------------===// 12564 12565let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 12566 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in { 12567defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 12568 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12569 "v4fmaddps", "$src3, $src2", "$src2, $src3", 12570 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12571 Sched<[SchedWriteFMA.ZMM.Folded]>; 12572 12573defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 12574 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12575 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 12576 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12577 Sched<[SchedWriteFMA.ZMM.Folded]>; 12578 12579defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 12580 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12581 "v4fmaddss", "$src3, $src2", "$src2, $src3", 12582 []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>, 12583 Sched<[SchedWriteFMA.Scl.Folded]>; 12584 12585defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 12586 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12587 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 12588 []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>, 12589 Sched<[SchedWriteFMA.Scl.Folded]>; 12590} 12591 12592//===----------------------------------------------------------------------===// 12593// AVX5124VNNIW 12594//===----------------------------------------------------------------------===// 12595 12596let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 12597 Constraints = "$src1 = $dst" in { 12598defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 12599 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12600 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 12601 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12602 Sched<[SchedWriteFMA.ZMM.Folded]>; 12603 12604defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 12605 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12606 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 12607 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12608 Sched<[SchedWriteFMA.ZMM.Folded]>; 12609} 12610 12611let hasSideEffects = 0 in { 12612 let mayStore = 1, SchedRW = [WriteFStoreX] in 12613 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>; 12614 let mayLoad = 1, SchedRW = [WriteFLoadX] in 12615 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>; 12616} 12617 12618//===----------------------------------------------------------------------===// 12619// VP2INTERSECT 12620//===----------------------------------------------------------------------===// 12621 12622multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 12623 def rr : I<0x68, MRMSrcReg, 12624 (outs _.KRPC:$dst), 12625 (ins _.RC:$src1, _.RC:$src2), 12626 !strconcat("vp2intersect", _.Suffix, 12627 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12628 [(set _.KRPC:$dst, (X86vp2intersect 12629 _.RC:$src1, (_.VT _.RC:$src2)))]>, 12630 EVEX, VVVV, T8, XD, Sched<[sched]>; 12631 12632 def rm : I<0x68, MRMSrcMem, 12633 (outs _.KRPC:$dst), 12634 (ins _.RC:$src1, _.MemOp:$src2), 12635 !strconcat("vp2intersect", _.Suffix, 12636 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12637 [(set _.KRPC:$dst, (X86vp2intersect 12638 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 12639 EVEX, VVVV, T8, XD, EVEX_CD8<_.EltSize, CD8VF>, 12640 Sched<[sched.Folded, sched.ReadAfterFold]>; 12641 12642 def rmb : I<0x68, MRMSrcMem, 12643 (outs _.KRPC:$dst), 12644 (ins _.RC:$src1, _.ScalarMemOp:$src2), 12645 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, 12646 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), 12647 [(set _.KRPC:$dst, (X86vp2intersect 12648 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, 12649 EVEX, VVVV, T8, XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 12650 Sched<[sched.Folded, sched.ReadAfterFold]>; 12651} 12652 12653multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 12654 let Predicates = [HasAVX512, HasVP2INTERSECT] in 12655 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512; 12656 12657 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { 12658 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256; 12659 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128; 12660 } 12661} 12662 12663let ExeDomain = SSEPackedInt in { 12664defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>; 12665defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W; 12666} 12667 12668multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, 12669 X86SchedWriteWidths sched, 12670 AVX512VLVectorVTInfo _SrcVTInfo, 12671 AVX512VLVectorVTInfo _DstVTInfo, 12672 SDNode OpNode, Predicate prd, 12673 bit IsCommutable = 0> { 12674 let Predicates = [prd] in 12675 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 12676 _SrcVTInfo.info512, _DstVTInfo.info512, 12677 _SrcVTInfo.info512, IsCommutable>, 12678 EVEX_V512, EVEX_CD8<32, CD8VF>; 12679 let Predicates = [HasVLX, prd] in { 12680 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 12681 _SrcVTInfo.info256, _DstVTInfo.info256, 12682 _SrcVTInfo.info256, IsCommutable>, 12683 EVEX_V256, EVEX_CD8<32, CD8VF>; 12684 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 12685 _SrcVTInfo.info128, _DstVTInfo.info128, 12686 _SrcVTInfo.info128, IsCommutable>, 12687 EVEX_V128, EVEX_CD8<32, CD8VF>; 12688 } 12689} 12690 12691let ExeDomain = SSEPackedSingle in 12692defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", 12693 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF 12694 avx512vl_f32_info, avx512vl_bf16_info, 12695 X86cvtne2ps2bf16, HasBF16, 0>, T8, XD; 12696 12697// Truncate Float to BFloat16 12698multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, 12699 X86SchedWriteWidths sched> { 12700 let ExeDomain = SSEPackedSingle in { 12701 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in { 12702 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info, 12703 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512; 12704 } 12705 let Predicates = [HasBF16, HasVLX] in { 12706 let Uses = []<Register>, mayRaiseFPException = 0 in { 12707 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info, 12708 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem, 12709 VK4WM>, EVEX_V128; 12710 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info, 12711 X86cvtneps2bf16, X86cvtneps2bf16, 12712 sched.YMM, "{1to8}", "{y}">, EVEX_V256; 12713 } 12714 } // Predicates = [HasBF16, HasVLX] 12715 } // ExeDomain = SSEPackedSingle 12716 12717 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12718 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 12719 VR128X:$src), 0>; 12720 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12721 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, 12722 f128mem:$src), 0, "intel">; 12723 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12724 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 12725 VR256X:$src), 0>; 12726 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12727 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, 12728 f256mem:$src), 0, "intel">; 12729} 12730 12731defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", 12732 SchedWriteCvtPD2PS>, T8, XS, 12733 EVEX_CD8<32, CD8VF>; 12734 12735let Predicates = [HasBF16, HasVLX] in { 12736 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction 12737 // patterns have been disabled with null_frag. 12738 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))), 12739 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12740 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0), 12741 VK4WM:$mask), 12742 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>; 12743 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV, 12744 VK4WM:$mask), 12745 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>; 12746 12747 def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), 12748 (VCVTNEPS2BF16Z128rm addr:$src)>; 12749 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0), 12750 VK4WM:$mask), 12751 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12752 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV, 12753 VK4WM:$mask), 12754 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; 12755 12756 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 12757 (X86VBroadcastld32 addr:$src)))), 12758 (VCVTNEPS2BF16Z128rmb addr:$src)>; 12759 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12760 (v8bf16 VR128X:$src0), VK4WM:$mask), 12761 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12762 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12763 v8bf16x_info.ImmAllZerosV, VK4WM:$mask), 12764 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; 12765 12766 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))), 12767 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12768 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))), 12769 (VCVTNEPS2BF16Z128rm addr:$src)>; 12770 12771 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))), 12772 (VCVTNEPS2BF16Z256rr VR256X:$src)>; 12773 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))), 12774 (VCVTNEPS2BF16Z256rm addr:$src)>; 12775 12776 def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)), 12777 (VPBROADCASTWZ128rm addr:$src)>; 12778 def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)), 12779 (VPBROADCASTWZ256rm addr:$src)>; 12780 12781 def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12782 (VPBROADCASTWZ128rr VR128X:$src)>; 12783 def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12784 (VPBROADCASTWZ256rr VR128X:$src)>; 12785 12786 def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))), 12787 (VCVTNEPS2BF16Z256rr VR256X:$src)>; 12788 def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))), 12789 (VCVTNEPS2BF16Z256rm addr:$src)>; 12790 12791 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. 12792} 12793 12794let Predicates = [HasBF16] in { 12795 def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)), 12796 (VPBROADCASTWZrm addr:$src)>; 12797 12798 def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12799 (VPBROADCASTWZrr VR128X:$src)>; 12800 12801 def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))), 12802 (VCVTNEPS2BF16Zrr VR512:$src)>; 12803 def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))), 12804 (VCVTNEPS2BF16Zrm addr:$src)>; 12805 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. 12806} 12807 12808let Constraints = "$src1 = $dst" in { 12809multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 12810 X86FoldableSchedWrite sched, 12811 X86VectorVTInfo _, X86VectorVTInfo src_v> { 12812 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12813 (ins src_v.RC:$src2, src_v.RC:$src3), 12814 OpcodeStr, "$src3, $src2", "$src2, $src3", 12815 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>, 12816 EVEX, VVVV, Sched<[sched]>; 12817 12818 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12819 (ins src_v.RC:$src2, src_v.MemOp:$src3), 12820 OpcodeStr, "$src3, $src2", "$src2, $src3", 12821 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12822 (src_v.LdFrag addr:$src3)))>, EVEX, VVVV, 12823 Sched<[sched.Folded, sched.ReadAfterFold]>; 12824 12825 let mayLoad = 1, hasSideEffects = 0 in 12826 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12827 (ins src_v.RC:$src2, f32mem:$src3), 12828 OpcodeStr, 12829 !strconcat("${src3}", _.BroadcastStr,", $src2"), 12830 !strconcat("$src2, ${src3}", _.BroadcastStr), 12831 (null_frag)>, 12832 EVEX_B, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 12833 12834} 12835} // Constraints = "$src1 = $dst" 12836 12837multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 12838 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 12839 AVX512VLVectorVTInfo src_v, Predicate prd> { 12840 let Predicates = [prd] in { 12841 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, 12842 src_v.info512>, EVEX_V512; 12843 } 12844 let Predicates = [HasVLX, prd] in { 12845 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256, 12846 src_v.info256>, EVEX_V256; 12847 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128, 12848 src_v.info128>, EVEX_V128; 12849 } 12850} 12851 12852let ExeDomain = SSEPackedSingle in 12853defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, 12854 avx512vl_f32_info, avx512vl_bf16_info, 12855 HasBF16>, T8, XS, EVEX_CD8<32, CD8VF>; 12856 12857//===----------------------------------------------------------------------===// 12858// AVX512FP16 12859//===----------------------------------------------------------------------===// 12860 12861let Predicates = [HasFP16] in { 12862// Move word ( r/m16) to Packed word 12863def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 12864 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveFromGpr]>; 12865def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src), 12866 "vmovw\t{$src, $dst|$dst, $src}", 12867 [(set VR128X:$dst, 12868 (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>, 12869 T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>; 12870 12871def : Pat<(f16 (bitconvert GR16:$src)), 12872 (f16 (COPY_TO_REGCLASS 12873 (VMOVW2SHrr 12874 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), 12875 FR16X))>; 12876def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))), 12877 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 12878def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))), 12879 (VMOVW2SHrr GR32:$src)>; 12880// FIXME: We should really find a way to improve these patterns. 12881def : Pat<(v8i32 (X86vzmovl 12882 (insert_subvector undef, 12883 (v4i32 (scalar_to_vector 12884 (and GR32:$src, 0xffff))), 12885 (iPTR 0)))), 12886 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 12887def : Pat<(v16i32 (X86vzmovl 12888 (insert_subvector undef, 12889 (v4i32 (scalar_to_vector 12890 (and GR32:$src, 0xffff))), 12891 (iPTR 0)))), 12892 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 12893 12894def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))), 12895 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 12896 12897// AVX 128-bit movw instruction write zeros in the high 128-bit part. 12898def : Pat<(v8i16 (X86vzload16 addr:$src)), 12899 (VMOVWrm addr:$src)>; 12900def : Pat<(v16i16 (X86vzload16 addr:$src)), 12901 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 12902 12903// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 12904def : Pat<(v32i16 (X86vzload16 addr:$src)), 12905 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 12906 12907def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))), 12908 (VMOVWrm addr:$src)>; 12909def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))), 12910 (VMOVWrm addr:$src)>; 12911def : Pat<(v8i32 (X86vzmovl 12912 (insert_subvector undef, 12913 (v4i32 (scalar_to_vector 12914 (i32 (zextloadi16 addr:$src)))), 12915 (iPTR 0)))), 12916 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 12917def : Pat<(v16i32 (X86vzmovl 12918 (insert_subvector undef, 12919 (v4i32 (scalar_to_vector 12920 (i32 (zextloadi16 addr:$src)))), 12921 (iPTR 0)))), 12922 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 12923 12924// Move word from xmm register to r/m16 12925def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 12926 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveToGpr]>; 12927def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs), 12928 (ins i16mem:$dst, VR128X:$src), 12929 "vmovw\t{$src, $dst|$dst, $src}", 12930 [(store (i16 (extractelt (v8i16 VR128X:$src), 12931 (iPTR 0))), addr:$dst)]>, 12932 T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>; 12933 12934def : Pat<(i16 (bitconvert FR16X:$src)), 12935 (i16 (EXTRACT_SUBREG 12936 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)), 12937 sub_16bit))>; 12938def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))), 12939 (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>; 12940 12941// Allow "vmovw" to use GR64 12942let hasSideEffects = 0 in { 12943 def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 12944 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 12945 def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 12946 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>; 12947} 12948} 12949 12950// Convert 16-bit float to i16/u16 12951multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12952 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12953 AVX512VLVectorVTInfo _Dst, 12954 AVX512VLVectorVTInfo _Src, 12955 X86SchedWriteWidths sched> { 12956 let Predicates = [HasFP16] in { 12957 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 12958 OpNode, MaskOpNode, sched.ZMM>, 12959 avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512, 12960 OpNodeRnd, sched.ZMM>, EVEX_V512; 12961 } 12962 let Predicates = [HasFP16, HasVLX] in { 12963 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 12964 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 12965 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 12966 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 12967 } 12968} 12969 12970// Convert 16-bit float to i16/u16 truncate 12971multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12972 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12973 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src, 12974 X86SchedWriteWidths sched> { 12975 let Predicates = [HasFP16] in { 12976 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 12977 OpNode, MaskOpNode, sched.ZMM>, 12978 avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512, 12979 OpNodeRnd, sched.ZMM>, EVEX_V512; 12980 } 12981 let Predicates = [HasFP16, HasVLX] in { 12982 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 12983 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 12984 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 12985 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 12986 } 12987} 12988 12989defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt, 12990 X86cvtp2UIntRnd, avx512vl_i16_info, 12991 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 12992 T_MAP5, EVEX_CD8<16, CD8VF>; 12993defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp, 12994 X86VUintToFpRnd, avx512vl_f16_info, 12995 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 12996 T_MAP5, XD, EVEX_CD8<16, CD8VF>; 12997defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si, 12998 X86cvttp2si, X86cvttp2siSAE, 12999 avx512vl_i16_info, avx512vl_f16_info, 13000 SchedWriteCvtPD2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VF>; 13001defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui, 13002 X86cvttp2ui, X86cvttp2uiSAE, 13003 avx512vl_i16_info, avx512vl_f16_info, 13004 SchedWriteCvtPD2DQ>, T_MAP5, EVEX_CD8<16, CD8VF>; 13005defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int, 13006 X86cvtp2IntRnd, avx512vl_i16_info, 13007 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 13008 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 13009defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp, 13010 X86VSintToFpRnd, avx512vl_f16_info, 13011 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 13012 T_MAP5, XS, EVEX_CD8<16, CD8VF>; 13013 13014// Convert Half to Signed/Unsigned Doubleword 13015multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13016 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13017 X86SchedWriteWidths sched> { 13018 let Predicates = [HasFP16] in { 13019 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 13020 MaskOpNode, sched.ZMM>, 13021 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info, 13022 OpNodeRnd, sched.ZMM>, EVEX_V512; 13023 } 13024 let Predicates = [HasFP16, HasVLX] in { 13025 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 13026 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 13027 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 13028 MaskOpNode, sched.YMM>, EVEX_V256; 13029 } 13030} 13031 13032// Convert Half to Signed/Unsigned Doubleword with truncation 13033multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13034 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13035 X86SchedWriteWidths sched> { 13036 let Predicates = [HasFP16] in { 13037 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 13038 MaskOpNode, sched.ZMM>, 13039 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info, 13040 OpNodeRnd, sched.ZMM>, EVEX_V512; 13041 } 13042 let Predicates = [HasFP16, HasVLX] in { 13043 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 13044 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 13045 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 13046 MaskOpNode, sched.YMM>, EVEX_V256; 13047 } 13048} 13049 13050 13051defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int, 13052 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD, 13053 EVEX_CD8<16, CD8VH>; 13054defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt, 13055 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, 13056 EVEX_CD8<16, CD8VH>; 13057 13058defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si, 13059 X86cvttp2si, X86cvttp2siSAE, 13060 SchedWriteCvtPS2DQ>, T_MAP5, XS, 13061 EVEX_CD8<16, CD8VH>; 13062 13063defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui, 13064 X86cvttp2ui, X86cvttp2uiSAE, 13065 SchedWriteCvtPS2DQ>, T_MAP5, 13066 EVEX_CD8<16, CD8VH>; 13067 13068// Convert Half to Signed/Unsigned Quardword 13069multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13070 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13071 X86SchedWriteWidths sched> { 13072 let Predicates = [HasFP16] in { 13073 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 13074 MaskOpNode, sched.ZMM>, 13075 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info, 13076 OpNodeRnd, sched.ZMM>, EVEX_V512; 13077 } 13078 let Predicates = [HasFP16, HasVLX] in { 13079 // Explicitly specified broadcast string, since we take only 2 elements 13080 // from v8f16x_info source 13081 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13082 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, 13083 EVEX_V128; 13084 // Explicitly specified broadcast string, since we take only 4 elements 13085 // from v8f16x_info source 13086 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13087 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, 13088 EVEX_V256; 13089 } 13090} 13091 13092// Convert Half to Signed/Unsigned Quardword with truncation 13093multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13094 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13095 X86SchedWriteWidths sched> { 13096 let Predicates = [HasFP16] in { 13097 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 13098 MaskOpNode, sched.ZMM>, 13099 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info, 13100 OpNodeRnd, sched.ZMM>, EVEX_V512; 13101 } 13102 let Predicates = [HasFP16, HasVLX] in { 13103 // Explicitly specified broadcast string, since we take only 2 elements 13104 // from v8f16x_info source 13105 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13106 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128; 13107 // Explicitly specified broadcast string, since we take only 4 elements 13108 // from v8f16x_info source 13109 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13110 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256; 13111 } 13112} 13113 13114defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int, 13115 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD, 13116 EVEX_CD8<16, CD8VQ>; 13117 13118defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt, 13119 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD, 13120 EVEX_CD8<16, CD8VQ>; 13121 13122defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si, 13123 X86cvttp2si, X86cvttp2siSAE, 13124 SchedWriteCvtPS2DQ>, T_MAP5, PD, 13125 EVEX_CD8<16, CD8VQ>; 13126 13127defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui, 13128 X86cvttp2ui, X86cvttp2uiSAE, 13129 SchedWriteCvtPS2DQ>, T_MAP5, PD, 13130 EVEX_CD8<16, CD8VQ>; 13131 13132// Convert Signed/Unsigned Quardword to Half 13133multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13134 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13135 X86SchedWriteWidths sched> { 13136 // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and 13137 // 512 memory forms of these instructions in Asm Parcer. They have the same 13138 // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly 13139 // due to the same reason. 13140 let Predicates = [HasFP16] in { 13141 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode, 13142 MaskOpNode, sched.ZMM, "{1to8}", "{z}">, 13143 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info, 13144 OpNodeRnd, sched.ZMM>, EVEX_V512; 13145 } 13146 let Predicates = [HasFP16, HasVLX] in { 13147 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info, 13148 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", 13149 i128mem, VK2WM>, EVEX_V128; 13150 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info, 13151 null_frag, null_frag, sched.YMM, "{1to4}", "{y}", 13152 i256mem, VK4WM>, EVEX_V256; 13153 } 13154 13155 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 13156 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 13157 VR128X:$src), 0, "att">; 13158 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 13159 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 13160 VK2WM:$mask, VR128X:$src), 0, "att">; 13161 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 13162 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 13163 VK2WM:$mask, VR128X:$src), 0, "att">; 13164 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 13165 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 13166 i64mem:$src), 0, "att">; 13167 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 13168 "$dst {${mask}}, ${src}{1to2}}", 13169 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 13170 VK2WM:$mask, i64mem:$src), 0, "att">; 13171 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 13172 "$dst {${mask}} {z}, ${src}{1to2}}", 13173 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 13174 VK2WM:$mask, i64mem:$src), 0, "att">; 13175 13176 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 13177 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 13178 VR256X:$src), 0, "att">; 13179 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 13180 "$dst {${mask}}, $src}", 13181 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 13182 VK4WM:$mask, VR256X:$src), 0, "att">; 13183 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 13184 "$dst {${mask}} {z}, $src}", 13185 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 13186 VK4WM:$mask, VR256X:$src), 0, "att">; 13187 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 13188 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 13189 i64mem:$src), 0, "att">; 13190 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 13191 "$dst {${mask}}, ${src}{1to4}}", 13192 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 13193 VK4WM:$mask, i64mem:$src), 0, "att">; 13194 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 13195 "$dst {${mask}} {z}, ${src}{1to4}}", 13196 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 13197 VK4WM:$mask, i64mem:$src), 0, "att">; 13198 13199 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 13200 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 13201 VR512:$src), 0, "att">; 13202 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 13203 "$dst {${mask}}, $src}", 13204 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 13205 VK8WM:$mask, VR512:$src), 0, "att">; 13206 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 13207 "$dst {${mask}} {z}, $src}", 13208 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 13209 VK8WM:$mask, VR512:$src), 0, "att">; 13210 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 13211 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 13212 i64mem:$src), 0, "att">; 13213 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 13214 "$dst {${mask}}, ${src}{1to8}}", 13215 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 13216 VK8WM:$mask, i64mem:$src), 0, "att">; 13217 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 13218 "$dst {${mask}} {z}, ${src}{1to8}}", 13219 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 13220 VK8WM:$mask, i64mem:$src), 0, "att">; 13221} 13222 13223defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp, 13224 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, 13225 EVEX_CD8<64, CD8VF>; 13226 13227defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp, 13228 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, XD, 13229 EVEX_CD8<64, CD8VF>; 13230 13231// Convert half to signed/unsigned int 32/64 13232defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si, 13233 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>, 13234 T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13235defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si, 13236 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>, 13237 T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>; 13238defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi, 13239 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>, 13240 T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13241defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi, 13242 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>, 13243 T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>; 13244 13245defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info, 13246 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13247 "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13248defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info, 13249 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13250 "{q}", HasFP16>, REX_W, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13251defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info, 13252 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13253 "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13254defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info, 13255 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13256 "{q}", HasFP16>, T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>; 13257 13258let Predicates = [HasFP16] in { 13259 defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32, 13260 v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">, 13261 T_MAP5, XS, EVEX_CD8<32, CD8VT1>; 13262 defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64, 13263 v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">, 13264 T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>; 13265 defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32, 13266 v8f16x_info, i32mem, loadi32, 13267 "cvtusi2sh","l">, T_MAP5, XS, EVEX_CD8<32, CD8VT1>; 13268 defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64, 13269 v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">, 13270 T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>; 13271 def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13272 (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13273 13274 def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13275 (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13276 13277 13278 def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))), 13279 (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13280 def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))), 13281 (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13282 13283 def : Pat<(f16 (any_sint_to_fp GR32:$src)), 13284 (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13285 def : Pat<(f16 (any_sint_to_fp GR64:$src)), 13286 (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13287 13288 def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))), 13289 (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13290 def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))), 13291 (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13292 13293 def : Pat<(f16 (any_uint_to_fp GR32:$src)), 13294 (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13295 def : Pat<(f16 (any_uint_to_fp GR64:$src)), 13296 (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13297 13298 // Patterns used for matching vcvtsi2sh intrinsic sequences from clang 13299 // which produce unnecessary vmovsh instructions 13300 def : Pat<(v8f16 (X86Movsh 13301 (v8f16 VR128X:$dst), 13302 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))), 13303 (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13304 13305 def : Pat<(v8f16 (X86Movsh 13306 (v8f16 VR128X:$dst), 13307 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))), 13308 (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13309 13310 def : Pat<(v8f16 (X86Movsh 13311 (v8f16 VR128X:$dst), 13312 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))), 13313 (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13314 13315 def : Pat<(v8f16 (X86Movsh 13316 (v8f16 VR128X:$dst), 13317 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))), 13318 (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13319 13320 def : Pat<(v8f16 (X86Movsh 13321 (v8f16 VR128X:$dst), 13322 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))), 13323 (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13324 13325 def : Pat<(v8f16 (X86Movsh 13326 (v8f16 VR128X:$dst), 13327 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))), 13328 (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13329 13330 def : Pat<(v8f16 (X86Movsh 13331 (v8f16 VR128X:$dst), 13332 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))), 13333 (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13334 13335 def : Pat<(v8f16 (X86Movsh 13336 (v8f16 VR128X:$dst), 13337 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))), 13338 (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13339} // Predicates = [HasFP16] 13340 13341let Predicates = [HasFP16, HasVLX] in { 13342 // Special patterns to allow use of X86VMSintToFP for masking. Instruction 13343 // patterns have been disabled with null_frag. 13344 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))), 13345 (VCVTQQ2PHZ256rr VR256X:$src)>; 13346 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13347 VK4WM:$mask), 13348 (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13349 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13350 VK4WM:$mask), 13351 (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13352 13353 def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))), 13354 (VCVTQQ2PHZ256rm addr:$src)>; 13355 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13356 VK4WM:$mask), 13357 (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13358 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13359 VK4WM:$mask), 13360 (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13361 13362 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13363 (VCVTQQ2PHZ256rmb addr:$src)>; 13364 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13365 (v8f16 VR128X:$src0), VK4WM:$mask), 13366 (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13367 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13368 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13369 (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13370 13371 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))), 13372 (VCVTQQ2PHZ128rr VR128X:$src)>; 13373 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13374 VK2WM:$mask), 13375 (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13376 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13377 VK2WM:$mask), 13378 (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13379 13380 def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))), 13381 (VCVTQQ2PHZ128rm addr:$src)>; 13382 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13383 VK2WM:$mask), 13384 (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13385 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13386 VK2WM:$mask), 13387 (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13388 13389 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13390 (VCVTQQ2PHZ128rmb addr:$src)>; 13391 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13392 (v8f16 VR128X:$src0), VK2WM:$mask), 13393 (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13394 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13395 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13396 (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13397 13398 // Special patterns to allow use of X86VMUintToFP for masking. Instruction 13399 // patterns have been disabled with null_frag. 13400 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))), 13401 (VCVTUQQ2PHZ256rr VR256X:$src)>; 13402 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13403 VK4WM:$mask), 13404 (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13405 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13406 VK4WM:$mask), 13407 (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13408 13409 def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))), 13410 (VCVTUQQ2PHZ256rm addr:$src)>; 13411 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13412 VK4WM:$mask), 13413 (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13414 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13415 VK4WM:$mask), 13416 (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13417 13418 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13419 (VCVTUQQ2PHZ256rmb addr:$src)>; 13420 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13421 (v8f16 VR128X:$src0), VK4WM:$mask), 13422 (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13423 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13424 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13425 (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13426 13427 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))), 13428 (VCVTUQQ2PHZ128rr VR128X:$src)>; 13429 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13430 VK2WM:$mask), 13431 (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13432 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13433 VK2WM:$mask), 13434 (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13435 13436 def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))), 13437 (VCVTUQQ2PHZ128rm addr:$src)>; 13438 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13439 VK2WM:$mask), 13440 (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13441 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13442 VK2WM:$mask), 13443 (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13444 13445 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13446 (VCVTUQQ2PHZ128rmb addr:$src)>; 13447 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13448 (v8f16 VR128X:$src0), VK2WM:$mask), 13449 (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13450 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13451 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13452 (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13453} 13454 13455let Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13456 multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> { 13457 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13458 (ins _.RC:$src2, _.RC:$src3), 13459 OpcodeStr, "$src3, $src2", "$src2, $src3", 13460 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX, VVVV; 13461 13462 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13463 (ins _.RC:$src2, _.MemOp:$src3), 13464 OpcodeStr, "$src3, $src2", "$src2, $src3", 13465 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX, VVVV; 13466 13467 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13468 (ins _.RC:$src2, _.ScalarMemOp:$src3), 13469 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr), 13470 (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX, VVVV; 13471 } 13472} // Constraints = "@earlyclobber $dst, $src1 = $dst" 13473 13474multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 13475 X86VectorVTInfo _> { 13476 let Constraints = "@earlyclobber $dst, $src1 = $dst" in 13477 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13478 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 13479 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 13480 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>, 13481 EVEX, VVVV, EVEX_B, EVEX_RC; 13482} 13483 13484 13485multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> { 13486 let Predicates = [HasFP16] in { 13487 defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>, 13488 avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>, 13489 EVEX_V512, Sched<[WriteFMAZ]>; 13490 } 13491 let Predicates = [HasVLX, HasFP16] in { 13492 defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>; 13493 defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>; 13494 } 13495} 13496 13497multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13498 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> { 13499 let Predicates = [HasFP16] in { 13500 defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 13501 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, 13502 avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info, 13503 "", "@earlyclobber $dst">, EVEX_V512; 13504 } 13505 let Predicates = [HasVLX, HasFP16] in { 13506 defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 13507 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256; 13508 defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 13509 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128; 13510 } 13511} 13512 13513 13514let Uses = [MXCSR] in { 13515 defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>, 13516 T_MAP6, XS, EVEX_CD8<32, CD8VF>; 13517 defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>, 13518 T_MAP6, XD, EVEX_CD8<32, CD8VF>; 13519 13520 defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc, 13521 x86vfmulcRnd, 1>, T_MAP6, XS, EVEX_CD8<32, CD8VF>; 13522 defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc, 13523 x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6, XD, EVEX_CD8<32, CD8VF>; 13524} 13525 13526 13527multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 13528 bit IsCommutable> { 13529 let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13530 defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13531 (ins VR128X:$src2, VR128X:$src3), OpcodeStr, 13532 "$src3, $src2", "$src2, $src3", 13533 (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>, 13534 Sched<[WriteFMAX]>; 13535 defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst), 13536 (ins VR128X:$src2, ssmem:$src3), OpcodeStr, 13537 "$src3, $src2", "$src2, $src3", 13538 (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>, 13539 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13540 defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13541 (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr, 13542 "$rc, $src3, $src2", "$src2, $src3, $rc", 13543 (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>, 13544 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13545 } 13546} 13547 13548multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13549 SDNode OpNodeRnd, bit IsCommutable> { 13550 let Predicates = [HasFP16] in { 13551 defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13552 (ins VR128X:$src1, VR128X:$src2), OpcodeStr, 13553 "$src2, $src1", "$src1, $src2", 13554 (v4f32 (OpNode VR128X:$src1, VR128X:$src2)), 13555 IsCommutable, IsCommutable, IsCommutable, 13556 X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>; 13557 defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst), 13558 (ins VR128X:$src1, ssmem:$src2), OpcodeStr, 13559 "$src2, $src1", "$src1, $src2", 13560 (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))), 13561 0, 0, 0, X86selects, "@earlyclobber $dst">, 13562 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13563 defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13564 (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr, 13565 "$rc, $src2, $src1", "$src1, $src2, $rc", 13566 (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)), 13567 0, 0, 0, X86selects, "@earlyclobber $dst">, 13568 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13569 } 13570} 13571 13572let Uses = [MXCSR] in { 13573 defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>, 13574 T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV; 13575 defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>, 13576 T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV; 13577 13578 defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>, 13579 T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV; 13580 defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>, 13581 T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV; 13582} 13583