1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX512 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// This multiclass generates the masking variants from the non-masking 16// variant. It only provides the assembly pieces for the masking variants. 17// It assumes custom ISel patterns for masking which can be provided as 18// template arguments. 19multiclass AVX512_maskable_custom<bits<8> O, Format F, 20 dag Outs, 21 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 22 string OpcodeStr, 23 string AttSrcAsm, string IntelSrcAsm, 24 list<dag> Pattern, 25 list<dag> MaskingPattern, 26 list<dag> ZeroMaskingPattern, 27 string MaskingConstraint = "", 28 bit IsCommutable = 0, 29 bit IsKCommutable = 0, 30 bit IsKZCommutable = IsCommutable, 31 string ClobberConstraint = ""> { 32 let isCommutable = IsCommutable, Constraints = ClobberConstraint in 33 def NAME: AVX512<O, F, Outs, Ins, 34 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 35 "$dst, "#IntelSrcAsm#"}", 36 Pattern>; 37 38 // Prefer over VMOV*rrk Pat<> 39 let isCommutable = IsKCommutable in 40 def NAME#k: AVX512<O, F, Outs, MaskingIns, 41 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 42 "$dst {${mask}}, "#IntelSrcAsm#"}", 43 MaskingPattern>, 44 EVEX_K { 45 // In case of the 3src subclass this is overridden with a let. 46 string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint, 47 !if(!eq(MaskingConstraint, ""), ClobberConstraint, 48 !strconcat(ClobberConstraint, ", ", MaskingConstraint))); 49 } 50 51 // Zero mask does not add any restrictions to commute operands transformation. 52 // So, it is Ok to use IsCommutable instead of IsKCommutable. 53 let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<> 54 Constraints = ClobberConstraint in 55 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns, 56 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"# 57 "$dst {${mask}} {z}, "#IntelSrcAsm#"}", 58 ZeroMaskingPattern>, 59 EVEX_KZ; 60} 61 62 63// Common base class of AVX512_maskable and AVX512_maskable_3src. 64multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _, 65 dag Outs, 66 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 67 string OpcodeStr, 68 string AttSrcAsm, string IntelSrcAsm, 69 dag RHS, dag MaskingRHS, 70 SDPatternOperator Select = vselect_mask, 71 string MaskingConstraint = "", 72 bit IsCommutable = 0, 73 bit IsKCommutable = 0, 74 bit IsKZCommutable = IsCommutable, 75 string ClobberConstraint = ""> : 76 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 77 AttSrcAsm, IntelSrcAsm, 78 [(set _.RC:$dst, RHS)], 79 [(set _.RC:$dst, MaskingRHS)], 80 [(set _.RC:$dst, 81 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], 82 MaskingConstraint, IsCommutable, 83 IsKCommutable, IsKZCommutable, ClobberConstraint>; 84 85// This multiclass generates the unconditional/non-masking, the masking and 86// the zero-masking variant of the vector instruction. In the masking case, the 87// preserved vector elements come from a new dummy input operand tied to $dst. 88// This version uses a separate dag for non-masking and masking. 89multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _, 90 dag Outs, dag Ins, string OpcodeStr, 91 string AttSrcAsm, string IntelSrcAsm, 92 dag RHS, dag MaskRHS, 93 string ClobberConstraint = "", 94 bit IsCommutable = 0, bit IsKCommutable = 0, 95 bit IsKZCommutable = IsCommutable> : 96 AVX512_maskable_custom<O, F, Outs, Ins, 97 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 98 !con((ins _.KRCWM:$mask), Ins), 99 OpcodeStr, AttSrcAsm, IntelSrcAsm, 100 [(set _.RC:$dst, RHS)], 101 [(set _.RC:$dst, 102 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))], 103 [(set _.RC:$dst, 104 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))], 105 "$src0 = $dst", IsCommutable, IsKCommutable, 106 IsKZCommutable, ClobberConstraint>; 107 108// This multiclass generates the unconditional/non-masking, the masking and 109// the zero-masking variant of the vector instruction. In the masking case, the 110// preserved vector elements come from a new dummy input operand tied to $dst. 111multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _, 112 dag Outs, dag Ins, string OpcodeStr, 113 string AttSrcAsm, string IntelSrcAsm, 114 dag RHS, 115 bit IsCommutable = 0, bit IsKCommutable = 0, 116 bit IsKZCommutable = IsCommutable, 117 SDPatternOperator Select = vselect_mask, 118 string ClobberConstraint = ""> : 119 AVX512_maskable_common<O, F, _, Outs, Ins, 120 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 121 !con((ins _.KRCWM:$mask), Ins), 122 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 123 (Select _.KRCWM:$mask, RHS, _.RC:$src0), 124 Select, "$src0 = $dst", IsCommutable, IsKCommutable, 125 IsKZCommutable, ClobberConstraint>; 126 127// This multiclass generates the unconditional/non-masking, the masking and 128// the zero-masking variant of the scalar instruction. 129multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _, 130 dag Outs, dag Ins, string OpcodeStr, 131 string AttSrcAsm, string IntelSrcAsm, 132 dag RHS> : 133 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm, 134 RHS, 0, 0, 0, X86selects_mask>; 135 136// Similar to AVX512_maskable but in this case one of the source operands 137// ($src1) is already tied to $dst so we just use that for the preserved 138// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude 139// $src1. 140multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, 141 dag Outs, dag NonTiedIns, string OpcodeStr, 142 string AttSrcAsm, string IntelSrcAsm, 143 dag RHS, 144 bit IsCommutable = 0, 145 bit IsKCommutable = 0, 146 SDPatternOperator Select = vselect_mask, 147 bit MaskOnly = 0> : 148 AVX512_maskable_common<O, F, _, Outs, 149 !con((ins _.RC:$src1), NonTiedIns), 150 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 151 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 152 OpcodeStr, AttSrcAsm, IntelSrcAsm, 153 !if(MaskOnly, (null_frag), RHS), 154 (Select _.KRCWM:$mask, RHS, _.RC:$src1), 155 Select, "", IsCommutable, IsKCommutable>; 156 157// Similar to AVX512_maskable_3src but in this case the input VT for the tied 158// operand differs from the output VT. This requires a bitconvert on 159// the preserved vector going into the vselect. 160// NOTE: The unmasked pattern is disabled. 161multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, 162 X86VectorVTInfo InVT, 163 dag Outs, dag NonTiedIns, string OpcodeStr, 164 string AttSrcAsm, string IntelSrcAsm, 165 dag RHS, bit IsCommutable = 0> : 166 AVX512_maskable_common<O, F, OutVT, Outs, 167 !con((ins InVT.RC:$src1), NonTiedIns), 168 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 169 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), 170 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), 171 (vselect_mask InVT.KRCWM:$mask, RHS, 172 (bitconvert InVT.RC:$src1)), 173 vselect_mask, "", IsCommutable>; 174 175multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _, 176 dag Outs, dag NonTiedIns, string OpcodeStr, 177 string AttSrcAsm, string IntelSrcAsm, 178 dag RHS, 179 bit IsCommutable = 0, 180 bit IsKCommutable = 0, 181 bit MaskOnly = 0> : 182 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm, 183 IntelSrcAsm, RHS, IsCommutable, IsKCommutable, 184 X86selects_mask, MaskOnly>; 185 186multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 187 dag Outs, dag Ins, 188 string OpcodeStr, 189 string AttSrcAsm, string IntelSrcAsm, 190 list<dag> Pattern> : 191 AVX512_maskable_custom<O, F, Outs, Ins, 192 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins), 193 !con((ins _.KRCWM:$mask), Ins), 194 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 195 "$src0 = $dst">; 196 197multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _, 198 dag Outs, dag NonTiedIns, 199 string OpcodeStr, 200 string AttSrcAsm, string IntelSrcAsm, 201 list<dag> Pattern> : 202 AVX512_maskable_custom<O, F, Outs, 203 !con((ins _.RC:$src1), NonTiedIns), 204 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 205 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 206 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [], 207 "">; 208 209// Instruction with mask that puts result in mask register, 210// like "compare" and "vptest" 211multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F, 212 dag Outs, 213 dag Ins, dag MaskingIns, 214 string OpcodeStr, 215 string AttSrcAsm, string IntelSrcAsm, 216 list<dag> Pattern, 217 list<dag> MaskingPattern, 218 bit IsCommutable = 0> { 219 let isCommutable = IsCommutable in { 220 def NAME: AVX512<O, F, Outs, Ins, 221 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"# 222 "$dst, "#IntelSrcAsm#"}", 223 Pattern>; 224 225 def NAME#k: AVX512<O, F, Outs, MaskingIns, 226 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"# 227 "$dst {${mask}}, "#IntelSrcAsm#"}", 228 MaskingPattern>, EVEX_K; 229 } 230} 231 232multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _, 233 dag Outs, 234 dag Ins, dag MaskingIns, 235 string OpcodeStr, 236 string AttSrcAsm, string IntelSrcAsm, 237 dag RHS, dag MaskingRHS, 238 bit IsCommutable = 0> : 239 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr, 240 AttSrcAsm, IntelSrcAsm, 241 [(set _.KRC:$dst, RHS)], 242 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>; 243 244multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _, 245 dag Outs, dag Ins, string OpcodeStr, 246 string AttSrcAsm, string IntelSrcAsm, 247 dag RHS, dag RHS_su, bit IsCommutable = 0> : 248 AVX512_maskable_common_cmp<O, F, _, Outs, Ins, 249 !con((ins _.KRCWM:$mask), Ins), 250 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, 251 (and _.KRCWM:$mask, RHS_su), IsCommutable>; 252 253// Used by conversion instructions. 254multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _, 255 dag Outs, 256 dag Ins, dag MaskingIns, dag ZeroMaskingIns, 257 string OpcodeStr, 258 string AttSrcAsm, string IntelSrcAsm, 259 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> : 260 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr, 261 AttSrcAsm, IntelSrcAsm, 262 [(set _.RC:$dst, RHS)], 263 [(set _.RC:$dst, MaskingRHS)], 264 [(set _.RC:$dst, ZeroMaskingRHS)], 265 "$src0 = $dst">; 266 267multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _, 268 dag Outs, dag NonTiedIns, string OpcodeStr, 269 string AttSrcAsm, string IntelSrcAsm, 270 dag RHS, dag MaskingRHS, bit IsCommutable, 271 bit IsKCommutable> : 272 AVX512_maskable_custom<O, F, Outs, 273 !con((ins _.RC:$src1), NonTiedIns), 274 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 275 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns), 276 OpcodeStr, AttSrcAsm, IntelSrcAsm, 277 [(set _.RC:$dst, RHS)], 278 [(set _.RC:$dst, 279 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))], 280 [(set _.RC:$dst, 281 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))], 282 "", IsCommutable, IsKCommutable>; 283 284// Alias instruction that maps zero vector to pxor / xorp* for AVX-512. 285// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 286// swizzled by ExecutionDomainFix to pxor. 287// We set canFoldAsLoad because this can be converted to a constant-pool 288// load of an all-zeros value if folding it would be beneficial. 289let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 290 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 291def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "", 292 [(set VR512:$dst, (v16i32 immAllZerosV))]>; 293def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "", 294 [(set VR512:$dst, (v16i32 immAllOnesV))]>; 295} 296 297let Predicates = [HasAVX512] in { 298def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>; 299def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>; 300def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>; 301def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>; 302def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>; 303def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; 304} 305 306// Alias instructions that allow VPTERNLOG to be used with a mask to create 307// a mix of all ones and all zeros elements. This is done this way to force 308// the same register to be used as input for all three sources. 309let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in { 310def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst), 311 (ins VK16WM:$mask), "", 312 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask), 313 (v16i32 immAllOnesV), 314 (v16i32 immAllZerosV)))]>; 315def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), 316 (ins VK8WM:$mask), "", 317 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), 318 (v8i64 immAllOnesV), 319 (v8i64 immAllZerosV)))]>; 320} 321 322let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 323 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in { 324def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 325 [(set VR128X:$dst, (v4i32 immAllZerosV))]>; 326def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "", 327 [(set VR256X:$dst, (v8i32 immAllZerosV))]>; 328} 329 330let Predicates = [HasAVX512] in { 331def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>; 332def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>; 333def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>; 334def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>; 335def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>; 336def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>; 337def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>; 338def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>; 339def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>; 340def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>; 341def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>; 342def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>; 343} 344 345// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 346// This is expanded by ExpandPostRAPseudos. 347let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 348 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in { 349 def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "", 350 [(set FR16X:$dst, fp16imm0)]>; 351 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", 352 [(set FR32X:$dst, fp32imm0)]>; 353 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", 354 [(set FR64X:$dst, fp64imm0)]>; 355 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", 356 [(set VR128X:$dst, fp128imm0)]>; 357} 358 359//===----------------------------------------------------------------------===// 360// AVX-512 - VECTOR INSERT 361// 362 363// Supports two different pattern operators for mask and unmasked ops. Allows 364// null_frag to be passed for one. 365multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, 366 X86VectorVTInfo To, 367 SDPatternOperator vinsert_insert, 368 SDPatternOperator vinsert_for_mask, 369 X86FoldableSchedWrite sched> { 370 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 371 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), 372 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), 373 "vinsert" # From.EltTypeName # "x" # From.NumElts, 374 "$src3, $src2, $src1", "$src1, $src2, $src3", 375 (vinsert_insert:$src3 (To.VT To.RC:$src1), 376 (From.VT From.RC:$src2), 377 (iPTR imm)), 378 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 379 (From.VT From.RC:$src2), 380 (iPTR imm))>, 381 AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>; 382 let mayLoad = 1 in 383 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), 384 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), 385 "vinsert" # From.EltTypeName # "x" # From.NumElts, 386 "$src3, $src2, $src1", "$src1, $src2, $src3", 387 (vinsert_insert:$src3 (To.VT To.RC:$src1), 388 (From.VT (From.LdFrag addr:$src2)), 389 (iPTR imm)), 390 (vinsert_for_mask:$src3 (To.VT To.RC:$src1), 391 (From.VT (From.LdFrag addr:$src2)), 392 (iPTR imm))>, AVX512AIi8Base, EVEX, VVVV, 393 EVEX_CD8<From.EltSize, From.CD8TupleForm>, 394 Sched<[sched.Folded, sched.ReadAfterFold]>; 395 } 396} 397 398// Passes the same pattern operator for masked and unmasked ops. 399multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, 400 X86VectorVTInfo To, 401 SDPatternOperator vinsert_insert, 402 X86FoldableSchedWrite sched> : 403 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; 404 405multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, 406 X86VectorVTInfo To, PatFrag vinsert_insert, 407 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> { 408 let Predicates = p in { 409 def : Pat<(vinsert_insert:$ins 410 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), 411 (To.VT (!cast<Instruction>(InstrStr#"rr") 412 To.RC:$src1, From.RC:$src2, 413 (INSERT_get_vinsert_imm To.RC:$ins)))>; 414 415 def : Pat<(vinsert_insert:$ins 416 (To.VT To.RC:$src1), 417 (From.VT (From.LdFrag addr:$src2)), 418 (iPTR imm)), 419 (To.VT (!cast<Instruction>(InstrStr#"rm") 420 To.RC:$src1, addr:$src2, 421 (INSERT_get_vinsert_imm To.RC:$ins)))>; 422 } 423} 424 425multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, 426 ValueType EltVT64, int Opcode256, 427 X86FoldableSchedWrite sched> { 428 429 let Predicates = [HasVLX] in 430 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, 431 X86VectorVTInfo< 4, EltVT32, VR128X>, 432 X86VectorVTInfo< 8, EltVT32, VR256X>, 433 vinsert128_insert, sched>, EVEX_V256; 434 435 defm NAME # "32x4Z" : vinsert_for_size<Opcode128, 436 X86VectorVTInfo< 4, EltVT32, VR128X>, 437 X86VectorVTInfo<16, EltVT32, VR512>, 438 vinsert128_insert, sched>, EVEX_V512; 439 440 defm NAME # "64x4Z" : vinsert_for_size<Opcode256, 441 X86VectorVTInfo< 4, EltVT64, VR256X>, 442 X86VectorVTInfo< 8, EltVT64, VR512>, 443 vinsert256_insert, sched>, REX_W, EVEX_V512; 444 445 // Even with DQI we'd like to only use these instructions for masking. 446 let Predicates = [HasVLX, HasDQI] in 447 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, 448 X86VectorVTInfo< 2, EltVT64, VR128X>, 449 X86VectorVTInfo< 4, EltVT64, VR256X>, 450 null_frag, vinsert128_insert, sched>, 451 EVEX_V256, REX_W; 452 453 // Even with DQI we'd like to only use these instructions for masking. 454 let Predicates = [HasDQI] in { 455 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, 456 X86VectorVTInfo< 2, EltVT64, VR128X>, 457 X86VectorVTInfo< 8, EltVT64, VR512>, 458 null_frag, vinsert128_insert, sched>, 459 REX_W, EVEX_V512; 460 461 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, 462 X86VectorVTInfo< 8, EltVT32, VR256X>, 463 X86VectorVTInfo<16, EltVT32, VR512>, 464 null_frag, vinsert256_insert, sched>, 465 EVEX_V512; 466 } 467} 468 469// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? 470defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; 471defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; 472 473// Codegen pattern with the alternative types, 474// Even with AVX512DQ we'll still use these for unmasked operations. 475defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 476 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 477defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 478 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 479 480defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 481 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 482defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 483 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 484 485defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 486 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 487defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 488 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 489 490// Codegen pattern with the alternative types insert VEC128 into VEC256 491defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 492 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 493defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 494 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 495defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info, 496 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; 497// Codegen pattern with the alternative types insert VEC128 into VEC512 498defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 499 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 500defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 501 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 502defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info, 503 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; 504// Codegen pattern with the alternative types insert VEC256 into VEC512 505defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 506 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 507defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 508 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 509defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info, 510 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; 511 512 513multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From, 514 X86VectorVTInfo To, X86VectorVTInfo Cast, 515 PatFrag vinsert_insert, 516 SDNodeXForm INSERT_get_vinsert_imm, 517 list<Predicate> p> { 518let Predicates = p in { 519 def : Pat<(Cast.VT 520 (vselect_mask Cast.KRCWM:$mask, 521 (bitconvert 522 (vinsert_insert:$ins (To.VT To.RC:$src1), 523 (From.VT From.RC:$src2), 524 (iPTR imm))), 525 Cast.RC:$src0)), 526 (!cast<Instruction>(InstrStr#"rrk") 527 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 528 (INSERT_get_vinsert_imm To.RC:$ins))>; 529 def : Pat<(Cast.VT 530 (vselect_mask Cast.KRCWM:$mask, 531 (bitconvert 532 (vinsert_insert:$ins (To.VT To.RC:$src1), 533 (From.VT 534 (bitconvert 535 (From.LdFrag addr:$src2))), 536 (iPTR imm))), 537 Cast.RC:$src0)), 538 (!cast<Instruction>(InstrStr#"rmk") 539 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 540 (INSERT_get_vinsert_imm To.RC:$ins))>; 541 542 def : Pat<(Cast.VT 543 (vselect_mask Cast.KRCWM:$mask, 544 (bitconvert 545 (vinsert_insert:$ins (To.VT To.RC:$src1), 546 (From.VT From.RC:$src2), 547 (iPTR imm))), 548 Cast.ImmAllZerosV)), 549 (!cast<Instruction>(InstrStr#"rrkz") 550 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, 551 (INSERT_get_vinsert_imm To.RC:$ins))>; 552 def : Pat<(Cast.VT 553 (vselect_mask Cast.KRCWM:$mask, 554 (bitconvert 555 (vinsert_insert:$ins (To.VT To.RC:$src1), 556 (From.VT (From.LdFrag addr:$src2)), 557 (iPTR imm))), 558 Cast.ImmAllZerosV)), 559 (!cast<Instruction>(InstrStr#"rmkz") 560 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, 561 (INSERT_get_vinsert_imm To.RC:$ins))>; 562} 563} 564 565defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, 566 v8f32x_info, vinsert128_insert, 567 INSERT_get_vinsert128_imm, [HasVLX]>; 568defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, 569 v4f64x_info, vinsert128_insert, 570 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 571 572defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, 573 v8i32x_info, vinsert128_insert, 574 INSERT_get_vinsert128_imm, [HasVLX]>; 575defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, 576 v8i32x_info, vinsert128_insert, 577 INSERT_get_vinsert128_imm, [HasVLX]>; 578defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, 579 v8i32x_info, vinsert128_insert, 580 INSERT_get_vinsert128_imm, [HasVLX]>; 581defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, 582 v4i64x_info, vinsert128_insert, 583 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 584defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, 585 v4i64x_info, vinsert128_insert, 586 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 587defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, 588 v4i64x_info, vinsert128_insert, 589 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; 590 591defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, 592 v16f32_info, vinsert128_insert, 593 INSERT_get_vinsert128_imm, [HasAVX512]>; 594defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, 595 v8f64_info, vinsert128_insert, 596 INSERT_get_vinsert128_imm, [HasDQI]>; 597 598defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, 599 v16i32_info, vinsert128_insert, 600 INSERT_get_vinsert128_imm, [HasAVX512]>; 601defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, 602 v16i32_info, vinsert128_insert, 603 INSERT_get_vinsert128_imm, [HasAVX512]>; 604defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, 605 v16i32_info, vinsert128_insert, 606 INSERT_get_vinsert128_imm, [HasAVX512]>; 607defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, 608 v8i64_info, vinsert128_insert, 609 INSERT_get_vinsert128_imm, [HasDQI]>; 610defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, 611 v8i64_info, vinsert128_insert, 612 INSERT_get_vinsert128_imm, [HasDQI]>; 613defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, 614 v8i64_info, vinsert128_insert, 615 INSERT_get_vinsert128_imm, [HasDQI]>; 616 617defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, 618 v16f32_info, vinsert256_insert, 619 INSERT_get_vinsert256_imm, [HasDQI]>; 620defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, 621 v8f64_info, vinsert256_insert, 622 INSERT_get_vinsert256_imm, [HasAVX512]>; 623 624defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, 625 v16i32_info, vinsert256_insert, 626 INSERT_get_vinsert256_imm, [HasDQI]>; 627defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, 628 v16i32_info, vinsert256_insert, 629 INSERT_get_vinsert256_imm, [HasDQI]>; 630defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, 631 v16i32_info, vinsert256_insert, 632 INSERT_get_vinsert256_imm, [HasDQI]>; 633defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, 634 v8i64_info, vinsert256_insert, 635 INSERT_get_vinsert256_imm, [HasAVX512]>; 636defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, 637 v8i64_info, vinsert256_insert, 638 INSERT_get_vinsert256_imm, [HasAVX512]>; 639defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, 640 v8i64_info, vinsert256_insert, 641 INSERT_get_vinsert256_imm, [HasAVX512]>; 642 643// vinsertps - insert f32 to XMM 644let ExeDomain = SSEPackedSingle in { 645let isCommutable = 1 in 646def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), 647 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), 648 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 649 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>, 650 EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>; 651def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), 652 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), 653 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 654 [(set VR128X:$dst, (X86insertps VR128X:$src1, 655 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 656 timm:$src3))]>, 657 EVEX, VVVV, EVEX_CD8<32, CD8VT1>, 658 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; 659} 660 661//===----------------------------------------------------------------------===// 662// AVX-512 VECTOR EXTRACT 663//--- 664 665// Supports two different pattern operators for mask and unmasked ops. Allows 666// null_frag to be passed for one. 667multiclass vextract_for_size_split<int Opcode, 668 X86VectorVTInfo From, X86VectorVTInfo To, 669 SDPatternOperator vextract_extract, 670 SDPatternOperator vextract_for_mask, 671 SchedWrite SchedRR, SchedWrite SchedMR> { 672 673 let hasSideEffects = 0, ExeDomain = To.ExeDomain in { 674 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst), 675 (ins From.RC:$src1, u8imm:$idx), 676 "vextract" # To.EltTypeName # "x" # To.NumElts, 677 "$idx, $src1", "$src1, $idx", 678 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)), 679 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>, 680 AVX512AIi8Base, EVEX, Sched<[SchedRR]>; 681 682 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs), 683 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx), 684 "vextract" # To.EltTypeName # "x" # To.NumElts # 685 "\t{$idx, $src1, $dst|$dst, $src1, $idx}", 686 [(store (To.VT (vextract_extract:$idx 687 (From.VT From.RC:$src1), (iPTR imm))), 688 addr:$dst)]>, EVEX, 689 Sched<[SchedMR]>; 690 691 let mayStore = 1, hasSideEffects = 0 in 692 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs), 693 (ins To.MemOp:$dst, To.KRCWM:$mask, 694 From.RC:$src1, u8imm:$idx), 695 "vextract" # To.EltTypeName # "x" # To.NumElts # 696 "\t{$idx, $src1, $dst {${mask}}|" 697 "$dst {${mask}}, $src1, $idx}", []>, 698 EVEX_K, EVEX, Sched<[SchedMR]>; 699 } 700} 701 702// Passes the same pattern operator for masked and unmasked ops. 703multiclass vextract_for_size<int Opcode, X86VectorVTInfo From, 704 X86VectorVTInfo To, 705 SDPatternOperator vextract_extract, 706 SchedWrite SchedRR, SchedWrite SchedMR> : 707 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>; 708 709// Codegen pattern for the alternative types 710multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From, 711 X86VectorVTInfo To, PatFrag vextract_extract, 712 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> { 713 let Predicates = p in { 714 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)), 715 (To.VT (!cast<Instruction>(InstrStr#"rr") 716 From.RC:$src1, 717 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 718 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1), 719 (iPTR imm))), addr:$dst), 720 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1, 721 (EXTRACT_get_vextract_imm To.RC:$ext))>; 722 } 723} 724 725multiclass vextract_for_type<ValueType EltVT32, int Opcode128, 726 ValueType EltVT64, int Opcode256, 727 SchedWrite SchedRR, SchedWrite SchedMR> { 728 let Predicates = [HasAVX512] in { 729 defm NAME # "32x4Z" : vextract_for_size<Opcode128, 730 X86VectorVTInfo<16, EltVT32, VR512>, 731 X86VectorVTInfo< 4, EltVT32, VR128X>, 732 vextract128_extract, SchedRR, SchedMR>, 733 EVEX_V512, EVEX_CD8<32, CD8VT4>; 734 defm NAME # "64x4Z" : vextract_for_size<Opcode256, 735 X86VectorVTInfo< 8, EltVT64, VR512>, 736 X86VectorVTInfo< 4, EltVT64, VR256X>, 737 vextract256_extract, SchedRR, SchedMR>, 738 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>; 739 } 740 let Predicates = [HasVLX] in 741 defm NAME # "32x4Z256" : vextract_for_size<Opcode128, 742 X86VectorVTInfo< 8, EltVT32, VR256X>, 743 X86VectorVTInfo< 4, EltVT32, VR128X>, 744 vextract128_extract, SchedRR, SchedMR>, 745 EVEX_V256, EVEX_CD8<32, CD8VT4>; 746 747 // Even with DQI we'd like to only use these instructions for masking. 748 let Predicates = [HasVLX, HasDQI] in 749 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128, 750 X86VectorVTInfo< 4, EltVT64, VR256X>, 751 X86VectorVTInfo< 2, EltVT64, VR128X>, 752 null_frag, vextract128_extract, SchedRR, SchedMR>, 753 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; 754 755 // Even with DQI we'd like to only use these instructions for masking. 756 let Predicates = [HasDQI] in { 757 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128, 758 X86VectorVTInfo< 8, EltVT64, VR512>, 759 X86VectorVTInfo< 2, EltVT64, VR128X>, 760 null_frag, vextract128_extract, SchedRR, SchedMR>, 761 REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>; 762 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256, 763 X86VectorVTInfo<16, EltVT32, VR512>, 764 X86VectorVTInfo< 8, EltVT32, VR256X>, 765 null_frag, vextract256_extract, SchedRR, SchedMR>, 766 EVEX_V512, EVEX_CD8<32, CD8VT8>; 767 } 768} 769 770// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types. 771defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>; 772defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>; 773 774// extract_subvector codegen patterns with the alternative types. 775// Even with AVX512DQ we'll still use these for unmasked operations. 776defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 777 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 778defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 779 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 780 781defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 782 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 783defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 784 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 785 786defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 787 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 788defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 789 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 790 791// Codegen pattern with the alternative types extract VEC128 from VEC256 792defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 793 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 794defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 795 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 796defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info, 797 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>; 798 799// Codegen pattern with the alternative types extract VEC128 from VEC512 800defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 801 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 802defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 803 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 804defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info, 805 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>; 806// Codegen pattern with the alternative types extract VEC256 from VEC512 807defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 808 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 809defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 810 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 811defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info, 812 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>; 813 814 815// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 816// smaller extract to enable EVEX->VEX. 817let Predicates = [NoVLX] in { 818def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 819 (v2i64 (VEXTRACTI128rr 820 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 821 (iPTR 1)))>; 822def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 823 (v2f64 (VEXTRACTF128rr 824 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 825 (iPTR 1)))>; 826def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 827 (v4i32 (VEXTRACTI128rr 828 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 829 (iPTR 1)))>; 830def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 831 (v4f32 (VEXTRACTF128rr 832 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 833 (iPTR 1)))>; 834def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 835 (v8i16 (VEXTRACTI128rr 836 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 837 (iPTR 1)))>; 838def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 839 (v8f16 (VEXTRACTF128rr 840 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 841 (iPTR 1)))>; 842def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 843 (v16i8 (VEXTRACTI128rr 844 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 845 (iPTR 1)))>; 846} 847 848// A 128-bit extract from bits [255:128] of a 512-bit vector should use a 849// smaller extract to enable EVEX->VEX. 850let Predicates = [HasVLX] in { 851def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))), 852 (v2i64 (VEXTRACTI32x4Z256rr 853 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)), 854 (iPTR 1)))>; 855def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))), 856 (v2f64 (VEXTRACTF32x4Z256rr 857 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)), 858 (iPTR 1)))>; 859def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))), 860 (v4i32 (VEXTRACTI32x4Z256rr 861 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)), 862 (iPTR 1)))>; 863def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))), 864 (v4f32 (VEXTRACTF32x4Z256rr 865 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)), 866 (iPTR 1)))>; 867def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))), 868 (v8i16 (VEXTRACTI32x4Z256rr 869 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)), 870 (iPTR 1)))>; 871def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))), 872 (v8f16 (VEXTRACTF32x4Z256rr 873 (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)), 874 (iPTR 1)))>; 875def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))), 876 (v16i8 (VEXTRACTI32x4Z256rr 877 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)), 878 (iPTR 1)))>; 879} 880 881 882// Additional patterns for handling a bitcast between the vselect and the 883// extract_subvector. 884multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From, 885 X86VectorVTInfo To, X86VectorVTInfo Cast, 886 PatFrag vextract_extract, 887 SDNodeXForm EXTRACT_get_vextract_imm, 888 list<Predicate> p> { 889let Predicates = p in { 890 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 891 (bitconvert 892 (To.VT (vextract_extract:$ext 893 (From.VT From.RC:$src), (iPTR imm)))), 894 To.RC:$src0)), 895 (Cast.VT (!cast<Instruction>(InstrStr#"rrk") 896 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src, 897 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 898 899 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask, 900 (bitconvert 901 (To.VT (vextract_extract:$ext 902 (From.VT From.RC:$src), (iPTR imm)))), 903 Cast.ImmAllZerosV)), 904 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz") 905 Cast.KRCWM:$mask, From.RC:$src, 906 (EXTRACT_get_vextract_imm To.RC:$ext)))>; 907} 908} 909 910defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info, 911 v4f32x_info, vextract128_extract, 912 EXTRACT_get_vextract128_imm, [HasVLX]>; 913defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info, 914 v2f64x_info, vextract128_extract, 915 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 916 917defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info, 918 v4i32x_info, vextract128_extract, 919 EXTRACT_get_vextract128_imm, [HasVLX]>; 920defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info, 921 v4i32x_info, vextract128_extract, 922 EXTRACT_get_vextract128_imm, [HasVLX]>; 923defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info, 924 v4i32x_info, vextract128_extract, 925 EXTRACT_get_vextract128_imm, [HasVLX]>; 926defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info, 927 v2i64x_info, vextract128_extract, 928 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 929defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info, 930 v2i64x_info, vextract128_extract, 931 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 932defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info, 933 v2i64x_info, vextract128_extract, 934 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>; 935 936defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info, 937 v4f32x_info, vextract128_extract, 938 EXTRACT_get_vextract128_imm, [HasAVX512]>; 939defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info, 940 v2f64x_info, vextract128_extract, 941 EXTRACT_get_vextract128_imm, [HasDQI]>; 942 943defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info, 944 v4i32x_info, vextract128_extract, 945 EXTRACT_get_vextract128_imm, [HasAVX512]>; 946defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info, 947 v4i32x_info, vextract128_extract, 948 EXTRACT_get_vextract128_imm, [HasAVX512]>; 949defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info, 950 v4i32x_info, vextract128_extract, 951 EXTRACT_get_vextract128_imm, [HasAVX512]>; 952defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info, 953 v2i64x_info, vextract128_extract, 954 EXTRACT_get_vextract128_imm, [HasDQI]>; 955defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info, 956 v2i64x_info, vextract128_extract, 957 EXTRACT_get_vextract128_imm, [HasDQI]>; 958defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info, 959 v2i64x_info, vextract128_extract, 960 EXTRACT_get_vextract128_imm, [HasDQI]>; 961 962defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info, 963 v8f32x_info, vextract256_extract, 964 EXTRACT_get_vextract256_imm, [HasDQI]>; 965defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info, 966 v4f64x_info, vextract256_extract, 967 EXTRACT_get_vextract256_imm, [HasAVX512]>; 968 969defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info, 970 v8i32x_info, vextract256_extract, 971 EXTRACT_get_vextract256_imm, [HasDQI]>; 972defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info, 973 v8i32x_info, vextract256_extract, 974 EXTRACT_get_vextract256_imm, [HasDQI]>; 975defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info, 976 v8i32x_info, vextract256_extract, 977 EXTRACT_get_vextract256_imm, [HasDQI]>; 978defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info, 979 v4i64x_info, vextract256_extract, 980 EXTRACT_get_vextract256_imm, [HasAVX512]>; 981defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info, 982 v4i64x_info, vextract256_extract, 983 EXTRACT_get_vextract256_imm, [HasAVX512]>; 984defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info, 985 v4i64x_info, vextract256_extract, 986 EXTRACT_get_vextract256_imm, [HasAVX512]>; 987 988// vextractps - extract 32 bits from XMM 989def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst), 990 (ins VR128X:$src1, u8imm:$src2), 991 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 992 [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, 993 EVEX, WIG, Sched<[WriteVecExtract]>; 994 995def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), 996 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), 997 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 998 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), 999 addr:$dst)]>, 1000 EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>; 1001 1002//===---------------------------------------------------------------------===// 1003// AVX-512 BROADCAST 1004//--- 1005// broadcast with a scalar argument. 1006multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo, 1007 X86VectorVTInfo SrcInfo> { 1008 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)), 1009 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr) 1010 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1011 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1012 (X86VBroadcast SrcInfo.FRC:$src), 1013 DestInfo.RC:$src0)), 1014 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk) 1015 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, 1016 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1017 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask, 1018 (X86VBroadcast SrcInfo.FRC:$src), 1019 DestInfo.ImmAllZerosV)), 1020 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz) 1021 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>; 1022} 1023 1024// Split version to allow mask and broadcast node to be different types. This 1025// helps support the 32x2 broadcasts. 1026multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr, 1027 SchedWrite SchedRR, SchedWrite SchedRM, 1028 X86VectorVTInfo MaskInfo, 1029 X86VectorVTInfo DestInfo, 1030 X86VectorVTInfo SrcInfo, 1031 bit IsConvertibleToThreeAddress, 1032 SDPatternOperator UnmaskedOp = X86VBroadcast, 1033 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> { 1034 let hasSideEffects = 0 in 1035 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src), 1036 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1037 [(set MaskInfo.RC:$dst, 1038 (MaskInfo.VT 1039 (bitconvert 1040 (DestInfo.VT 1041 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))], 1042 DestInfo.ExeDomain>, T8, PD, EVEX, Sched<[SchedRR]>; 1043 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1044 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src), 1045 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1046 "${dst} {${mask}} {z}, $src}"), 1047 [(set MaskInfo.RC:$dst, 1048 (vselect_mask MaskInfo.KRCWM:$mask, 1049 (MaskInfo.VT 1050 (bitconvert 1051 (DestInfo.VT 1052 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1053 MaskInfo.ImmAllZerosV))], 1054 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, Sched<[SchedRR]>; 1055 let Constraints = "$src0 = $dst" in 1056 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), 1057 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1058 SrcInfo.RC:$src), 1059 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1060 "${dst} {${mask}}, $src}"), 1061 [(set MaskInfo.RC:$dst, 1062 (vselect_mask MaskInfo.KRCWM:$mask, 1063 (MaskInfo.VT 1064 (bitconvert 1065 (DestInfo.VT 1066 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))), 1067 MaskInfo.RC:$src0))], 1068 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, Sched<[SchedRR]>; 1069 1070 let hasSideEffects = 0, mayLoad = 1 in 1071 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1072 (ins SrcInfo.ScalarMemOp:$src), 1073 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1074 [(set MaskInfo.RC:$dst, 1075 (MaskInfo.VT 1076 (bitconvert 1077 (DestInfo.VT 1078 (UnmaskedBcastOp addr:$src)))))], 1079 DestInfo.ExeDomain>, T8, PD, EVEX, 1080 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1081 1082 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1083 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src), 1084 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 1085 "${dst} {${mask}} {z}, $src}"), 1086 [(set MaskInfo.RC:$dst, 1087 (vselect_mask MaskInfo.KRCWM:$mask, 1088 (MaskInfo.VT 1089 (bitconvert 1090 (DestInfo.VT 1091 (SrcInfo.BroadcastLdFrag addr:$src)))), 1092 MaskInfo.ImmAllZerosV))], 1093 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_KZ, 1094 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1095 1096 let Constraints = "$src0 = $dst", 1097 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 1098 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst), 1099 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, 1100 SrcInfo.ScalarMemOp:$src), 1101 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|", 1102 "${dst} {${mask}}, $src}"), 1103 [(set MaskInfo.RC:$dst, 1104 (vselect_mask MaskInfo.KRCWM:$mask, 1105 (MaskInfo.VT 1106 (bitconvert 1107 (DestInfo.VT 1108 (SrcInfo.BroadcastLdFrag addr:$src)))), 1109 MaskInfo.RC:$src0))], 1110 DestInfo.ExeDomain>, T8, PD, EVEX, EVEX_K, 1111 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>; 1112} 1113 1114// Helper class to force mask and broadcast result to same type. 1115multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, 1116 SchedWrite SchedRR, SchedWrite SchedRM, 1117 X86VectorVTInfo DestInfo, 1118 X86VectorVTInfo SrcInfo, 1119 bit IsConvertibleToThreeAddress> : 1120 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM, 1121 DestInfo, DestInfo, SrcInfo, 1122 IsConvertibleToThreeAddress>; 1123 1124multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr, 1125 AVX512VLVectorVTInfo _> { 1126 let Predicates = [HasAVX512] in { 1127 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1128 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1129 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1130 EVEX_V512; 1131 } 1132 1133 let Predicates = [HasVLX] in { 1134 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1135 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1136 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1137 EVEX_V256; 1138 } 1139} 1140 1141multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr, 1142 AVX512VLVectorVTInfo _> { 1143 let Predicates = [HasAVX512] in { 1144 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1145 WriteFShuffle256Ld, _.info512, _.info128, 1>, 1146 avx512_broadcast_scalar<NAME, _.info512, _.info128>, 1147 EVEX_V512; 1148 } 1149 1150 let Predicates = [HasVLX] in { 1151 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1152 WriteFShuffle256Ld, _.info256, _.info128, 1>, 1153 avx512_broadcast_scalar<NAME, _.info256, _.info128>, 1154 EVEX_V256; 1155 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256, 1156 WriteFShuffle256Ld, _.info128, _.info128, 1>, 1157 avx512_broadcast_scalar<NAME, _.info128, _.info128>, 1158 EVEX_V128; 1159 } 1160} 1161defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss", 1162 avx512vl_f32_info>; 1163defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd", 1164 avx512vl_f64_info>, REX_W; 1165 1166multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR, 1167 X86VectorVTInfo _, SDPatternOperator OpNode, 1168 RegisterClass SrcRC> { 1169 // Fold with a mask even if it has multiple uses since it is cheap. 1170 let ExeDomain = _.ExeDomain in 1171 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 1172 (ins SrcRC:$src), 1173 "vpbroadcast"#_.Suffix, "$src", "$src", 1174 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0, 1175 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>, 1176 T8, PD, EVEX, Sched<[SchedRR]>; 1177} 1178 1179multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR, 1180 X86VectorVTInfo _, SDPatternOperator OpNode, 1181 RegisterClass SrcRC, SubRegIndex Subreg> { 1182 let hasSideEffects = 0, ExeDomain = _.ExeDomain in 1183 defm rr : AVX512_maskable_custom<opc, MRMSrcReg, 1184 (outs _.RC:$dst), (ins GR32:$src), 1185 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)), 1186 !con((ins _.KRCWM:$mask), (ins GR32:$src)), 1187 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [], 1188 "$src0 = $dst">, T8, PD, EVEX, Sched<[SchedRR]>; 1189 1190 def : Pat <(_.VT (OpNode SrcRC:$src)), 1191 (!cast<Instruction>(Name#rr) 1192 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1193 1194 // Fold with a mask even if it has multiple uses since it is cheap. 1195 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0), 1196 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask, 1197 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1198 1199 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV), 1200 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask, 1201 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>; 1202} 1203 1204multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name, 1205 AVX512VLVectorVTInfo _, SDPatternOperator OpNode, 1206 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { 1207 let Predicates = [prd] in 1208 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512, 1209 OpNode, SrcRC, Subreg>, EVEX_V512; 1210 let Predicates = [prd, HasVLX] in { 1211 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256, 1212 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256; 1213 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle, 1214 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128; 1215 } 1216} 1217 1218multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _, 1219 SDPatternOperator OpNode, 1220 RegisterClass SrcRC, Predicate prd> { 1221 let Predicates = [prd] in 1222 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode, 1223 SrcRC>, EVEX_V512; 1224 let Predicates = [prd, HasVLX] in { 1225 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode, 1226 SrcRC>, EVEX_V256; 1227 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode, 1228 SrcRC>, EVEX_V128; 1229 } 1230} 1231 1232defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr", 1233 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>; 1234defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr", 1235 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit, 1236 HasBWI>; 1237defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, 1238 X86VBroadcast, GR32, HasAVX512>; 1239defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, 1240 X86VBroadcast, GR64, HasAVX512>, REX_W; 1241 1242multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr, 1243 AVX512VLVectorVTInfo _, Predicate prd, 1244 bit IsConvertibleToThreeAddress> { 1245 let Predicates = [prd] in { 1246 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1247 WriteShuffle256Ld, _.info512, _.info128, 1248 IsConvertibleToThreeAddress>, 1249 EVEX_V512; 1250 } 1251 let Predicates = [prd, HasVLX] in { 1252 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256, 1253 WriteShuffle256Ld, _.info256, _.info128, 1254 IsConvertibleToThreeAddress>, 1255 EVEX_V256; 1256 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle, 1257 WriteShuffleXLd, _.info128, _.info128, 1258 IsConvertibleToThreeAddress>, 1259 EVEX_V128; 1260 } 1261} 1262 1263defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", 1264 avx512vl_i8_info, HasBWI, 0>; 1265defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", 1266 avx512vl_i16_info, HasBWI, 0>; 1267defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", 1268 avx512vl_i32_info, HasAVX512, 1>; 1269defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", 1270 avx512vl_i64_info, HasAVX512, 1>, REX_W; 1271 1272multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, 1273 SDPatternOperator OpNode, 1274 X86VectorVTInfo _Dst, 1275 X86VectorVTInfo _Src> { 1276 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1277 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1278 (_Dst.VT (OpNode addr:$src))>, 1279 Sched<[SchedWriteShuffle.YMM.Folded]>, 1280 AVX5128IBase, EVEX; 1281} 1282 1283// This should be used for the AVX512DQ broadcast instructions. It disables 1284// the unmasked patterns so that we only use the DQ instructions when masking 1285// is requested. 1286multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr, 1287 SDPatternOperator OpNode, 1288 X86VectorVTInfo _Dst, 1289 X86VectorVTInfo _Src> { 1290 let hasSideEffects = 0, mayLoad = 1 in 1291 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 1292 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src", 1293 (null_frag), 1294 (_Dst.VT (OpNode addr:$src))>, 1295 Sched<[SchedWriteShuffle.YMM.Folded]>, 1296 AVX5128IBase, EVEX; 1297} 1298let Predicates = [HasBWI] in { 1299 def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)), 1300 (VPBROADCASTWZrm addr:$src)>; 1301 1302 def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))), 1303 (VPBROADCASTWZrr VR128X:$src)>; 1304 def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))), 1305 (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1306} 1307let Predicates = [HasVLX, HasBWI] in { 1308 def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)), 1309 (VPBROADCASTWZ128rm addr:$src)>; 1310 def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)), 1311 (VPBROADCASTWZ256rm addr:$src)>; 1312 1313 def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))), 1314 (VPBROADCASTWZ128rr VR128X:$src)>; 1315 def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))), 1316 (VPBROADCASTWZ256rr VR128X:$src)>; 1317 1318 def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))), 1319 (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1320 def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))), 1321 (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>; 1322} 1323 1324//===----------------------------------------------------------------------===// 1325// AVX-512 BROADCAST SUBVECTORS 1326// 1327 1328defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1329 X86SubVBroadcastld128, v16i32_info, v4i32x_info>, 1330 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1331defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1332 X86SubVBroadcastld128, v16f32_info, v4f32x_info>, 1333 EVEX_V512, EVEX_CD8<32, CD8VT4>; 1334defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", 1335 X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W, 1336 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1337defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4", 1338 X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W, 1339 EVEX_V512, EVEX_CD8<64, CD8VT4>; 1340 1341let Predicates = [HasAVX512] in { 1342def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)), 1343 (VBROADCASTF64X4rm addr:$src)>; 1344def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)), 1345 (VBROADCASTF64X4rm addr:$src)>; 1346def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)), 1347 (VBROADCASTF64X4rm addr:$src)>; 1348def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)), 1349 (VBROADCASTI64X4rm addr:$src)>; 1350def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)), 1351 (VBROADCASTI64X4rm addr:$src)>; 1352def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)), 1353 (VBROADCASTI64X4rm addr:$src)>; 1354def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)), 1355 (VBROADCASTI64X4rm addr:$src)>; 1356 1357def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)), 1358 (VBROADCASTF32X4rm addr:$src)>; 1359def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)), 1360 (VBROADCASTF32X4rm addr:$src)>; 1361def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)), 1362 (VBROADCASTF32X4rm addr:$src)>; 1363def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)), 1364 (VBROADCASTI32X4rm addr:$src)>; 1365def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)), 1366 (VBROADCASTI32X4rm addr:$src)>; 1367def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)), 1368 (VBROADCASTI32X4rm addr:$src)>; 1369def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)), 1370 (VBROADCASTI32X4rm addr:$src)>; 1371 1372// Patterns for selects of bitcasted operations. 1373def : Pat<(vselect_mask VK16WM:$mask, 1374 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1375 (v16f32 immAllZerosV)), 1376 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; 1377def : Pat<(vselect_mask VK16WM:$mask, 1378 (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))), 1379 VR512:$src0), 1380 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1381def : Pat<(vselect_mask VK16WM:$mask, 1382 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1383 (v16i32 immAllZerosV)), 1384 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>; 1385def : Pat<(vselect_mask VK16WM:$mask, 1386 (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))), 1387 VR512:$src0), 1388 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1389 1390def : Pat<(vselect_mask VK8WM:$mask, 1391 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1392 (v8f64 immAllZerosV)), 1393 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; 1394def : Pat<(vselect_mask VK8WM:$mask, 1395 (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))), 1396 VR512:$src0), 1397 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1398def : Pat<(vselect_mask VK8WM:$mask, 1399 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1400 (v8i64 immAllZerosV)), 1401 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; 1402def : Pat<(vselect_mask VK8WM:$mask, 1403 (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))), 1404 VR512:$src0), 1405 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1406} 1407 1408let Predicates = [HasVLX] in { 1409defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", 1410 X86SubVBroadcastld128, v8i32x_info, v4i32x_info>, 1411 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1412defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", 1413 X86SubVBroadcastld128, v8f32x_info, v4f32x_info>, 1414 EVEX_V256, EVEX_CD8<32, CD8VT4>; 1415 1416def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)), 1417 (VBROADCASTF32X4Z256rm addr:$src)>; 1418def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)), 1419 (VBROADCASTF32X4Z256rm addr:$src)>; 1420def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)), 1421 (VBROADCASTF32X4Z256rm addr:$src)>; 1422def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)), 1423 (VBROADCASTI32X4Z256rm addr:$src)>; 1424def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)), 1425 (VBROADCASTI32X4Z256rm addr:$src)>; 1426def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)), 1427 (VBROADCASTI32X4Z256rm addr:$src)>; 1428def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)), 1429 (VBROADCASTI32X4Z256rm addr:$src)>; 1430 1431// Patterns for selects of bitcasted operations. 1432def : Pat<(vselect_mask VK8WM:$mask, 1433 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1434 (v8f32 immAllZerosV)), 1435 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1436def : Pat<(vselect_mask VK8WM:$mask, 1437 (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))), 1438 VR256X:$src0), 1439 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1440def : Pat<(vselect_mask VK8WM:$mask, 1441 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1442 (v8i32 immAllZerosV)), 1443 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>; 1444def : Pat<(vselect_mask VK8WM:$mask, 1445 (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))), 1446 VR256X:$src0), 1447 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>; 1448} 1449 1450let Predicates = [HasBF16] in { 1451 def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)), 1452 (VBROADCASTF64X4rm addr:$src)>; 1453 def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)), 1454 (VBROADCASTF32X4rm addr:$src)>; 1455} 1456 1457let Predicates = [HasBF16, HasVLX] in 1458 def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)), 1459 (VBROADCASTF32X4Z256rm addr:$src)>; 1460 1461let Predicates = [HasVLX, HasDQI] in { 1462defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1463 X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, 1464 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; 1465defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1466 X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, 1467 EVEX_V256, EVEX_CD8<64, CD8VT2>, REX_W; 1468 1469// Patterns for selects of bitcasted operations. 1470def : Pat<(vselect_mask VK4WM:$mask, 1471 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1472 (v4f64 immAllZerosV)), 1473 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1474def : Pat<(vselect_mask VK4WM:$mask, 1475 (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))), 1476 VR256X:$src0), 1477 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1478def : Pat<(vselect_mask VK4WM:$mask, 1479 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1480 (v4i64 immAllZerosV)), 1481 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; 1482def : Pat<(vselect_mask VK4WM:$mask, 1483 (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))), 1484 VR256X:$src0), 1485 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; 1486} 1487 1488let Predicates = [HasDQI] in { 1489defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2", 1490 X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W, 1491 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1492defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8", 1493 X86SubVBroadcastld256, v16i32_info, v8i32x_info>, 1494 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1495defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2", 1496 X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W, 1497 EVEX_V512, EVEX_CD8<64, CD8VT2>; 1498defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8", 1499 X86SubVBroadcastld256, v16f32_info, v8f32x_info>, 1500 EVEX_V512, EVEX_CD8<32, CD8VT8>; 1501 1502// Patterns for selects of bitcasted operations. 1503def : Pat<(vselect_mask VK16WM:$mask, 1504 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1505 (v16f32 immAllZerosV)), 1506 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; 1507def : Pat<(vselect_mask VK16WM:$mask, 1508 (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))), 1509 VR512:$src0), 1510 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1511def : Pat<(vselect_mask VK16WM:$mask, 1512 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1513 (v16i32 immAllZerosV)), 1514 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>; 1515def : Pat<(vselect_mask VK16WM:$mask, 1516 (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))), 1517 VR512:$src0), 1518 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>; 1519 1520def : Pat<(vselect_mask VK8WM:$mask, 1521 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1522 (v8f64 immAllZerosV)), 1523 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; 1524def : Pat<(vselect_mask VK8WM:$mask, 1525 (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))), 1526 VR512:$src0), 1527 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1528def : Pat<(vselect_mask VK8WM:$mask, 1529 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1530 (v8i64 immAllZerosV)), 1531 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; 1532def : Pat<(vselect_mask VK8WM:$mask, 1533 (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))), 1534 VR512:$src0), 1535 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; 1536} 1537 1538multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, 1539 AVX512VLVectorVTInfo _Dst, 1540 AVX512VLVectorVTInfo _Src> { 1541 let Predicates = [HasDQI] in 1542 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1543 WriteShuffle256Ld, _Dst.info512, 1544 _Src.info512, _Src.info128, 0, null_frag, null_frag>, 1545 EVEX_V512; 1546 let Predicates = [HasDQI, HasVLX] in 1547 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256, 1548 WriteShuffle256Ld, _Dst.info256, 1549 _Src.info256, _Src.info128, 0, null_frag, null_frag>, 1550 EVEX_V256; 1551} 1552 1553multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, 1554 AVX512VLVectorVTInfo _Dst, 1555 AVX512VLVectorVTInfo _Src> : 1556 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { 1557 1558 let Predicates = [HasDQI, HasVLX] in 1559 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle, 1560 WriteShuffleXLd, _Dst.info128, 1561 _Src.info128, _Src.info128, 0, null_frag, null_frag>, 1562 EVEX_V128; 1563} 1564 1565defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", 1566 avx512vl_i32_info, avx512vl_i64_info>; 1567defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", 1568 avx512vl_f32_info, avx512vl_f64_info>; 1569 1570//===----------------------------------------------------------------------===// 1571// AVX-512 BROADCAST MASK TO VECTOR REGISTER 1572//--- 1573multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr, 1574 X86VectorVTInfo _, RegisterClass KRC> { 1575 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src), 1576 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 1577 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, 1578 EVEX, Sched<[WriteShuffle]>; 1579} 1580 1581multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 1582 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> { 1583 let Predicates = [HasCDI] in 1584 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512; 1585 let Predicates = [HasCDI, HasVLX] in { 1586 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256; 1587 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128; 1588 } 1589} 1590 1591defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", 1592 avx512vl_i32_info, VK16>; 1593defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", 1594 avx512vl_i64_info, VK8>, REX_W; 1595 1596//===----------------------------------------------------------------------===// 1597// -- VPERMI2 - 3 source operands form -- 1598multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, 1599 X86FoldableSchedWrite sched, 1600 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1601let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1602 hasSideEffects = 0 in { 1603 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), 1604 (ins _.RC:$src2, _.RC:$src3), 1605 OpcodeStr, "$src3, $src2", "$src2, $src3", 1606 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, 1607 EVEX, VVVV, AVX5128IBase, Sched<[sched]>; 1608 1609 let mayLoad = 1 in 1610 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1611 (ins _.RC:$src2, _.MemOp:$src3), 1612 OpcodeStr, "$src3, $src2", "$src2, $src3", 1613 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, 1614 (_.VT (_.LdFrag addr:$src3)))), 1>, 1615 EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1616 } 1617} 1618 1619multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, 1620 X86FoldableSchedWrite sched, 1621 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1622 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 1623 hasSideEffects = 0, mayLoad = 1 in 1624 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), 1625 (ins _.RC:$src2, _.ScalarMemOp:$src3), 1626 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1627 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1628 (_.VT (X86VPermt2 _.RC:$src2, 1629 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1630 AVX5128IBase, EVEX, VVVV, EVEX_B, 1631 Sched<[sched.Folded, sched.ReadAfterFold]>; 1632} 1633 1634multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, 1635 X86FoldableSchedWrite sched, 1636 AVX512VLVectorVTInfo VTInfo, 1637 AVX512VLVectorVTInfo ShuffleMask> { 1638 defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1639 ShuffleMask.info512>, 1640 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512, 1641 ShuffleMask.info512>, EVEX_V512; 1642 let Predicates = [HasVLX] in { 1643 defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1644 ShuffleMask.info128>, 1645 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128, 1646 ShuffleMask.info128>, EVEX_V128; 1647 defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1648 ShuffleMask.info256>, 1649 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256, 1650 ShuffleMask.info256>, EVEX_V256; 1651 } 1652} 1653 1654multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, 1655 X86FoldableSchedWrite sched, 1656 AVX512VLVectorVTInfo VTInfo, 1657 AVX512VLVectorVTInfo Idx, 1658 Predicate Prd> { 1659 let Predicates = [Prd] in 1660 defm NAME#Z: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512, 1661 Idx.info512>, EVEX_V512; 1662 let Predicates = [Prd, HasVLX] in { 1663 defm NAME#Z128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128, 1664 Idx.info128>, EVEX_V128; 1665 defm NAME#Z256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256, 1666 Idx.info256>, EVEX_V256; 1667 } 1668} 1669 1670defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, 1671 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1672defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, 1673 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1674defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, 1675 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1676 REX_W, EVEX_CD8<16, CD8VF>; 1677defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, 1678 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1679 EVEX_CD8<8, CD8VF>; 1680defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, 1681 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1682defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, 1683 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1684 1685// Extra patterns to deal with extra bitcasts due to passthru and index being 1686// different types on the fp versions. 1687multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, 1688 X86VectorVTInfo IdxVT, 1689 X86VectorVTInfo CastVT> { 1690 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1691 (X86VPermt2 (_.VT _.RC:$src2), 1692 (IdxVT.VT (bitconvert 1693 (CastVT.VT _.RC:$src1))), 1694 _.RC:$src3), 1695 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1696 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, 1697 _.RC:$src2, _.RC:$src3)>; 1698 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1699 (X86VPermt2 _.RC:$src2, 1700 (IdxVT.VT (bitconvert 1701 (CastVT.VT _.RC:$src1))), 1702 (_.LdFrag addr:$src3)), 1703 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1704 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, 1705 _.RC:$src2, addr:$src3)>; 1706 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 1707 (X86VPermt2 _.RC:$src2, 1708 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), 1709 (_.BroadcastLdFrag addr:$src3)), 1710 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), 1711 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, 1712 _.RC:$src2, addr:$src3)>; 1713} 1714 1715// TODO: Should we add more casts? The vXi64 case is common due to ABI. 1716defm : avx512_perm_i_lowering<"VPERMI2PSZ", v16f32_info, v16i32_info, v8i64_info>; 1717defm : avx512_perm_i_lowering<"VPERMI2PSZ256", v8f32x_info, v8i32x_info, v4i64x_info>; 1718defm : avx512_perm_i_lowering<"VPERMI2PSZ128", v4f32x_info, v4i32x_info, v2i64x_info>; 1719 1720// VPERMT2 1721multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, 1722 X86FoldableSchedWrite sched, 1723 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1724let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 1725 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 1726 (ins IdxVT.RC:$src2, _.RC:$src3), 1727 OpcodeStr, "$src3, $src2", "$src2, $src3", 1728 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, 1729 EVEX, VVVV, AVX5128IBase, Sched<[sched]>; 1730 1731 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1732 (ins IdxVT.RC:$src2, _.MemOp:$src3), 1733 OpcodeStr, "$src3, $src2", "$src2, $src3", 1734 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, 1735 (_.LdFrag addr:$src3))), 1>, 1736 EVEX, VVVV, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; 1737 } 1738} 1739multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, 1740 X86FoldableSchedWrite sched, 1741 X86VectorVTInfo _, X86VectorVTInfo IdxVT> { 1742 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in 1743 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 1744 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3), 1745 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 1746 !strconcat("$src2, ${src3}", _.BroadcastStr ), 1747 (_.VT (X86VPermt2 _.RC:$src1, 1748 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>, 1749 AVX5128IBase, EVEX, VVVV, EVEX_B, 1750 Sched<[sched.Folded, sched.ReadAfterFold]>; 1751} 1752 1753multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, 1754 X86FoldableSchedWrite sched, 1755 AVX512VLVectorVTInfo VTInfo, 1756 AVX512VLVectorVTInfo ShuffleMask> { 1757 defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1758 ShuffleMask.info512>, 1759 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, 1760 ShuffleMask.info512>, EVEX_V512; 1761 let Predicates = [HasVLX] in { 1762 defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1763 ShuffleMask.info128>, 1764 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, 1765 ShuffleMask.info128>, EVEX_V128; 1766 defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1767 ShuffleMask.info256>, 1768 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, 1769 ShuffleMask.info256>, EVEX_V256; 1770 } 1771} 1772 1773multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, 1774 X86FoldableSchedWrite sched, 1775 AVX512VLVectorVTInfo VTInfo, 1776 AVX512VLVectorVTInfo Idx, Predicate Prd> { 1777 let Predicates = [Prd] in 1778 defm NAME#Z: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, 1779 Idx.info512>, EVEX_V512; 1780 let Predicates = [Prd, HasVLX] in { 1781 defm NAME#Z128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, 1782 Idx.info128>, EVEX_V128; 1783 defm NAME#Z256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, 1784 Idx.info256>, EVEX_V256; 1785 } 1786} 1787 1788defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, 1789 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1790defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, 1791 avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1792defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, 1793 avx512vl_i16_info, avx512vl_i16_info, HasBWI>, 1794 REX_W, EVEX_CD8<16, CD8VF>; 1795defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, 1796 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, 1797 EVEX_CD8<8, CD8VF>; 1798defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, 1799 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 1800defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, 1801 avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 1802 1803//===----------------------------------------------------------------------===// 1804// AVX-512 - BLEND using mask 1805// 1806 1807multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, 1808 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1809 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 1810 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1811 (ins _.RC:$src1, _.RC:$src2), 1812 !strconcat(OpcodeStr, 1813 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, 1814 EVEX, VVVV, Sched<[sched]>; 1815 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1816 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1817 !strconcat(OpcodeStr, 1818 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1819 []>, EVEX, VVVV, EVEX_K, Sched<[sched]>; 1820 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), 1821 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 1822 !strconcat(OpcodeStr, 1823 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1824 []>, EVEX, VVVV, EVEX_KZ, Sched<[sched]>; 1825 let mayLoad = 1 in { 1826 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1827 (ins _.RC:$src1, _.MemOp:$src2), 1828 !strconcat(OpcodeStr, 1829 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), 1830 []>, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 1831 Sched<[sched.Folded, sched.ReadAfterFold]>; 1832 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1833 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1834 !strconcat(OpcodeStr, 1835 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), 1836 []>, EVEX, VVVV, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, 1837 Sched<[sched.Folded, sched.ReadAfterFold]>; 1838 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1839 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 1840 !strconcat(OpcodeStr, 1841 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), 1842 []>, EVEX, VVVV, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, 1843 Sched<[sched.Folded, sched.ReadAfterFold]>; 1844 } 1845 } 1846} 1847multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, 1848 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1849 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in { 1850 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1851 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1852 !strconcat(OpcodeStr, 1853 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 1854 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1855 EVEX, VVVV, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1856 Sched<[sched.Folded, sched.ReadAfterFold]>; 1857 1858 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1859 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), 1860 !strconcat(OpcodeStr, 1861 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", 1862 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1863 EVEX, VVVV, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1864 Sched<[sched.Folded, sched.ReadAfterFold]>; 1865 1866 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), 1867 (ins _.RC:$src1, _.ScalarMemOp:$src2), 1868 !strconcat(OpcodeStr, 1869 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", 1870 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, 1871 EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 1872 Sched<[sched.Folded, sched.ReadAfterFold]>; 1873 } 1874} 1875 1876multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 1877 AVX512VLVectorVTInfo VTInfo> { 1878 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1879 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1880 EVEX_V512; 1881 1882 let Predicates = [HasVLX] in { 1883 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1884 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1885 EVEX_V256; 1886 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1887 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1888 EVEX_V128; 1889 } 1890} 1891 1892multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched, 1893 AVX512VLVectorVTInfo VTInfo> { 1894 let Predicates = [HasBWI] in 1895 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>, 1896 EVEX_V512; 1897 1898 let Predicates = [HasBWI, HasVLX] in { 1899 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>, 1900 EVEX_V256; 1901 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>, 1902 EVEX_V128; 1903 } 1904} 1905 1906defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend, 1907 avx512vl_f32_info>; 1908defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend, 1909 avx512vl_f64_info>, REX_W; 1910defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend, 1911 avx512vl_i32_info>; 1912defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend, 1913 avx512vl_i64_info>, REX_W; 1914defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend, 1915 avx512vl_i8_info>; 1916defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, 1917 avx512vl_i16_info>, REX_W; 1918 1919//===----------------------------------------------------------------------===// 1920// Compare Instructions 1921//===----------------------------------------------------------------------===// 1922 1923// avx512_cmp_scalar - AVX512 CMPSS and CMPSD 1924 1925multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE, 1926 PatFrag OpNode_su, PatFrag OpNodeSAE_su, 1927 X86FoldableSchedWrite sched> { 1928 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 1929 (outs _.KRC:$dst), 1930 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 1931 "vcmp"#_.Suffix, 1932 "$cc, $src2, $src1", "$src1, $src2, $cc", 1933 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 1934 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 1935 timm:$cc)>, EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC; 1936 let mayLoad = 1 in 1937 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 1938 (outs _.KRC:$dst), 1939 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), 1940 "vcmp"#_.Suffix, 1941 "$cc, $src2, $src1", "$src1, $src2, $cc", 1942 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 1943 timm:$cc), 1944 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 1945 timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 1946 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1947 1948 let Uses = [MXCSR] in 1949 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 1950 (outs _.KRC:$dst), 1951 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 1952 "vcmp"#_.Suffix, 1953 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", 1954 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 1955 timm:$cc), 1956 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), 1957 timm:$cc)>, 1958 EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>; 1959 1960 let isCodeGenOnly = 1 in { 1961 let isCommutable = 1 in 1962 def rr : AVX512Ii8<0xC2, MRMSrcReg, 1963 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc), 1964 !strconcat("vcmp", _.Suffix, 1965 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 1966 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 1967 _.FRC:$src2, 1968 timm:$cc))]>, 1969 EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC; 1970 def rm : AVX512Ii8<0xC2, MRMSrcMem, 1971 (outs _.KRC:$dst), 1972 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 1973 !strconcat("vcmp", _.Suffix, 1974 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 1975 [(set _.KRC:$dst, (OpNode _.FRC:$src1, 1976 (_.ScalarLdFrag addr:$src2), 1977 timm:$cc))]>, 1978 EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, 1979 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1980 } 1981} 1982 1983let Predicates = [HasAVX512] in { 1984 let ExeDomain = SSEPackedSingle in 1985 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE, 1986 X86cmpms_su, X86cmpmsSAE_su, 1987 SchedWriteFCmp.Scl>, AVX512XSIi8Base; 1988 let ExeDomain = SSEPackedDouble in 1989 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE, 1990 X86cmpms_su, X86cmpmsSAE_su, 1991 SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W; 1992} 1993let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in 1994 defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE, 1995 X86cmpms_su, X86cmpmsSAE_su, 1996 SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA; 1997 1998multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, 1999 X86FoldableSchedWrite sched, 2000 X86VectorVTInfo _, bit IsCommutable> { 2001 let isCommutable = IsCommutable, hasSideEffects = 0 in 2002 def rr : AVX512BI<opc, MRMSrcReg, 2003 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), 2004 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2005 []>, EVEX, VVVV, Sched<[sched]>; 2006 let mayLoad = 1, hasSideEffects = 0 in 2007 def rm : AVX512BI<opc, MRMSrcMem, 2008 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), 2009 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2010 []>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 2011 let isCommutable = IsCommutable, hasSideEffects = 0 in 2012 def rrk : AVX512BI<opc, MRMSrcReg, 2013 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 2014 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2015 "$dst {${mask}}, $src1, $src2}"), 2016 []>, EVEX, VVVV, EVEX_K, Sched<[sched]>; 2017 let mayLoad = 1, hasSideEffects = 0 in 2018 def rmk : AVX512BI<opc, MRMSrcMem, 2019 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), 2020 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", 2021 "$dst {${mask}}, $src1, $src2}"), 2022 []>, EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2023} 2024 2025multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, 2026 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2027 bit IsCommutable> : 2028 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> { 2029 let mayLoad = 1, hasSideEffects = 0 in { 2030 def rmb : AVX512BI<opc, MRMSrcMem, 2031 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), 2032 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", 2033 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), 2034 []>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2035 def rmbk : AVX512BI<opc, MRMSrcMem, 2036 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2037 _.ScalarMemOp:$src2), 2038 !strconcat(OpcodeStr, 2039 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2040 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), 2041 []>, EVEX, VVVV, EVEX_K, EVEX_B, 2042 Sched<[sched.Folded, sched.ReadAfterFold]>; 2043 } 2044} 2045 2046multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, 2047 X86SchedWriteWidths sched, 2048 AVX512VLVectorVTInfo VTInfo, Predicate prd, 2049 bit IsCommutable = 0> { 2050 let Predicates = [prd] in 2051 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM, 2052 VTInfo.info512, IsCommutable>, EVEX_V512; 2053 2054 let Predicates = [prd, HasVLX] in { 2055 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM, 2056 VTInfo.info256, IsCommutable>, EVEX_V256; 2057 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM, 2058 VTInfo.info128, IsCommutable>, EVEX_V128; 2059 } 2060} 2061 2062multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, 2063 X86SchedWriteWidths sched, 2064 AVX512VLVectorVTInfo VTInfo, 2065 Predicate prd, bit IsCommutable = 0> { 2066 let Predicates = [prd] in 2067 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM, 2068 VTInfo.info512, IsCommutable>, EVEX_V512; 2069 2070 let Predicates = [prd, HasVLX] in { 2071 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM, 2072 VTInfo.info256, IsCommutable>, EVEX_V256; 2073 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM, 2074 VTInfo.info128, IsCommutable>, EVEX_V128; 2075 } 2076} 2077 2078// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't 2079// increase the pattern complexity the way an immediate would. 2080let AddedComplexity = 2 in { 2081// FIXME: Is there a better scheduler class for VPCMP? 2082defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", 2083 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>, 2084 EVEX_CD8<8, CD8VF>, WIG; 2085 2086defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", 2087 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>, 2088 EVEX_CD8<16, CD8VF>, WIG; 2089 2090defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", 2091 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>, 2092 EVEX_CD8<32, CD8VF>; 2093 2094defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", 2095 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>, 2096 T8, REX_W, EVEX_CD8<64, CD8VF>; 2097 2098defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", 2099 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2100 EVEX_CD8<8, CD8VF>, WIG; 2101 2102defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", 2103 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2104 EVEX_CD8<16, CD8VF>, WIG; 2105 2106defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", 2107 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>, 2108 EVEX_CD8<32, CD8VF>; 2109 2110defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", 2111 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>, 2112 T8, REX_W, EVEX_CD8<64, CD8VF>; 2113} 2114 2115multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag, 2116 PatFrag Frag_su, 2117 X86FoldableSchedWrite sched, 2118 X86VectorVTInfo _, string Name> { 2119 let isCommutable = 1 in 2120 def rri : AVX512AIi8<opc, MRMSrcReg, 2121 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2122 !strconcat("vpcmp", Suffix, 2123 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2124 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1), 2125 (_.VT _.RC:$src2), 2126 cond)))]>, 2127 EVEX, VVVV, Sched<[sched]>; 2128 def rmi : AVX512AIi8<opc, MRMSrcMem, 2129 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2130 !strconcat("vpcmp", Suffix, 2131 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), 2132 [(set _.KRC:$dst, (_.KVT 2133 (Frag:$cc 2134 (_.VT _.RC:$src1), 2135 (_.VT (_.LdFrag addr:$src2)), 2136 cond)))]>, 2137 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 2138 let isCommutable = 1 in 2139 def rrik : AVX512AIi8<opc, MRMSrcReg, 2140 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, 2141 u8imm:$cc), 2142 !strconcat("vpcmp", Suffix, 2143 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2144 "$dst {${mask}}, $src1, $src2, $cc}"), 2145 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2146 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1), 2147 (_.VT _.RC:$src2), 2148 cond))))]>, 2149 EVEX, VVVV, EVEX_K, Sched<[sched]>; 2150 def rmik : AVX512AIi8<opc, MRMSrcMem, 2151 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, 2152 u8imm:$cc), 2153 !strconcat("vpcmp", Suffix, 2154 "\t{$cc, $src2, $src1, $dst {${mask}}|", 2155 "$dst {${mask}}, $src1, $src2, $cc}"), 2156 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2157 (_.KVT 2158 (Frag_su:$cc 2159 (_.VT _.RC:$src1), 2160 (_.VT (_.LdFrag addr:$src2)), 2161 cond))))]>, 2162 EVEX, VVVV, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2163 2164 def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2), 2165 (_.VT _.RC:$src1), cond)), 2166 (!cast<Instruction>(Name#_.ZSuffix#"rmi") 2167 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2168 2169 def : Pat<(and _.KRCWM:$mask, 2170 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2), 2171 (_.VT _.RC:$src1), cond))), 2172 (!cast<Instruction>(Name#_.ZSuffix#"rmik") 2173 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2174 (X86pcmpm_imm_commute $cc))>; 2175} 2176 2177multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag, 2178 PatFrag Frag_su, X86FoldableSchedWrite sched, 2179 X86VectorVTInfo _, string Name> : 2180 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> { 2181 def rmib : AVX512AIi8<opc, MRMSrcMem, 2182 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, 2183 u8imm:$cc), 2184 !strconcat("vpcmp", Suffix, 2185 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", 2186 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2187 [(set _.KRC:$dst, (_.KVT (Frag:$cc 2188 (_.VT _.RC:$src1), 2189 (_.BroadcastLdFrag addr:$src2), 2190 cond)))]>, 2191 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2192 def rmibk : AVX512AIi8<opc, MRMSrcMem, 2193 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, 2194 _.ScalarMemOp:$src2, u8imm:$cc), 2195 !strconcat("vpcmp", Suffix, 2196 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", 2197 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), 2198 [(set _.KRC:$dst, (and _.KRCWM:$mask, 2199 (_.KVT (Frag_su:$cc 2200 (_.VT _.RC:$src1), 2201 (_.BroadcastLdFrag addr:$src2), 2202 cond))))]>, 2203 EVEX, VVVV, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2204 2205 def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2), 2206 (_.VT _.RC:$src1), cond)), 2207 (!cast<Instruction>(Name#_.ZSuffix#"rmib") 2208 _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>; 2209 2210 def : Pat<(and _.KRCWM:$mask, 2211 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2), 2212 (_.VT _.RC:$src1), cond))), 2213 (!cast<Instruction>(Name#_.ZSuffix#"rmibk") 2214 _.KRCWM:$mask, _.RC:$src1, addr:$src2, 2215 (X86pcmpm_imm_commute $cc))>; 2216} 2217 2218multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag, 2219 PatFrag Frag_su, X86SchedWriteWidths sched, 2220 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2221 let Predicates = [prd] in 2222 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2223 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2224 2225 let Predicates = [prd, HasVLX] in { 2226 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2227 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2228 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, 2229 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2230 } 2231} 2232 2233multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag, 2234 PatFrag Frag_su, X86SchedWriteWidths sched, 2235 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 2236 let Predicates = [prd] in 2237 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2238 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512; 2239 2240 let Predicates = [prd, HasVLX] in { 2241 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2242 sched.YMM, VTInfo.info256, NAME>, EVEX_V256; 2243 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, 2244 sched.XMM, VTInfo.info128, NAME>, EVEX_V128; 2245 } 2246} 2247 2248// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? 2249defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su, 2250 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2251 EVEX_CD8<8, CD8VF>; 2252defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su, 2253 SchedWriteVecALU, avx512vl_i8_info, HasBWI>, 2254 EVEX_CD8<8, CD8VF>; 2255 2256defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su, 2257 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2258 REX_W, EVEX_CD8<16, CD8VF>; 2259defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su, 2260 SchedWriteVecALU, avx512vl_i16_info, HasBWI>, 2261 REX_W, EVEX_CD8<16, CD8VF>; 2262 2263defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su, 2264 SchedWriteVecALU, avx512vl_i32_info, 2265 HasAVX512>, EVEX_CD8<32, CD8VF>; 2266defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su, 2267 SchedWriteVecALU, avx512vl_i32_info, 2268 HasAVX512>, EVEX_CD8<32, CD8VF>; 2269 2270defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su, 2271 SchedWriteVecALU, avx512vl_i64_info, 2272 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>; 2273defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su, 2274 SchedWriteVecALU, avx512vl_i64_info, 2275 HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>; 2276 2277multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _, 2278 string Name> { 2279let Uses = [MXCSR], mayRaiseFPException = 1 in { 2280 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 2281 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc), 2282 "vcmp"#_.Suffix, 2283 "$cc, $src2, $src1", "$src1, $src2, $cc", 2284 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2285 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 2286 1>, Sched<[sched]>; 2287 2288 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2289 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 2290 "vcmp"#_.Suffix, 2291 "$cc, $src2, $src1", "$src1, $src2, $cc", 2292 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2293 timm:$cc), 2294 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 2295 timm:$cc)>, 2296 Sched<[sched.Folded, sched.ReadAfterFold]>; 2297 2298 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 2299 (outs _.KRC:$dst), 2300 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 2301 "vcmp"#_.Suffix, 2302 "$cc, ${src2}"#_.BroadcastStr#", $src1", 2303 "$src1, ${src2}"#_.BroadcastStr#", $cc", 2304 (X86any_cmpm (_.VT _.RC:$src1), 2305 (_.VT (_.BroadcastLdFrag addr:$src2)), 2306 timm:$cc), 2307 (X86cmpm_su (_.VT _.RC:$src1), 2308 (_.VT (_.BroadcastLdFrag addr:$src2)), 2309 timm:$cc)>, 2310 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2311 } 2312 2313 // Patterns for selecting with loads in other operand. 2314 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), 2315 timm:$cc), 2316 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2317 (X86cmpm_imm_commute timm:$cc))>; 2318 2319 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2), 2320 (_.VT _.RC:$src1), 2321 timm:$cc)), 2322 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2323 _.RC:$src1, addr:$src2, 2324 (X86cmpm_imm_commute timm:$cc))>; 2325 2326 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2), 2327 (_.VT _.RC:$src1), timm:$cc), 2328 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2329 (X86cmpm_imm_commute timm:$cc))>; 2330 2331 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2), 2332 (_.VT _.RC:$src1), 2333 timm:$cc)), 2334 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2335 _.RC:$src1, addr:$src2, 2336 (X86cmpm_imm_commute timm:$cc))>; 2337 2338 // Patterns for mask intrinsics. 2339 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, 2340 (_.KVT immAllOnesV)), 2341 (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>; 2342 2343 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask), 2344 (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1, 2345 _.RC:$src2, timm:$cc)>; 2346 2347 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2348 (_.KVT immAllOnesV)), 2349 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>; 2350 2351 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc, 2352 _.KRCWM:$mask), 2353 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1, 2354 addr:$src2, timm:$cc)>; 2355 2356 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2357 (_.KVT immAllOnesV)), 2358 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>; 2359 2360 def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc, 2361 _.KRCWM:$mask), 2362 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1, 2363 addr:$src2, timm:$cc)>; 2364 2365 // Patterns for mask intrinsics with loads in other operand. 2366 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2367 (_.KVT immAllOnesV)), 2368 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, 2369 (X86cmpm_imm_commute timm:$cc))>; 2370 2371 def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2372 _.KRCWM:$mask), 2373 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, 2374 _.RC:$src1, addr:$src2, 2375 (X86cmpm_imm_commute timm:$cc))>; 2376 2377 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2378 (_.KVT immAllOnesV)), 2379 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, 2380 (X86cmpm_imm_commute timm:$cc))>; 2381 2382 def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc, 2383 _.KRCWM:$mask), 2384 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, 2385 _.RC:$src1, addr:$src2, 2386 (X86cmpm_imm_commute timm:$cc))>; 2387} 2388 2389multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 2390 // comparison code form (VCMP[EQ/LT/LE/...] 2391 let Uses = [MXCSR] in 2392 defm rrib : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst), 2393 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 2394 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc), 2395 "vcmp"#_.Suffix, 2396 "$cc, {sae}, $src2, $src1", 2397 "$src1, $src2, {sae}, $cc", 2398 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2399 (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))], 2400 [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1), 2401 (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>, 2402 EVEX_B, Sched<[sched]>; 2403} 2404 2405multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 2406 Predicate Pred = HasAVX512> { 2407 let Predicates = [Pred] in { 2408 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>, 2409 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512; 2410 2411 } 2412 let Predicates = [Pred,HasVLX] in { 2413 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128; 2414 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256; 2415 } 2416} 2417 2418defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>, 2419 AVX512PDIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 2420defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>, 2421 AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 2422defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>, 2423 AVX512PSIi8Base, EVEX, VVVV, EVEX_CD8<16, CD8VF>, TA; 2424 2425// Patterns to select fp compares with load as first operand. 2426let Predicates = [HasAVX512] in { 2427 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)), 2428 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2429 2430 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)), 2431 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2432} 2433 2434let Predicates = [HasFP16] in { 2435 def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)), 2436 (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>; 2437} 2438 2439// ---------------------------------------------------------------- 2440// FPClass 2441 2442//handle fpclass instruction mask = op(reg_scalar,imm) 2443// op(mem_scalar,imm) 2444multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, 2445 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2446 Predicate prd> { 2447 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2448 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2449 (ins _.RC:$src1, i32u8imm:$src2), 2450 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2451 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1), 2452 (i32 timm:$src2)))]>, 2453 Sched<[sched]>; 2454 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2455 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2456 OpcodeStr#_.Suffix# 2457 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2458 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2459 (X86Vfpclasss_su (_.VT _.RC:$src1), 2460 (i32 timm:$src2))))]>, 2461 EVEX_K, Sched<[sched]>; 2462 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2463 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), 2464 OpcodeStr#_.Suffix# 2465 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2466 [(set _.KRC:$dst, 2467 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1), 2468 (i32 timm:$src2)))]>, 2469 Sched<[sched.Folded, sched.ReadAfterFold]>; 2470 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2471 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), 2472 OpcodeStr#_.Suffix# 2473 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2474 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2475 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1), 2476 (i32 timm:$src2))))]>, 2477 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2478 } 2479} 2480 2481//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm) 2482// fpclass(reg_vec, mem_vec, imm) 2483// fpclass(reg_vec, broadcast(eltVt), imm) 2484multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, 2485 X86FoldableSchedWrite sched, X86VectorVTInfo _, 2486 string mem>{ 2487 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 2488 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2489 (ins _.RC:$src1, i32u8imm:$src2), 2490 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2491 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1), 2492 (i32 timm:$src2)))]>, 2493 Sched<[sched]>; 2494 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), 2495 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 2496 OpcodeStr#_.Suffix# 2497 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2498 [(set _.KRC:$dst,(and _.KRCWM:$mask, 2499 (X86Vfpclass_su (_.VT _.RC:$src1), 2500 (i32 timm:$src2))))]>, 2501 EVEX_K, Sched<[sched]>; 2502 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2503 (ins _.MemOp:$src1, i32u8imm:$src2), 2504 OpcodeStr#_.Suffix#"{"#mem#"}"# 2505 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2506 [(set _.KRC:$dst,(X86Vfpclass 2507 (_.VT (_.LdFrag addr:$src1)), 2508 (i32 timm:$src2)))]>, 2509 Sched<[sched.Folded, sched.ReadAfterFold]>; 2510 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2511 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), 2512 OpcodeStr#_.Suffix#"{"#mem#"}"# 2513 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2514 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su 2515 (_.VT (_.LdFrag addr:$src1)), 2516 (i32 timm:$src2))))]>, 2517 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2518 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2519 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 2520 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2521 _.BroadcastStr#", $dst|$dst, ${src1}" 2522 #_.BroadcastStr#", $src2}", 2523 [(set _.KRC:$dst,(X86Vfpclass 2524 (_.VT (_.BroadcastLdFrag addr:$src1)), 2525 (i32 timm:$src2)))]>, 2526 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 2527 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), 2528 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 2529 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"# 2530 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"# 2531 _.BroadcastStr#", $src2}", 2532 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su 2533 (_.VT (_.BroadcastLdFrag addr:$src1)), 2534 (i32 timm:$src2))))]>, 2535 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; 2536 } 2537 2538 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate 2539 // the memory form. 2540 def : InstAlias<OpcodeStr#_.Suffix#mem# 2541 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2542 (!cast<Instruction>(NAME#"rr") 2543 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2544 def : InstAlias<OpcodeStr#_.Suffix#mem# 2545 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 2546 (!cast<Instruction>(NAME#"rrk") 2547 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">; 2548 def : InstAlias<OpcodeStr#_.Suffix#mem# 2549 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"# 2550 _.BroadcastStr#", $src2}", 2551 (!cast<Instruction>(NAME#"rmb") 2552 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2553 def : InstAlias<OpcodeStr#_.Suffix#mem# 2554 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|" 2555 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}", 2556 (!cast<Instruction>(NAME#"rmbk") 2557 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">; 2558} 2559 2560multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, 2561 bits<8> opc, X86SchedWriteWidths sched, 2562 Predicate prd>{ 2563 let Predicates = [prd] in { 2564 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM, 2565 _.info512, "z">, EVEX_V512; 2566 } 2567 let Predicates = [prd, HasVLX] in { 2568 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM, 2569 _.info128, "x">, EVEX_V128; 2570 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM, 2571 _.info256, "y">, EVEX_V256; 2572 } 2573} 2574 2575multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, 2576 bits<8> opcScalar, X86SchedWriteWidths sched> { 2577 defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec, 2578 sched, HasFP16>, 2579 EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA; 2580 defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2581 sched.Scl, f16x_info, HasFP16>, 2582 EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA; 2583 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, 2584 sched, HasDQI>, 2585 EVEX_CD8<32, CD8VF>, AVX512AIi8Base; 2586 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, 2587 sched, HasDQI>, 2588 EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W; 2589 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2590 sched.Scl, f32x_info, HasDQI>, VEX_LIG, 2591 EVEX_CD8<32, CD8VT1>, AVX512AIi8Base; 2592 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, 2593 sched.Scl, f64x_info, HasDQI>, VEX_LIG, 2594 EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W; 2595} 2596 2597defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX; 2598 2599//----------------------------------------------------------------- 2600// Mask register copy, including 2601// - copy between mask registers 2602// - load/store mask registers 2603// - copy from GPR to mask register and vice versa 2604// 2605multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk, 2606 string OpcodeStr, RegisterClass KRC, ValueType vvt, 2607 X86MemOperand x86memop, string Suffix = ""> { 2608 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove], 2609 explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in 2610 def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2611 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2612 Sched<[WriteMove]>; 2613 def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src), 2614 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2615 [(set KRC:$dst, (vvt (load addr:$src)))]>, 2616 Sched<[WriteLoad]>; 2617 def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src), 2618 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2619 [(store KRC:$src, addr:$dst)]>, 2620 Sched<[WriteStore]>; 2621} 2622 2623multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk, 2624 string OpcodeStr, RegisterClass KRC, 2625 RegisterClass GRC, string Suffix = ""> { 2626 let hasSideEffects = 0 in { 2627 def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src), 2628 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2629 Sched<[WriteMove]>; 2630 def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src), 2631 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, 2632 Sched<[WriteMove]>; 2633 } 2634} 2635 2636let Predicates = [HasDQI, NoEGPR] in 2637 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>, 2638 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>, 2639 VEX, TB, PD; 2640let Predicates = [HasDQI, HasEGPR, In64BitMode] in 2641 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">, 2642 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">, 2643 EVEX, TB, PD; 2644 2645let Predicates = [HasAVX512, NoEGPR] in 2646 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>, 2647 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>, 2648 VEX, TB; 2649let Predicates = [HasAVX512, HasEGPR, In64BitMode] in 2650 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">, 2651 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">, 2652 EVEX, TB; 2653 2654let Predicates = [HasBWI, NoEGPR] in { 2655 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>, 2656 VEX, TB, PD, REX_W; 2657 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>, 2658 VEX, TB, XD; 2659 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>, 2660 VEX, TB, REX_W; 2661 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>, 2662 VEX, TB, XD, REX_W; 2663} 2664let Predicates = [HasBWI, HasEGPR, In64BitMode] in { 2665 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">, 2666 EVEX, TB, PD, REX_W; 2667 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">, 2668 EVEX, TB, XD; 2669 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">, 2670 EVEX, TB, REX_W; 2671 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">, 2672 EVEX, TB, XD, REX_W; 2673} 2674 2675// GR from/to mask register 2676def : Pat<(v16i1 (bitconvert (i16 GR16:$src))), 2677 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>; 2678def : Pat<(i16 (bitconvert (v16i1 VK16:$src))), 2679 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>; 2680def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))), 2681 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>; 2682 2683def : Pat<(v8i1 (bitconvert (i8 GR8:$src))), 2684 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>; 2685def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), 2686 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>; 2687 2688def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2689 (KMOVWrk VK16:$src)>; 2690def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))), 2691 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>; 2692def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2693 (COPY_TO_REGCLASS VK16:$src, GR32)>; 2694def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), 2695 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>; 2696 2697def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2698 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; 2699def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))), 2700 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>; 2701def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2702 (COPY_TO_REGCLASS VK8:$src, GR32)>; 2703def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), 2704 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>; 2705 2706def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), 2707 (COPY_TO_REGCLASS GR32:$src, VK32)>; 2708def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), 2709 (COPY_TO_REGCLASS VK32:$src, GR32)>; 2710def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), 2711 (COPY_TO_REGCLASS GR64:$src, VK64)>; 2712def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), 2713 (COPY_TO_REGCLASS VK64:$src, GR64)>; 2714 2715// Load/store kreg 2716let Predicates = [HasDQI] in { 2717 def : Pat<(v1i1 (load addr:$src)), 2718 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>; 2719 def : Pat<(v2i1 (load addr:$src)), 2720 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>; 2721 def : Pat<(v4i1 (load addr:$src)), 2722 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>; 2723} 2724 2725let Predicates = [HasAVX512] in { 2726 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), 2727 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; 2728 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))), 2729 (KMOVWkm addr:$src)>; 2730} 2731 2732def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", 2733 SDTypeProfile<1, 2, [SDTCisVT<0, i8>, 2734 SDTCVecEltisVT<1, i1>, 2735 SDTCisPtrTy<2>]>>; 2736 2737let Predicates = [HasAVX512] in { 2738 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { 2739 def : Pat<(maskVT (scalar_to_vector GR32:$src)), 2740 (COPY_TO_REGCLASS GR32:$src, maskRC)>; 2741 2742 def : Pat<(maskVT (scalar_to_vector GR8:$src)), 2743 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; 2744 2745 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), 2746 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; 2747 2748 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), 2749 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; 2750 } 2751 2752 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; 2753 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>; 2754 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>; 2755 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>; 2756 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>; 2757 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>; 2758 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>; 2759 2760 def : Pat<(insert_subvector (v16i1 immAllZerosV), 2761 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)), 2762 (KMOVWkr (AND32ri 2763 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), 2764 (i32 1)))>; 2765} 2766 2767// Mask unary operation 2768// - KNOT 2769multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, 2770 RegisterClass KRC, SDPatternOperator OpNode, 2771 X86FoldableSchedWrite sched, Predicate prd> { 2772 let Predicates = [prd] in 2773 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), 2774 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 2775 [(set KRC:$dst, (OpNode KRC:$src))]>, 2776 Sched<[sched]>; 2777} 2778 2779multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, 2780 SDPatternOperator OpNode, 2781 X86FoldableSchedWrite sched> { 2782 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2783 sched, HasDQI>, VEX, TB, PD; 2784 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2785 sched, HasAVX512>, VEX, TB; 2786 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2787 sched, HasBWI>, VEX, TB, PD, REX_W; 2788 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2789 sched, HasBWI>, VEX, TB, REX_W; 2790} 2791 2792// TODO - do we need a X86SchedWriteWidths::KMASK type? 2793defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; 2794 2795// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit 2796let Predicates = [HasAVX512, NoDQI] in 2797def : Pat<(vnot VK8:$src), 2798 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>; 2799 2800def : Pat<(vnot VK4:$src), 2801 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>; 2802def : Pat<(vnot VK2:$src), 2803 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>; 2804def : Pat<(vnot VK1:$src), 2805 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>; 2806 2807// Mask binary operation 2808// - KAND, KANDN, KOR, KXNOR, KXOR 2809multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, 2810 RegisterClass KRC, SDPatternOperator OpNode, 2811 X86FoldableSchedWrite sched, Predicate prd, 2812 bit IsCommutable> { 2813 let Predicates = [prd], isCommutable = IsCommutable in 2814 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), 2815 !strconcat(OpcodeStr, 2816 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 2817 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, 2818 Sched<[sched]>; 2819} 2820 2821multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, 2822 SDPatternOperator OpNode, 2823 X86FoldableSchedWrite sched, bit IsCommutable, 2824 Predicate prdW = HasAVX512> { 2825 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2826 sched, HasDQI, IsCommutable>, VEX, VVVV, VEX_L, TB, PD; 2827 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2828 sched, prdW, IsCommutable>, VEX, VVVV, VEX_L, TB; 2829 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2830 sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB, PD; 2831 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2832 sched, HasBWI, IsCommutable>, VEX, VVVV, VEX_L, REX_W, TB; 2833} 2834 2835// TODO - do we need a X86SchedWriteWidths::KMASK type? 2836defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; 2837defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; 2838defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; 2839defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; 2840defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; 2841defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; 2842 2843multiclass avx512_binop_pat<SDPatternOperator VOpNode, 2844 Instruction Inst> { 2845 // With AVX512F, 8-bit mask is promoted to 16-bit mask, 2846 // for the DQI set, this type is legal and KxxxB instruction is used 2847 let Predicates = [NoDQI] in 2848 def : Pat<(VOpNode VK8:$src1, VK8:$src2), 2849 (COPY_TO_REGCLASS 2850 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16), 2851 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>; 2852 2853 // All types smaller than 8 bits require conversion anyway 2854 def : Pat<(VOpNode VK1:$src1, VK1:$src2), 2855 (COPY_TO_REGCLASS (Inst 2856 (COPY_TO_REGCLASS VK1:$src1, VK16), 2857 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>; 2858 def : Pat<(VOpNode VK2:$src1, VK2:$src2), 2859 (COPY_TO_REGCLASS (Inst 2860 (COPY_TO_REGCLASS VK2:$src1, VK16), 2861 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>; 2862 def : Pat<(VOpNode VK4:$src1, VK4:$src2), 2863 (COPY_TO_REGCLASS (Inst 2864 (COPY_TO_REGCLASS VK4:$src1, VK16), 2865 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>; 2866} 2867 2868defm : avx512_binop_pat<and, KANDWrr>; 2869defm : avx512_binop_pat<vandn, KANDNWrr>; 2870defm : avx512_binop_pat<or, KORWrr>; 2871defm : avx512_binop_pat<vxnor, KXNORWrr>; 2872defm : avx512_binop_pat<xor, KXORWrr>; 2873 2874// Mask unpacking 2875multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst, 2876 X86KVectorVTInfo Src, X86FoldableSchedWrite sched, 2877 Predicate prd> { 2878 let Predicates = [prd] in { 2879 let hasSideEffects = 0 in 2880 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst), 2881 (ins Src.KRC:$src1, Src.KRC:$src2), 2882 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 2883 VEX, VVVV, VEX_L, Sched<[sched]>; 2884 2885 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)), 2886 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>; 2887 } 2888} 2889 2890defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, TB, PD; 2891defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, TB; 2892defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, TB, REX_W; 2893 2894// Mask bit testing 2895multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 2896 SDNode OpNode, X86FoldableSchedWrite sched, 2897 Predicate prd> { 2898 let Predicates = [prd], Defs = [EFLAGS] in 2899 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), 2900 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2901 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, 2902 Sched<[sched]>; 2903} 2904 2905multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 2906 X86FoldableSchedWrite sched, 2907 Predicate prdW = HasAVX512> { 2908 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, 2909 VEX, TB, PD; 2910 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, 2911 VEX, TB; 2912 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, 2913 VEX, TB, REX_W; 2914 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, 2915 VEX, TB, PD, REX_W; 2916} 2917 2918// TODO - do we need a X86SchedWriteWidths::KMASK type? 2919defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; 2920defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; 2921 2922// Mask shift 2923multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, 2924 SDNode OpNode, X86FoldableSchedWrite sched> { 2925 let Predicates = [HasAVX512] in 2926 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), 2927 !strconcat(OpcodeStr, 2928 "\t{$imm, $src, $dst|$dst, $src, $imm}"), 2929 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>, 2930 Sched<[sched]>; 2931} 2932 2933multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, 2934 SDNode OpNode, X86FoldableSchedWrite sched> { 2935 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, 2936 sched>, VEX, TA, PD, REX_W; 2937 let Predicates = [HasDQI] in 2938 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, 2939 sched>, VEX, TA, PD; 2940 let Predicates = [HasBWI] in { 2941 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, 2942 sched>, VEX, TA, PD, REX_W; 2943 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, 2944 sched>, VEX, TA, PD; 2945 } 2946} 2947 2948defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; 2949defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; 2950 2951// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. 2952multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 2953 string InstStr, 2954 X86VectorVTInfo Narrow, 2955 X86VectorVTInfo Wide> { 2956def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 2957 (Narrow.VT Narrow.RC:$src2), cond)), 2958 (COPY_TO_REGCLASS 2959 (!cast<Instruction>(InstStr#"Zrri") 2960 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 2961 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 2962 (X86pcmpm_imm $cc)), Narrow.KRC)>; 2963 2964def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 2965 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 2966 (Narrow.VT Narrow.RC:$src2), 2967 cond)))), 2968 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 2969 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 2970 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 2971 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 2972 (X86pcmpm_imm $cc)), Narrow.KRC)>; 2973} 2974 2975multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su, 2976 string InstStr, 2977 X86VectorVTInfo Narrow, 2978 X86VectorVTInfo Wide> { 2979// Broadcast load. 2980def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), 2981 (Narrow.BroadcastLdFrag addr:$src2), cond)), 2982 (COPY_TO_REGCLASS 2983 (!cast<Instruction>(InstStr#"Zrmib") 2984 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 2985 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 2986 2987def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 2988 (Narrow.KVT 2989 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), 2990 (Narrow.BroadcastLdFrag addr:$src2), 2991 cond)))), 2992 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 2993 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 2994 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 2995 addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>; 2996 2997// Commuted with broadcast load. 2998def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2), 2999 (Narrow.VT Narrow.RC:$src1), 3000 cond)), 3001 (COPY_TO_REGCLASS 3002 (!cast<Instruction>(InstStr#"Zrmib") 3003 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3004 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3005 3006def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3007 (Narrow.KVT 3008 (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), 3009 (Narrow.VT Narrow.RC:$src1), 3010 cond)))), 3011 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk") 3012 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3013 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3014 addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>; 3015} 3016 3017// Same as above, but for fp types which don't use PatFrags. 3018multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr, 3019 X86VectorVTInfo Narrow, 3020 X86VectorVTInfo Wide> { 3021def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3022 (Narrow.VT Narrow.RC:$src2), timm:$cc)), 3023 (COPY_TO_REGCLASS 3024 (!cast<Instruction>(InstStr#"Zrri") 3025 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3026 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3027 timm:$cc), Narrow.KRC)>; 3028 3029def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3030 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3031 (Narrow.VT Narrow.RC:$src2), timm:$cc))), 3032 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik") 3033 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3034 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3035 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), 3036 timm:$cc), Narrow.KRC)>; 3037 3038// Broadcast load. 3039def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), 3040 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), 3041 (COPY_TO_REGCLASS 3042 (!cast<Instruction>(InstStr#"Zrmbi") 3043 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3044 addr:$src2, timm:$cc), Narrow.KRC)>; 3045 3046def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3047 (X86cmpm_su (Narrow.VT Narrow.RC:$src1), 3048 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), 3049 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3050 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3051 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3052 addr:$src2, timm:$cc), Narrow.KRC)>; 3053 3054// Commuted with broadcast load. 3055def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3056 (Narrow.VT Narrow.RC:$src1), timm:$cc)), 3057 (COPY_TO_REGCLASS 3058 (!cast<Instruction>(InstStr#"Zrmbi") 3059 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3060 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3061 3062def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, 3063 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), 3064 (Narrow.VT Narrow.RC:$src1), timm:$cc))), 3065 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik") 3066 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), 3067 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), 3068 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; 3069} 3070 3071let Predicates = [HasAVX512, NoVLX] in { 3072 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3073 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3074 3075 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3076 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3077 3078 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3079 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3080 3081 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3082 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3083 3084 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>; 3085 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>; 3086 3087 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>; 3088 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>; 3089 3090 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>; 3091 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>; 3092 3093 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>; 3094 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>; 3095 3096 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; 3097 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; 3098 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; 3099 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; 3100} 3101 3102let Predicates = [HasBWI, NoVLX] in { 3103 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>; 3104 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>; 3105 3106 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>; 3107 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>; 3108 3109 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>; 3110 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>; 3111 3112 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>; 3113 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>; 3114} 3115 3116// Mask setting all 0s or 1s 3117multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> { 3118 let Predicates = [HasAVX512] in 3119 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1, 3120 SchedRW = [WriteZero] in 3121 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "", 3122 [(set KRC:$dst, (VT Val))]>; 3123} 3124 3125multiclass avx512_mask_setop_w<SDPatternOperator Val> { 3126 defm W : avx512_mask_setop<VK16, v16i1, Val>; 3127 defm D : avx512_mask_setop<VK32, v32i1, Val>; 3128 defm Q : avx512_mask_setop<VK64, v64i1, Val>; 3129} 3130 3131defm KSET0 : avx512_mask_setop_w<immAllZerosV>; 3132defm KSET1 : avx512_mask_setop_w<immAllOnesV>; 3133 3134// With AVX-512 only, 8-bit mask is promoted to 16-bit mask. 3135let Predicates = [HasAVX512] in { 3136 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; 3137 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; 3138 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; 3139 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; 3140 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; 3141 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; 3142 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; 3143 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; 3144} 3145 3146// Patterns for kmask insert_subvector/extract_subvector to/from index=0 3147multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT, 3148 RegisterClass RC, ValueType VT> { 3149 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), 3150 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>; 3151 3152 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), 3153 (VT (COPY_TO_REGCLASS subRC:$src, RC))>; 3154} 3155defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>; 3156defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>; 3157defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>; 3158defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>; 3159defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>; 3160defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>; 3161 3162defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>; 3163defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>; 3164defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>; 3165defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>; 3166defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>; 3167 3168defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>; 3169defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>; 3170defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>; 3171defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>; 3172 3173defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>; 3174defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>; 3175defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>; 3176 3177defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>; 3178defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>; 3179 3180defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>; 3181 3182//===----------------------------------------------------------------------===// 3183// AVX-512 - Aligned and unaligned load and store 3184// 3185 3186multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, 3187 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload, 3188 X86SchedWriteMoveLS Sched, bit NoRMPattern = 0, 3189 SDPatternOperator SelectOprr = vselect> { 3190 let hasSideEffects = 0 in { 3191 let isMoveReg = 1 in 3192 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src), 3193 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], 3194 _.ExeDomain>, EVEX, Sched<[Sched.RR]>; 3195 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3196 (ins _.KRCWM:$mask, _.RC:$src), 3197 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|", 3198 "${dst} {${mask}} {z}, $src}"), 3199 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3200 (_.VT _.RC:$src), 3201 _.ImmAllZerosV)))], _.ExeDomain>, 3202 EVEX, EVEX_KZ, Sched<[Sched.RR]>; 3203 3204 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in 3205 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src), 3206 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3207 !if(NoRMPattern, [], 3208 [(set _.RC:$dst, 3209 (_.VT (ld_frag addr:$src)))]), 3210 _.ExeDomain>, EVEX, Sched<[Sched.RM]>; 3211 3212 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in { 3213 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), 3214 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1), 3215 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3216 "${dst} {${mask}}, $src1}"), 3217 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask, 3218 (_.VT _.RC:$src1), 3219 (_.VT _.RC:$src0))))], _.ExeDomain>, 3220 EVEX, EVEX_K, Sched<[Sched.RR]>; 3221 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3222 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), 3223 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", 3224 "${dst} {${mask}}, $src1}"), 3225 [(set _.RC:$dst, (_.VT 3226 (vselect_mask _.KRCWM:$mask, 3227 (_.VT (ld_frag addr:$src1)), 3228 (_.VT _.RC:$src0))))], _.ExeDomain>, 3229 EVEX, EVEX_K, Sched<[Sched.RM]>; 3230 } 3231 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), 3232 (ins _.KRCWM:$mask, _.MemOp:$src), 3233 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# 3234 "${dst} {${mask}} {z}, $src}", 3235 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask, 3236 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))], 3237 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>; 3238 } 3239 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)), 3240 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3241 3242 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)), 3243 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>; 3244 3245 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))), 3246 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0, 3247 _.KRCWM:$mask, addr:$ptr)>; 3248} 3249 3250multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr, 3251 AVX512VLVectorVTInfo _, Predicate prd, 3252 X86SchedWriteMoveLSWidths Sched, 3253 bit NoRMPattern = 0> { 3254 let Predicates = [prd] in 3255 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, 3256 _.info512.AlignedLdFrag, masked_load_aligned, 3257 Sched.ZMM, NoRMPattern>, EVEX_V512; 3258 3259 let Predicates = [prd, HasVLX] in { 3260 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, 3261 _.info256.AlignedLdFrag, masked_load_aligned, 3262 Sched.YMM, NoRMPattern>, EVEX_V256; 3263 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, 3264 _.info128.AlignedLdFrag, masked_load_aligned, 3265 Sched.XMM, NoRMPattern>, EVEX_V128; 3266 } 3267} 3268 3269multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, 3270 AVX512VLVectorVTInfo _, Predicate prd, 3271 X86SchedWriteMoveLSWidths Sched, 3272 bit NoRMPattern = 0, 3273 SDPatternOperator SelectOprr = vselect> { 3274 let Predicates = [prd] in 3275 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag, 3276 masked_load, Sched.ZMM, NoRMPattern, SelectOprr>, EVEX_V512; 3277 3278 let Predicates = [prd, HasVLX] in { 3279 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag, 3280 masked_load, Sched.YMM, NoRMPattern, SelectOprr>, EVEX_V256; 3281 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag, 3282 masked_load, Sched.XMM, NoRMPattern, SelectOprr>, EVEX_V128; 3283 } 3284} 3285 3286multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName, 3287 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore, 3288 X86SchedWriteMoveLS Sched, bit NoMRPattern = 0> { 3289 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 3290 let isMoveReg = 1 in 3291 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src), 3292 OpcodeStr # "\t{$src, $dst|$dst, $src}", 3293 [], _.ExeDomain>, EVEX, 3294 Sched<[Sched.RR]>; 3295 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3296 (ins _.KRCWM:$mask, _.RC:$src), 3297 OpcodeStr # "\t{$src, ${dst} {${mask}}|"# 3298 "${dst} {${mask}}, $src}", 3299 [], _.ExeDomain>, EVEX, EVEX_K, 3300 Sched<[Sched.RR]>; 3301 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), 3302 (ins _.KRCWM:$mask, _.RC:$src), 3303 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" # 3304 "${dst} {${mask}} {z}, $src}", 3305 [], _.ExeDomain>, EVEX, EVEX_KZ, 3306 Sched<[Sched.RR]>; 3307 } 3308 3309 let hasSideEffects = 0, mayStore = 1 in 3310 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 3311 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 3312 !if(NoMRPattern, [], 3313 [(st_frag (_.VT _.RC:$src), addr:$dst)]), 3314 _.ExeDomain>, EVEX, Sched<[Sched.MR]>; 3315 def mrk : AVX512PI<opc, MRMDestMem, (outs), 3316 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 3317 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3318 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>; 3319 3320 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask), 3321 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr, 3322 _.KRCWM:$mask, _.RC:$src)>; 3323 3324 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}", 3325 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV") 3326 _.RC:$dst, _.RC:$src), 0>; 3327 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", 3328 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV") 3329 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3330 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}", 3331 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV") 3332 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>; 3333} 3334 3335multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, 3336 AVX512VLVectorVTInfo _, Predicate prd, 3337 X86SchedWriteMoveLSWidths Sched, 3338 bit NoMRPattern = 0> { 3339 let Predicates = [prd] in 3340 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store, 3341 masked_store, Sched.ZMM, NoMRPattern>, EVEX_V512; 3342 let Predicates = [prd, HasVLX] in { 3343 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store, 3344 masked_store, Sched.YMM, NoMRPattern>, EVEX_V256; 3345 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store, 3346 masked_store, Sched.XMM, NoMRPattern>, EVEX_V128; 3347 } 3348} 3349 3350multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, 3351 AVX512VLVectorVTInfo _, Predicate prd, 3352 X86SchedWriteMoveLSWidths Sched, 3353 bit NoMRPattern = 0> { 3354 let Predicates = [prd] in 3355 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore, 3356 masked_store_aligned, Sched.ZMM, NoMRPattern>, EVEX_V512; 3357 3358 let Predicates = [prd, HasVLX] in { 3359 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore, 3360 masked_store_aligned, Sched.YMM, NoMRPattern>, EVEX_V256; 3361 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore, 3362 masked_store_aligned, Sched.XMM, NoMRPattern>, EVEX_V128; 3363 } 3364} 3365 3366defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, 3367 HasAVX512, SchedWriteFMoveLS>, 3368 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, 3369 HasAVX512, SchedWriteFMoveLS>, 3370 TB, EVEX_CD8<32, CD8VF>; 3371 3372defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, 3373 HasAVX512, SchedWriteFMoveLS>, 3374 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, 3375 HasAVX512, SchedWriteFMoveLS>, 3376 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 3377 3378defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, 3379 SchedWriteFMoveLS, 0, null_frag>, 3380 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, 3381 SchedWriteFMoveLS>, 3382 TB, EVEX_CD8<32, CD8VF>; 3383 3384defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 3385 SchedWriteFMoveLS, 0, null_frag>, 3386 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, 3387 SchedWriteFMoveLS>, 3388 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 3389 3390defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, 3391 HasAVX512, SchedWriteVecMoveLS, 1>, 3392 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, 3393 HasAVX512, SchedWriteVecMoveLS, 1>, 3394 TB, PD, EVEX_CD8<32, CD8VF>; 3395 3396defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, 3397 HasAVX512, SchedWriteVecMoveLS>, 3398 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, 3399 HasAVX512, SchedWriteVecMoveLS>, 3400 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 3401 3402defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3403 SchedWriteVecMoveLS, 1>, 3404 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI, 3405 SchedWriteVecMoveLS, 1>, 3406 TB, XD, EVEX_CD8<8, CD8VF>; 3407 3408defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3409 SchedWriteVecMoveLS, 1>, 3410 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI, 3411 SchedWriteVecMoveLS, 1>, 3412 TB, XD, REX_W, EVEX_CD8<16, CD8VF>; 3413 3414defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3415 SchedWriteVecMoveLS, 1, null_frag>, 3416 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512, 3417 SchedWriteVecMoveLS, 1>, 3418 TB, XS, EVEX_CD8<32, CD8VF>; 3419 3420defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3421 SchedWriteVecMoveLS, 0, null_frag>, 3422 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512, 3423 SchedWriteVecMoveLS>, 3424 TB, XS, REX_W, EVEX_CD8<64, CD8VF>; 3425 3426// Special instructions to help with spilling when we don't have VLX. We need 3427// to load or store from a ZMM register instead. These are converted in 3428// expandPostRAPseudos. 3429let isReMaterializable = 1, canFoldAsLoad = 1, 3430 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in { 3431def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3432 "", []>, Sched<[WriteFLoadX]>; 3433def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3434 "", []>, Sched<[WriteFLoadY]>; 3435def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src), 3436 "", []>, Sched<[WriteFLoadX]>; 3437def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src), 3438 "", []>, Sched<[WriteFLoadY]>; 3439} 3440 3441let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { 3442def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3443 "", []>, Sched<[WriteFStoreX]>; 3444def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3445 "", []>, Sched<[WriteFStoreY]>; 3446def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src), 3447 "", []>, Sched<[WriteFStoreX]>; 3448def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src), 3449 "", []>, Sched<[WriteFStoreY]>; 3450} 3451 3452def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), 3453 (v8i64 VR512:$src))), 3454 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), 3455 VK8), VR512:$src)>; 3456 3457def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), 3458 (v16i32 VR512:$src))), 3459 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; 3460 3461// These patterns exist to prevent the above patterns from introducing a second 3462// mask inversion when one already exists. 3463def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)), 3464 (v8i64 immAllZerosV), 3465 (v8i64 VR512:$src))), 3466 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; 3467def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)), 3468 (v16i32 immAllZerosV), 3469 (v16i32 VR512:$src))), 3470 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; 3471 3472multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow, 3473 X86VectorVTInfo Wide> { 3474 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3475 Narrow.RC:$src1, Narrow.RC:$src0)), 3476 (EXTRACT_SUBREG 3477 (Wide.VT 3478 (!cast<Instruction>(InstrStr#"rrk") 3479 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)), 3480 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3481 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3482 Narrow.SubRegIdx)>; 3483 3484 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask), 3485 Narrow.RC:$src1, Narrow.ImmAllZerosV)), 3486 (EXTRACT_SUBREG 3487 (Wide.VT 3488 (!cast<Instruction>(InstrStr#"rrkz") 3489 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM), 3490 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))), 3491 Narrow.SubRegIdx)>; 3492} 3493 3494// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't 3495// available. Use a 512-bit operation and extract. 3496let Predicates = [HasAVX512, NoVLX] in { 3497 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; 3498 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; 3499 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; 3500 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; 3501 3502 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; 3503 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; 3504 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; 3505 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; 3506} 3507 3508let Predicates = [HasBWI, NoVLX] in { 3509 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>; 3510 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>; 3511 3512 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>; 3513 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>; 3514 3515 defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>; 3516 defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>; 3517 3518 defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>; 3519 defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>; 3520} 3521 3522let Predicates = [HasAVX512] in { 3523 // 512-bit load. 3524 def : Pat<(alignedloadv16i32 addr:$src), 3525 (VMOVDQA64Zrm addr:$src)>; 3526 def : Pat<(alignedloadv32i16 addr:$src), 3527 (VMOVDQA64Zrm addr:$src)>; 3528 def : Pat<(alignedloadv32f16 addr:$src), 3529 (VMOVAPSZrm addr:$src)>; 3530 def : Pat<(alignedloadv32bf16 addr:$src), 3531 (VMOVAPSZrm addr:$src)>; 3532 def : Pat<(alignedloadv64i8 addr:$src), 3533 (VMOVDQA64Zrm addr:$src)>; 3534 def : Pat<(loadv16i32 addr:$src), 3535 (VMOVDQU64Zrm addr:$src)>; 3536 def : Pat<(loadv32i16 addr:$src), 3537 (VMOVDQU64Zrm addr:$src)>; 3538 def : Pat<(loadv32f16 addr:$src), 3539 (VMOVUPSZrm addr:$src)>; 3540 def : Pat<(loadv32bf16 addr:$src), 3541 (VMOVUPSZrm addr:$src)>; 3542 def : Pat<(loadv64i8 addr:$src), 3543 (VMOVDQU64Zrm addr:$src)>; 3544 3545 // 512-bit store. 3546 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), 3547 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3548 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), 3549 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3550 def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst), 3551 (VMOVAPSZmr addr:$dst, VR512:$src)>; 3552 def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst), 3553 (VMOVAPSZmr addr:$dst, VR512:$src)>; 3554 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), 3555 (VMOVDQA64Zmr addr:$dst, VR512:$src)>; 3556 def : Pat<(store (v16i32 VR512:$src), addr:$dst), 3557 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3558 def : Pat<(store (v32i16 VR512:$src), addr:$dst), 3559 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3560 def : Pat<(store (v32f16 VR512:$src), addr:$dst), 3561 (VMOVUPSZmr addr:$dst, VR512:$src)>; 3562 def : Pat<(store (v32bf16 VR512:$src), addr:$dst), 3563 (VMOVUPSZmr addr:$dst, VR512:$src)>; 3564 def : Pat<(store (v64i8 VR512:$src), addr:$dst), 3565 (VMOVDQU64Zmr addr:$dst, VR512:$src)>; 3566} 3567 3568let Predicates = [HasVLX] in { 3569 // 128-bit load. 3570 def : Pat<(alignedloadv4i32 addr:$src), 3571 (VMOVDQA64Z128rm addr:$src)>; 3572 def : Pat<(alignedloadv8i16 addr:$src), 3573 (VMOVDQA64Z128rm addr:$src)>; 3574 def : Pat<(alignedloadv8f16 addr:$src), 3575 (VMOVAPSZ128rm addr:$src)>; 3576 def : Pat<(alignedloadv8bf16 addr:$src), 3577 (VMOVAPSZ128rm addr:$src)>; 3578 def : Pat<(alignedloadv16i8 addr:$src), 3579 (VMOVDQA64Z128rm addr:$src)>; 3580 def : Pat<(loadv4i32 addr:$src), 3581 (VMOVDQU64Z128rm addr:$src)>; 3582 def : Pat<(loadv8i16 addr:$src), 3583 (VMOVDQU64Z128rm addr:$src)>; 3584 def : Pat<(loadv8f16 addr:$src), 3585 (VMOVUPSZ128rm addr:$src)>; 3586 def : Pat<(loadv8bf16 addr:$src), 3587 (VMOVUPSZ128rm addr:$src)>; 3588 def : Pat<(loadv16i8 addr:$src), 3589 (VMOVDQU64Z128rm addr:$src)>; 3590 3591 // 128-bit store. 3592 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), 3593 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3594 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), 3595 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3596 def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst), 3597 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 3598 def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst), 3599 (VMOVAPSZ128mr addr:$dst, VR128X:$src)>; 3600 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), 3601 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; 3602 def : Pat<(store (v4i32 VR128X:$src), addr:$dst), 3603 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3604 def : Pat<(store (v8i16 VR128X:$src), addr:$dst), 3605 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3606 def : Pat<(store (v8f16 VR128X:$src), addr:$dst), 3607 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 3608 def : Pat<(store (v8bf16 VR128X:$src), addr:$dst), 3609 (VMOVUPSZ128mr addr:$dst, VR128X:$src)>; 3610 def : Pat<(store (v16i8 VR128X:$src), addr:$dst), 3611 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; 3612 3613 // 256-bit load. 3614 def : Pat<(alignedloadv8i32 addr:$src), 3615 (VMOVDQA64Z256rm addr:$src)>; 3616 def : Pat<(alignedloadv16i16 addr:$src), 3617 (VMOVDQA64Z256rm addr:$src)>; 3618 def : Pat<(alignedloadv16f16 addr:$src), 3619 (VMOVAPSZ256rm addr:$src)>; 3620 def : Pat<(alignedloadv16bf16 addr:$src), 3621 (VMOVAPSZ256rm addr:$src)>; 3622 def : Pat<(alignedloadv32i8 addr:$src), 3623 (VMOVDQA64Z256rm addr:$src)>; 3624 def : Pat<(loadv8i32 addr:$src), 3625 (VMOVDQU64Z256rm addr:$src)>; 3626 def : Pat<(loadv16i16 addr:$src), 3627 (VMOVDQU64Z256rm addr:$src)>; 3628 def : Pat<(loadv16f16 addr:$src), 3629 (VMOVUPSZ256rm addr:$src)>; 3630 def : Pat<(loadv16bf16 addr:$src), 3631 (VMOVUPSZ256rm addr:$src)>; 3632 def : Pat<(loadv32i8 addr:$src), 3633 (VMOVDQU64Z256rm addr:$src)>; 3634 3635 // 256-bit store. 3636 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), 3637 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3638 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), 3639 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3640 def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst), 3641 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; 3642 def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst), 3643 (VMOVAPSZ256mr addr:$dst, VR256X:$src)>; 3644 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), 3645 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; 3646 def : Pat<(store (v8i32 VR256X:$src), addr:$dst), 3647 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3648 def : Pat<(store (v16i16 VR256X:$src), addr:$dst), 3649 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3650 def : Pat<(store (v16f16 VR256X:$src), addr:$dst), 3651 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; 3652 def : Pat<(store (v16bf16 VR256X:$src), addr:$dst), 3653 (VMOVUPSZ256mr addr:$dst, VR256X:$src)>; 3654 def : Pat<(store (v32i8 VR256X:$src), addr:$dst), 3655 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>; 3656} 3657 3658multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> { 3659let Predicates = [HasBWI] in { 3660 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))), 3661 (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>; 3662 def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)), 3663 (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>; 3664 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3665 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))), 3666 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3667 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3668 (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)), 3669 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3670 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3671 (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))), 3672 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3673 def : Pat<(_.info512.VT (vselect VK32WM:$mask, 3674 (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)), 3675 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3676 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))), 3677 (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>; 3678 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)), 3679 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3680 def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)), 3681 (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>; 3682 3683 def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask), 3684 (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>; 3685} 3686let Predicates = [HasBWI, HasVLX] in { 3687 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))), 3688 (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>; 3689 def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)), 3690 (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>; 3691 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3692 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))), 3693 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3694 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3695 (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)), 3696 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3697 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3698 (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))), 3699 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3700 def : Pat<(_.info256.VT (vselect VK16WM:$mask, 3701 (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)), 3702 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3703 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))), 3704 (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>; 3705 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)), 3706 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3707 def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)), 3708 (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>; 3709 3710 def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask), 3711 (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>; 3712 3713 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))), 3714 (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>; 3715 def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)), 3716 (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>; 3717 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3718 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))), 3719 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3720 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3721 (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)), 3722 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3723 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3724 (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))), 3725 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3726 def : Pat<(_.info128.VT (vselect VK8WM:$mask, 3727 (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)), 3728 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3729 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))), 3730 (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>; 3731 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)), 3732 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3733 def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)), 3734 (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>; 3735 3736 def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask), 3737 (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>; 3738} 3739} 3740 3741defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>; 3742defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>; 3743 3744// Move Int Doubleword to Packed Double Int 3745// 3746let ExeDomain = SSEPackedInt in { 3747def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 3748 "vmovd\t{$src, $dst|$dst, $src}", 3749 [(set VR128X:$dst, 3750 (v4i32 (scalar_to_vector GR32:$src)))]>, 3751 EVEX, Sched<[WriteVecMoveFromGpr]>; 3752def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), 3753 "vmovd\t{$src, $dst|$dst, $src}", 3754 [(set VR128X:$dst, 3755 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, 3756 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>; 3757def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 3758 "vmovq\t{$src, $dst|$dst, $src}", 3759 [(set VR128X:$dst, 3760 (v2i64 (scalar_to_vector GR64:$src)))]>, 3761 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 3762let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 3763def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), 3764 (ins i64mem:$src), 3765 "vmovq\t{$src, $dst|$dst, $src}", []>, 3766 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>; 3767let isCodeGenOnly = 1 in { 3768def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), 3769 "vmovq\t{$src, $dst|$dst, $src}", 3770 [(set FR64X:$dst, (bitconvert GR64:$src))]>, 3771 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 3772def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), 3773 "vmovq\t{$src, $dst|$dst, $src}", 3774 [(set GR64:$dst, (bitconvert FR64X:$src))]>, 3775 EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 3776} 3777} // ExeDomain = SSEPackedInt 3778 3779// Move Int Doubleword to Single Scalar 3780// 3781let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3782def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), 3783 "vmovd\t{$src, $dst|$dst, $src}", 3784 [(set FR32X:$dst, (bitconvert GR32:$src))]>, 3785 EVEX, Sched<[WriteVecMoveFromGpr]>; 3786} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3787 3788// Move doubleword from xmm register to r/m32 3789// 3790let ExeDomain = SSEPackedInt in { 3791def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 3792 "vmovd\t{$src, $dst|$dst, $src}", 3793 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), 3794 (iPTR 0)))]>, 3795 EVEX, Sched<[WriteVecMoveToGpr]>; 3796def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), 3797 (ins i32mem:$dst, VR128X:$src), 3798 "vmovd\t{$src, $dst|$dst, $src}", 3799 [(store (i32 (extractelt (v4i32 VR128X:$src), 3800 (iPTR 0))), addr:$dst)]>, 3801 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>; 3802} // ExeDomain = SSEPackedInt 3803 3804// Move quadword from xmm1 register to r/m64 3805// 3806let ExeDomain = SSEPackedInt in { 3807def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 3808 "vmovq\t{$src, $dst|$dst, $src}", 3809 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), 3810 (iPTR 0)))]>, 3811 TB, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>, 3812 Requires<[HasAVX512]>; 3813 3814let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 3815def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src), 3816 "vmovq\t{$src, $dst|$dst, $src}", []>, TB, PD, 3817 EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>, 3818 Requires<[HasAVX512, In64BitMode]>; 3819 3820def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs), 3821 (ins i64mem:$dst, VR128X:$src), 3822 "vmovq\t{$src, $dst|$dst, $src}", 3823 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)), 3824 addr:$dst)]>, 3825 EVEX, TB, PD, REX_W, EVEX_CD8<64, CD8VT1>, 3826 Sched<[WriteVecStore]>, Requires<[HasAVX512]>; 3827 3828let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 3829def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), 3830 (ins VR128X:$src), 3831 "vmovq\t{$src, $dst|$dst, $src}", []>, 3832 EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>; 3833} // ExeDomain = SSEPackedInt 3834 3835def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", 3836 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; 3837 3838let Predicates = [HasAVX512] in { 3839 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), 3840 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; 3841} 3842 3843// Move Scalar Single to Double Int 3844// 3845let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { 3846def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), 3847 (ins FR32X:$src), 3848 "vmovd\t{$src, $dst|$dst, $src}", 3849 [(set GR32:$dst, (bitconvert FR32X:$src))]>, 3850 EVEX, Sched<[WriteVecMoveToGpr]>; 3851} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 3852 3853// Move Quadword Int to Packed Quadword Int 3854// 3855let ExeDomain = SSEPackedInt in { 3856def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 3857 (ins i64mem:$src), 3858 "vmovq\t{$src, $dst|$dst, $src}", 3859 [(set VR128X:$dst, 3860 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, 3861 EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>; 3862} // ExeDomain = SSEPackedInt 3863 3864// Allow "vmovd" but print "vmovq". 3865def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3866 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>; 3867def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 3868 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>; 3869 3870// Conversions between masks and scalar fp. 3871def : Pat<(v32i1 (bitconvert FR32X:$src)), 3872 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>; 3873def : Pat<(f32 (bitconvert VK32:$src)), 3874 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>; 3875 3876def : Pat<(v64i1 (bitconvert FR64X:$src)), 3877 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>; 3878def : Pat<(f64 (bitconvert VK64:$src)), 3879 (VMOV64toSDZrr (KMOVQrk VK64:$src))>; 3880 3881//===----------------------------------------------------------------------===// 3882// AVX-512 MOVSH, MOVSS, MOVSD 3883//===----------------------------------------------------------------------===// 3884 3885multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, 3886 X86VectorVTInfo _, Predicate prd = HasAVX512> { 3887 let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in 3888 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3889 (ins _.RC:$src1, _.RC:$src2), 3890 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3891 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], 3892 _.ExeDomain>, EVEX, VVVV, Sched<[SchedWriteFShuffle.XMM]>; 3893 let Predicates = [prd] in { 3894 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3895 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3896 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", 3897 "$dst {${mask}} {z}, $src1, $src2}"), 3898 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3899 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3900 _.ImmAllZerosV)))], 3901 _.ExeDomain>, EVEX, VVVV, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; 3902 let Constraints = "$src0 = $dst" in 3903 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 3904 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), 3905 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", 3906 "$dst {${mask}}, $src1, $src2}"), 3907 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, 3908 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 3909 (_.VT _.RC:$src0))))], 3910 _.ExeDomain>, EVEX, VVVV, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; 3911 let canFoldAsLoad = 1, isReMaterializable = 1 in { 3912 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), 3913 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3914 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))], 3915 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3916 // _alt version uses FR32/FR64 register class. 3917 let isCodeGenOnly = 1 in 3918 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), 3919 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3920 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], 3921 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>; 3922 } 3923 let mayLoad = 1, hasSideEffects = 0 in { 3924 let Constraints = "$src0 = $dst" in 3925 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3926 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), 3927 !strconcat(asm, "\t{$src, $dst {${mask}}|", 3928 "$dst {${mask}}, $src}"), 3929 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; 3930 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), 3931 (ins _.KRCWM:$mask, _.ScalarMemOp:$src), 3932 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", 3933 "$dst {${mask}} {z}, $src}"), 3934 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; 3935 } 3936 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), 3937 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 3938 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>, 3939 EVEX, Sched<[WriteFStore]>; 3940 let mayStore = 1, hasSideEffects = 0 in 3941 def mrk: AVX512PI<0x11, MRMDestMem, (outs), 3942 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src), 3943 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), 3944 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>; 3945 } 3946} 3947 3948defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, 3949 VEX_LIG, TB, XS, EVEX_CD8<32, CD8VT1>; 3950 3951defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, 3952 VEX_LIG, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 3953 3954defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info, 3955 HasFP16>, 3956 VEX_LIG, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 3957 3958multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode, 3959 PatLeaf ZeroFP, X86VectorVTInfo _> { 3960 3961def : Pat<(_.VT (OpNode _.RC:$src0, 3962 (_.VT (scalar_to_vector 3963 (_.EltVT (X86selects VK1WM:$mask, 3964 (_.EltVT _.FRC:$src1), 3965 (_.EltVT _.FRC:$src2))))))), 3966 (!cast<Instruction>(InstrStr#rrk) 3967 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)), 3968 VK1WM:$mask, 3969 (_.VT _.RC:$src0), 3970 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 3971 3972def : Pat<(_.VT (OpNode _.RC:$src0, 3973 (_.VT (scalar_to_vector 3974 (_.EltVT (X86selects VK1WM:$mask, 3975 (_.EltVT _.FRC:$src1), 3976 (_.EltVT ZeroFP))))))), 3977 (!cast<Instruction>(InstrStr#rrkz) 3978 VK1WM:$mask, 3979 (_.VT _.RC:$src0), 3980 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>; 3981} 3982 3983multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 3984 dag Mask, RegisterClass MaskRC> { 3985 3986def : Pat<(masked_store 3987 (_.info512.VT (insert_subvector undef, 3988 (_.info128.VT _.info128.RC:$src), 3989 (iPTR 0))), addr:$dst, Mask), 3990 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 3991 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 3992 _.info128.RC:$src)>; 3993 3994} 3995 3996multiclass avx512_store_scalar_lowering_subreg<string InstrStr, 3997 AVX512VLVectorVTInfo _, 3998 dag Mask, RegisterClass MaskRC, 3999 SubRegIndex subreg> { 4000 4001def : Pat<(masked_store 4002 (_.info512.VT (insert_subvector undef, 4003 (_.info128.VT _.info128.RC:$src), 4004 (iPTR 0))), addr:$dst, Mask), 4005 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4006 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4007 _.info128.RC:$src)>; 4008 4009} 4010 4011// This matches the more recent codegen from clang that avoids emitting a 512 4012// bit masked store directly. Codegen will widen 128-bit masked store to 512 4013// bits on AVX512F only targets. 4014multiclass avx512_store_scalar_lowering_subreg2<string InstrStr, 4015 AVX512VLVectorVTInfo _, 4016 dag Mask512, dag Mask128, 4017 RegisterClass MaskRC, 4018 SubRegIndex subreg> { 4019 4020// AVX512F pattern. 4021def : Pat<(masked_store 4022 (_.info512.VT (insert_subvector undef, 4023 (_.info128.VT _.info128.RC:$src), 4024 (iPTR 0))), addr:$dst, Mask512), 4025 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4026 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4027 _.info128.RC:$src)>; 4028 4029// AVX512VL pattern. 4030def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128), 4031 (!cast<Instruction>(InstrStr#mrk) addr:$dst, 4032 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4033 _.info128.RC:$src)>; 4034} 4035 4036multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, 4037 dag Mask, RegisterClass MaskRC> { 4038 4039def : Pat<(_.info128.VT (extract_subvector 4040 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4041 _.info512.ImmAllZerosV)), 4042 (iPTR 0))), 4043 (!cast<Instruction>(InstrStr#rmkz) 4044 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4045 addr:$srcAddr)>; 4046 4047def : Pat<(_.info128.VT (extract_subvector 4048 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4049 (_.info512.VT (insert_subvector undef, 4050 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4051 (iPTR 0))))), 4052 (iPTR 0))), 4053 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4054 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), 4055 addr:$srcAddr)>; 4056 4057} 4058 4059multiclass avx512_load_scalar_lowering_subreg<string InstrStr, 4060 AVX512VLVectorVTInfo _, 4061 dag Mask, RegisterClass MaskRC, 4062 SubRegIndex subreg> { 4063 4064def : Pat<(_.info128.VT (extract_subvector 4065 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4066 _.info512.ImmAllZerosV)), 4067 (iPTR 0))), 4068 (!cast<Instruction>(InstrStr#rmkz) 4069 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4070 addr:$srcAddr)>; 4071 4072def : Pat<(_.info128.VT (extract_subvector 4073 (_.info512.VT (masked_load addr:$srcAddr, Mask, 4074 (_.info512.VT (insert_subvector undef, 4075 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4076 (iPTR 0))))), 4077 (iPTR 0))), 4078 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4079 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4080 addr:$srcAddr)>; 4081 4082} 4083 4084// This matches the more recent codegen from clang that avoids emitting a 512 4085// bit masked load directly. Codegen will widen 128-bit masked load to 512 4086// bits on AVX512F only targets. 4087multiclass avx512_load_scalar_lowering_subreg2<string InstrStr, 4088 AVX512VLVectorVTInfo _, 4089 dag Mask512, dag Mask128, 4090 RegisterClass MaskRC, 4091 SubRegIndex subreg> { 4092// AVX512F patterns. 4093def : Pat<(_.info128.VT (extract_subvector 4094 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4095 _.info512.ImmAllZerosV)), 4096 (iPTR 0))), 4097 (!cast<Instruction>(InstrStr#rmkz) 4098 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4099 addr:$srcAddr)>; 4100 4101def : Pat<(_.info128.VT (extract_subvector 4102 (_.info512.VT (masked_load addr:$srcAddr, Mask512, 4103 (_.info512.VT (insert_subvector undef, 4104 (_.info128.VT (X86vzmovl _.info128.RC:$src)), 4105 (iPTR 0))))), 4106 (iPTR 0))), 4107 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4108 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4109 addr:$srcAddr)>; 4110 4111// AVX512Vl patterns. 4112def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4113 _.info128.ImmAllZerosV)), 4114 (!cast<Instruction>(InstrStr#rmkz) 4115 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4116 addr:$srcAddr)>; 4117 4118def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, 4119 (_.info128.VT (X86vzmovl _.info128.RC:$src)))), 4120 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src, 4121 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), 4122 addr:$srcAddr)>; 4123} 4124 4125defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>; 4126defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>; 4127 4128defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4129 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4130defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4131 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4132defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4133 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4134 4135let Predicates = [HasFP16] in { 4136defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>; 4137defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4138 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4139defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4140 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4141defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4142 (v32i1 (insert_subvector 4143 (v32i1 immAllZerosV), 4144 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4145 (iPTR 0))), 4146 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4147 GR8, sub_8bit>; 4148 4149defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info, 4150 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>; 4151defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info, 4152 (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>; 4153defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info, 4154 (v32i1 (insert_subvector 4155 (v32i1 immAllZerosV), 4156 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4157 (iPTR 0))), 4158 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4159 GR8, sub_8bit>; 4160 4161def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))), 4162 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk 4163 (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)), 4164 VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4165 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4166 4167def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)), 4168 (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)), 4169 (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>; 4170} 4171 4172defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4173 (v16i1 (insert_subvector 4174 (v16i1 immAllZerosV), 4175 (v4i1 (extract_subvector 4176 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4177 (iPTR 0))), 4178 (iPTR 0))), 4179 (v4i1 (extract_subvector 4180 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4181 (iPTR 0))), GR8, sub_8bit>; 4182defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4183 (v8i1 4184 (extract_subvector 4185 (v16i1 4186 (insert_subvector 4187 (v16i1 immAllZerosV), 4188 (v2i1 (extract_subvector 4189 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4190 (iPTR 0))), 4191 (iPTR 0))), 4192 (iPTR 0))), 4193 (v2i1 (extract_subvector 4194 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4195 (iPTR 0))), GR8, sub_8bit>; 4196 4197defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info, 4198 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>; 4199defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, 4200 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>; 4201defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, 4202 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; 4203 4204defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info, 4205 (v16i1 (insert_subvector 4206 (v16i1 immAllZerosV), 4207 (v4i1 (extract_subvector 4208 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4209 (iPTR 0))), 4210 (iPTR 0))), 4211 (v4i1 (extract_subvector 4212 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))), 4213 (iPTR 0))), GR8, sub_8bit>; 4214defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info, 4215 (v8i1 4216 (extract_subvector 4217 (v16i1 4218 (insert_subvector 4219 (v16i1 immAllZerosV), 4220 (v2i1 (extract_subvector 4221 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4222 (iPTR 0))), 4223 (iPTR 0))), 4224 (iPTR 0))), 4225 (v2i1 (extract_subvector 4226 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), 4227 (iPTR 0))), GR8, sub_8bit>; 4228 4229def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), 4230 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk 4231 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), 4232 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4233 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4234 4235def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), 4236 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), 4237 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; 4238 4239def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), 4240 (COPY_TO_REGCLASS 4241 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), 4242 VK1WM:$mask, addr:$src)), 4243 FR32X)>; 4244def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), 4245 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; 4246 4247def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), 4248 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk 4249 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), 4250 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4251 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4252 4253def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), 4254 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), 4255 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; 4256 4257def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), 4258 (COPY_TO_REGCLASS 4259 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), 4260 VK1WM:$mask, addr:$src)), 4261 FR64X)>; 4262def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), 4263 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; 4264 4265 4266def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))), 4267 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4268def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))), 4269 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4270 4271def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))), 4272 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4273def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))), 4274 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>; 4275 4276let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { 4277 let Predicates = [HasFP16] in { 4278 def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4279 (ins VR128X:$src1, VR128X:$src2), 4280 "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4281 []>, T_MAP5, XS, EVEX, VVVV, VEX_LIG, 4282 Sched<[SchedWriteFShuffle.XMM]>; 4283 4284 let Constraints = "$src0 = $dst" in 4285 def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4286 (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask, 4287 VR128X:$src1, VR128X:$src2), 4288 "vmovsh\t{$src2, $src1, $dst {${mask}}|"# 4289 "$dst {${mask}}, $src1, $src2}", 4290 []>, T_MAP5, XS, EVEX_K, EVEX, VVVV, VEX_LIG, 4291 Sched<[SchedWriteFShuffle.XMM]>; 4292 4293 def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4294 (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4295 "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"# 4296 "$dst {${mask}} {z}, $src1, $src2}", 4297 []>, EVEX_KZ, T_MAP5, XS, EVEX, VVVV, VEX_LIG, 4298 Sched<[SchedWriteFShuffle.XMM]>; 4299 } 4300 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4301 (ins VR128X:$src1, VR128X:$src2), 4302 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4303 []>, TB, XS, EVEX, VVVV, VEX_LIG, 4304 Sched<[SchedWriteFShuffle.XMM]>; 4305 4306 let Constraints = "$src0 = $dst" in 4307 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4308 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, 4309 VR128X:$src1, VR128X:$src2), 4310 "vmovss\t{$src2, $src1, $dst {${mask}}|"# 4311 "$dst {${mask}}, $src1, $src2}", 4312 []>, EVEX_K, TB, XS, EVEX, VVVV, VEX_LIG, 4313 Sched<[SchedWriteFShuffle.XMM]>; 4314 4315 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4316 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), 4317 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"# 4318 "$dst {${mask}} {z}, $src1, $src2}", 4319 []>, EVEX_KZ, TB, XS, EVEX, VVVV, VEX_LIG, 4320 Sched<[SchedWriteFShuffle.XMM]>; 4321 4322 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4323 (ins VR128X:$src1, VR128X:$src2), 4324 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4325 []>, TB, XD, EVEX, VVVV, VEX_LIG, REX_W, 4326 Sched<[SchedWriteFShuffle.XMM]>; 4327 4328 let Constraints = "$src0 = $dst" in 4329 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4330 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, 4331 VR128X:$src1, VR128X:$src2), 4332 "vmovsd\t{$src2, $src1, $dst {${mask}}|"# 4333 "$dst {${mask}}, $src1, $src2}", 4334 []>, EVEX_K, TB, XD, EVEX, VVVV, VEX_LIG, 4335 REX_W, Sched<[SchedWriteFShuffle.XMM]>; 4336 4337 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), 4338 (ins f64x_info.KRCWM:$mask, VR128X:$src1, 4339 VR128X:$src2), 4340 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"# 4341 "$dst {${mask}} {z}, $src1, $src2}", 4342 []>, EVEX_KZ, TB, XD, EVEX, VVVV, VEX_LIG, 4343 REX_W, Sched<[SchedWriteFShuffle.XMM]>; 4344} 4345 4346def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4347 (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4348def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"# 4349 "$dst {${mask}}, $src1, $src2}", 4350 (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask, 4351 VR128X:$src1, VR128X:$src2), 0>; 4352def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4353 "$dst {${mask}} {z}, $src1, $src2}", 4354 (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask, 4355 VR128X:$src1, VR128X:$src2), 0>; 4356def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4357 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4358def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"# 4359 "$dst {${mask}}, $src1, $src2}", 4360 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask, 4361 VR128X:$src1, VR128X:$src2), 0>; 4362def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4363 "$dst {${mask}} {z}, $src1, $src2}", 4364 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask, 4365 VR128X:$src1, VR128X:$src2), 0>; 4366def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4367 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>; 4368def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# 4369 "$dst {${mask}}, $src1, $src2}", 4370 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask, 4371 VR128X:$src1, VR128X:$src2), 0>; 4372def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# 4373 "$dst {${mask}} {z}, $src1, $src2}", 4374 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask, 4375 VR128X:$src1, VR128X:$src2), 0>; 4376 4377let Predicates = [HasAVX512, OptForSize] in { 4378 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), 4379 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; 4380 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), 4381 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; 4382 4383 // Move low f32 and clear high bits. 4384 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), 4385 (SUBREG_TO_REG (i32 0), 4386 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4387 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4388 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))), 4389 (SUBREG_TO_REG (i32 0), 4390 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4391 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>; 4392 4393 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4394 (SUBREG_TO_REG (i32 0), 4395 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), 4396 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>; 4397 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4398 (SUBREG_TO_REG (i32 0), 4399 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), 4400 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>; 4401} 4402 4403// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than 4404// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31. 4405let Predicates = [HasAVX512, OptForSpeed] in { 4406 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))), 4407 (SUBREG_TO_REG (i32 0), 4408 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)), 4409 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), 4410 (i8 1))), sub_xmm)>; 4411 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))), 4412 (SUBREG_TO_REG (i32 0), 4413 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)), 4414 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), 4415 (i8 3))), sub_xmm)>; 4416} 4417 4418let Predicates = [HasAVX512] in { 4419 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 4420 (VMOVSSZrm addr:$src)>; 4421 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 4422 (VMOVSDZrm addr:$src)>; 4423 4424 // Represent the same patterns above but in the form they appear for 4425 // 256-bit types 4426 def : Pat<(v8f32 (X86vzload32 addr:$src)), 4427 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4428 def : Pat<(v4f64 (X86vzload64 addr:$src)), 4429 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4430 4431 // Represent the same patterns above but in the form they appear for 4432 // 512-bit types 4433 def : Pat<(v16f32 (X86vzload32 addr:$src)), 4434 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; 4435 def : Pat<(v8f64 (X86vzload64 addr:$src)), 4436 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; 4437} 4438let Predicates = [HasFP16] in { 4439 def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))), 4440 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>; 4441 def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))), 4442 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>; 4443 4444 // FIXME we need better canonicalization in dag combine 4445 def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))), 4446 (SUBREG_TO_REG (i32 0), 4447 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4448 (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>; 4449 def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))), 4450 (SUBREG_TO_REG (i32 0), 4451 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), 4452 (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>; 4453 4454 // FIXME we need better canonicalization in dag combine 4455 def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))), 4456 (SUBREG_TO_REG (i32 0), 4457 (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)), 4458 (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>; 4459 def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))), 4460 (SUBREG_TO_REG (i32 0), 4461 (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)), 4462 (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>; 4463 4464 def : Pat<(v8f16 (X86vzload16 addr:$src)), 4465 (VMOVSHZrm addr:$src)>; 4466 4467 def : Pat<(v16f16 (X86vzload16 addr:$src)), 4468 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4469 4470 def : Pat<(v32f16 (X86vzload16 addr:$src)), 4471 (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>; 4472} 4473 4474let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { 4475def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 4476 (ins VR128X:$src), 4477 "vmovq\t{$src, $dst|$dst, $src}", 4478 [(set VR128X:$dst, (v2i64 (X86vzmovl 4479 (v2i64 VR128X:$src))))]>, 4480 EVEX, REX_W; 4481} 4482 4483let Predicates = [HasAVX512] in { 4484 def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))), 4485 (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 4486 GR8:$src, sub_8bit)))>; 4487 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4488 (VMOVDI2PDIZrr GR32:$src)>; 4489 4490 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), 4491 (VMOV64toPQIZrr GR64:$src)>; 4492 4493 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4494 def : Pat<(v4i32 (X86vzload32 addr:$src)), 4495 (VMOVDI2PDIZrm addr:$src)>; 4496 def : Pat<(v8i32 (X86vzload32 addr:$src)), 4497 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4498 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), 4499 (VMOVZPQILo2PQIZrr VR128X:$src)>; 4500 def : Pat<(v2i64 (X86vzload64 addr:$src)), 4501 (VMOVQI2PQIZrm addr:$src)>; 4502 def : Pat<(v4i64 (X86vzload64 addr:$src)), 4503 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4504 4505 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 4506 def : Pat<(v16i32 (X86vzload32 addr:$src)), 4507 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; 4508 def : Pat<(v8i64 (X86vzload64 addr:$src)), 4509 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; 4510 4511 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), 4512 (SUBREG_TO_REG (i32 0), 4513 (v2f64 (VMOVZPQILo2PQIZrr 4514 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), 4515 sub_xmm)>; 4516 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))), 4517 (SUBREG_TO_REG (i32 0), 4518 (v2i64 (VMOVZPQILo2PQIZrr 4519 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), 4520 sub_xmm)>; 4521 4522 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))), 4523 (SUBREG_TO_REG (i32 0), 4524 (v2f64 (VMOVZPQILo2PQIZrr 4525 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), 4526 sub_xmm)>; 4527 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))), 4528 (SUBREG_TO_REG (i32 0), 4529 (v2i64 (VMOVZPQILo2PQIZrr 4530 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), 4531 sub_xmm)>; 4532} 4533 4534//===----------------------------------------------------------------------===// 4535// AVX-512 - Non-temporals 4536//===----------------------------------------------------------------------===// 4537 4538def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), 4539 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", 4540 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>, 4541 EVEX, T8, PD, EVEX_V512, EVEX_CD8<64, CD8VF>; 4542 4543let Predicates = [HasVLX] in { 4544 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), 4545 (ins i256mem:$src), 4546 "vmovntdqa\t{$src, $dst|$dst, $src}", 4547 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>, 4548 EVEX, T8, PD, EVEX_V256, EVEX_CD8<64, CD8VF>; 4549 4550 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), 4551 (ins i128mem:$src), 4552 "vmovntdqa\t{$src, $dst|$dst, $src}", 4553 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>, 4554 EVEX, T8, PD, EVEX_V128, EVEX_CD8<64, CD8VF>; 4555} 4556 4557multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 4558 X86SchedWriteMoveLS Sched, 4559 PatFrag st_frag = alignednontemporalstore> { 4560 let SchedRW = [Sched.MR], AddedComplexity = 400 in 4561 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), 4562 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4563 [(st_frag (_.VT _.RC:$src), addr:$dst)], 4564 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>; 4565} 4566 4567multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, 4568 AVX512VLVectorVTInfo VTInfo, 4569 X86SchedWriteMoveLSWidths Sched> { 4570 let Predicates = [HasAVX512] in 4571 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512; 4572 4573 let Predicates = [HasAVX512, HasVLX] in { 4574 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256; 4575 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128; 4576 } 4577} 4578 4579defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, 4580 SchedWriteVecMoveLSNT>, TB, PD; 4581defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, 4582 SchedWriteFMoveLSNT>, TB, PD, REX_W; 4583defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, 4584 SchedWriteFMoveLSNT>, TB; 4585 4586let Predicates = [HasAVX512], AddedComplexity = 400 in { 4587 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), 4588 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4589 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst), 4590 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4591 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst), 4592 (VMOVNTDQZmr addr:$dst, VR512:$src)>; 4593 4594 def : Pat<(v8f64 (alignednontemporalload addr:$src)), 4595 (VMOVNTDQAZrm addr:$src)>; 4596 def : Pat<(v16f32 (alignednontemporalload addr:$src)), 4597 (VMOVNTDQAZrm addr:$src)>; 4598 def : Pat<(v8i64 (alignednontemporalload addr:$src)), 4599 (VMOVNTDQAZrm addr:$src)>; 4600 def : Pat<(v16i32 (alignednontemporalload addr:$src)), 4601 (VMOVNTDQAZrm addr:$src)>; 4602 def : Pat<(v32i16 (alignednontemporalload addr:$src)), 4603 (VMOVNTDQAZrm addr:$src)>; 4604 def : Pat<(v64i8 (alignednontemporalload addr:$src)), 4605 (VMOVNTDQAZrm addr:$src)>; 4606} 4607 4608let Predicates = [HasVLX], AddedComplexity = 400 in { 4609 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst), 4610 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4611 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst), 4612 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4613 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst), 4614 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>; 4615 4616 def : Pat<(v4f64 (alignednontemporalload addr:$src)), 4617 (VMOVNTDQAZ256rm addr:$src)>; 4618 def : Pat<(v8f32 (alignednontemporalload addr:$src)), 4619 (VMOVNTDQAZ256rm addr:$src)>; 4620 def : Pat<(v4i64 (alignednontemporalload addr:$src)), 4621 (VMOVNTDQAZ256rm addr:$src)>; 4622 def : Pat<(v8i32 (alignednontemporalload addr:$src)), 4623 (VMOVNTDQAZ256rm addr:$src)>; 4624 def : Pat<(v16i16 (alignednontemporalload addr:$src)), 4625 (VMOVNTDQAZ256rm addr:$src)>; 4626 def : Pat<(v32i8 (alignednontemporalload addr:$src)), 4627 (VMOVNTDQAZ256rm addr:$src)>; 4628 4629 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), 4630 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4631 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst), 4632 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4633 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst), 4634 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; 4635 4636 def : Pat<(v2f64 (alignednontemporalload addr:$src)), 4637 (VMOVNTDQAZ128rm addr:$src)>; 4638 def : Pat<(v4f32 (alignednontemporalload addr:$src)), 4639 (VMOVNTDQAZ128rm addr:$src)>; 4640 def : Pat<(v2i64 (alignednontemporalload addr:$src)), 4641 (VMOVNTDQAZ128rm addr:$src)>; 4642 def : Pat<(v4i32 (alignednontemporalload addr:$src)), 4643 (VMOVNTDQAZ128rm addr:$src)>; 4644 def : Pat<(v8i16 (alignednontemporalload addr:$src)), 4645 (VMOVNTDQAZ128rm addr:$src)>; 4646 def : Pat<(v16i8 (alignednontemporalload addr:$src)), 4647 (VMOVNTDQAZ128rm addr:$src)>; 4648} 4649 4650//===----------------------------------------------------------------------===// 4651// AVX-512 - Integer arithmetic 4652// 4653multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 4654 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4655 bit IsCommutable = 0> { 4656 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 4657 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 4658 "$src2, $src1", "$src1, $src2", 4659 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 4660 IsCommutable, IsCommutable>, AVX512BIBase, EVEX, VVVV, 4661 Sched<[sched]>; 4662 4663 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4664 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 4665 "$src2, $src1", "$src1, $src2", 4666 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>, 4667 AVX512BIBase, EVEX, VVVV, 4668 Sched<[sched.Folded, sched.ReadAfterFold]>; 4669} 4670 4671multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4672 X86VectorVTInfo _, X86FoldableSchedWrite sched, 4673 bit IsCommutable = 0> : 4674 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { 4675 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 4676 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 4677 "${src2}"#_.BroadcastStr#", $src1", 4678 "$src1, ${src2}"#_.BroadcastStr, 4679 (_.VT (OpNode _.RC:$src1, 4680 (_.BroadcastLdFrag addr:$src2)))>, 4681 AVX512BIBase, EVEX, VVVV, EVEX_B, 4682 Sched<[sched.Folded, sched.ReadAfterFold]>; 4683} 4684 4685multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4686 AVX512VLVectorVTInfo VTInfo, 4687 X86SchedWriteWidths sched, Predicate prd, 4688 bit IsCommutable = 0> { 4689 let Predicates = [prd] in 4690 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4691 IsCommutable>, EVEX_V512; 4692 4693 let Predicates = [prd, HasVLX] in { 4694 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, 4695 sched.YMM, IsCommutable>, EVEX_V256; 4696 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, 4697 sched.XMM, IsCommutable>, EVEX_V128; 4698 } 4699} 4700 4701multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 4702 AVX512VLVectorVTInfo VTInfo, 4703 X86SchedWriteWidths sched, Predicate prd, 4704 bit IsCommutable = 0> { 4705 let Predicates = [prd] in 4706 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM, 4707 IsCommutable>, EVEX_V512; 4708 4709 let Predicates = [prd, HasVLX] in { 4710 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, 4711 sched.YMM, IsCommutable>, EVEX_V256; 4712 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, 4713 sched.XMM, IsCommutable>, EVEX_V128; 4714 } 4715} 4716 4717multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, 4718 X86SchedWriteWidths sched, Predicate prd, 4719 bit IsCommutable = 0> { 4720 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, 4721 sched, prd, IsCommutable>, 4722 REX_W, EVEX_CD8<64, CD8VF>; 4723} 4724 4725multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, 4726 X86SchedWriteWidths sched, Predicate prd, 4727 bit IsCommutable = 0> { 4728 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, 4729 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; 4730} 4731 4732multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, 4733 X86SchedWriteWidths sched, Predicate prd, 4734 bit IsCommutable = 0> { 4735 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, 4736 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, 4737 WIG; 4738} 4739 4740multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, 4741 X86SchedWriteWidths sched, Predicate prd, 4742 bit IsCommutable = 0> { 4743 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, 4744 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, 4745 WIG; 4746} 4747 4748multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 4749 SDNode OpNode, X86SchedWriteWidths sched, 4750 Predicate prd, bit IsCommutable = 0> { 4751 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, 4752 IsCommutable>; 4753 4754 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, 4755 IsCommutable>; 4756} 4757 4758multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 4759 SDNode OpNode, X86SchedWriteWidths sched, 4760 Predicate prd, bit IsCommutable = 0> { 4761 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, 4762 IsCommutable>; 4763 4764 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, 4765 IsCommutable>; 4766} 4767 4768multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 4769 bits<8> opc_d, bits<8> opc_q, 4770 string OpcodeStr, SDNode OpNode, 4771 X86SchedWriteWidths sched, 4772 bit IsCommutable = 0> { 4773 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, 4774 sched, HasAVX512, IsCommutable>, 4775 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, 4776 sched, HasBWI, IsCommutable>; 4777} 4778 4779multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, 4780 X86FoldableSchedWrite sched, 4781 SDNode OpNode,X86VectorVTInfo _Src, 4782 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, 4783 bit IsCommutable = 0> { 4784 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4785 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4786 "$src2, $src1","$src1, $src2", 4787 (_Dst.VT (OpNode 4788 (_Src.VT _Src.RC:$src1), 4789 (_Src.VT _Src.RC:$src2))), 4790 IsCommutable>, 4791 AVX512BIBase, EVEX, VVVV, Sched<[sched]>; 4792 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4793 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4794 "$src2, $src1", "$src1, $src2", 4795 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4796 (_Src.LdFrag addr:$src2)))>, 4797 AVX512BIBase, EVEX, VVVV, 4798 Sched<[sched.Folded, sched.ReadAfterFold]>; 4799 4800 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4801 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), 4802 OpcodeStr, 4803 "${src2}"#_Brdct.BroadcastStr#", $src1", 4804 "$src1, ${src2}"#_Brdct.BroadcastStr, 4805 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4806 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>, 4807 AVX512BIBase, EVEX, VVVV, EVEX_B, 4808 Sched<[sched.Folded, sched.ReadAfterFold]>; 4809} 4810 4811defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, 4812 SchedWriteVecALU, 1>; 4813defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, 4814 SchedWriteVecALU, 0>; 4815defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, 4816 SchedWriteVecALU, HasBWI, 1>; 4817defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, 4818 SchedWriteVecALU, HasBWI, 0>; 4819defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, 4820 SchedWriteVecALU, HasBWI, 1>; 4821defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, 4822 SchedWriteVecALU, HasBWI, 0>; 4823defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, 4824 SchedWritePMULLD, HasAVX512, 1>, T8; 4825defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, 4826 SchedWriteVecIMul, HasBWI, 1>; 4827defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, 4828 SchedWriteVecIMul, HasDQI, 1>, T8; 4829defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul, 4830 HasBWI, 1>; 4831defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul, 4832 HasBWI, 1>; 4833defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, 4834 SchedWriteVecIMul, HasBWI, 1>, T8; 4835defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu, 4836 SchedWriteVecALU, HasBWI, 1>; 4837defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, 4838 SchedWriteVecIMul, HasAVX512, 1>, T8; 4839defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, 4840 SchedWriteVecIMul, HasAVX512, 1>; 4841 4842multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, 4843 X86SchedWriteWidths sched, 4844 AVX512VLVectorVTInfo _SrcVTInfo, 4845 AVX512VLVectorVTInfo _DstVTInfo, 4846 SDNode OpNode, Predicate prd, bit IsCommutable = 0> { 4847 let Predicates = [prd] in 4848 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 4849 _SrcVTInfo.info512, _DstVTInfo.info512, 4850 v8i64_info, IsCommutable>, 4851 EVEX_V512, EVEX_CD8<64, CD8VF>, REX_W; 4852 let Predicates = [HasVLX, prd] in { 4853 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 4854 _SrcVTInfo.info256, _DstVTInfo.info256, 4855 v4i64x_info, IsCommutable>, 4856 EVEX_V256, EVEX_CD8<64, CD8VF>, REX_W; 4857 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 4858 _SrcVTInfo.info128, _DstVTInfo.info128, 4859 v2i64x_info, IsCommutable>, 4860 EVEX_V128, EVEX_CD8<64, CD8VF>, REX_W; 4861 } 4862} 4863 4864defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU, 4865 avx512vl_i8_info, avx512vl_i8_info, 4866 X86multishift, HasVBMI, 0>, T8; 4867 4868multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 4869 X86VectorVTInfo _Src, X86VectorVTInfo _Dst, 4870 X86FoldableSchedWrite sched> { 4871 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4872 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 4873 OpcodeStr, 4874 "${src2}"#_Src.BroadcastStr#", $src1", 4875 "$src1, ${src2}"#_Src.BroadcastStr, 4876 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 4877 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>, 4878 EVEX, VVVV, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, 4879 Sched<[sched.Folded, sched.ReadAfterFold]>; 4880} 4881 4882multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 4883 SDNode OpNode,X86VectorVTInfo _Src, 4884 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, 4885 bit IsCommutable = 0> { 4886 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 4887 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 4888 "$src2, $src1","$src1, $src2", 4889 (_Dst.VT (OpNode 4890 (_Src.VT _Src.RC:$src1), 4891 (_Src.VT _Src.RC:$src2))), 4892 IsCommutable, IsCommutable>, 4893 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX, VVVV, Sched<[sched]>; 4894 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 4895 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 4896 "$src2, $src1", "$src1, $src2", 4897 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), 4898 (_Src.LdFrag addr:$src2)))>, 4899 EVEX, VVVV, EVEX_CD8<_Src.EltSize, CD8VF>, 4900 Sched<[sched.Folded, sched.ReadAfterFold]>; 4901} 4902 4903multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, 4904 SDNode OpNode> { 4905 let Predicates = [HasBWI] in 4906 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, 4907 v32i16_info, SchedWriteShuffle.ZMM>, 4908 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, 4909 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512; 4910 let Predicates = [HasBWI, HasVLX] in { 4911 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, 4912 v16i16x_info, SchedWriteShuffle.YMM>, 4913 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, 4914 v16i16x_info, SchedWriteShuffle.YMM>, 4915 EVEX_V256; 4916 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, 4917 v8i16x_info, SchedWriteShuffle.XMM>, 4918 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, 4919 v8i16x_info, SchedWriteShuffle.XMM>, 4920 EVEX_V128; 4921 } 4922} 4923multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, 4924 SDNode OpNode> { 4925 let Predicates = [HasBWI] in 4926 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info, 4927 SchedWriteShuffle.ZMM>, EVEX_V512, WIG; 4928 let Predicates = [HasBWI, HasVLX] in { 4929 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, 4930 v32i8x_info, SchedWriteShuffle.YMM>, 4931 EVEX_V256, WIG; 4932 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, 4933 v16i8x_info, SchedWriteShuffle.XMM>, 4934 EVEX_V128, WIG; 4935 } 4936} 4937 4938multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, 4939 SDNode OpNode, AVX512VLVectorVTInfo _Src, 4940 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { 4941 let Predicates = [HasBWI] in 4942 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, 4943 _Dst.info512, SchedWriteVecIMul.ZMM, 4944 IsCommutable>, EVEX_V512; 4945 let Predicates = [HasBWI, HasVLX] in { 4946 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, 4947 _Dst.info256, SchedWriteVecIMul.YMM, 4948 IsCommutable>, EVEX_V256; 4949 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, 4950 _Dst.info128, SchedWriteVecIMul.XMM, 4951 IsCommutable>, EVEX_V128; 4952 } 4953} 4954 4955defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase; 4956defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase; 4957defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase; 4958defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; 4959 4960defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, 4961 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8, WIG; 4962defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, 4963 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG; 4964 4965defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, 4966 SchedWriteVecALU, HasBWI, 1>, T8; 4967defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, 4968 SchedWriteVecALU, HasBWI, 1>; 4969defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax, 4970 SchedWriteVecALU, HasAVX512, 1>, T8; 4971defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax, 4972 SchedWriteVecALU, HasAVX512, 1>, T8; 4973 4974defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, 4975 SchedWriteVecALU, HasBWI, 1>; 4976defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, 4977 SchedWriteVecALU, HasBWI, 1>, T8; 4978defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax, 4979 SchedWriteVecALU, HasAVX512, 1>, T8; 4980defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax, 4981 SchedWriteVecALU, HasAVX512, 1>, T8; 4982 4983defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, 4984 SchedWriteVecALU, HasBWI, 1>, T8; 4985defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, 4986 SchedWriteVecALU, HasBWI, 1>; 4987defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin, 4988 SchedWriteVecALU, HasAVX512, 1>, T8; 4989defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin, 4990 SchedWriteVecALU, HasAVX512, 1>, T8; 4991 4992defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, 4993 SchedWriteVecALU, HasBWI, 1>; 4994defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, 4995 SchedWriteVecALU, HasBWI, 1>, T8; 4996defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin, 4997 SchedWriteVecALU, HasAVX512, 1>, T8; 4998defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin, 4999 SchedWriteVecALU, HasAVX512, 1>, T8; 5000 5001// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. 5002let Predicates = [HasDQI, NoVLX] in { 5003 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 5004 (EXTRACT_SUBREG 5005 (VPMULLQZrr 5006 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5007 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5008 sub_ymm)>; 5009 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5010 (EXTRACT_SUBREG 5011 (VPMULLQZrmb 5012 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5013 addr:$src2), 5014 sub_ymm)>; 5015 5016 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 5017 (EXTRACT_SUBREG 5018 (VPMULLQZrr 5019 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5020 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5021 sub_xmm)>; 5022 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5023 (EXTRACT_SUBREG 5024 (VPMULLQZrmb 5025 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5026 addr:$src2), 5027 sub_xmm)>; 5028} 5029 5030multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> { 5031 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)), 5032 (EXTRACT_SUBREG 5033 (!cast<Instruction>(Instr#"rr") 5034 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5035 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 5036 sub_ymm)>; 5037 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))), 5038 (EXTRACT_SUBREG 5039 (!cast<Instruction>(Instr#"rmb") 5040 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 5041 addr:$src2), 5042 sub_ymm)>; 5043 5044 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)), 5045 (EXTRACT_SUBREG 5046 (!cast<Instruction>(Instr#"rr") 5047 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5048 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 5049 sub_xmm)>; 5050 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))), 5051 (EXTRACT_SUBREG 5052 (!cast<Instruction>(Instr#"rmb") 5053 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 5054 addr:$src2), 5055 sub_xmm)>; 5056} 5057 5058let Predicates = [HasAVX512, NoVLX] in { 5059 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>; 5060 defm : avx512_min_max_lowering<"VPMINUQZ", umin>; 5061 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>; 5062 defm : avx512_min_max_lowering<"VPMINSQZ", smin>; 5063} 5064 5065//===----------------------------------------------------------------------===// 5066// AVX-512 Logical Instructions 5067//===----------------------------------------------------------------------===// 5068 5069defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and, 5070 SchedWriteVecLogic, HasAVX512, 1>; 5071defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or, 5072 SchedWriteVecLogic, HasAVX512, 1>; 5073defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 5074 SchedWriteVecLogic, HasAVX512, 1>; 5075defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, 5076 SchedWriteVecLogic, HasAVX512>; 5077 5078let Predicates = [HasVLX] in { 5079 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)), 5080 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5081 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)), 5082 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>; 5083 5084 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)), 5085 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5086 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)), 5087 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>; 5088 5089 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)), 5090 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5091 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)), 5092 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>; 5093 5094 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)), 5095 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5096 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)), 5097 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>; 5098 5099 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)), 5100 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5101 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)), 5102 (VPANDQZ128rm VR128X:$src1, addr:$src2)>; 5103 5104 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)), 5105 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5106 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)), 5107 (VPORQZ128rm VR128X:$src1, addr:$src2)>; 5108 5109 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)), 5110 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5111 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)), 5112 (VPXORQZ128rm VR128X:$src1, addr:$src2)>; 5113 5114 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)), 5115 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5116 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)), 5117 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>; 5118 5119 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)), 5120 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5121 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)), 5122 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>; 5123 5124 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)), 5125 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5126 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)), 5127 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>; 5128 5129 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)), 5130 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5131 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)), 5132 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>; 5133 5134 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)), 5135 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5136 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)), 5137 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>; 5138 5139 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)), 5140 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5141 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)), 5142 (VPANDQZ256rm VR256X:$src1, addr:$src2)>; 5143 5144 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)), 5145 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5146 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)), 5147 (VPORQZ256rm VR256X:$src1, addr:$src2)>; 5148 5149 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)), 5150 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5151 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)), 5152 (VPXORQZ256rm VR256X:$src1, addr:$src2)>; 5153 5154 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)), 5155 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5156 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)), 5157 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>; 5158} 5159 5160let Predicates = [HasAVX512] in { 5161 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)), 5162 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5163 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)), 5164 (VPANDQZrr VR512:$src1, VR512:$src2)>; 5165 5166 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)), 5167 (VPORQZrr VR512:$src1, VR512:$src2)>; 5168 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)), 5169 (VPORQZrr VR512:$src1, VR512:$src2)>; 5170 5171 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)), 5172 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5173 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)), 5174 (VPXORQZrr VR512:$src1, VR512:$src2)>; 5175 5176 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)), 5177 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5178 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)), 5179 (VPANDNQZrr VR512:$src1, VR512:$src2)>; 5180 5181 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)), 5182 (VPANDQZrm VR512:$src1, addr:$src2)>; 5183 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)), 5184 (VPANDQZrm VR512:$src1, addr:$src2)>; 5185 5186 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)), 5187 (VPORQZrm VR512:$src1, addr:$src2)>; 5188 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)), 5189 (VPORQZrm VR512:$src1, addr:$src2)>; 5190 5191 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)), 5192 (VPXORQZrm VR512:$src1, addr:$src2)>; 5193 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)), 5194 (VPXORQZrm VR512:$src1, addr:$src2)>; 5195 5196 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)), 5197 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5198 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)), 5199 (VPANDNQZrm VR512:$src1, addr:$src2)>; 5200} 5201 5202// Patterns to catch vselect with different type than logic op. 5203multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode, 5204 X86VectorVTInfo _, 5205 X86VectorVTInfo IntInfo> { 5206 // Masked register-register logical operations. 5207 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5208 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5209 _.RC:$src0)), 5210 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, 5211 _.RC:$src1, _.RC:$src2)>; 5212 5213 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5214 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))), 5215 _.ImmAllZerosV)), 5216 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, 5217 _.RC:$src2)>; 5218 5219 // Masked register-memory logical operations. 5220 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5221 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5222 (load addr:$src2)))), 5223 _.RC:$src0)), 5224 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, 5225 _.RC:$src1, addr:$src2)>; 5226 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5227 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, 5228 (load addr:$src2)))), 5229 _.ImmAllZerosV)), 5230 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, 5231 addr:$src2)>; 5232} 5233 5234multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode, 5235 X86VectorVTInfo _, 5236 X86VectorVTInfo IntInfo> { 5237 // Register-broadcast logical operations. 5238 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5239 (bitconvert 5240 (IntInfo.VT (OpNode _.RC:$src1, 5241 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5242 _.RC:$src0)), 5243 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, 5244 _.RC:$src1, addr:$src2)>; 5245 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 5246 (bitconvert 5247 (IntInfo.VT (OpNode _.RC:$src1, 5248 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))), 5249 _.ImmAllZerosV)), 5250 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, 5251 _.RC:$src1, addr:$src2)>; 5252} 5253 5254multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode, 5255 AVX512VLVectorVTInfo SelectInfo, 5256 AVX512VLVectorVTInfo IntInfo> { 5257let Predicates = [HasVLX] in { 5258 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128, 5259 IntInfo.info128>; 5260 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256, 5261 IntInfo.info256>; 5262} 5263let Predicates = [HasAVX512] in { 5264 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512, 5265 IntInfo.info512>; 5266} 5267} 5268 5269multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode, 5270 AVX512VLVectorVTInfo SelectInfo, 5271 AVX512VLVectorVTInfo IntInfo> { 5272let Predicates = [HasVLX] in { 5273 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode, 5274 SelectInfo.info128, IntInfo.info128>; 5275 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode, 5276 SelectInfo.info256, IntInfo.info256>; 5277} 5278let Predicates = [HasAVX512] in { 5279 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode, 5280 SelectInfo.info512, IntInfo.info512>; 5281} 5282} 5283 5284multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> { 5285 // i64 vselect with i32/i16/i8 logic op 5286 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5287 avx512vl_i32_info>; 5288 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5289 avx512vl_i16_info>; 5290 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info, 5291 avx512vl_i8_info>; 5292 5293 // i32 vselect with i64/i16/i8 logic op 5294 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5295 avx512vl_i64_info>; 5296 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5297 avx512vl_i16_info>; 5298 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info, 5299 avx512vl_i8_info>; 5300 5301 // f32 vselect with i64/i32/i16/i8 logic op 5302 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5303 avx512vl_i64_info>; 5304 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5305 avx512vl_i32_info>; 5306 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5307 avx512vl_i16_info>; 5308 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info, 5309 avx512vl_i8_info>; 5310 5311 // f64 vselect with i64/i32/i16/i8 logic op 5312 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5313 avx512vl_i64_info>; 5314 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5315 avx512vl_i32_info>; 5316 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5317 avx512vl_i16_info>; 5318 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info, 5319 avx512vl_i8_info>; 5320 5321 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode, 5322 avx512vl_f32_info, 5323 avx512vl_i32_info>; 5324 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode, 5325 avx512vl_f64_info, 5326 avx512vl_i64_info>; 5327} 5328 5329defm : avx512_logical_lowering_types<"VPAND", and>; 5330defm : avx512_logical_lowering_types<"VPOR", or>; 5331defm : avx512_logical_lowering_types<"VPXOR", xor>; 5332defm : avx512_logical_lowering_types<"VPANDN", X86andnp>; 5333 5334//===----------------------------------------------------------------------===// 5335// AVX-512 FP arithmetic 5336//===----------------------------------------------------------------------===// 5337 5338multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5339 SDPatternOperator OpNode, SDNode VecNode, 5340 X86FoldableSchedWrite sched, bit IsCommutable> { 5341 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5342 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5343 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5344 "$src2, $src1", "$src1, $src2", 5345 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5346 Sched<[sched]>; 5347 5348 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5349 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5350 "$src2, $src1", "$src1, $src2", 5351 (_.VT (VecNode _.RC:$src1, 5352 (_.ScalarIntMemFrags addr:$src2)))>, 5353 Sched<[sched.Folded, sched.ReadAfterFold]>; 5354 let isCodeGenOnly = 1, Predicates = [HasAVX512] in { 5355 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5356 (ins _.FRC:$src1, _.FRC:$src2), 5357 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5358 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5359 Sched<[sched]> { 5360 let isCommutable = IsCommutable; 5361 } 5362 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5363 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5364 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5365 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5366 (_.ScalarLdFrag addr:$src2)))]>, 5367 Sched<[sched.Folded, sched.ReadAfterFold]>; 5368 } 5369 } 5370} 5371 5372multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5373 SDNode VecNode, X86FoldableSchedWrite sched> { 5374 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5375 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5376 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 5377 "$rc, $src2, $src1", "$src1, $src2, $rc", 5378 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 5379 (i32 timm:$rc))>, 5380 EVEX_B, EVEX_RC, Sched<[sched]>; 5381} 5382multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 5383 SDNode OpNode, SDNode VecNode, SDNode SaeNode, 5384 X86FoldableSchedWrite sched, bit IsCommutable> { 5385 let ExeDomain = _.ExeDomain in { 5386 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5387 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5388 "$src2, $src1", "$src1, $src2", 5389 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, 5390 Sched<[sched]>, SIMD_EXC; 5391 5392 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5393 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 5394 "$src2, $src1", "$src1, $src2", 5395 (_.VT (VecNode _.RC:$src1, 5396 (_.ScalarIntMemFrags addr:$src2)))>, 5397 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 5398 5399 let isCodeGenOnly = 1, Predicates = [HasAVX512], 5400 Uses = [MXCSR], mayRaiseFPException = 1 in { 5401 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5402 (ins _.FRC:$src1, _.FRC:$src2), 5403 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5404 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5405 Sched<[sched]> { 5406 let isCommutable = IsCommutable; 5407 } 5408 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5409 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5410 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5411 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5412 (_.ScalarLdFrag addr:$src2)))]>, 5413 Sched<[sched.Folded, sched.ReadAfterFold]>; 5414 } 5415 5416 let Uses = [MXCSR] in 5417 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5418 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5419 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5420 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 5421 EVEX_B, Sched<[sched]>; 5422 } 5423} 5424 5425multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5426 SDNode VecNode, SDNode RndNode, 5427 X86SchedWriteSizes sched, bit IsCommutable> { 5428 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, 5429 sched.PS.Scl, IsCommutable>, 5430 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode, 5431 sched.PS.Scl>, 5432 TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5433 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, 5434 sched.PD.Scl, IsCommutable>, 5435 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode, 5436 sched.PD.Scl>, 5437 TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5438 let Predicates = [HasFP16] in 5439 defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode, 5440 VecNode, sched.PH.Scl, IsCommutable>, 5441 avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode, 5442 sched.PH.Scl>, 5443 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>; 5444} 5445 5446multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, 5447 SDNode VecNode, SDNode SaeNode, 5448 X86SchedWriteSizes sched, bit IsCommutable> { 5449 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, 5450 VecNode, SaeNode, sched.PS.Scl, IsCommutable>, 5451 TB, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>; 5452 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, 5453 VecNode, SaeNode, sched.PD.Scl, IsCommutable>, 5454 TB, XD, REX_W, EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>; 5455 let Predicates = [HasFP16] in { 5456 defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode, 5457 VecNode, SaeNode, sched.PH.Scl, IsCommutable>, 5458 T_MAP5, XS, EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>; 5459 } 5460} 5461defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds, 5462 SchedWriteFAddSizes, 1>; 5463defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds, 5464 SchedWriteFMulSizes, 1>; 5465defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds, 5466 SchedWriteFAddSizes, 0>; 5467defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds, 5468 SchedWriteFDivSizes, 0>; 5469defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs, 5470 SchedWriteFCmpSizes, 0>; 5471defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs, 5472 SchedWriteFCmpSizes, 0>; 5473 5474// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use 5475// X86fminc and X86fmaxc instead of X86fmin and X86fmax 5476multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, 5477 X86VectorVTInfo _, SDNode OpNode, 5478 X86FoldableSchedWrite sched> { 5479 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { 5480 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), 5481 (ins _.FRC:$src1, _.FRC:$src2), 5482 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5483 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, 5484 Sched<[sched]> { 5485 let isCommutable = 1; 5486 } 5487 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), 5488 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 5489 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 5490 [(set _.FRC:$dst, (OpNode _.FRC:$src1, 5491 (_.ScalarLdFrag addr:$src2)))]>, 5492 Sched<[sched.Folded, sched.ReadAfterFold]>; 5493 } 5494} 5495defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, 5496 SchedWriteFCmp.Scl>, TB, XS, 5497 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5498 5499defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, 5500 SchedWriteFCmp.Scl>, TB, XD, 5501 REX_W, EVEX, VVVV, VEX_LIG, 5502 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5503 5504defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, 5505 SchedWriteFCmp.Scl>, TB, XS, 5506 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC; 5507 5508defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, 5509 SchedWriteFCmp.Scl>, TB, XD, 5510 REX_W, EVEX, VVVV, VEX_LIG, 5511 EVEX_CD8<64, CD8VT1>, SIMD_EXC; 5512 5513defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc, 5514 SchedWriteFCmp.Scl>, T_MAP5, XS, 5515 EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC; 5516 5517defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc, 5518 SchedWriteFCmp.Scl>, T_MAP5, XS, 5519 EVEX, VVVV, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC; 5520 5521multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5522 SDPatternOperator MaskOpNode, 5523 X86VectorVTInfo _, X86FoldableSchedWrite sched, 5524 bit IsCommutable, 5525 bit IsKCommutable = IsCommutable, 5526 string suffix = _.Suffix, 5527 string ClobberConstraint = "", 5528 bit MayRaiseFPException = 1> { 5529 let ExeDomain = _.ExeDomain, hasSideEffects = 0, 5530 Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in { 5531 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 5532 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix, 5533 "$src2, $src1", "$src1, $src2", 5534 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), 5535 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint, 5536 IsCommutable, IsKCommutable, IsKCommutable>, EVEX, VVVV, Sched<[sched]>; 5537 let mayLoad = 1 in { 5538 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5539 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix, 5540 "$src2, $src1", "$src1, $src2", 5541 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5542 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)), 5543 ClobberConstraint>, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 5544 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 5545 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix, 5546 "${src2}"#_.BroadcastStr#", $src1", 5547 "$src1, ${src2}"#_.BroadcastStr, 5548 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5549 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))), 5550 ClobberConstraint>, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5551 } 5552 } 5553} 5554 5555multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, 5556 SDPatternOperator OpNodeRnd, 5557 X86FoldableSchedWrite sched, X86VectorVTInfo _, 5558 string suffix = _.Suffix, 5559 string ClobberConstraint = ""> { 5560 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5561 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5562 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix, 5563 "$rc, $src2, $src1", "$src1, $src2, $rc", 5564 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))), 5565 0, 0, 0, vselect_mask, ClobberConstraint>, 5566 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 5567} 5568 5569multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, 5570 SDPatternOperator OpNodeSAE, 5571 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5572 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 5573 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5574 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5575 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 5576 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>, 5577 EVEX, VVVV, EVEX_B, Sched<[sched]>; 5578} 5579 5580multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5581 SDPatternOperator MaskOpNode, 5582 Predicate prd, X86SchedWriteSizes sched, 5583 bit IsCommutable = 0, 5584 bit IsPD128Commutable = IsCommutable> { 5585 let Predicates = [prd] in { 5586 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 5587 sched.PS.ZMM, IsCommutable>, EVEX_V512, TB, 5588 EVEX_CD8<32, CD8VF>; 5589 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info, 5590 sched.PD.ZMM, IsCommutable>, EVEX_V512, TB, PD, REX_W, 5591 EVEX_CD8<64, CD8VF>; 5592 } 5593 5594 // Define only if AVX512VL feature is present. 5595 let Predicates = [prd, HasVLX] in { 5596 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 5597 sched.PS.XMM, IsCommutable>, EVEX_V128, TB, 5598 EVEX_CD8<32, CD8VF>; 5599 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 5600 sched.PS.YMM, IsCommutable>, EVEX_V256, TB, 5601 EVEX_CD8<32, CD8VF>; 5602 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info, 5603 sched.PD.XMM, IsPD128Commutable, 5604 IsCommutable>, EVEX_V128, TB, PD, REX_W, 5605 EVEX_CD8<64, CD8VF>; 5606 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info, 5607 sched.PD.YMM, IsCommutable>, EVEX_V256, TB, PD, REX_W, 5608 EVEX_CD8<64, CD8VF>; 5609 } 5610} 5611 5612multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 5613 SDPatternOperator MaskOpNode, 5614 X86SchedWriteSizes sched, bit IsCommutable = 0> { 5615 let Predicates = [HasFP16] in { 5616 defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info, 5617 sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5, 5618 EVEX_CD8<16, CD8VF>; 5619 } 5620 let Predicates = [HasVLX, HasFP16] in { 5621 defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info, 5622 sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5, 5623 EVEX_CD8<16, CD8VF>; 5624 defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info, 5625 sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5, 5626 EVEX_CD8<16, CD8VF>; 5627 } 5628} 5629 5630let Uses = [MXCSR] in 5631multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5632 X86SchedWriteSizes sched> { 5633 let Predicates = [HasFP16] in { 5634 defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5635 v32f16_info>, 5636 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 5637 } 5638 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5639 v16f32_info>, 5640 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 5641 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5642 v8f64_info>, 5643 EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>; 5644} 5645 5646let Uses = [MXCSR] in 5647multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, 5648 X86SchedWriteSizes sched> { 5649 let Predicates = [HasFP16] in { 5650 defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM, 5651 v32f16_info>, 5652 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 5653 } 5654 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM, 5655 v16f32_info>, 5656 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 5657 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM, 5658 v8f64_info>, 5659 EVEX_V512, TB, PD, REX_W,EVEX_CD8<64, CD8VF>; 5660} 5661 5662defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512, 5663 SchedWriteFAddSizes, 1>, 5664 avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>, 5665 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>; 5666defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512, 5667 SchedWriteFMulSizes, 1>, 5668 avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>, 5669 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>; 5670defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512, 5671 SchedWriteFAddSizes>, 5672 avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>, 5673 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>; 5674defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512, 5675 SchedWriteFDivSizes>, 5676 avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>, 5677 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>; 5678defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512, 5679 SchedWriteFCmpSizes, 0>, 5680 avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>, 5681 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>; 5682defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512, 5683 SchedWriteFCmpSizes, 0>, 5684 avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>, 5685 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>; 5686let isCodeGenOnly = 1 in { 5687 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512, 5688 SchedWriteFCmpSizes, 1>, 5689 avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc, 5690 SchedWriteFCmpSizes, 1>; 5691 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512, 5692 SchedWriteFCmpSizes, 1>, 5693 avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc, 5694 SchedWriteFCmpSizes, 1>; 5695} 5696let Uses = []<Register>, mayRaiseFPException = 0 in { 5697defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI, 5698 SchedWriteFLogicSizes, 1>; 5699defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI, 5700 SchedWriteFLogicSizes, 0>; 5701defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI, 5702 SchedWriteFLogicSizes, 1>; 5703defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI, 5704 SchedWriteFLogicSizes, 1>; 5705} 5706 5707multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 5708 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5709 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5710 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5711 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5712 "$src2, $src1", "$src1, $src2", 5713 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5714 EVEX, VVVV, Sched<[sched]>; 5715 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5716 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix, 5717 "$src2, $src1", "$src1, $src2", 5718 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, 5719 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 5720 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5721 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix, 5722 "${src2}"#_.BroadcastStr#", $src1", 5723 "$src1, ${src2}"#_.BroadcastStr, 5724 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>, 5725 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 5726 } 5727} 5728 5729multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 5730 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5731 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 5732 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 5733 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix, 5734 "$src2, $src1", "$src1, $src2", 5735 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, 5736 Sched<[sched]>; 5737 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 5738 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix, 5739 "$src2, $src1", "$src1, $src2", 5740 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>, 5741 Sched<[sched.Folded, sched.ReadAfterFold]>; 5742 } 5743} 5744 5745multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, 5746 X86SchedWriteWidths sched> { 5747 let Predicates = [HasFP16] in { 5748 defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>, 5749 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>, 5750 EVEX_V512, T_MAP6, PD, EVEX_CD8<16, CD8VF>; 5751 defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>, 5752 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>, 5753 EVEX, VVVV, T_MAP6, PD, EVEX_CD8<16, CD8VT1>; 5754 } 5755 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>, 5756 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>, 5757 EVEX_V512, EVEX_CD8<32, CD8VF>, T8, PD; 5758 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>, 5759 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>, 5760 EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8, PD; 5761 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>, 5762 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info, 5763 X86scalefsRnd, sched.Scl>, 5764 EVEX, VVVV, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8, PD; 5765 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>, 5766 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info, 5767 X86scalefsRnd, sched.Scl>, 5768 EVEX, VVVV, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8, PD; 5769 5770 // Define only if AVX512VL feature is present. 5771 let Predicates = [HasVLX] in { 5772 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>, 5773 EVEX_V128, EVEX_CD8<32, CD8VF>, T8, PD; 5774 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>, 5775 EVEX_V256, EVEX_CD8<32, CD8VF>, T8, PD; 5776 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>, 5777 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8, PD; 5778 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>, 5779 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8, PD; 5780 } 5781 5782 let Predicates = [HasFP16, HasVLX] in { 5783 defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>, 5784 EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PD; 5785 defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>, 5786 EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PD; 5787 } 5788} 5789defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", SchedWriteFAdd>; 5790 5791//===----------------------------------------------------------------------===// 5792// AVX-512 VPTESTM instructions 5793//===----------------------------------------------------------------------===// 5794 5795multiclass avx512_vptest<bits<8> opc, string OpcodeStr, 5796 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5797 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG. 5798 // There are just too many permutations due to commutability and bitcasts. 5799 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { 5800 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), 5801 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 5802 "$src2, $src1", "$src1, $src2", 5803 (null_frag), (null_frag), 1>, 5804 EVEX, VVVV, Sched<[sched]>; 5805 let mayLoad = 1 in 5806 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5807 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 5808 "$src2, $src1", "$src1, $src2", 5809 (null_frag), (null_frag)>, 5810 EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 5811 Sched<[sched.Folded, sched.ReadAfterFold]>; 5812 } 5813} 5814 5815multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, 5816 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5817 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 5818 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), 5819 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 5820 "${src2}"#_.BroadcastStr#", $src1", 5821 "$src1, ${src2}"#_.BroadcastStr, 5822 (null_frag), (null_frag)>, 5823 EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 5824 Sched<[sched.Folded, sched.ReadAfterFold]>; 5825} 5826 5827multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, 5828 X86SchedWriteWidths sched, 5829 AVX512VLVectorVTInfo _> { 5830 let Predicates = [HasAVX512] in 5831 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>, 5832 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512; 5833 5834 let Predicates = [HasAVX512, HasVLX] in { 5835 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>, 5836 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256; 5837 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>, 5838 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128; 5839 } 5840} 5841 5842multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, 5843 X86SchedWriteWidths sched> { 5844 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched, 5845 avx512vl_i32_info>; 5846 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched, 5847 avx512vl_i64_info>, REX_W; 5848} 5849 5850multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, 5851 X86SchedWriteWidths sched> { 5852 let Predicates = [HasBWI] in { 5853 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM, 5854 v32i16_info>, EVEX_V512, REX_W; 5855 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM, 5856 v64i8_info>, EVEX_V512; 5857 } 5858 5859 let Predicates = [HasVLX, HasBWI] in { 5860 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM, 5861 v16i16x_info>, EVEX_V256, REX_W; 5862 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM, 5863 v8i16x_info>, EVEX_V128, REX_W; 5864 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM, 5865 v32i8x_info>, EVEX_V256; 5866 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM, 5867 v16i8x_info>, EVEX_V128; 5868 } 5869} 5870 5871multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, 5872 X86SchedWriteWidths sched> : 5873 avx512_vptest_wb<opc_wb, OpcodeStr, sched>, 5874 avx512_vptest_dq<opc_dq, OpcodeStr, sched>; 5875 5876defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", 5877 SchedWriteVecLogic>, T8, PD; 5878defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", 5879 SchedWriteVecLogic>, T8, XS; 5880 5881//===----------------------------------------------------------------------===// 5882// AVX-512 Shift instructions 5883//===----------------------------------------------------------------------===// 5884 5885multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, 5886 string OpcodeStr, SDNode OpNode, 5887 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5888 let ExeDomain = _.ExeDomain in { 5889 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), 5890 (ins _.RC:$src1, u8imm:$src2), OpcodeStr, 5891 "$src2, $src1", "$src1, $src2", 5892 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>, 5893 Sched<[sched]>; 5894 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5895 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, 5896 "$src2, $src1", "$src1, $src2", 5897 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)), 5898 (i8 timm:$src2)))>, 5899 Sched<[sched.Folded]>; 5900 } 5901} 5902 5903multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, 5904 string OpcodeStr, SDNode OpNode, 5905 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 5906 let ExeDomain = _.ExeDomain in 5907 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), 5908 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, 5909 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2", 5910 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>, 5911 EVEX_B, Sched<[sched.Folded]>; 5912} 5913 5914multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5915 X86FoldableSchedWrite sched, ValueType SrcVT, 5916 X86VectorVTInfo _> { 5917 // src2 is always 128-bit 5918 let ExeDomain = _.ExeDomain in { 5919 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 5920 (ins _.RC:$src1, VR128X:$src2), OpcodeStr, 5921 "$src2, $src1", "$src1, $src2", 5922 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, 5923 AVX512BIBase, EVEX, VVVV, Sched<[sched]>; 5924 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 5925 (ins _.RC:$src1, i128mem:$src2), OpcodeStr, 5926 "$src2, $src1", "$src1, $src2", 5927 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>, 5928 AVX512BIBase, 5929 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 5930 } 5931} 5932 5933multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 5934 X86SchedWriteWidths sched, ValueType SrcVT, 5935 AVX512VLVectorVTInfo VTInfo, 5936 Predicate prd> { 5937 let Predicates = [prd] in 5938 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, 5939 VTInfo.info512>, EVEX_V512, 5940 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; 5941 let Predicates = [prd, HasVLX] in { 5942 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, 5943 VTInfo.info256>, EVEX_V256, 5944 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; 5945 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, 5946 VTInfo.info128>, EVEX_V128, 5947 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; 5948 } 5949} 5950 5951multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, 5952 string OpcodeStr, SDNode OpNode, 5953 X86SchedWriteWidths sched> { 5954 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, 5955 avx512vl_i32_info, HasAVX512>; 5956 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, 5957 avx512vl_i64_info, HasAVX512>, REX_W; 5958 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, 5959 avx512vl_i16_info, HasBWI>; 5960} 5961 5962multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 5963 string OpcodeStr, SDNode OpNode, 5964 X86SchedWriteWidths sched, 5965 AVX512VLVectorVTInfo VTInfo> { 5966 let Predicates = [HasAVX512] in 5967 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5968 sched.ZMM, VTInfo.info512>, 5969 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM, 5970 VTInfo.info512>, EVEX_V512; 5971 let Predicates = [HasAVX512, HasVLX] in { 5972 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5973 sched.YMM, VTInfo.info256>, 5974 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM, 5975 VTInfo.info256>, EVEX_V256; 5976 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5977 sched.XMM, VTInfo.info128>, 5978 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM, 5979 VTInfo.info128>, EVEX_V128; 5980 } 5981} 5982 5983multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, 5984 string OpcodeStr, SDNode OpNode, 5985 X86SchedWriteWidths sched> { 5986 let Predicates = [HasBWI] in 5987 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5988 sched.ZMM, v32i16_info>, EVEX_V512, WIG; 5989 let Predicates = [HasVLX, HasBWI] in { 5990 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5991 sched.YMM, v16i16x_info>, EVEX_V256, WIG; 5992 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, 5993 sched.XMM, v8i16x_info>, EVEX_V128, WIG; 5994 } 5995} 5996 5997multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, 5998 Format ImmFormR, Format ImmFormM, 5999 string OpcodeStr, SDNode OpNode, 6000 X86SchedWriteWidths sched> { 6001 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, 6002 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 6003 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, 6004 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W; 6005} 6006 6007defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, 6008 SchedWriteVecShiftImm>, 6009 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, 6010 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6011 6012defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, 6013 SchedWriteVecShiftImm>, 6014 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, 6015 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6016 6017defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, 6018 SchedWriteVecShiftImm>, 6019 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, 6020 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6021 6022defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, 6023 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6024defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, 6025 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX, VVVV; 6026 6027defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, 6028 SchedWriteVecShift>; 6029defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, 6030 SchedWriteVecShift>; 6031defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, 6032 SchedWriteVecShift>; 6033 6034// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. 6035let Predicates = [HasAVX512, NoVLX] in { 6036 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))), 6037 (EXTRACT_SUBREG (v8i64 6038 (VPSRAQZrr 6039 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6040 VR128X:$src2)), sub_ymm)>; 6041 6042 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6043 (EXTRACT_SUBREG (v8i64 6044 (VPSRAQZrr 6045 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6046 VR128X:$src2)), sub_xmm)>; 6047 6048 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))), 6049 (EXTRACT_SUBREG (v8i64 6050 (VPSRAQZri 6051 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6052 timm:$src2)), sub_ymm)>; 6053 6054 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))), 6055 (EXTRACT_SUBREG (v8i64 6056 (VPSRAQZri 6057 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6058 timm:$src2)), sub_xmm)>; 6059} 6060 6061//===-------------------------------------------------------------------===// 6062// Variable Bit Shifts 6063//===-------------------------------------------------------------------===// 6064 6065multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 6066 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6067 let ExeDomain = _.ExeDomain in { 6068 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 6069 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 6070 "$src2, $src1", "$src1, $src2", 6071 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, 6072 AVX5128IBase, EVEX, VVVV, Sched<[sched]>; 6073 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6074 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, 6075 "$src2, $src1", "$src1, $src2", 6076 (_.VT (OpNode _.RC:$src1, 6077 (_.VT (_.LdFrag addr:$src2))))>, 6078 AVX5128IBase, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 6079 Sched<[sched.Folded, sched.ReadAfterFold]>; 6080 } 6081} 6082 6083multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, 6084 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 6085 let ExeDomain = _.ExeDomain in 6086 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 6087 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6088 "${src2}"#_.BroadcastStr#", $src1", 6089 "$src1, ${src2}"#_.BroadcastStr, 6090 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>, 6091 AVX5128IBase, EVEX_B, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 6092 Sched<[sched.Folded, sched.ReadAfterFold]>; 6093} 6094 6095multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6096 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 6097 let Predicates = [HasAVX512] in 6098 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 6099 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; 6100 6101 let Predicates = [HasAVX512, HasVLX] in { 6102 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 6103 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; 6104 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 6105 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; 6106 } 6107} 6108 6109multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, 6110 SDNode OpNode, X86SchedWriteWidths sched> { 6111 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, 6112 avx512vl_i32_info>; 6113 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, 6114 avx512vl_i64_info>, REX_W; 6115} 6116 6117// Use 512bit version to implement 128/256 bit in case NoVLX. 6118multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, 6119 SDNode OpNode, list<Predicate> p> { 6120 let Predicates = p in { 6121 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 6122 (_.info256.VT _.info256.RC:$src2))), 6123 (EXTRACT_SUBREG 6124 (!cast<Instruction>(OpcodeStr#"Zrr") 6125 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), 6126 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), 6127 sub_ymm)>; 6128 6129 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 6130 (_.info128.VT _.info128.RC:$src2))), 6131 (EXTRACT_SUBREG 6132 (!cast<Instruction>(OpcodeStr#"Zrr") 6133 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), 6134 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), 6135 sub_xmm)>; 6136 } 6137} 6138multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, 6139 SDNode OpNode, X86SchedWriteWidths sched> { 6140 let Predicates = [HasBWI] in 6141 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>, 6142 EVEX_V512, REX_W; 6143 let Predicates = [HasVLX, HasBWI] in { 6144 6145 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>, 6146 EVEX_V256, REX_W; 6147 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>, 6148 EVEX_V128, REX_W; 6149 } 6150} 6151 6152defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, 6153 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; 6154 6155defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, 6156 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; 6157 6158defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, 6159 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; 6160 6161defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; 6162defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; 6163 6164defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>; 6165defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>; 6166defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>; 6167defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>; 6168 6169 6170// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6171let Predicates = [HasAVX512, NoVLX] in { 6172 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6173 (EXTRACT_SUBREG (v8i64 6174 (VPROLVQZrr 6175 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6176 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6177 sub_xmm)>; 6178 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6179 (EXTRACT_SUBREG (v8i64 6180 (VPROLVQZrr 6181 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6182 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6183 sub_ymm)>; 6184 6185 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6186 (EXTRACT_SUBREG (v16i32 6187 (VPROLVDZrr 6188 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6189 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6190 sub_xmm)>; 6191 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6192 (EXTRACT_SUBREG (v16i32 6193 (VPROLVDZrr 6194 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6195 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6196 sub_ymm)>; 6197 6198 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))), 6199 (EXTRACT_SUBREG (v8i64 6200 (VPROLQZri 6201 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6202 timm:$src2)), sub_xmm)>; 6203 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))), 6204 (EXTRACT_SUBREG (v8i64 6205 (VPROLQZri 6206 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6207 timm:$src2)), sub_ymm)>; 6208 6209 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))), 6210 (EXTRACT_SUBREG (v16i32 6211 (VPROLDZri 6212 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6213 timm:$src2)), sub_xmm)>; 6214 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))), 6215 (EXTRACT_SUBREG (v16i32 6216 (VPROLDZri 6217 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6218 timm:$src2)), sub_ymm)>; 6219} 6220 6221// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. 6222let Predicates = [HasAVX512, NoVLX] in { 6223 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), 6224 (EXTRACT_SUBREG (v8i64 6225 (VPRORVQZrr 6226 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6227 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6228 sub_xmm)>; 6229 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), 6230 (EXTRACT_SUBREG (v8i64 6231 (VPRORVQZrr 6232 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6233 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6234 sub_ymm)>; 6235 6236 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))), 6237 (EXTRACT_SUBREG (v16i32 6238 (VPRORVDZrr 6239 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6240 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))), 6241 sub_xmm)>; 6242 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), 6243 (EXTRACT_SUBREG (v16i32 6244 (VPRORVDZrr 6245 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6246 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))), 6247 sub_ymm)>; 6248 6249 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))), 6250 (EXTRACT_SUBREG (v8i64 6251 (VPRORQZri 6252 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6253 timm:$src2)), sub_xmm)>; 6254 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))), 6255 (EXTRACT_SUBREG (v8i64 6256 (VPRORQZri 6257 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6258 timm:$src2)), sub_ymm)>; 6259 6260 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))), 6261 (EXTRACT_SUBREG (v16i32 6262 (VPRORDZri 6263 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)), 6264 timm:$src2)), sub_xmm)>; 6265 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))), 6266 (EXTRACT_SUBREG (v16i32 6267 (VPRORDZri 6268 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), 6269 timm:$src2)), sub_ymm)>; 6270} 6271 6272//===-------------------------------------------------------------------===// 6273// 1-src variable permutation VPERMW/D/Q 6274//===-------------------------------------------------------------------===// 6275 6276multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6277 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6278 let Predicates = [HasAVX512] in 6279 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6280 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; 6281 6282 let Predicates = [HasAVX512, HasVLX] in 6283 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6284 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; 6285} 6286 6287multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, 6288 string OpcodeStr, SDNode OpNode, 6289 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { 6290 let Predicates = [HasAVX512] in 6291 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6292 sched, VTInfo.info512>, 6293 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6294 sched, VTInfo.info512>, EVEX_V512; 6295 let Predicates = [HasAVX512, HasVLX] in 6296 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, 6297 sched, VTInfo.info256>, 6298 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 6299 sched, VTInfo.info256>, EVEX_V256; 6300} 6301 6302multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, 6303 Predicate prd, SDNode OpNode, 6304 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { 6305 let Predicates = [prd] in 6306 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, 6307 EVEX_V512 ; 6308 let Predicates = [HasVLX, prd] in { 6309 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, 6310 EVEX_V256 ; 6311 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, 6312 EVEX_V128 ; 6313 } 6314} 6315 6316defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, 6317 WriteVarShuffle256, avx512vl_i16_info>, REX_W; 6318defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, 6319 WriteVarShuffle256, avx512vl_i8_info>; 6320 6321defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, 6322 WriteVarShuffle256, avx512vl_i32_info>; 6323defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, 6324 WriteVarShuffle256, avx512vl_i64_info>, REX_W; 6325defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, 6326 WriteFVarShuffle256, avx512vl_f32_info>; 6327defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, 6328 WriteFVarShuffle256, avx512vl_f64_info>, REX_W; 6329 6330defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", 6331 X86VPermi, WriteShuffle256, avx512vl_i64_info>, 6332 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W; 6333defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", 6334 X86VPermi, WriteFShuffle256, avx512vl_f64_info>, 6335 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W; 6336 6337//===----------------------------------------------------------------------===// 6338// AVX-512 - VPERMIL 6339//===----------------------------------------------------------------------===// 6340 6341multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, 6342 X86FoldableSchedWrite sched, X86VectorVTInfo _, 6343 X86VectorVTInfo Ctrl> { 6344 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), 6345 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, 6346 "$src2, $src1", "$src1, $src2", 6347 (_.VT (OpNode _.RC:$src1, 6348 (Ctrl.VT Ctrl.RC:$src2)))>, 6349 T8, PD, EVEX, VVVV, Sched<[sched]>; 6350 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6351 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, 6352 "$src2, $src1", "$src1, $src2", 6353 (_.VT (OpNode 6354 _.RC:$src1, 6355 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>, 6356 T8, PD, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 6357 Sched<[sched.Folded, sched.ReadAfterFold]>; 6358 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), 6359 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, 6360 "${src2}"#_.BroadcastStr#", $src1", 6361 "$src1, ${src2}"#_.BroadcastStr, 6362 (_.VT (OpNode 6363 _.RC:$src1, 6364 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>, 6365 T8, PD, EVEX, VVVV, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 6366 Sched<[sched.Folded, sched.ReadAfterFold]>; 6367} 6368 6369multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, 6370 X86SchedWriteWidths sched, 6371 AVX512VLVectorVTInfo _, 6372 AVX512VLVectorVTInfo Ctrl> { 6373 let Predicates = [HasAVX512] in { 6374 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM, 6375 _.info512, Ctrl.info512>, EVEX_V512; 6376 } 6377 let Predicates = [HasAVX512, HasVLX] in { 6378 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM, 6379 _.info128, Ctrl.info128>, EVEX_V128; 6380 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM, 6381 _.info256, Ctrl.info256>, EVEX_V256; 6382 } 6383} 6384 6385multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, 6386 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ 6387 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle, 6388 _, Ctrl>; 6389 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, 6390 X86VPermilpi, SchedWriteFShuffle, _>, 6391 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; 6392} 6393 6394let ExeDomain = SSEPackedSingle in 6395defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, 6396 avx512vl_i32_info>; 6397let ExeDomain = SSEPackedDouble in 6398defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, 6399 avx512vl_i64_info>, REX_W; 6400 6401//===----------------------------------------------------------------------===// 6402// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW 6403//===----------------------------------------------------------------------===// 6404 6405defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", 6406 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>, 6407 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; 6408defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", 6409 X86PShufhw, SchedWriteShuffle>, 6410 EVEX, AVX512XSIi8Base; 6411defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", 6412 X86PShuflw, SchedWriteShuffle>, 6413 EVEX, AVX512XDIi8Base; 6414 6415//===----------------------------------------------------------------------===// 6416// AVX-512 - VPSHUFB 6417//===----------------------------------------------------------------------===// 6418 6419multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 6420 X86SchedWriteWidths sched> { 6421 let Predicates = [HasBWI] in 6422 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>, 6423 EVEX_V512; 6424 6425 let Predicates = [HasVLX, HasBWI] in { 6426 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>, 6427 EVEX_V256; 6428 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>, 6429 EVEX_V128; 6430 } 6431} 6432 6433defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, 6434 SchedWriteVarShuffle>, WIG; 6435 6436//===----------------------------------------------------------------------===// 6437// Move Low to High and High to Low packed FP Instructions 6438//===----------------------------------------------------------------------===// 6439 6440def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), 6441 (ins VR128X:$src1, VR128X:$src2), 6442 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6443 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, 6444 Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV; 6445let isCommutable = 1 in 6446def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), 6447 (ins VR128X:$src1, VR128X:$src2), 6448 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 6449 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, 6450 Sched<[SchedWriteFShuffle.XMM]>, EVEX, VVVV; 6451 6452//===----------------------------------------------------------------------===// 6453// VMOVHPS/PD VMOVLPS Instructions 6454// All patterns was taken from SSS implementation. 6455//===----------------------------------------------------------------------===// 6456 6457multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, 6458 SDPatternOperator OpNode, 6459 X86VectorVTInfo _> { 6460 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in 6461 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst), 6462 (ins _.RC:$src1, f64mem:$src2), 6463 !strconcat(OpcodeStr, 6464 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6465 [(set _.RC:$dst, 6466 (OpNode _.RC:$src1, 6467 (_.VT (bitconvert 6468 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, 6469 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX, VVVV; 6470} 6471 6472// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in 6473// SSE1. And MOVLPS pattern is even more complex. 6474defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag, 6475 v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB; 6476defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl, 6477 v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W; 6478defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag, 6479 v4f32x_info>, EVEX_CD8<32, CD8VT2>, TB; 6480defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd, 6481 v2f64x_info>, EVEX_CD8<64, CD8VT1>, TB, PD, REX_W; 6482 6483let Predicates = [HasAVX512] in { 6484 // VMOVHPD patterns 6485 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), 6486 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; 6487 6488 // VMOVLPD patterns 6489 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), 6490 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; 6491} 6492 6493let SchedRW = [WriteFStore] in { 6494let mayStore = 1, hasSideEffects = 0 in 6495def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs), 6496 (ins f64mem:$dst, VR128X:$src), 6497 "vmovhps\t{$src, $dst|$dst, $src}", 6498 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6499def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs), 6500 (ins f64mem:$dst, VR128X:$src), 6501 "vmovhpd\t{$src, $dst|$dst, $src}", 6502 [(store (f64 (extractelt 6503 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)), 6504 (iPTR 0))), addr:$dst)]>, 6505 EVEX, EVEX_CD8<64, CD8VT1>, REX_W; 6506let mayStore = 1, hasSideEffects = 0 in 6507def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs), 6508 (ins f64mem:$dst, VR128X:$src), 6509 "vmovlps\t{$src, $dst|$dst, $src}", 6510 []>, EVEX, EVEX_CD8<32, CD8VT2>; 6511def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs), 6512 (ins f64mem:$dst, VR128X:$src), 6513 "vmovlpd\t{$src, $dst|$dst, $src}", 6514 [(store (f64 (extractelt (v2f64 VR128X:$src), 6515 (iPTR 0))), addr:$dst)]>, 6516 EVEX, EVEX_CD8<64, CD8VT1>, REX_W; 6517} // SchedRW 6518 6519let Predicates = [HasAVX512] in { 6520 // VMOVHPD patterns 6521 def : Pat<(store (f64 (extractelt 6522 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))), 6523 (iPTR 0))), addr:$dst), 6524 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>; 6525} 6526//===----------------------------------------------------------------------===// 6527// FMA - Fused Multiply Operations 6528// 6529 6530multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6531 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6532 X86VectorVTInfo _> { 6533 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6534 Uses = [MXCSR], mayRaiseFPException = 1 in { 6535 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6536 (ins _.RC:$src2, _.RC:$src3), 6537 OpcodeStr, "$src3, $src2", "$src2, $src3", 6538 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 6539 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>, 6540 EVEX, VVVV, Sched<[sched]>; 6541 6542 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6543 (ins _.RC:$src2, _.MemOp:$src3), 6544 OpcodeStr, "$src3, $src2", "$src2, $src3", 6545 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 6546 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, 6547 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 6548 sched.ReadAfterFold]>; 6549 6550 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6551 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6552 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 6553 !strconcat("$src2, ${src3}", _.BroadcastStr ), 6554 (OpNode _.RC:$src2, 6555 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 6556 (MaskOpNode _.RC:$src2, 6557 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>, 6558 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, 6559 sched.ReadAfterFold]>; 6560 } 6561} 6562 6563multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6564 X86FoldableSchedWrite sched, 6565 X86VectorVTInfo _> { 6566 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6567 Uses = [MXCSR] in 6568 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6569 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6570 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6571 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 6572 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>, 6573 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 6574} 6575 6576multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6577 SDNode MaskOpNode, SDNode OpNodeRnd, 6578 X86SchedWriteWidths sched, 6579 AVX512VLVectorVTInfo _, 6580 Predicate prd = HasAVX512> { 6581 let Predicates = [prd] in { 6582 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6583 sched.ZMM, _.info512>, 6584 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6585 _.info512>, 6586 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6587 } 6588 let Predicates = [HasVLX, prd] in { 6589 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6590 sched.YMM, _.info256>, 6591 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6592 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6593 sched.XMM, _.info128>, 6594 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6595 } 6596} 6597 6598multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6599 SDNode MaskOpNode, SDNode OpNodeRnd> { 6600 defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6601 OpNodeRnd, SchedWriteFMA, 6602 avx512vl_f16_info, HasFP16>, T_MAP6, PD; 6603 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6604 OpNodeRnd, SchedWriteFMA, 6605 avx512vl_f32_info>, T8, PD; 6606 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6607 OpNodeRnd, SchedWriteFMA, 6608 avx512vl_f64_info>, T8, PD, REX_W; 6609} 6610 6611defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma, 6612 fma, X86FmaddRnd>; 6613defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub, 6614 X86Fmsub, X86FmsubRnd>; 6615defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, 6616 X86Fmaddsub, X86FmaddsubRnd>; 6617defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, 6618 X86Fmsubadd, X86FmsubaddRnd>; 6619defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd, 6620 X86Fnmadd, X86FnmaddRnd>; 6621defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub, 6622 X86Fnmsub, X86FnmsubRnd>; 6623 6624 6625multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6626 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6627 X86VectorVTInfo _> { 6628 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6629 Uses = [MXCSR], mayRaiseFPException = 1 in { 6630 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6631 (ins _.RC:$src2, _.RC:$src3), 6632 OpcodeStr, "$src3, $src2", "$src2, $src3", 6633 (null_frag), 6634 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 6635 EVEX, VVVV, Sched<[sched]>; 6636 6637 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6638 (ins _.RC:$src2, _.MemOp:$src3), 6639 OpcodeStr, "$src3, $src2", "$src2, $src3", 6640 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 6641 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, 6642 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 6643 sched.ReadAfterFold]>; 6644 6645 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6646 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6647 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6648 "$src2, ${src3}"#_.BroadcastStr, 6649 (_.VT (OpNode _.RC:$src2, 6650 (_.VT (_.BroadcastLdFrag addr:$src3)), 6651 _.RC:$src1)), 6652 (_.VT (MaskOpNode _.RC:$src2, 6653 (_.VT (_.BroadcastLdFrag addr:$src3)), 6654 _.RC:$src1)), 1, 0>, EVEX, VVVV, EVEX_B, 6655 Sched<[sched.Folded, sched.ReadAfterFold, 6656 sched.ReadAfterFold]>; 6657 } 6658} 6659 6660multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6661 X86FoldableSchedWrite sched, 6662 X86VectorVTInfo _> { 6663 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6664 Uses = [MXCSR] in 6665 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6666 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6667 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6668 (null_frag), 6669 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))), 6670 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 6671} 6672 6673multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6674 SDNode MaskOpNode, SDNode OpNodeRnd, 6675 X86SchedWriteWidths sched, 6676 AVX512VLVectorVTInfo _, 6677 Predicate prd = HasAVX512> { 6678 let Predicates = [prd] in { 6679 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6680 sched.ZMM, _.info512>, 6681 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6682 _.info512>, 6683 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6684 } 6685 let Predicates = [HasVLX, prd] in { 6686 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6687 sched.YMM, _.info256>, 6688 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6689 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6690 sched.XMM, _.info128>, 6691 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6692 } 6693} 6694 6695multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6696 SDNode MaskOpNode, SDNode OpNodeRnd > { 6697 defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6698 OpNodeRnd, SchedWriteFMA, 6699 avx512vl_f16_info, HasFP16>, T_MAP6, PD; 6700 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6701 OpNodeRnd, SchedWriteFMA, 6702 avx512vl_f32_info>, T8, PD; 6703 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6704 OpNodeRnd, SchedWriteFMA, 6705 avx512vl_f64_info>, T8, PD, REX_W; 6706} 6707 6708defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma, 6709 fma, X86FmaddRnd>; 6710defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub, 6711 X86Fmsub, X86FmsubRnd>; 6712defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, 6713 X86Fmaddsub, X86FmaddsubRnd>; 6714defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, 6715 X86Fmsubadd, X86FmsubaddRnd>; 6716defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd, 6717 X86Fnmadd, X86FnmaddRnd>; 6718defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub, 6719 X86Fnmsub, X86FnmsubRnd>; 6720 6721multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6722 SDNode MaskOpNode, X86FoldableSchedWrite sched, 6723 X86VectorVTInfo _> { 6724 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6725 Uses = [MXCSR], mayRaiseFPException = 1 in { 6726 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6727 (ins _.RC:$src2, _.RC:$src3), 6728 OpcodeStr, "$src3, $src2", "$src2, $src3", 6729 (null_frag), 6730 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>, 6731 EVEX, VVVV, Sched<[sched]>; 6732 6733 // Pattern is 312 order so that the load is in a different place from the 6734 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6735 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6736 (ins _.RC:$src2, _.MemOp:$src3), 6737 OpcodeStr, "$src3, $src2", "$src2, $src3", 6738 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 6739 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, 6740 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 6741 sched.ReadAfterFold]>; 6742 6743 // Pattern is 312 order so that the load is in a different place from the 6744 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6745 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst), 6746 (ins _.RC:$src2, _.ScalarMemOp:$src3), 6747 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2", 6748 "$src2, ${src3}"#_.BroadcastStr, 6749 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6750 _.RC:$src1, _.RC:$src2)), 6751 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)), 6752 _.RC:$src1, _.RC:$src2)), 1, 0>, 6753 EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, 6754 sched.ReadAfterFold]>; 6755 } 6756} 6757 6758multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 6759 X86FoldableSchedWrite sched, 6760 X86VectorVTInfo _> { 6761 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0, 6762 Uses = [MXCSR] in 6763 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst), 6764 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6765 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 6766 (null_frag), 6767 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))), 6768 1, 1>, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched]>; 6769} 6770 6771multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6772 SDNode MaskOpNode, SDNode OpNodeRnd, 6773 X86SchedWriteWidths sched, 6774 AVX512VLVectorVTInfo _, 6775 Predicate prd = HasAVX512> { 6776 let Predicates = [prd] in { 6777 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6778 sched.ZMM, _.info512>, 6779 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM, 6780 _.info512>, 6781 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 6782 } 6783 let Predicates = [HasVLX, prd] in { 6784 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6785 sched.YMM, _.info256>, 6786 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 6787 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 6788 sched.XMM, _.info128>, 6789 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 6790 } 6791} 6792 6793multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 6794 SDNode MaskOpNode, SDNode OpNodeRnd > { 6795 defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode, 6796 OpNodeRnd, SchedWriteFMA, 6797 avx512vl_f16_info, HasFP16>, T_MAP6, PD; 6798 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode, 6799 OpNodeRnd, SchedWriteFMA, 6800 avx512vl_f32_info>, T8, PD; 6801 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode, 6802 OpNodeRnd, SchedWriteFMA, 6803 avx512vl_f64_info>, T8, PD, REX_W; 6804} 6805 6806defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma, 6807 fma, X86FmaddRnd>; 6808defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub, 6809 X86Fmsub, X86FmsubRnd>; 6810defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, 6811 X86Fmaddsub, X86FmaddsubRnd>; 6812defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, 6813 X86Fmsubadd, X86FmsubaddRnd>; 6814defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd, 6815 X86Fnmadd, X86FnmaddRnd>; 6816defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub, 6817 X86Fnmsub, X86FnmsubRnd>; 6818 6819// Scalar FMA 6820multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 6821 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { 6822let Constraints = "$src1 = $dst", hasSideEffects = 0 in { 6823 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6824 (ins _.RC:$src2, _.RC:$src3), OpcodeStr, 6825 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6826 EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; 6827 6828 let mayLoad = 1 in 6829 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 6830 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr, 6831 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>, 6832 EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold, 6833 SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; 6834 6835 let Uses = [MXCSR] in 6836 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 6837 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 6838 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>, 6839 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; 6840 6841 let isCodeGenOnly = 1, isCommutable = 1 in { 6842 def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 6843 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3), 6844 !strconcat(OpcodeStr, 6845 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6846 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV, SIMD_EXC; 6847 def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst), 6848 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3), 6849 !strconcat(OpcodeStr, 6850 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6851 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold, 6852 SchedWriteFMA.Scl.ReadAfterFold]>, EVEX, VVVV, SIMD_EXC; 6853 6854 let Uses = [MXCSR] in 6855 def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst), 6856 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc), 6857 !strconcat(OpcodeStr, 6858 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"), 6859 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC, 6860 Sched<[SchedWriteFMA.Scl]>, EVEX, VVVV; 6861 }// isCodeGenOnly = 1 6862}// Constraints = "$src1 = $dst" 6863} 6864 6865multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6866 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd, 6867 X86VectorVTInfo _, string SUFF> { 6868 let ExeDomain = _.ExeDomain in { 6869 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _, 6870 // Operands for intrinsic are in 123 order to preserve passthu 6871 // semantics. 6872 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6873 _.FRC:$src3))), 6874 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1, 6875 (_.ScalarLdFrag addr:$src3)))), 6876 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1, 6877 _.FRC:$src3, (i32 timm:$rc)))), 0>; 6878 6879 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _, 6880 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3, 6881 _.FRC:$src1))), 6882 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, 6883 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 6884 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3, 6885 _.FRC:$src1, (i32 timm:$rc)))), 1>; 6886 6887 // One pattern is 312 order so that the load is in a different place from the 6888 // 213 and 231 patterns this helps tablegen's duplicate pattern detection. 6889 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _, 6890 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3, 6891 _.FRC:$src2))), 6892 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3), 6893 _.FRC:$src1, _.FRC:$src2))), 6894 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3, 6895 _.FRC:$src2, (i32 timm:$rc)))), 1>; 6896 } 6897} 6898 6899multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132, 6900 string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> { 6901 let Predicates = [HasAVX512] in { 6902 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6903 OpNodeRnd, f32x_info, "SS">, 6904 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD; 6905 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6906 OpNodeRnd, f64x_info, "SD">, 6907 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD; 6908 } 6909 let Predicates = [HasFP16] in { 6910 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode, 6911 OpNodeRnd, f16x_info, "SH">, 6912 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6, PD; 6913 } 6914} 6915 6916defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>; 6917defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>; 6918defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>; 6919defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>; 6920 6921multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp, 6922 SDNode RndOp, string Prefix, 6923 string Suffix, SDNode Move, 6924 X86VectorVTInfo _, PatLeaf ZeroFP, 6925 Predicate prd = HasAVX512> { 6926 let Predicates = [prd] in { 6927 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6928 (Op _.FRC:$src2, 6929 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6930 _.FRC:$src3))))), 6931 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int") 6932 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6933 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6934 6935 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6936 (Op _.FRC:$src2, _.FRC:$src3, 6937 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6938 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") 6939 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6940 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6941 6942 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6943 (Op _.FRC:$src2, 6944 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6945 (_.ScalarLdFrag addr:$src3)))))), 6946 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int") 6947 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6948 addr:$src3)>; 6949 6950 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6951 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6952 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))), 6953 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int") 6954 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6955 addr:$src3)>; 6956 6957 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6958 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 6959 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6960 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") 6961 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6962 addr:$src3)>; 6963 6964 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6965 (X86selects_mask VK1WM:$mask, 6966 (MaskedOp _.FRC:$src2, 6967 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6968 _.FRC:$src3), 6969 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6970 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk") 6971 VR128X:$src1, VK1WM:$mask, 6972 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 6973 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 6974 6975 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6976 (X86selects_mask VK1WM:$mask, 6977 (MaskedOp _.FRC:$src2, 6978 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6979 (_.ScalarLdFrag addr:$src3)), 6980 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6981 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk") 6982 VR128X:$src1, VK1WM:$mask, 6983 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6984 6985 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6986 (X86selects_mask VK1WM:$mask, 6987 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 6988 (_.ScalarLdFrag addr:$src3), _.FRC:$src2), 6989 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6990 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk") 6991 VR128X:$src1, VK1WM:$mask, 6992 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 6993 6994 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 6995 (X86selects_mask VK1WM:$mask, 6996 (MaskedOp _.FRC:$src2, _.FRC:$src3, 6997 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 6998 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 6999 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk") 7000 VR128X:$src1, VK1WM:$mask, 7001 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7002 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7003 7004 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7005 (X86selects_mask VK1WM:$mask, 7006 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7007 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7008 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7009 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk") 7010 VR128X:$src1, VK1WM:$mask, 7011 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7012 7013 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7014 (X86selects_mask VK1WM:$mask, 7015 (MaskedOp _.FRC:$src2, 7016 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7017 _.FRC:$src3), 7018 (_.EltVT ZeroFP)))))), 7019 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz") 7020 VR128X:$src1, VK1WM:$mask, 7021 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7022 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7023 7024 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7025 (X86selects_mask VK1WM:$mask, 7026 (MaskedOp _.FRC:$src2, _.FRC:$src3, 7027 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7028 (_.EltVT ZeroFP)))))), 7029 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz") 7030 VR128X:$src1, VK1WM:$mask, 7031 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7032 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; 7033 7034 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7035 (X86selects_mask VK1WM:$mask, 7036 (MaskedOp _.FRC:$src2, 7037 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7038 (_.ScalarLdFrag addr:$src3)), 7039 (_.EltVT ZeroFP)))))), 7040 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz") 7041 VR128X:$src1, VK1WM:$mask, 7042 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7043 7044 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7045 (X86selects_mask VK1WM:$mask, 7046 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7047 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)), 7048 (_.EltVT ZeroFP)))))), 7049 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz") 7050 VR128X:$src1, VK1WM:$mask, 7051 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7052 7053 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7054 (X86selects_mask VK1WM:$mask, 7055 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3), 7056 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))), 7057 (_.EltVT ZeroFP)))))), 7058 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz") 7059 VR128X:$src1, VK1WM:$mask, 7060 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; 7061 7062 // Patterns with rounding mode. 7063 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7064 (RndOp _.FRC:$src2, 7065 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7066 _.FRC:$src3, (i32 timm:$rc)))))), 7067 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int") 7068 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7069 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7070 7071 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7072 (RndOp _.FRC:$src2, _.FRC:$src3, 7073 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7074 (i32 timm:$rc)))))), 7075 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") 7076 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7077 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7078 7079 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7080 (X86selects_mask VK1WM:$mask, 7081 (RndOp _.FRC:$src2, 7082 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7083 _.FRC:$src3, (i32 timm:$rc)), 7084 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7085 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk") 7086 VR128X:$src1, VK1WM:$mask, 7087 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7088 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7089 7090 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7091 (X86selects_mask VK1WM:$mask, 7092 (RndOp _.FRC:$src2, _.FRC:$src3, 7093 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7094 (i32 timm:$rc)), 7095 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), 7096 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk") 7097 VR128X:$src1, VK1WM:$mask, 7098 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7099 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7100 7101 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7102 (X86selects_mask VK1WM:$mask, 7103 (RndOp _.FRC:$src2, 7104 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7105 _.FRC:$src3, (i32 timm:$rc)), 7106 (_.EltVT ZeroFP)))))), 7107 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz") 7108 VR128X:$src1, VK1WM:$mask, 7109 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7110 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7111 7112 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector 7113 (X86selects_mask VK1WM:$mask, 7114 (RndOp _.FRC:$src2, _.FRC:$src3, 7115 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), 7116 (i32 timm:$rc)), 7117 (_.EltVT ZeroFP)))))), 7118 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz") 7119 VR128X:$src1, VK1WM:$mask, 7120 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), 7121 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; 7122 } 7123} 7124defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH", 7125 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7126defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH", 7127 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7128defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH", 7129 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7130defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH", 7131 X86Movsh, v8f16x_info, fp16imm0, HasFP16>; 7132 7133defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7134 "SS", X86Movss, v4f32x_info, fp32imm0>; 7135defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7136 "SS", X86Movss, v4f32x_info, fp32imm0>; 7137defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7138 "SS", X86Movss, v4f32x_info, fp32imm0>; 7139defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7140 "SS", X86Movss, v4f32x_info, fp32imm0>; 7141 7142defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", 7143 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7144defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", 7145 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7146defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", 7147 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7148defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", 7149 "SD", X86Movsd, v2f64x_info, fp64imm0>; 7150 7151//===----------------------------------------------------------------------===// 7152// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA 7153//===----------------------------------------------------------------------===// 7154let Constraints = "$src1 = $dst" in { 7155multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7156 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 7157 // NOTE: The SDNode have the multiply operands first with the add last. 7158 // This enables commuted load patterns to be autogenerated by tablegen. 7159 let ExeDomain = _.ExeDomain in { 7160 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 7161 (ins _.RC:$src2, _.RC:$src3), 7162 OpcodeStr, "$src3, $src2", "$src2, $src3", 7163 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, 7164 T8, PD, EVEX, VVVV, Sched<[sched]>; 7165 7166 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7167 (ins _.RC:$src2, _.MemOp:$src3), 7168 OpcodeStr, "$src3, $src2", "$src2, $src3", 7169 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, 7170 T8, PD, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold, 7171 sched.ReadAfterFold]>; 7172 7173 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 7174 (ins _.RC:$src2, _.ScalarMemOp:$src3), 7175 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), 7176 !strconcat("$src2, ${src3}", _.BroadcastStr ), 7177 (OpNode _.RC:$src2, 7178 (_.VT (_.BroadcastLdFrag addr:$src3)), 7179 _.RC:$src1)>, 7180 T8, PD, EVEX, VVVV, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold, 7181 sched.ReadAfterFold]>; 7182 } 7183} 7184} // Constraints = "$src1 = $dst" 7185 7186multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 7187 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 7188 let Predicates = [HasIFMA] in { 7189 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 7190 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; 7191 } 7192 let Predicates = [HasVLX, HasIFMA] in { 7193 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 7194 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; 7195 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 7196 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; 7197 } 7198} 7199 7200defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, 7201 SchedWriteVecIMul, avx512vl_i64_info>, 7202 REX_W; 7203defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, 7204 SchedWriteVecIMul, avx512vl_i64_info>, 7205 REX_W; 7206 7207//===----------------------------------------------------------------------===// 7208// AVX-512 Scalar convert from sign integer to float/double 7209//===----------------------------------------------------------------------===// 7210 7211multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched, 7212 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7213 X86MemOperand x86memop, PatFrag ld_frag, string asm, 7214 string mem, list<Register> _Uses = [MXCSR], 7215 bit _mayRaiseFPException = 1> { 7216let ExeDomain = DstVT.ExeDomain, Uses = _Uses, 7217 mayRaiseFPException = _mayRaiseFPException in { 7218 let hasSideEffects = 0, isCodeGenOnly = 1 in { 7219 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), 7220 (ins DstVT.FRC:$src1, SrcRC:$src), 7221 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 7222 EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7223 let mayLoad = 1 in 7224 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), 7225 (ins DstVT.FRC:$src1, x86memop:$src), 7226 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>, 7227 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 7228 } // hasSideEffects = 0 7229 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7230 (ins DstVT.RC:$src1, SrcRC:$src2), 7231 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7232 [(set DstVT.RC:$dst, 7233 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>, 7234 EVEX, VVVV, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7235 7236 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), 7237 (ins DstVT.RC:$src1, x86memop:$src2), 7238 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7239 [(set DstVT.RC:$dst, 7240 (OpNode (DstVT.VT DstVT.RC:$src1), 7241 (ld_frag addr:$src2)))]>, 7242 EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 7243} 7244 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7245 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, 7246 DstVT.RC:$src1, SrcRC:$src2), 0, "att">; 7247} 7248 7249multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, 7250 X86FoldableSchedWrite sched, RegisterClass SrcRC, 7251 X86VectorVTInfo DstVT, string asm, 7252 string mem> { 7253 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in 7254 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), 7255 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 7256 !strconcat(asm, 7257 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"), 7258 [(set DstVT.RC:$dst, 7259 (OpNode (DstVT.VT DstVT.RC:$src1), 7260 SrcRC:$src2, 7261 (i32 timm:$rc)))]>, 7262 EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>; 7263 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}", 7264 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst, 7265 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">; 7266} 7267 7268multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd, 7269 X86FoldableSchedWrite sched, 7270 RegisterClass SrcRC, X86VectorVTInfo DstVT, 7271 X86MemOperand x86memop, PatFrag ld_frag, 7272 string asm, string mem> { 7273 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>, 7274 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, 7275 ld_frag, asm, mem>, VEX_LIG; 7276} 7277 7278let Predicates = [HasAVX512] in { 7279defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7280 WriteCvtI2SS, GR32, 7281 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">, 7282 TB, XS, EVEX_CD8<32, CD8VT1>; 7283defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7284 WriteCvtI2SS, GR64, 7285 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">, 7286 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>; 7287defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32, 7288 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>, 7289 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7290defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, 7291 WriteCvtI2SD, GR64, 7292 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">, 7293 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7294 7295def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7296 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7297def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7298 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7299 7300def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))), 7301 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7302def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))), 7303 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7304def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))), 7305 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7306def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))), 7307 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7308 7309def : Pat<(f32 (any_sint_to_fp GR32:$src)), 7310 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7311def : Pat<(f32 (any_sint_to_fp GR64:$src)), 7312 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7313def : Pat<(f64 (any_sint_to_fp GR32:$src)), 7314 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7315def : Pat<(f64 (any_sint_to_fp GR64:$src)), 7316 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7317 7318defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7319 WriteCvtI2SS, GR32, 7320 v4f32x_info, i32mem, loadi32, 7321 "cvtusi2ss", "l">, TB, XS, EVEX_CD8<32, CD8VT1>; 7322defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7323 WriteCvtI2SS, GR64, 7324 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">, 7325 TB, XS, REX_W, EVEX_CD8<64, CD8VT1>; 7326defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info, 7327 i32mem, loadi32, "cvtusi2sd", "l", [], 0>, 7328 TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; 7329defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, 7330 WriteCvtI2SD, GR64, 7331 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">, 7332 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7333 7334def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 7335 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7336def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 7337 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 7338 7339def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))), 7340 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7341def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))), 7342 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>; 7343def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))), 7344 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7345def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))), 7346 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>; 7347 7348def : Pat<(f32 (any_uint_to_fp GR32:$src)), 7349 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 7350def : Pat<(f32 (any_uint_to_fp GR64:$src)), 7351 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>; 7352def : Pat<(f64 (any_uint_to_fp GR32:$src)), 7353 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 7354def : Pat<(f64 (any_uint_to_fp GR64:$src)), 7355 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; 7356} 7357 7358//===----------------------------------------------------------------------===// 7359// AVX-512 Scalar convert from float/double to integer 7360//===----------------------------------------------------------------------===// 7361 7362multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, 7363 X86VectorVTInfo DstVT, SDNode OpNode, 7364 SDNode OpNodeRnd, 7365 X86FoldableSchedWrite sched, string asm, 7366 string aliasStr, Predicate prd = HasAVX512> { 7367 let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in { 7368 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), 7369 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7370 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, 7371 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7372 let Uses = [MXCSR] in 7373 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), 7374 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), 7375 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>, 7376 EVEX, VEX_LIG, EVEX_B, EVEX_RC, 7377 Sched<[sched]>; 7378 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), 7379 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7380 [(set DstVT.RC:$dst, (OpNode 7381 (SrcVT.ScalarIntMemFrags addr:$src)))]>, 7382 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7383 } // Predicates = [prd] 7384 7385 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7386 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; 7387 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}", 7388 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">; 7389 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7390 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, 7391 SrcVT.IntScalarMemOp:$src), 0, "att">; 7392} 7393 7394// Convert float/double to signed/unsigned int 32/64 7395defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si, 7396 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">, 7397 TB, XS, EVEX_CD8<32, CD8VT1>; 7398defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si, 7399 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">, 7400 TB, XS, REX_W, EVEX_CD8<32, CD8VT1>; 7401defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi, 7402 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">, 7403 TB, XS, EVEX_CD8<32, CD8VT1>; 7404defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi, 7405 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">, 7406 TB, XS, REX_W, EVEX_CD8<32, CD8VT1>; 7407defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si, 7408 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">, 7409 TB, XD, EVEX_CD8<64, CD8VT1>; 7410defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si, 7411 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">, 7412 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7413defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi, 7414 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">, 7415 TB, XD, EVEX_CD8<64, CD8VT1>; 7416defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi, 7417 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, 7418 TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7419 7420multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT, 7421 X86VectorVTInfo DstVT, SDNode OpNode, 7422 X86FoldableSchedWrite sched> { 7423 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { 7424 let isCodeGenOnly = 1 in { 7425 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src), 7426 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7427 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>, 7428 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7429 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src), 7430 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7431 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>, 7432 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7433 } 7434 } // Predicates = [HasAVX512] 7435} 7436 7437defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info, 7438 lrint, WriteCvtSS2I>, TB, XS, EVEX_CD8<32, CD8VT1>; 7439defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info, 7440 llrint, WriteCvtSS2I>, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>; 7441defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info, 7442 lrint, WriteCvtSD2I>, TB, XD, EVEX_CD8<64, CD8VT1>; 7443defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info, 7444 llrint, WriteCvtSD2I>, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>; 7445 7446let Predicates = [HasAVX512] in { 7447 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>; 7448 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>; 7449 7450 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>; 7451 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>; 7452} 7453 7454// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang 7455// which produce unnecessary vmovs{s,d} instructions 7456let Predicates = [HasAVX512] in { 7457def : Pat<(v4f32 (X86Movss 7458 (v4f32 VR128X:$dst), 7459 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), 7460 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7461 7462def : Pat<(v4f32 (X86Movss 7463 (v4f32 VR128X:$dst), 7464 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))), 7465 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7466 7467def : Pat<(v4f32 (X86Movss 7468 (v4f32 VR128X:$dst), 7469 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))), 7470 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7471 7472def : Pat<(v4f32 (X86Movss 7473 (v4f32 VR128X:$dst), 7474 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))), 7475 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7476 7477def : Pat<(v2f64 (X86Movsd 7478 (v2f64 VR128X:$dst), 7479 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), 7480 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7481 7482def : Pat<(v2f64 (X86Movsd 7483 (v2f64 VR128X:$dst), 7484 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))), 7485 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7486 7487def : Pat<(v2f64 (X86Movsd 7488 (v2f64 VR128X:$dst), 7489 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))), 7490 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7491 7492def : Pat<(v2f64 (X86Movsd 7493 (v2f64 VR128X:$dst), 7494 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))), 7495 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7496 7497def : Pat<(v4f32 (X86Movss 7498 (v4f32 VR128X:$dst), 7499 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))), 7500 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>; 7501 7502def : Pat<(v4f32 (X86Movss 7503 (v4f32 VR128X:$dst), 7504 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))), 7505 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>; 7506 7507def : Pat<(v4f32 (X86Movss 7508 (v4f32 VR128X:$dst), 7509 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))), 7510 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>; 7511 7512def : Pat<(v4f32 (X86Movss 7513 (v4f32 VR128X:$dst), 7514 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))), 7515 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>; 7516 7517def : Pat<(v2f64 (X86Movsd 7518 (v2f64 VR128X:$dst), 7519 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))), 7520 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>; 7521 7522def : Pat<(v2f64 (X86Movsd 7523 (v2f64 VR128X:$dst), 7524 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))), 7525 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>; 7526 7527def : Pat<(v2f64 (X86Movsd 7528 (v2f64 VR128X:$dst), 7529 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))), 7530 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>; 7531 7532def : Pat<(v2f64 (X86Movsd 7533 (v2f64 VR128X:$dst), 7534 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))), 7535 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>; 7536} // Predicates = [HasAVX512] 7537 7538// Convert float/double to signed/unsigned int 32/64 with truncation 7539multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 7540 X86VectorVTInfo _DstRC, SDPatternOperator OpNode, 7541 SDNode OpNodeInt, SDNode OpNodeSAE, 7542 X86FoldableSchedWrite sched, string aliasStr, 7543 Predicate prd = HasAVX512> { 7544let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in { 7545 let isCodeGenOnly = 1 in { 7546 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 7547 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7548 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, 7549 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7550 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 7551 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7552 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 7553 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7554 } 7555 7556 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7557 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7558 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 7559 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 7560 let Uses = [MXCSR] in 7561 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 7562 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 7563 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 7564 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 7565 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 7566 (ins _SrcRC.IntScalarMemOp:$src), 7567 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 7568 [(set _DstRC.RC:$dst, 7569 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, 7570 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 7571} // Predicates = [prd] 7572 7573 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7574 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7575 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}", 7576 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; 7577 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", 7578 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst, 7579 _SrcRC.IntScalarMemOp:$src), 0, "att">; 7580} 7581 7582defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, 7583 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7584 "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>; 7585defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, 7586 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 7587 "{q}">, REX_W, TB, XS, EVEX_CD8<32, CD8VT1>; 7588defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, 7589 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7590 "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>; 7591defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, 7592 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I, 7593 "{q}">, REX_W, TB, XD, EVEX_CD8<64, CD8VT1>; 7594 7595defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info, 7596 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7597 "{l}">, TB, XS, EVEX_CD8<32, CD8VT1>; 7598defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info, 7599 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 7600 "{q}">, TB, XS,REX_W, EVEX_CD8<32, CD8VT1>; 7601defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info, 7602 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7603 "{l}">, TB, XD, EVEX_CD8<64, CD8VT1>; 7604defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info, 7605 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I, 7606 "{q}">, TB, XD, REX_W, EVEX_CD8<64, CD8VT1>; 7607 7608//===----------------------------------------------------------------------===// 7609// AVX-512 Convert form float to double and back 7610//===----------------------------------------------------------------------===// 7611 7612let Uses = [MXCSR], mayRaiseFPException = 1 in 7613multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7614 X86VectorVTInfo _Src, SDNode OpNode, 7615 X86FoldableSchedWrite sched> { 7616 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7617 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7618 "$src2, $src1", "$src1, $src2", 7619 (_.VT (OpNode (_.VT _.RC:$src1), 7620 (_Src.VT _Src.RC:$src2)))>, 7621 EVEX, VVVV, VEX_LIG, Sched<[sched]>; 7622 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 7623 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, 7624 "$src2, $src1", "$src1, $src2", 7625 (_.VT (OpNode (_.VT _.RC:$src1), 7626 (_Src.ScalarIntMemFrags addr:$src2)))>, 7627 EVEX, VVVV, VEX_LIG, 7628 Sched<[sched.Folded, sched.ReadAfterFold]>; 7629 7630 let isCodeGenOnly = 1, hasSideEffects = 0 in { 7631 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), 7632 (ins _.FRC:$src1, _Src.FRC:$src2), 7633 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7634 EVEX, VVVV, VEX_LIG, Sched<[sched]>; 7635 let mayLoad = 1 in 7636 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), 7637 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), 7638 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7639 EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; 7640 } 7641} 7642 7643// Scalar Conversion with SAE - suppress all exceptions 7644multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7645 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7646 X86FoldableSchedWrite sched> { 7647 let Uses = [MXCSR] in 7648 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7649 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, 7650 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 7651 (_.VT (OpNodeSAE (_.VT _.RC:$src1), 7652 (_Src.VT _Src.RC:$src2)))>, 7653 EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>; 7654} 7655 7656// Scalar Conversion with rounding control (RC) 7657multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7658 X86VectorVTInfo _Src, SDNode OpNodeRnd, 7659 X86FoldableSchedWrite sched> { 7660 let Uses = [MXCSR] in 7661 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 7662 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 7663 "$rc, $src2, $src1", "$src1, $src2, $rc", 7664 (_.VT (OpNodeRnd (_.VT _.RC:$src1), 7665 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 7666 EVEX, VVVV, VEX_LIG, Sched<[sched]>, 7667 EVEX_B, EVEX_RC; 7668} 7669multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr, 7670 SDNode OpNode, SDNode OpNodeRnd, 7671 X86FoldableSchedWrite sched, 7672 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7673 Predicate prd = HasAVX512> { 7674 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7675 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7676 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, 7677 OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>; 7678 } 7679} 7680 7681multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr, 7682 SDNode OpNode, SDNode OpNodeSAE, 7683 X86FoldableSchedWrite sched, 7684 X86VectorVTInfo _src, X86VectorVTInfo _dst, 7685 Predicate prd = HasAVX512> { 7686 let Predicates = [prd], ExeDomain = SSEPackedSingle in { 7687 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>, 7688 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>, 7689 EVEX_CD8<_src.EltSize, CD8VT1>; 7690 } 7691} 7692defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds, 7693 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7694 f32x_info>, TB, XD, REX_W; 7695defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts, 7696 X86fpextsSAE, WriteCvtSS2SD, f32x_info, 7697 f64x_info>, TB, XS; 7698defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds, 7699 X86froundsRnd, WriteCvtSD2SS, f64x_info, 7700 f16x_info, HasFP16>, T_MAP5, XD, REX_W; 7701defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts, 7702 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7703 f64x_info, HasFP16>, T_MAP5, XS; 7704defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds, 7705 X86froundsRnd, WriteCvtSD2SS, f32x_info, 7706 f16x_info, HasFP16>, T_MAP5; 7707defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts, 7708 X86fpextsSAE, WriteCvtSS2SD, f16x_info, 7709 f32x_info, HasFP16>, T_MAP6; 7710 7711def : Pat<(f64 (any_fpextend FR32X:$src)), 7712 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, 7713 Requires<[HasAVX512]>; 7714def : Pat<(f64 (any_fpextend (loadf32 addr:$src))), 7715 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7716 Requires<[HasAVX512, OptForSize]>; 7717 7718def : Pat<(f32 (any_fpround FR64X:$src)), 7719 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, 7720 Requires<[HasAVX512]>; 7721 7722def : Pat<(f32 (any_fpextend FR16X:$src)), 7723 (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>, 7724 Requires<[HasFP16]>; 7725def : Pat<(f32 (any_fpextend (loadf16 addr:$src))), 7726 (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>, 7727 Requires<[HasFP16, OptForSize]>; 7728 7729def : Pat<(f64 (any_fpextend FR16X:$src)), 7730 (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>, 7731 Requires<[HasFP16]>; 7732def : Pat<(f64 (any_fpextend (loadf16 addr:$src))), 7733 (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, 7734 Requires<[HasFP16, OptForSize]>; 7735 7736def : Pat<(f16 (any_fpround FR32X:$src)), 7737 (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>, 7738 Requires<[HasFP16]>; 7739def : Pat<(f16 (any_fpround FR64X:$src)), 7740 (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>, 7741 Requires<[HasFP16]>; 7742 7743def : Pat<(v4f32 (X86Movss 7744 (v4f32 VR128X:$dst), 7745 (v4f32 (scalar_to_vector 7746 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), 7747 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, 7748 Requires<[HasAVX512]>; 7749 7750def : Pat<(v2f64 (X86Movsd 7751 (v2f64 VR128X:$dst), 7752 (v2f64 (scalar_to_vector 7753 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), 7754 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, 7755 Requires<[HasAVX512]>; 7756 7757//===----------------------------------------------------------------------===// 7758// AVX-512 Vector convert from signed/unsigned integer to float/double 7759// and from float/double to signed/unsigned integer 7760//===----------------------------------------------------------------------===// 7761 7762multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7763 X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode, 7764 X86FoldableSchedWrite sched, 7765 string Broadcast = _.BroadcastStr, 7766 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7767 RegisterClass MaskRC = _.KRCWM, 7768 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))), 7769 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> { 7770let Uses = [MXCSR], mayRaiseFPException = 1 in { 7771 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst), 7772 (ins _Src.RC:$src), 7773 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), 7774 (ins MaskRC:$mask, _Src.RC:$src), 7775 OpcodeStr, "$src", "$src", 7776 (_.VT (OpNode (_Src.VT _Src.RC:$src))), 7777 (vselect_mask MaskRC:$mask, 7778 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7779 _.RC:$src0), 7780 (vselect_mask MaskRC:$mask, 7781 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))), 7782 _.ImmAllZerosV)>, 7783 EVEX, Sched<[sched]>; 7784 7785 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 7786 (ins MemOp:$src), 7787 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), 7788 (ins MaskRC:$mask, MemOp:$src), 7789 OpcodeStr#Alias, "$src", "$src", 7790 LdDAG, 7791 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0), 7792 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>, 7793 EVEX, Sched<[sched.Folded]>; 7794 7795 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst), 7796 (ins _Src.ScalarMemOp:$src), 7797 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), 7798 (ins MaskRC:$mask, _Src.ScalarMemOp:$src), 7799 OpcodeStr, 7800 "${src}"#Broadcast, "${src}"#Broadcast, 7801 (_.VT (OpNode (_Src.VT 7802 (_Src.BroadcastLdFrag addr:$src)) 7803 )), 7804 (vselect_mask MaskRC:$mask, 7805 (_.VT 7806 (MaskOpNode 7807 (_Src.VT 7808 (_Src.BroadcastLdFrag addr:$src)))), 7809 _.RC:$src0), 7810 (vselect_mask MaskRC:$mask, 7811 (_.VT 7812 (MaskOpNode 7813 (_Src.VT 7814 (_Src.BroadcastLdFrag addr:$src)))), 7815 _.ImmAllZerosV)>, 7816 EVEX, EVEX_B, Sched<[sched.Folded]>; 7817 } 7818} 7819// Conversion with SAE - suppress all exceptions 7820multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7821 X86VectorVTInfo _Src, SDNode OpNodeSAE, 7822 X86FoldableSchedWrite sched> { 7823 let Uses = [MXCSR] in 7824 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7825 (ins _Src.RC:$src), OpcodeStr, 7826 "{sae}, $src", "$src, {sae}", 7827 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>, 7828 EVEX, EVEX_B, Sched<[sched]>; 7829} 7830 7831// Conversion with rounding control (RC) 7832multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7833 X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd, 7834 X86FoldableSchedWrite sched> { 7835 let Uses = [MXCSR] in 7836 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 7837 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, 7838 "$rc, $src", "$src, $rc", 7839 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>, 7840 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 7841} 7842 7843// Similar to avx512_vcvt_fp, but uses an extload for the memory form. 7844multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 7845 X86VectorVTInfo _Src, SDPatternOperator OpNode, 7846 SDNode MaskOpNode, 7847 X86FoldableSchedWrite sched, 7848 string Broadcast = _.BroadcastStr, 7849 string Alias = "", X86MemOperand MemOp = _Src.MemOp, 7850 RegisterClass MaskRC = _.KRCWM> 7851 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast, 7852 Alias, MemOp, MaskRC, 7853 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)), 7854 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; 7855 7856// Extend [Float to Double, Half to Float] 7857multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr, 7858 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 7859 X86SchedWriteWidths sched, Predicate prd = HasAVX512> { 7860 let Predicates = [prd] in { 7861 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256, 7862 any_fpextend, fpextend, sched.ZMM>, 7863 avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256, 7864 X86vfpextSAE, sched.ZMM>, EVEX_V512; 7865 } 7866 let Predicates = [prd, HasVLX] in { 7867 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128, 7868 X86any_vfpext, X86vfpext, sched.XMM, 7869 _dst.info128.BroadcastStr, 7870 "", f64mem>, EVEX_V128; 7871 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128, 7872 any_fpextend, fpextend, sched.YMM>, EVEX_V256; 7873 } 7874} 7875 7876// Truncate [Double to Float, Float to Half] 7877multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr, 7878 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 7879 X86SchedWriteWidths sched, Predicate prd = HasAVX512, 7880 PatFrag bcast128 = _src.info128.BroadcastLdFrag, 7881 PatFrag loadVT128 = _src.info128.LdFrag, 7882 RegisterClass maskRC128 = _src.info128.KRCWM> { 7883 let Predicates = [prd] in { 7884 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, 7885 X86any_vfpround, X86vfpround, sched.ZMM>, 7886 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 7887 X86vfproundRnd, sched.ZMM>, EVEX_V512; 7888 } 7889 let Predicates = [prd, HasVLX] in { 7890 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, 7891 null_frag, null_frag, sched.XMM, 7892 _src.info128.BroadcastStr, "{x}", 7893 f128mem, maskRC128>, EVEX_V128; 7894 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, 7895 X86any_vfpround, X86vfpround, 7896 sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256; 7897 7898 // Special patterns to allow use of X86vmfpround for masking. Instruction 7899 // patterns have been disabled with null_frag. 7900 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))), 7901 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 7902 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 7903 maskRC128:$mask), 7904 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>; 7905 def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 7906 maskRC128:$mask), 7907 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>; 7908 7909 def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))), 7910 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 7911 def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0), 7912 maskRC128:$mask), 7913 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 7914 def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV, 7915 maskRC128:$mask), 7916 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>; 7917 7918 def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))), 7919 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 7920 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 7921 (_dst.info128.VT VR128X:$src0), maskRC128:$mask), 7922 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>; 7923 def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)), 7924 _dst.info128.ImmAllZerosV, maskRC128:$mask), 7925 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>; 7926 } 7927 7928 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 7929 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 7930 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7931 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 7932 VK2WM:$mask, VR128X:$src), 0, "att">; 7933 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|" 7934 "$dst {${mask}} {z}, $src}", 7935 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 7936 VK2WM:$mask, VR128X:$src), 0, "att">; 7937 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 7938 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7939 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 7940 "$dst {${mask}}, ${src}{1to2}}", 7941 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 7942 VK2WM:$mask, f64mem:$src), 0, "att">; 7943 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 7944 "$dst {${mask}} {z}, ${src}{1to2}}", 7945 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 7946 VK2WM:$mask, f64mem:$src), 0, "att">; 7947 7948 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 7949 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 7950 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 7951 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 7952 VK4WM:$mask, VR256X:$src), 0, "att">; 7953 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 7954 "$dst {${mask}} {z}, $src}", 7955 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 7956 VK4WM:$mask, VR256X:$src), 0, "att">; 7957 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 7958 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">; 7959 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 7960 "$dst {${mask}}, ${src}{1to4}}", 7961 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 7962 VK4WM:$mask, f64mem:$src), 0, "att">; 7963 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 7964 "$dst {${mask}} {z}, ${src}{1to4}}", 7965 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 7966 VK4WM:$mask, f64mem:$src), 0, "att">; 7967} 7968 7969defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps", 7970 avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>, 7971 REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 7972defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd", 7973 avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>, 7974 TB, EVEX_CD8<32, CD8VH>; 7975 7976// Extend Half to Double 7977multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr, 7978 X86SchedWriteWidths sched> { 7979 let Predicates = [HasFP16] in { 7980 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info, 7981 any_fpextend, fpextend, sched.ZMM>, 7982 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info, 7983 X86vfpextSAE, sched.ZMM>, EVEX_V512; 7984 def : Pat<(v8f64 (extloadv8f16 addr:$src)), 7985 (!cast<Instruction>(NAME # "Zrm") addr:$src)>; 7986 } 7987 let Predicates = [HasFP16, HasVLX] in { 7988 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info, 7989 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "", 7990 f32mem>, EVEX_V128; 7991 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info, 7992 X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "", 7993 f64mem>, EVEX_V256; 7994 } 7995} 7996 7997// Truncate Double to Half 7998multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { 7999 let Predicates = [HasFP16] in { 8000 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info, 8001 X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">, 8002 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info, 8003 X86vfproundRnd, sched.ZMM>, EVEX_V512; 8004 } 8005 let Predicates = [HasFP16, HasVLX] in { 8006 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag, 8007 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8008 VK2WM>, EVEX_V128; 8009 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag, 8010 null_frag, sched.YMM, "{1to4}", "{y}", f256mem, 8011 VK4WM>, EVEX_V256; 8012 } 8013 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8014 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8015 VR128X:$src), 0, "att">; 8016 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8017 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8018 VK2WM:$mask, VR128X:$src), 0, "att">; 8019 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8020 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8021 VK2WM:$mask, VR128X:$src), 0, "att">; 8022 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8023 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8024 i64mem:$src), 0, "att">; 8025 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8026 "$dst {${mask}}, ${src}{1to2}}", 8027 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8028 VK2WM:$mask, i64mem:$src), 0, "att">; 8029 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8030 "$dst {${mask}} {z}, ${src}{1to2}}", 8031 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8032 VK2WM:$mask, i64mem:$src), 0, "att">; 8033 8034 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8035 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8036 VR256X:$src), 0, "att">; 8037 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8038 "$dst {${mask}}, $src}", 8039 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8040 VK4WM:$mask, VR256X:$src), 0, "att">; 8041 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8042 "$dst {${mask}} {z}, $src}", 8043 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8044 VK4WM:$mask, VR256X:$src), 0, "att">; 8045 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8046 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8047 i64mem:$src), 0, "att">; 8048 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8049 "$dst {${mask}}, ${src}{1to4}}", 8050 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8051 VK4WM:$mask, i64mem:$src), 0, "att">; 8052 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8053 "$dst {${mask}} {z}, ${src}{1to4}}", 8054 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8055 VK4WM:$mask, i64mem:$src), 0, "att">; 8056 8057 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 8058 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 8059 VR512:$src), 0, "att">; 8060 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 8061 "$dst {${mask}}, $src}", 8062 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 8063 VK8WM:$mask, VR512:$src), 0, "att">; 8064 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 8065 "$dst {${mask}} {z}, $src}", 8066 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 8067 VK8WM:$mask, VR512:$src), 0, "att">; 8068 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 8069 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 8070 i64mem:$src), 0, "att">; 8071 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 8072 "$dst {${mask}}, ${src}{1to8}}", 8073 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 8074 VK8WM:$mask, i64mem:$src), 0, "att">; 8075 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 8076 "$dst {${mask}} {z}, ${src}{1to8}}", 8077 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 8078 VK8WM:$mask, i64mem:$src), 0, "att">; 8079} 8080 8081defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info, 8082 avx512vl_f32_info, SchedWriteCvtPD2PS, 8083 HasFP16>, T_MAP5, PD, EVEX_CD8<32, CD8VF>; 8084defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info, 8085 avx512vl_f16_info, SchedWriteCvtPS2PD, 8086 HasFP16>, T_MAP6, PD, EVEX_CD8<16, CD8VH>; 8087defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>, 8088 REX_W, T_MAP5, PD, EVEX_CD8<64, CD8VF>; 8089defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>, 8090 T_MAP5, EVEX_CD8<16, CD8VQ>; 8091 8092let Predicates = [HasFP16, HasVLX] in { 8093 // Special patterns to allow use of X86vmfpround for masking. Instruction 8094 // patterns have been disabled with null_frag. 8095 def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))), 8096 (VCVTPD2PHZ256rr VR256X:$src)>; 8097 def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0), 8098 VK4WM:$mask)), 8099 (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 8100 def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV, 8101 VK4WM:$mask), 8102 (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 8103 8104 def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))), 8105 (VCVTPD2PHZ256rm addr:$src)>; 8106 def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0), 8107 VK4WM:$mask), 8108 (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8109 def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV, 8110 VK4WM:$mask), 8111 (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>; 8112 8113 def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))), 8114 (VCVTPD2PHZ256rmb addr:$src)>; 8115 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8116 (v8f16 VR128X:$src0), VK4WM:$mask), 8117 (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 8118 def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)), 8119 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 8120 (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 8121 8122 def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))), 8123 (VCVTPD2PHZ128rr VR128X:$src)>; 8124 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0), 8125 VK2WM:$mask), 8126 (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8127 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV, 8128 VK2WM:$mask), 8129 (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 8130 8131 def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))), 8132 (VCVTPD2PHZ128rm addr:$src)>; 8133 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0), 8134 VK2WM:$mask), 8135 (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8136 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV, 8137 VK2WM:$mask), 8138 (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>; 8139 8140 def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))), 8141 (VCVTPD2PHZ128rmb addr:$src)>; 8142 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8143 (v8f16 VR128X:$src0), VK2WM:$mask), 8144 (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8145 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), 8146 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 8147 (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 8148} 8149 8150// Convert Signed/Unsigned Doubleword to Double 8151let Uses = []<Register>, mayRaiseFPException = 0 in 8152multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8153 SDNode MaskOpNode, SDPatternOperator OpNode128, 8154 SDNode MaskOpNode128, 8155 X86SchedWriteWidths sched> { 8156 // No rounding in this op 8157 let Predicates = [HasAVX512] in 8158 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, 8159 MaskOpNode, sched.ZMM>, EVEX_V512; 8160 8161 let Predicates = [HasVLX] in { 8162 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, 8163 OpNode128, MaskOpNode128, sched.XMM, "{1to2}", 8164 "", i64mem, VK2WM, 8165 (v2f64 (OpNode128 (bc_v4i32 8166 (v2i64 8167 (scalar_to_vector (loadi64 addr:$src)))))), 8168 (v2f64 (MaskOpNode128 (bc_v4i32 8169 (v2i64 8170 (scalar_to_vector (loadi64 addr:$src))))))>, 8171 EVEX_V128; 8172 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, 8173 MaskOpNode, sched.YMM>, EVEX_V256; 8174 } 8175} 8176 8177// Convert Signed/Unsigned Doubleword to Float 8178multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8179 SDNode MaskOpNode, SDNode OpNodeRnd, 8180 X86SchedWriteWidths sched> { 8181 let Predicates = [HasAVX512] in 8182 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, 8183 MaskOpNode, sched.ZMM>, 8184 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, 8185 OpNodeRnd, sched.ZMM>, EVEX_V512; 8186 8187 let Predicates = [HasVLX] in { 8188 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, 8189 MaskOpNode, sched.XMM>, EVEX_V128; 8190 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, 8191 MaskOpNode, sched.YMM>, EVEX_V256; 8192 } 8193} 8194 8195// Convert Float to Signed/Unsigned Doubleword with truncation 8196multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8197 SDNode MaskOpNode, 8198 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 8199 let Predicates = [HasAVX512] in { 8200 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8201 MaskOpNode, sched.ZMM>, 8202 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 8203 OpNodeSAE, sched.ZMM>, EVEX_V512; 8204 } 8205 let Predicates = [HasVLX] in { 8206 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8207 MaskOpNode, sched.XMM>, EVEX_V128; 8208 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8209 MaskOpNode, sched.YMM>, EVEX_V256; 8210 } 8211} 8212 8213// Convert Float to Signed/Unsigned Doubleword 8214multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8215 SDNode MaskOpNode, SDNode OpNodeRnd, 8216 X86SchedWriteWidths sched> { 8217 let Predicates = [HasAVX512] in { 8218 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 8219 MaskOpNode, sched.ZMM>, 8220 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, 8221 OpNodeRnd, sched.ZMM>, EVEX_V512; 8222 } 8223 let Predicates = [HasVLX] in { 8224 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 8225 MaskOpNode, sched.XMM>, EVEX_V128; 8226 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 8227 MaskOpNode, sched.YMM>, EVEX_V256; 8228 } 8229} 8230 8231// Convert Double to Signed/Unsigned Doubleword with truncation 8232multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8233 SDNode MaskOpNode, SDNode OpNodeSAE, 8234 X86SchedWriteWidths sched> { 8235 let Predicates = [HasAVX512] in { 8236 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8237 MaskOpNode, sched.ZMM>, 8238 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 8239 OpNodeSAE, sched.ZMM>, EVEX_V512; 8240 } 8241 let Predicates = [HasVLX] in { 8242 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8243 // memory forms of these instructions in Asm Parser. They have the same 8244 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8245 // due to the same reason. 8246 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8247 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8248 VK2WM>, EVEX_V128; 8249 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8250 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8251 } 8252 8253 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8254 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8255 VR128X:$src), 0, "att">; 8256 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8257 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8258 VK2WM:$mask, VR128X:$src), 0, "att">; 8259 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8260 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8261 VK2WM:$mask, VR128X:$src), 0, "att">; 8262 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8263 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8264 f64mem:$src), 0, "att">; 8265 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8266 "$dst {${mask}}, ${src}{1to2}}", 8267 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8268 VK2WM:$mask, f64mem:$src), 0, "att">; 8269 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8270 "$dst {${mask}} {z}, ${src}{1to2}}", 8271 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8272 VK2WM:$mask, f64mem:$src), 0, "att">; 8273 8274 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8275 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8276 VR256X:$src), 0, "att">; 8277 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8278 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8279 VK4WM:$mask, VR256X:$src), 0, "att">; 8280 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8281 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8282 VK4WM:$mask, VR256X:$src), 0, "att">; 8283 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8284 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8285 f64mem:$src), 0, "att">; 8286 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8287 "$dst {${mask}}, ${src}{1to4}}", 8288 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8289 VK4WM:$mask, f64mem:$src), 0, "att">; 8290 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8291 "$dst {${mask}} {z}, ${src}{1to4}}", 8292 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8293 VK4WM:$mask, f64mem:$src), 0, "att">; 8294} 8295 8296// Convert Double to Signed/Unsigned Doubleword 8297multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8298 SDNode MaskOpNode, SDNode OpNodeRnd, 8299 X86SchedWriteWidths sched> { 8300 let Predicates = [HasAVX512] in { 8301 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 8302 MaskOpNode, sched.ZMM>, 8303 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, 8304 OpNodeRnd, sched.ZMM>, EVEX_V512; 8305 } 8306 let Predicates = [HasVLX] in { 8307 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8308 // memory forms of these instructions in Asm Parcer. They have the same 8309 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8310 // due to the same reason. 8311 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 8312 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem, 8313 VK2WM>, EVEX_V128; 8314 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 8315 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 8316 } 8317 8318 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8319 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">; 8320 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8321 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8322 VK2WM:$mask, VR128X:$src), 0, "att">; 8323 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8324 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8325 VK2WM:$mask, VR128X:$src), 0, "att">; 8326 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8327 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8328 f64mem:$src), 0, "att">; 8329 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8330 "$dst {${mask}}, ${src}{1to2}}", 8331 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8332 VK2WM:$mask, f64mem:$src), 0, "att">; 8333 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8334 "$dst {${mask}} {z}, ${src}{1to2}}", 8335 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8336 VK2WM:$mask, f64mem:$src), 0, "att">; 8337 8338 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8339 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">; 8340 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8341 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8342 VK4WM:$mask, VR256X:$src), 0, "att">; 8343 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8344 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8345 VK4WM:$mask, VR256X:$src), 0, "att">; 8346 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8347 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8348 f64mem:$src), 0, "att">; 8349 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8350 "$dst {${mask}}, ${src}{1to4}}", 8351 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8352 VK4WM:$mask, f64mem:$src), 0, "att">; 8353 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8354 "$dst {${mask}} {z}, ${src}{1to4}}", 8355 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8356 VK4WM:$mask, f64mem:$src), 0, "att">; 8357} 8358 8359// Convert Double to Signed/Unsigned Quardword 8360multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8361 SDNode MaskOpNode, SDNode OpNodeRnd, 8362 X86SchedWriteWidths sched> { 8363 let Predicates = [HasDQI] in { 8364 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8365 MaskOpNode, sched.ZMM>, 8366 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, 8367 OpNodeRnd, sched.ZMM>, EVEX_V512; 8368 } 8369 let Predicates = [HasDQI, HasVLX] in { 8370 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8371 MaskOpNode, sched.XMM>, EVEX_V128; 8372 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8373 MaskOpNode, sched.YMM>, EVEX_V256; 8374 } 8375} 8376 8377// Convert Double to Signed/Unsigned Quardword with truncation 8378multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8379 SDNode MaskOpNode, SDNode OpNodeRnd, 8380 X86SchedWriteWidths sched> { 8381 let Predicates = [HasDQI] in { 8382 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 8383 MaskOpNode, sched.ZMM>, 8384 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 8385 OpNodeRnd, sched.ZMM>, EVEX_V512; 8386 } 8387 let Predicates = [HasDQI, HasVLX] in { 8388 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 8389 MaskOpNode, sched.XMM>, EVEX_V128; 8390 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 8391 MaskOpNode, sched.YMM>, EVEX_V256; 8392 } 8393} 8394 8395// Convert Signed/Unsigned Quardword to Double 8396multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8397 SDNode MaskOpNode, SDNode OpNodeRnd, 8398 X86SchedWriteWidths sched> { 8399 let Predicates = [HasDQI] in { 8400 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, 8401 MaskOpNode, sched.ZMM>, 8402 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, 8403 OpNodeRnd, sched.ZMM>, EVEX_V512; 8404 } 8405 let Predicates = [HasDQI, HasVLX] in { 8406 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, 8407 MaskOpNode, sched.XMM>, EVEX_V128; 8408 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, 8409 MaskOpNode, sched.YMM>, EVEX_V256; 8410 } 8411} 8412 8413// Convert Float to Signed/Unsigned Quardword 8414multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, 8415 SDNode MaskOpNode, SDNode OpNodeRnd, 8416 X86SchedWriteWidths sched> { 8417 let Predicates = [HasDQI] in { 8418 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8419 MaskOpNode, sched.ZMM>, 8420 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, 8421 OpNodeRnd, sched.ZMM>, EVEX_V512; 8422 } 8423 let Predicates = [HasDQI, HasVLX] in { 8424 // Explicitly specified broadcast string, since we take only 2 elements 8425 // from v4f32x_info source 8426 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8427 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8428 (v2i64 (OpNode (bc_v4f32 8429 (v2f64 8430 (scalar_to_vector (loadf64 addr:$src)))))), 8431 (v2i64 (MaskOpNode (bc_v4f32 8432 (v2f64 8433 (scalar_to_vector (loadf64 addr:$src))))))>, 8434 EVEX_V128; 8435 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8436 MaskOpNode, sched.YMM>, EVEX_V256; 8437 } 8438} 8439 8440// Convert Float to Signed/Unsigned Quardword with truncation 8441multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8442 SDNode MaskOpNode, SDNode OpNodeRnd, 8443 X86SchedWriteWidths sched> { 8444 let Predicates = [HasDQI] in { 8445 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 8446 MaskOpNode, sched.ZMM>, 8447 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 8448 OpNodeRnd, sched.ZMM>, EVEX_V512; 8449 } 8450 let Predicates = [HasDQI, HasVLX] in { 8451 // Explicitly specified broadcast string, since we take only 2 elements 8452 // from v4f32x_info source 8453 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 8454 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 8455 (v2i64 (OpNode (bc_v4f32 8456 (v2f64 8457 (scalar_to_vector (loadf64 addr:$src)))))), 8458 (v2i64 (MaskOpNode (bc_v4f32 8459 (v2f64 8460 (scalar_to_vector (loadf64 addr:$src))))))>, 8461 EVEX_V128; 8462 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 8463 MaskOpNode, sched.YMM>, EVEX_V256; 8464 } 8465} 8466 8467// Convert Signed/Unsigned Quardword to Float 8468// Also Convert Signed/Unsigned Doubleword to Half 8469multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 8470 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128, 8471 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd, 8472 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src, 8473 X86SchedWriteWidths sched, Predicate prd = HasDQI> { 8474 let Predicates = [prd] in { 8475 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode, 8476 MaskOpNode, sched.ZMM>, 8477 avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512, 8478 OpNodeRnd, sched.ZMM>, EVEX_V512; 8479 } 8480 let Predicates = [prd, HasVLX] in { 8481 // we need "x"/"y" suffixes in order to distinguish between 128 and 256 8482 // memory forms of these instructions in Asm Parcer. They have the same 8483 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly 8484 // due to the same reason. 8485 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag, 8486 null_frag, sched.XMM, _src.info128.BroadcastStr, 8487 "{x}", i128mem, _src.info128.KRCWM>, 8488 EVEX_V128; 8489 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode, 8490 MaskOpNode, sched.YMM, _src.info256.BroadcastStr, 8491 "{y}">, EVEX_V256; 8492 8493 // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction 8494 // patterns have been disabled with null_frag. 8495 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))), 8496 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>; 8497 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0), 8498 _src.info128.KRCWM:$mask), 8499 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>; 8500 def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV, 8501 _src.info128.KRCWM:$mask), 8502 (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>; 8503 8504 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))), 8505 (!cast<Instruction>(NAME # "Z128rm") addr:$src)>; 8506 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0), 8507 _src.info128.KRCWM:$mask), 8508 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8509 def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV, 8510 _src.info128.KRCWM:$mask), 8511 (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>; 8512 8513 def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))), 8514 (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>; 8515 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8516 (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask), 8517 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>; 8518 def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)), 8519 _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask), 8520 (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>; 8521 } 8522 8523 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 8524 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 8525 VR128X:$src), 0, "att">; 8526 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 8527 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 8528 VK2WM:$mask, VR128X:$src), 0, "att">; 8529 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 8530 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 8531 VK2WM:$mask, VR128X:$src), 0, "att">; 8532 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 8533 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 8534 i64mem:$src), 0, "att">; 8535 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 8536 "$dst {${mask}}, ${src}{1to2}}", 8537 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 8538 VK2WM:$mask, i64mem:$src), 0, "att">; 8539 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 8540 "$dst {${mask}} {z}, ${src}{1to2}}", 8541 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 8542 VK2WM:$mask, i64mem:$src), 0, "att">; 8543 8544 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 8545 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 8546 VR256X:$src), 0, "att">; 8547 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 8548 "$dst {${mask}}, $src}", 8549 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 8550 VK4WM:$mask, VR256X:$src), 0, "att">; 8551 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 8552 "$dst {${mask}} {z}, $src}", 8553 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 8554 VK4WM:$mask, VR256X:$src), 0, "att">; 8555 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 8556 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 8557 i64mem:$src), 0, "att">; 8558 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 8559 "$dst {${mask}}, ${src}{1to4}}", 8560 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 8561 VK4WM:$mask, i64mem:$src), 0, "att">; 8562 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 8563 "$dst {${mask}} {z}, ${src}{1to4}}", 8564 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 8565 VK4WM:$mask, i64mem:$src), 0, "att">; 8566} 8567 8568defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp, 8569 X86any_VSintToFP, X86VSintToFP, 8570 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>; 8571 8572defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp, 8573 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, 8574 TB, EVEX_CD8<32, CD8VF>; 8575 8576defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si, 8577 X86cvttp2si, X86cvttp2siSAE, 8578 SchedWriteCvtPS2DQ>, TB, XS, EVEX_CD8<32, CD8VF>; 8579 8580defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si, 8581 X86cvttp2si, X86cvttp2siSAE, 8582 SchedWriteCvtPD2DQ>, 8583 TB, PD, REX_W, EVEX_CD8<64, CD8VF>; 8584 8585defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui, 8586 X86cvttp2ui, X86cvttp2uiSAE, 8587 SchedWriteCvtPS2DQ>, TB, EVEX_CD8<32, CD8VF>; 8588 8589defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, 8590 X86cvttp2ui, X86cvttp2uiSAE, 8591 SchedWriteCvtPD2DQ>, 8592 TB, REX_W, EVEX_CD8<64, CD8VF>; 8593 8594defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, 8595 uint_to_fp, X86any_VUintToFP, X86VUintToFP, 8596 SchedWriteCvtDQ2PD>, TB, XS, EVEX_CD8<32, CD8VH>; 8597 8598defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, 8599 uint_to_fp, X86VUintToFpRnd, 8600 SchedWriteCvtDQ2PS>, TB, XD, EVEX_CD8<32, CD8VF>; 8601 8602defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int, 8603 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD, 8604 EVEX_CD8<32, CD8VF>; 8605 8606defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int, 8607 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, TB, XD, 8608 REX_W, EVEX_CD8<64, CD8VF>; 8609 8610defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt, 8611 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, 8612 TB, EVEX_CD8<32, CD8VF>; 8613 8614defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt, 8615 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W, 8616 TB, EVEX_CD8<64, CD8VF>; 8617 8618defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int, 8619 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W, 8620 TB, PD, EVEX_CD8<64, CD8VF>; 8621 8622defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int, 8623 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, TB, PD, 8624 EVEX_CD8<32, CD8VH>; 8625 8626defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt, 8627 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W, 8628 TB, PD, EVEX_CD8<64, CD8VF>; 8629 8630defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt, 8631 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, TB, PD, 8632 EVEX_CD8<32, CD8VH>; 8633 8634defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si, 8635 X86cvttp2si, X86cvttp2siSAE, 8636 SchedWriteCvtPD2DQ>, REX_W, 8637 TB, PD, EVEX_CD8<64, CD8VF>; 8638 8639defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si, 8640 X86cvttp2si, X86cvttp2siSAE, 8641 SchedWriteCvtPS2DQ>, TB, PD, 8642 EVEX_CD8<32, CD8VH>; 8643 8644defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui, 8645 X86cvttp2ui, X86cvttp2uiSAE, 8646 SchedWriteCvtPD2DQ>, REX_W, 8647 TB, PD, EVEX_CD8<64, CD8VF>; 8648 8649defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui, 8650 X86cvttp2ui, X86cvttp2uiSAE, 8651 SchedWriteCvtPS2DQ>, TB, PD, 8652 EVEX_CD8<32, CD8VH>; 8653 8654defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp, 8655 sint_to_fp, X86VSintToFpRnd, 8656 SchedWriteCvtDQ2PD>, REX_W, TB, XS, EVEX_CD8<64, CD8VF>; 8657 8658defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp, 8659 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>, 8660 REX_W, TB, XS, EVEX_CD8<64, CD8VF>; 8661 8662defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp, 8663 X86any_VSintToFP, X86VMSintToFP, 8664 X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8665 SchedWriteCvtDQ2PS, HasFP16>, 8666 T_MAP5, EVEX_CD8<32, CD8VF>; 8667 8668defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp, 8669 X86any_VUintToFP, X86VMUintToFP, 8670 X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info, 8671 SchedWriteCvtDQ2PS, HasFP16>, T_MAP5, XD, 8672 EVEX_CD8<32, CD8VF>; 8673 8674defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp, 8675 X86any_VSintToFP, X86VMSintToFP, 8676 X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8677 SchedWriteCvtDQ2PS>, REX_W, TB, 8678 EVEX_CD8<64, CD8VF>; 8679 8680defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp, 8681 X86any_VUintToFP, X86VMUintToFP, 8682 X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info, 8683 SchedWriteCvtDQ2PS>, REX_W, TB, XD, 8684 EVEX_CD8<64, CD8VF>; 8685 8686let Predicates = [HasVLX] in { 8687 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction 8688 // patterns have been disabled with null_frag. 8689 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))), 8690 (VCVTPD2DQZ128rr VR128X:$src)>; 8691 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8692 VK2WM:$mask), 8693 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8694 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8695 VK2WM:$mask), 8696 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8697 8698 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))), 8699 (VCVTPD2DQZ128rm addr:$src)>; 8700 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8701 VK2WM:$mask), 8702 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8703 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8704 VK2WM:$mask), 8705 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8706 8707 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))), 8708 (VCVTPD2DQZ128rmb addr:$src)>; 8709 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8710 (v4i32 VR128X:$src0), VK2WM:$mask), 8711 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8712 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)), 8713 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8714 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8715 8716 // Special patterns to allow use of X86mcvttp2si for masking. Instruction 8717 // patterns have been disabled with null_frag. 8718 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))), 8719 (VCVTTPD2DQZ128rr VR128X:$src)>; 8720 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8721 VK2WM:$mask), 8722 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8723 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8724 VK2WM:$mask), 8725 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8726 8727 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))), 8728 (VCVTTPD2DQZ128rm addr:$src)>; 8729 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8730 VK2WM:$mask), 8731 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8732 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8733 VK2WM:$mask), 8734 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>; 8735 8736 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))), 8737 (VCVTTPD2DQZ128rmb addr:$src)>; 8738 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8739 (v4i32 VR128X:$src0), VK2WM:$mask), 8740 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8741 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)), 8742 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8743 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>; 8744 8745 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8746 // patterns have been disabled with null_frag. 8747 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))), 8748 (VCVTPD2UDQZ128rr VR128X:$src)>; 8749 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8750 VK2WM:$mask), 8751 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8752 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8753 VK2WM:$mask), 8754 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8755 8756 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))), 8757 (VCVTPD2UDQZ128rm addr:$src)>; 8758 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8759 VK2WM:$mask), 8760 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8761 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8762 VK2WM:$mask), 8763 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8764 8765 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))), 8766 (VCVTPD2UDQZ128rmb addr:$src)>; 8767 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8768 (v4i32 VR128X:$src0), VK2WM:$mask), 8769 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8770 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)), 8771 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8772 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8773 8774 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction 8775 // patterns have been disabled with null_frag. 8776 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))), 8777 (VCVTTPD2UDQZ128rr VR128X:$src)>; 8778 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 8779 VK2WM:$mask), 8780 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 8781 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 8782 VK2WM:$mask), 8783 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>; 8784 8785 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))), 8786 (VCVTTPD2UDQZ128rm addr:$src)>; 8787 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 8788 VK2WM:$mask), 8789 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8790 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 8791 VK2WM:$mask), 8792 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>; 8793 8794 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))), 8795 (VCVTTPD2UDQZ128rmb addr:$src)>; 8796 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8797 (v4i32 VR128X:$src0), VK2WM:$mask), 8798 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8799 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)), 8800 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 8801 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>; 8802} 8803 8804let Predicates = [HasDQI, HasVLX] in { 8805 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8806 (VCVTPS2QQZ128rm addr:$src)>; 8807 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8808 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8809 VR128X:$src0)), 8810 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8811 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8812 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8813 v2i64x_info.ImmAllZerosV)), 8814 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8815 8816 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8817 (VCVTPS2UQQZ128rm addr:$src)>; 8818 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8819 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8820 VR128X:$src0)), 8821 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8822 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8823 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8824 v2i64x_info.ImmAllZerosV)), 8825 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8826 8827 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8828 (VCVTTPS2QQZ128rm addr:$src)>; 8829 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8830 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8831 VR128X:$src0)), 8832 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8833 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8834 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8835 v2i64x_info.ImmAllZerosV)), 8836 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; 8837 8838 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), 8839 (VCVTTPS2UQQZ128rm addr:$src)>; 8840 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8841 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8842 VR128X:$src0)), 8843 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8844 def : Pat<(v2i64 (vselect_mask VK2WM:$mask, 8845 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), 8846 v2i64x_info.ImmAllZerosV)), 8847 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; 8848} 8849 8850let Predicates = [HasVLX] in { 8851 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8852 (VCVTDQ2PDZ128rm addr:$src)>; 8853 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8854 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8855 VR128X:$src0)), 8856 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8857 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8858 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8859 v2f64x_info.ImmAllZerosV)), 8860 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8861 8862 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 8863 (VCVTUDQ2PDZ128rm addr:$src)>; 8864 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8865 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8866 VR128X:$src0)), 8867 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 8868 def : Pat<(v2f64 (vselect_mask VK2WM:$mask, 8869 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), 8870 v2f64x_info.ImmAllZerosV)), 8871 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; 8872} 8873 8874//===----------------------------------------------------------------------===// 8875// Half precision conversion instructions 8876//===----------------------------------------------------------------------===// 8877 8878let Uses = [MXCSR], mayRaiseFPException = 1 in 8879multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8880 X86MemOperand x86memop, dag ld_dag, 8881 X86FoldableSchedWrite sched> { 8882 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 8883 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", 8884 (X86any_cvtph2ps (_src.VT _src.RC:$src)), 8885 (X86cvtph2ps (_src.VT _src.RC:$src))>, 8886 T8, PD, Sched<[sched]>; 8887 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), 8888 (ins x86memop:$src), "vcvtph2ps", "$src", "$src", 8889 (X86any_cvtph2ps (_src.VT ld_dag)), 8890 (X86cvtph2ps (_src.VT ld_dag))>, 8891 T8, PD, Sched<[sched.Folded]>; 8892} 8893 8894multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8895 X86FoldableSchedWrite sched> { 8896 let Uses = [MXCSR] in 8897 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), 8898 (ins _src.RC:$src), "vcvtph2ps", 8899 "{sae}, $src", "$src, {sae}", 8900 (X86cvtph2psSAE (_src.VT _src.RC:$src))>, 8901 T8, PD, EVEX_B, Sched<[sched]>; 8902} 8903 8904let Predicates = [HasAVX512] in 8905 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, 8906 (load addr:$src), WriteCvtPH2PSZ>, 8907 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, 8908 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8909 8910let Predicates = [HasVLX] in { 8911 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 8912 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256, 8913 EVEX_CD8<32, CD8VH>; 8914 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, 8915 (bitconvert (v2i64 (X86vzload64 addr:$src))), 8916 WriteCvtPH2PS>, EVEX, EVEX_V128, 8917 EVEX_CD8<32, CD8VH>; 8918 8919 // Pattern match vcvtph2ps of a scalar i64 load. 8920 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert 8921 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), 8922 (VCVTPH2PSZ128rm addr:$src)>; 8923} 8924 8925multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8926 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> { 8927let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 8928 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8929 (ins _src.RC:$src1, i32u8imm:$src2), 8930 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8931 [(set _dest.RC:$dst, 8932 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 8933 Sched<[RR]>; 8934 let Constraints = "$src0 = $dst" in 8935 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8936 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8937 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 8938 [(set _dest.RC:$dst, 8939 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8940 _dest.RC:$src0, _src.KRCWM:$mask))]>, 8941 Sched<[RR]>, EVEX_K; 8942 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8943 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8944 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", 8945 [(set _dest.RC:$dst, 8946 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2), 8947 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 8948 Sched<[RR]>, EVEX_KZ; 8949 let hasSideEffects = 0, mayStore = 1 in { 8950 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), 8951 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), 8952 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8953 Sched<[MR]>; 8954 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), 8955 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8956 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, 8957 EVEX_K, Sched<[MR]>; 8958 } 8959} 8960} 8961 8962multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, 8963 SchedWrite Sched> { 8964 let hasSideEffects = 0, Uses = [MXCSR] in { 8965 def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8966 (ins _src.RC:$src1, i32u8imm:$src2), 8967 "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}", 8968 [(set _dest.RC:$dst, 8969 (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>, 8970 EVEX_B, Sched<[Sched]>; 8971 let Constraints = "$src0 = $dst" in 8972 def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8973 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8974 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}", 8975 [(set _dest.RC:$dst, 8976 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2), 8977 _dest.RC:$src0, _src.KRCWM:$mask))]>, 8978 EVEX_B, Sched<[Sched]>, EVEX_K; 8979 def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst), 8980 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), 8981 "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}", 8982 [(set _dest.RC:$dst, 8983 (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2), 8984 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>, 8985 EVEX_B, Sched<[Sched]>, EVEX_KZ; 8986} 8987} 8988 8989let Predicates = [HasAVX512] in { 8990 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, 8991 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, 8992 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, 8993 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; 8994 8995 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst), 8996 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>; 8997} 8998 8999let Predicates = [HasVLX] in { 9000 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, 9001 WriteCvtPS2PHY, WriteCvtPS2PHYSt>, 9002 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; 9003 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, 9004 WriteCvtPS2PH, WriteCvtPS2PHSt>, 9005 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; 9006 9007 def : Pat<(store (f64 (extractelt 9008 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9009 (iPTR 0))), addr:$dst), 9010 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9011 def : Pat<(store (i64 (extractelt 9012 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))), 9013 (iPTR 0))), addr:$dst), 9014 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>; 9015 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst), 9016 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>; 9017} 9018 9019// Unordered/Ordered scalar fp compare with Sae and set EFLAGS 9020multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, 9021 string OpcodeStr, Domain d, 9022 X86FoldableSchedWrite sched = WriteFComX> { 9023 let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in 9024 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 9025 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, 9026 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; 9027} 9028 9029let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9030 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>, 9031 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9032 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>, 9033 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>; 9034 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>, 9035 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; 9036 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>, 9037 AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>; 9038} 9039 9040let Defs = [EFLAGS], Predicates = [HasAVX512] in { 9041 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32, 9042 "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9043 EVEX_CD8<32, CD8VT1>; 9044 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64, 9045 "ucomisd", SSEPackedDouble>, TB, PD, EVEX, 9046 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9047 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32, 9048 "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9049 EVEX_CD8<32, CD8VT1>; 9050 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64, 9051 "comisd", SSEPackedDouble>, TB, PD, EVEX, 9052 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9053 let isCodeGenOnly = 1 in { 9054 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, 9055 sse_load_f32, "ucomiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9056 EVEX_CD8<32, CD8VT1>; 9057 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, 9058 sse_load_f64, "ucomisd", SSEPackedDouble>, TB, PD, EVEX, 9059 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9060 9061 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, 9062 sse_load_f32, "comiss", SSEPackedSingle>, TB, EVEX, VEX_LIG, 9063 EVEX_CD8<32, CD8VT1>; 9064 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, 9065 sse_load_f64, "comisd", SSEPackedDouble>, TB, PD, EVEX, 9066 VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 9067 } 9068} 9069 9070let Defs = [EFLAGS], Predicates = [HasFP16] in { 9071 defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish", 9072 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5, 9073 EVEX_CD8<16, CD8VT1>; 9074 defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish", 9075 SSEPackedSingle>, AVX512PSIi8Base, T_MAP5, 9076 EVEX_CD8<16, CD8VT1>; 9077 defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16, 9078 "ucomish", SSEPackedSingle>, T_MAP5, EVEX, 9079 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9080 defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16, 9081 "comish", SSEPackedSingle>, T_MAP5, EVEX, 9082 VEX_LIG, EVEX_CD8<16, CD8VT1>; 9083 let isCodeGenOnly = 1 in { 9084 defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem, 9085 sse_load_f16, "ucomish", SSEPackedSingle>, 9086 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9087 9088 defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem, 9089 sse_load_f16, "comish", SSEPackedSingle>, 9090 T_MAP5, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 9091 } 9092} 9093 9094/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh 9095multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9096 X86FoldableSchedWrite sched, X86VectorVTInfo _, 9097 Predicate prd = HasAVX512> { 9098 let Predicates = [prd], ExeDomain = _.ExeDomain in { 9099 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9100 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9101 "$src2, $src1", "$src1, $src2", 9102 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9103 EVEX, VVVV, VEX_LIG, Sched<[sched]>; 9104 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9105 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9106 "$src2, $src1", "$src1, $src2", 9107 (OpNode (_.VT _.RC:$src1), 9108 (_.ScalarIntMemFrags addr:$src2))>, EVEX, VVVV, VEX_LIG, 9109 Sched<[sched.Folded, sched.ReadAfterFold]>; 9110} 9111} 9112 9113defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl, 9114 f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>, 9115 T_MAP6, PD; 9116defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s, 9117 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>, 9118 EVEX_CD8<16, CD8VT1>, T_MAP6, PD; 9119let Uses = [MXCSR] in { 9120defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl, 9121 f32x_info>, EVEX_CD8<32, CD8VT1>, 9122 T8, PD; 9123defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl, 9124 f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>, 9125 T8, PD; 9126defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, 9127 SchedWriteFRsqrt.Scl, f32x_info>, 9128 EVEX_CD8<32, CD8VT1>, T8, PD; 9129defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, 9130 SchedWriteFRsqrt.Scl, f64x_info>, REX_W, 9131 EVEX_CD8<64, CD8VT1>, T8, PD; 9132} 9133 9134/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd 9135multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 9136 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9137 let ExeDomain = _.ExeDomain in { 9138 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9139 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9140 (_.VT (OpNode _.RC:$src))>, EVEX, T8, PD, 9141 Sched<[sched]>; 9142 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9143 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9144 (OpNode (_.VT 9145 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8, PD, 9146 Sched<[sched.Folded, sched.ReadAfterFold]>; 9147 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9148 (ins _.ScalarMemOp:$src), OpcodeStr, 9149 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9150 (OpNode (_.VT 9151 (_.BroadcastLdFrag addr:$src)))>, 9152 EVEX, T8, PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9153 } 9154} 9155 9156multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, 9157 X86SchedWriteWidths sched> { 9158 let Uses = [MXCSR] in { 9159 defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM, 9160 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; 9161 defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM, 9162 v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>; 9163 } 9164 let Predicates = [HasFP16] in 9165 defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM, 9166 v32f16_info>, EVEX_V512, T_MAP6, EVEX_CD8<16, CD8VF>; 9167 9168 // Define only if AVX512VL feature is present. 9169 let Predicates = [HasVLX], Uses = [MXCSR] in { 9170 defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9171 OpNode, sched.XMM, v4f32x_info>, 9172 EVEX_V128, EVEX_CD8<32, CD8VF>; 9173 defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), 9174 OpNode, sched.YMM, v8f32x_info>, 9175 EVEX_V256, EVEX_CD8<32, CD8VF>; 9176 defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9177 OpNode, sched.XMM, v2f64x_info>, 9178 EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>; 9179 defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), 9180 OpNode, sched.YMM, v4f64x_info>, 9181 EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>; 9182 } 9183 let Predicates = [HasFP16, HasVLX] in { 9184 defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9185 OpNode, sched.XMM, v8f16x_info>, 9186 EVEX_V128, T_MAP6, EVEX_CD8<16, CD8VF>; 9187 defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), 9188 OpNode, sched.YMM, v16f16x_info>, 9189 EVEX_V256, T_MAP6, EVEX_CD8<16, CD8VF>; 9190 } 9191} 9192 9193defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>; 9194defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>; 9195 9196/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd 9197multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, 9198 SDNode OpNode, SDNode OpNodeSAE, 9199 X86FoldableSchedWrite sched> { 9200 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 9201 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9202 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9203 "$src2, $src1", "$src1, $src2", 9204 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9205 Sched<[sched]>, SIMD_EXC; 9206 9207 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9208 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9209 "{sae}, $src2, $src1", "$src1, $src2, {sae}", 9210 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, 9211 EVEX_B, Sched<[sched]>; 9212 9213 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9214 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9215 "$src2, $src1", "$src1, $src2", 9216 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>, 9217 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9218 } 9219} 9220 9221multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 9222 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9223 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE, 9224 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV; 9225 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE, 9226 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV; 9227} 9228 9229multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode, 9230 SDNode OpNodeSAE, X86FoldableSchedWrite sched> { 9231 let Predicates = [HasFP16] in 9232 defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>, 9233 EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV; 9234} 9235 9236let Predicates = [HasERI] in { 9237 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs, 9238 SchedWriteFRcp.Scl>; 9239 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs, 9240 SchedWriteFRsqrt.Scl>; 9241} 9242 9243defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9244 SchedWriteFRnd.Scl>, 9245 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs, 9246 SchedWriteFRnd.Scl>; 9247/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd 9248 9249multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9250 SDNode OpNode, X86FoldableSchedWrite sched> { 9251 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9252 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9253 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9254 (OpNode (_.VT _.RC:$src))>, 9255 Sched<[sched]>; 9256 9257 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9258 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9259 (OpNode (_.VT 9260 (bitconvert (_.LdFrag addr:$src))))>, 9261 Sched<[sched.Folded, sched.ReadAfterFold]>; 9262 9263 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 9264 (ins _.ScalarMemOp:$src), OpcodeStr, 9265 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9266 (OpNode (_.VT 9267 (_.BroadcastLdFrag addr:$src)))>, 9268 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9269 } 9270} 9271multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 9272 SDNode OpNode, X86FoldableSchedWrite sched> { 9273 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 9274 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9275 (ins _.RC:$src), OpcodeStr, 9276 "{sae}, $src", "$src, {sae}", 9277 (OpNode (_.VT _.RC:$src))>, 9278 EVEX_B, Sched<[sched]>; 9279} 9280 9281multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, 9282 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9283 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>, 9284 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>, 9285 T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>; 9286 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>, 9287 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>, 9288 T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>; 9289} 9290 9291multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, 9292 SDNode OpNode, X86SchedWriteWidths sched> { 9293 // Define only if AVX512VL feature is present. 9294 let Predicates = [HasVLX] in { 9295 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, 9296 sched.XMM>, 9297 EVEX_V128, T8, PD, EVEX_CD8<32, CD8VF>; 9298 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, 9299 sched.YMM>, 9300 EVEX_V256, T8, PD, EVEX_CD8<32, CD8VF>; 9301 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, 9302 sched.XMM>, 9303 EVEX_V128, REX_W, T8, PD, EVEX_CD8<64, CD8VF>; 9304 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, 9305 sched.YMM>, 9306 EVEX_V256, REX_W, T8, PD, EVEX_CD8<64, CD8VF>; 9307 } 9308} 9309 9310multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode, 9311 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 9312 let Predicates = [HasFP16] in 9313 defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>, 9314 avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>, 9315 T_MAP6, PD, EVEX_V512, EVEX_CD8<16, CD8VF>; 9316 let Predicates = [HasFP16, HasVLX] in { 9317 defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>, 9318 EVEX_V128, T_MAP6, PD, EVEX_CD8<16, CD8VF>; 9319 defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>, 9320 EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>; 9321 } 9322} 9323let Predicates = [HasERI] in { 9324 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE, 9325 SchedWriteFRsqrt>, EVEX; 9326 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE, 9327 SchedWriteFRcp>, EVEX; 9328 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE, 9329 SchedWriteFAdd>, EVEX; 9330} 9331defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9332 SchedWriteFRnd>, 9333 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE, 9334 SchedWriteFRnd>, 9335 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp, 9336 SchedWriteFRnd>, EVEX; 9337 9338multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, 9339 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9340 let ExeDomain = _.ExeDomain in 9341 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 9342 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", 9343 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>, 9344 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; 9345} 9346 9347multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, 9348 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 9349 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 9350 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 9351 (ins _.RC:$src), OpcodeStr, "$src", "$src", 9352 (_.VT (any_fsqrt _.RC:$src)), 9353 (_.VT (fsqrt _.RC:$src))>, EVEX, 9354 Sched<[sched]>; 9355 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9356 (ins _.MemOp:$src), OpcodeStr, "$src", "$src", 9357 (any_fsqrt (_.VT (_.LdFrag addr:$src))), 9358 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX, 9359 Sched<[sched.Folded, sched.ReadAfterFold]>; 9360 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 9361 (ins _.ScalarMemOp:$src), OpcodeStr, 9362 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr, 9363 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))), 9364 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>, 9365 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 9366 } 9367} 9368 9369let Uses = [MXCSR], mayRaiseFPException = 1 in 9370multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr, 9371 X86SchedWriteSizes sched> { 9372 let Predicates = [HasFP16] in 9373 defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9374 sched.PH.ZMM, v32f16_info>, 9375 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 9376 let Predicates = [HasFP16, HasVLX] in { 9377 defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9378 sched.PH.XMM, v8f16x_info>, 9379 EVEX_V128, T_MAP5, EVEX_CD8<16, CD8VF>; 9380 defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"), 9381 sched.PH.YMM, v16f16x_info>, 9382 EVEX_V256, T_MAP5, EVEX_CD8<16, CD8VF>; 9383 } 9384 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9385 sched.PS.ZMM, v16f32_info>, 9386 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 9387 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9388 sched.PD.ZMM, v8f64_info>, 9389 EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9390 // Define only if AVX512VL feature is present. 9391 let Predicates = [HasVLX] in { 9392 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9393 sched.PS.XMM, v4f32x_info>, 9394 EVEX_V128, TB, EVEX_CD8<32, CD8VF>; 9395 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), 9396 sched.PS.YMM, v8f32x_info>, 9397 EVEX_V256, TB, EVEX_CD8<32, CD8VF>; 9398 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9399 sched.PD.XMM, v2f64x_info>, 9400 EVEX_V128, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9401 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), 9402 sched.PD.YMM, v4f64x_info>, 9403 EVEX_V256, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9404 } 9405} 9406 9407let Uses = [MXCSR] in 9408multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr, 9409 X86SchedWriteSizes sched> { 9410 let Predicates = [HasFP16] in 9411 defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"), 9412 sched.PH.ZMM, v32f16_info>, 9413 EVEX_V512, T_MAP5, EVEX_CD8<16, CD8VF>; 9414 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), 9415 sched.PS.ZMM, v16f32_info>, 9416 EVEX_V512, TB, EVEX_CD8<32, CD8VF>; 9417 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), 9418 sched.PD.ZMM, v8f64_info>, 9419 EVEX_V512, REX_W, TB, PD, EVEX_CD8<64, CD8VF>; 9420} 9421 9422multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9423 X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> { 9424 let ExeDomain = _.ExeDomain, Predicates = [prd] in { 9425 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9426 (ins _.RC:$src1, _.RC:$src2), OpcodeStr, 9427 "$src2, $src1", "$src1, $src2", 9428 (X86fsqrts (_.VT _.RC:$src1), 9429 (_.VT _.RC:$src2))>, 9430 Sched<[sched]>, SIMD_EXC; 9431 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9432 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, 9433 "$src2, $src1", "$src1, $src2", 9434 (X86fsqrts (_.VT _.RC:$src1), 9435 (_.ScalarIntMemFrags addr:$src2))>, 9436 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9437 let Uses = [MXCSR] in 9438 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9439 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, 9440 "$rc, $src2, $src1", "$src1, $src2, $rc", 9441 (X86fsqrtRnds (_.VT _.RC:$src1), 9442 (_.VT _.RC:$src2), 9443 (i32 timm:$rc))>, 9444 EVEX_B, EVEX_RC, Sched<[sched]>; 9445 9446 let isCodeGenOnly = 1, hasSideEffects = 0 in { 9447 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9448 (ins _.FRC:$src1, _.FRC:$src2), 9449 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9450 Sched<[sched]>, SIMD_EXC; 9451 let mayLoad = 1 in 9452 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9453 (ins _.FRC:$src1, _.ScalarMemOp:$src2), 9454 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 9455 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9456 } 9457 } 9458 9459 let Predicates = [prd] in { 9460 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)), 9461 (!cast<Instruction>(Name#Zr) 9462 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; 9463 } 9464 9465 let Predicates = [prd, OptForSize] in { 9466 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))), 9467 (!cast<Instruction>(Name#Zm) 9468 (_.EltVT (IMPLICIT_DEF)), addr:$src)>; 9469 } 9470} 9471 9472multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr, 9473 X86SchedWriteSizes sched> { 9474 defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>, 9475 EVEX_CD8<16, CD8VT1>, EVEX, VVVV, T_MAP5, XS; 9476 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">, 9477 EVEX_CD8<32, CD8VT1>, EVEX, VVVV, TB, XS; 9478 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">, 9479 EVEX_CD8<64, CD8VT1>, EVEX, VVVV, TB, XD, REX_W; 9480} 9481 9482defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, 9483 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>; 9484 9485defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG; 9486 9487multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, 9488 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 9489 let ExeDomain = _.ExeDomain in { 9490 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9491 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9492 "$src3, $src2, $src1", "$src1, $src2, $src3", 9493 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9494 (i32 timm:$src3)))>, 9495 Sched<[sched]>, SIMD_EXC; 9496 9497 let Uses = [MXCSR] in 9498 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 9499 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, 9500 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 9501 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 9502 (i32 timm:$src3)))>, EVEX_B, 9503 Sched<[sched]>; 9504 9505 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 9506 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 9507 OpcodeStr, 9508 "$src3, $src2, $src1", "$src1, $src2, $src3", 9509 (_.VT (X86RndScales _.RC:$src1, 9510 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>, 9511 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9512 9513 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { 9514 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), 9515 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 9516 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9517 []>, Sched<[sched]>, SIMD_EXC; 9518 9519 let mayLoad = 1 in 9520 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), 9521 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 9522 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 9523 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 9524 } 9525 } 9526 9527 let Predicates = [HasAVX512] in { 9528 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2), 9529 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)), 9530 _.FRC:$src1, timm:$src2))>; 9531 } 9532 9533 let Predicates = [HasAVX512, OptForSize] in { 9534 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), 9535 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)), 9536 addr:$src1, timm:$src2))>; 9537 } 9538} 9539 9540let Predicates = [HasFP16] in 9541defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh", 9542 SchedWriteFRnd.Scl, f16x_info>, 9543 AVX512PSIi8Base, TA, EVEX, VVVV, 9544 EVEX_CD8<16, CD8VT1>; 9545 9546defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless", 9547 SchedWriteFRnd.Scl, f32x_info>, 9548 AVX512AIi8Base, EVEX, VVVV, VEX_LIG, 9549 EVEX_CD8<32, CD8VT1>; 9550 9551defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd", 9552 SchedWriteFRnd.Scl, f64x_info>, 9553 REX_W, AVX512AIi8Base, EVEX, VVVV, VEX_LIG, 9554 EVEX_CD8<64, CD8VT1>; 9555 9556multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move, 9557 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP, 9558 dag OutMask, Predicate BasePredicate> { 9559 let Predicates = [BasePredicate] in { 9560 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9561 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9562 (extractelt _.VT:$dst, (iPTR 0))))), 9563 (!cast<Instruction>("V"#OpcPrefix#r_Intk) 9564 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; 9565 9566 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, 9567 (OpNode (extractelt _.VT:$src2, (iPTR 0))), 9568 ZeroFP))), 9569 (!cast<Instruction>("V"#OpcPrefix#r_Intkz) 9570 OutMask, _.VT:$src2, _.VT:$src1)>; 9571 } 9572} 9573 9574defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh, 9575 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info, 9576 fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>; 9577defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss, 9578 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info, 9579 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9580defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd, 9581 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info, 9582 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>; 9583 9584 9585//------------------------------------------------- 9586// Integer truncate and extend operations 9587//------------------------------------------------- 9588 9589multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 9590 SDPatternOperator MaskNode, 9591 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, 9592 X86VectorVTInfo DestInfo, X86MemOperand x86memop> { 9593 let ExeDomain = DestInfo.ExeDomain in { 9594 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9595 (ins SrcInfo.RC:$src), 9596 OpcodeStr # "\t{$src, $dst|$dst, $src}", 9597 [(set DestInfo.RC:$dst, 9598 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>, 9599 EVEX, Sched<[sched]>; 9600 let Constraints = "$src0 = $dst" in 9601 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9602 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9603 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 9604 [(set DestInfo.RC:$dst, 9605 (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9606 (DestInfo.VT DestInfo.RC:$src0), 9607 SrcInfo.KRCWM:$mask))]>, 9608 EVEX, EVEX_K, Sched<[sched]>; 9609 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst), 9610 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9611 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 9612 [(set DestInfo.RC:$dst, 9613 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src), 9614 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>, 9615 EVEX, EVEX_KZ, Sched<[sched]>; 9616 } 9617 9618 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { 9619 def mr : AVX512XS8I<opc, MRMDestMem, (outs), 9620 (ins x86memop:$dst, SrcInfo.RC:$src), 9621 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, 9622 EVEX, Sched<[sched.Folded]>; 9623 9624 def mrk : AVX512XS8I<opc, MRMDestMem, (outs), 9625 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), 9626 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, 9627 EVEX, EVEX_K, Sched<[sched.Folded]>; 9628 }//mayStore = 1, hasSideEffects = 0 9629} 9630 9631multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, 9632 PatFrag truncFrag, PatFrag mtruncFrag, 9633 string Name> { 9634 9635 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst), 9636 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr) 9637 addr:$dst, SrcInfo.RC:$src)>; 9638 9639 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst, 9640 SrcInfo.KRCWM:$mask), 9641 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk) 9642 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; 9643} 9644 9645multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, 9646 SDNode OpNode256, SDNode OpNode512, 9647 SDPatternOperator MaskNode128, 9648 SDPatternOperator MaskNode256, 9649 SDPatternOperator MaskNode512, 9650 X86SchedWriteWidths sched, 9651 AVX512VLVectorVTInfo VTSrcInfo, 9652 X86VectorVTInfo DestInfoZ128, 9653 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, 9654 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, 9655 X86MemOperand x86memopZ, PatFrag truncFrag, 9656 PatFrag mtruncFrag, Predicate prd = HasAVX512>{ 9657 9658 let Predicates = [HasVLX, prd] in { 9659 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM, 9660 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, 9661 avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag, 9662 mtruncFrag, NAME>, EVEX_V128; 9663 9664 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM, 9665 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, 9666 avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag, 9667 mtruncFrag, NAME>, EVEX_V256; 9668 } 9669 let Predicates = [prd] in 9670 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM, 9671 VTSrcInfo.info512, DestInfoZ, x86memopZ>, 9672 avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag, 9673 mtruncFrag, NAME>, EVEX_V512; 9674} 9675 9676multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, 9677 X86SchedWriteWidths sched, PatFrag StoreNode, 9678 PatFrag MaskedStoreNode, SDNode InVecNode, 9679 SDPatternOperator InVecMaskNode> { 9680 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, 9681 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched, 9682 avx512vl_i64_info, v16i8x_info, v16i8x_info, 9683 v16i8x_info, i16mem, i32mem, i64mem, StoreNode, 9684 MaskedStoreNode>, EVEX_CD8<8, CD8VO>; 9685} 9686 9687multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9688 SDPatternOperator MaskNode, 9689 X86SchedWriteWidths sched, PatFrag StoreNode, 9690 PatFrag MaskedStoreNode, SDNode InVecNode, 9691 SDPatternOperator InVecMaskNode> { 9692 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9693 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9694 avx512vl_i64_info, v8i16x_info, v8i16x_info, 9695 v8i16x_info, i32mem, i64mem, i128mem, StoreNode, 9696 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; 9697} 9698 9699multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, 9700 SDPatternOperator MaskNode, 9701 X86SchedWriteWidths sched, PatFrag StoreNode, 9702 PatFrag MaskedStoreNode, SDNode InVecNode, 9703 SDPatternOperator InVecMaskNode> { 9704 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9705 InVecMaskNode, MaskNode, MaskNode, sched, 9706 avx512vl_i64_info, v4i32x_info, v4i32x_info, 9707 v8i32x_info, i64mem, i128mem, i256mem, StoreNode, 9708 MaskedStoreNode>, EVEX_CD8<32, CD8VH>; 9709} 9710 9711multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, 9712 SDPatternOperator MaskNode, 9713 X86SchedWriteWidths sched, PatFrag StoreNode, 9714 PatFrag MaskedStoreNode, SDNode InVecNode, 9715 SDPatternOperator InVecMaskNode> { 9716 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, 9717 InVecMaskNode, InVecMaskNode, MaskNode, sched, 9718 avx512vl_i32_info, v16i8x_info, v16i8x_info, 9719 v16i8x_info, i32mem, i64mem, i128mem, StoreNode, 9720 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; 9721} 9722 9723multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, 9724 SDPatternOperator MaskNode, 9725 X86SchedWriteWidths sched, PatFrag StoreNode, 9726 PatFrag MaskedStoreNode, SDNode InVecNode, 9727 SDPatternOperator InVecMaskNode> { 9728 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9729 InVecMaskNode, MaskNode, MaskNode, sched, 9730 avx512vl_i32_info, v8i16x_info, v8i16x_info, 9731 v16i16x_info, i64mem, i128mem, i256mem, StoreNode, 9732 MaskedStoreNode>, EVEX_CD8<16, CD8VH>; 9733} 9734 9735multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, 9736 SDPatternOperator MaskNode, 9737 X86SchedWriteWidths sched, PatFrag StoreNode, 9738 PatFrag MaskedStoreNode, SDNode InVecNode, 9739 SDPatternOperator InVecMaskNode> { 9740 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, 9741 InVecMaskNode, MaskNode, MaskNode, sched, 9742 avx512vl_i16_info, v16i8x_info, v16i8x_info, 9743 v32i8x_info, i64mem, i128mem, i256mem, StoreNode, 9744 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; 9745} 9746 9747defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", 9748 SchedWriteVecTruncate, truncstorevi8, 9749 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9750defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", 9751 SchedWriteVecTruncate, truncstore_s_vi8, 9752 masked_truncstore_s_vi8, X86vtruncs, 9753 X86vmtruncs>; 9754defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", 9755 SchedWriteVecTruncate, truncstore_us_vi8, 9756 masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; 9757 9758defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, 9759 SchedWriteVecTruncate, truncstorevi16, 9760 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9761defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, 9762 SchedWriteVecTruncate, truncstore_s_vi16, 9763 masked_truncstore_s_vi16, X86vtruncs, 9764 X86vmtruncs>; 9765defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, 9766 select_truncus, SchedWriteVecTruncate, 9767 truncstore_us_vi16, masked_truncstore_us_vi16, 9768 X86vtruncus, X86vmtruncus>; 9769 9770defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, 9771 SchedWriteVecTruncate, truncstorevi32, 9772 masked_truncstorevi32, X86vtrunc, X86vmtrunc>; 9773defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, 9774 SchedWriteVecTruncate, truncstore_s_vi32, 9775 masked_truncstore_s_vi32, X86vtruncs, 9776 X86vmtruncs>; 9777defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, 9778 select_truncus, SchedWriteVecTruncate, 9779 truncstore_us_vi32, masked_truncstore_us_vi32, 9780 X86vtruncus, X86vmtruncus>; 9781 9782defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, 9783 SchedWriteVecTruncate, truncstorevi8, 9784 masked_truncstorevi8, X86vtrunc, X86vmtrunc>; 9785defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, 9786 SchedWriteVecTruncate, truncstore_s_vi8, 9787 masked_truncstore_s_vi8, X86vtruncs, 9788 X86vmtruncs>; 9789defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, 9790 select_truncus, SchedWriteVecTruncate, 9791 truncstore_us_vi8, masked_truncstore_us_vi8, 9792 X86vtruncus, X86vmtruncus>; 9793 9794defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, 9795 SchedWriteVecTruncate, truncstorevi16, 9796 masked_truncstorevi16, X86vtrunc, X86vmtrunc>; 9797defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, 9798 SchedWriteVecTruncate, truncstore_s_vi16, 9799 masked_truncstore_s_vi16, X86vtruncs, 9800 X86vmtruncs>; 9801defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, 9802 select_truncus, SchedWriteVecTruncate, 9803 truncstore_us_vi16, masked_truncstore_us_vi16, 9804 X86vtruncus, X86vmtruncus>; 9805 9806defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, 9807 SchedWriteVecTruncate, truncstorevi8, 9808 masked_truncstorevi8, X86vtrunc, 9809 X86vmtrunc>; 9810defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, 9811 SchedWriteVecTruncate, truncstore_s_vi8, 9812 masked_truncstore_s_vi8, X86vtruncs, 9813 X86vmtruncs>; 9814defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, 9815 select_truncus, SchedWriteVecTruncate, 9816 truncstore_us_vi8, masked_truncstore_us_vi8, 9817 X86vtruncus, X86vmtruncus>; 9818 9819let Predicates = [HasAVX512, NoVLX] in { 9820def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))), 9821 (v8i16 (EXTRACT_SUBREG 9822 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), 9823 VR256X:$src, sub_ymm)))), sub_xmm))>; 9824def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))), 9825 (v4i32 (EXTRACT_SUBREG 9826 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), 9827 VR256X:$src, sub_ymm)))), sub_xmm))>; 9828} 9829 9830let Predicates = [HasBWI, NoVLX] in { 9831def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 9832 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), 9833 VR256X:$src, sub_ymm))), sub_xmm))>; 9834} 9835 9836// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes. 9837multiclass mtrunc_lowering<string InstrName, SDNode OpNode, 9838 X86VectorVTInfo DestInfo, 9839 X86VectorVTInfo SrcInfo> { 9840 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9841 DestInfo.RC:$src0, 9842 SrcInfo.KRCWM:$mask)), 9843 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0, 9844 SrcInfo.KRCWM:$mask, 9845 SrcInfo.RC:$src)>; 9846 9847 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src), 9848 DestInfo.ImmAllZerosV, 9849 SrcInfo.KRCWM:$mask)), 9850 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask, 9851 SrcInfo.RC:$src)>; 9852} 9853 9854let Predicates = [HasVLX] in { 9855defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>; 9856defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>; 9857defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>; 9858} 9859 9860let Predicates = [HasAVX512] in { 9861defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>; 9862defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>; 9863defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>; 9864 9865defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>; 9866defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>; 9867defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>; 9868 9869defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>; 9870defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>; 9871defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>; 9872} 9873 9874multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 9875 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, 9876 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ 9877 let ExeDomain = DestInfo.ExeDomain in { 9878 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 9879 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", 9880 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, 9881 EVEX, Sched<[sched]>; 9882 9883 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 9884 (ins x86memop:$src), OpcodeStr ,"$src", "$src", 9885 (DestInfo.VT (LdFrag addr:$src))>, 9886 EVEX, Sched<[sched.Folded]>; 9887 } 9888} 9889 9890multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr, 9891 SDNode OpNode, SDNode InVecNode, string ExtTy, 9892 X86SchedWriteWidths sched, 9893 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9894 let Predicates = [HasVLX, HasBWI] in { 9895 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info, 9896 v16i8x_info, i64mem, LdFrag, InVecNode>, 9897 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V128, WIG; 9898 9899 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info, 9900 v16i8x_info, i128mem, LdFrag, OpNode>, 9901 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V256, WIG; 9902 } 9903 let Predicates = [HasBWI] in { 9904 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info, 9905 v32i8x_info, i256mem, LdFrag, OpNode>, 9906 EVEX_CD8<8, CD8VH>, T8, PD, EVEX_V512, WIG; 9907 } 9908} 9909 9910multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr, 9911 SDNode OpNode, SDNode InVecNode, string ExtTy, 9912 X86SchedWriteWidths sched, 9913 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9914 let Predicates = [HasVLX, HasAVX512] in { 9915 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info, 9916 v16i8x_info, i32mem, LdFrag, InVecNode>, 9917 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V128, WIG; 9918 9919 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info, 9920 v16i8x_info, i64mem, LdFrag, InVecNode>, 9921 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V256, WIG; 9922 } 9923 let Predicates = [HasAVX512] in { 9924 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info, 9925 v16i8x_info, i128mem, LdFrag, OpNode>, 9926 EVEX_CD8<8, CD8VQ>, T8, PD, EVEX_V512, WIG; 9927 } 9928} 9929 9930multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr, 9931 SDNode InVecNode, string ExtTy, 9932 X86SchedWriteWidths sched, 9933 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { 9934 let Predicates = [HasVLX, HasAVX512] in { 9935 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 9936 v16i8x_info, i16mem, LdFrag, InVecNode>, 9937 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V128, WIG; 9938 9939 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 9940 v16i8x_info, i32mem, LdFrag, InVecNode>, 9941 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V256, WIG; 9942 } 9943 let Predicates = [HasAVX512] in { 9944 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 9945 v16i8x_info, i64mem, LdFrag, InVecNode>, 9946 EVEX_CD8<8, CD8VO>, T8, PD, EVEX_V512, WIG; 9947 } 9948} 9949 9950multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr, 9951 SDNode OpNode, SDNode InVecNode, string ExtTy, 9952 X86SchedWriteWidths sched, 9953 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9954 let Predicates = [HasVLX, HasAVX512] in { 9955 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info, 9956 v8i16x_info, i64mem, LdFrag, InVecNode>, 9957 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V128, WIG; 9958 9959 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info, 9960 v8i16x_info, i128mem, LdFrag, OpNode>, 9961 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V256, WIG; 9962 } 9963 let Predicates = [HasAVX512] in { 9964 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info, 9965 v16i16x_info, i256mem, LdFrag, OpNode>, 9966 EVEX_CD8<16, CD8VH>, T8, PD, EVEX_V512, WIG; 9967 } 9968} 9969 9970multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr, 9971 SDNode OpNode, SDNode InVecNode, string ExtTy, 9972 X86SchedWriteWidths sched, 9973 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { 9974 let Predicates = [HasVLX, HasAVX512] in { 9975 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 9976 v8i16x_info, i32mem, LdFrag, InVecNode>, 9977 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V128, WIG; 9978 9979 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 9980 v8i16x_info, i64mem, LdFrag, InVecNode>, 9981 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V256, WIG; 9982 } 9983 let Predicates = [HasAVX512] in { 9984 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 9985 v8i16x_info, i128mem, LdFrag, OpNode>, 9986 EVEX_CD8<16, CD8VQ>, T8, PD, EVEX_V512, WIG; 9987 } 9988} 9989 9990multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr, 9991 SDNode OpNode, SDNode InVecNode, string ExtTy, 9992 X86SchedWriteWidths sched, 9993 PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { 9994 9995 let Predicates = [HasVLX, HasAVX512] in { 9996 defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info, 9997 v4i32x_info, i64mem, LdFrag, InVecNode>, 9998 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V128; 9999 10000 defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info, 10001 v4i32x_info, i128mem, LdFrag, OpNode>, 10002 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V256; 10003 } 10004 let Predicates = [HasAVX512] in { 10005 defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info, 10006 v8i32x_info, i256mem, LdFrag, OpNode>, 10007 EVEX_CD8<32, CD8VH>, T8, PD, EVEX_V512; 10008 } 10009} 10010 10011defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>; 10012defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>; 10013defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq", zext_invec, "z", SchedWriteVecExtend>; 10014defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>; 10015defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>; 10016defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>; 10017 10018defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>; 10019defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>; 10020defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq", sext_invec, "s", SchedWriteVecExtend>; 10021defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>; 10022defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>; 10023defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>; 10024 10025 10026// Patterns that we also need any extend versions of. aext_vector_inreg 10027// is currently legalized to zext_vector_inreg. 10028multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> { 10029 // 256-bit patterns 10030 let Predicates = [HasVLX, HasBWI] in { 10031 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), 10032 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>; 10033 } 10034 10035 let Predicates = [HasVLX] in { 10036 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), 10037 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>; 10038 10039 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), 10040 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>; 10041 } 10042 10043 // 512-bit patterns 10044 let Predicates = [HasBWI] in { 10045 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))), 10046 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>; 10047 } 10048 let Predicates = [HasAVX512] in { 10049 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))), 10050 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>; 10051 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))), 10052 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>; 10053 10054 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))), 10055 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>; 10056 10057 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))), 10058 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>; 10059 } 10060} 10061 10062multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 10063 SDNode InVecOp> : 10064 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> { 10065 // 128-bit patterns 10066 let Predicates = [HasVLX, HasBWI] in { 10067 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10068 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10069 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10070 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10071 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10072 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>; 10073 } 10074 let Predicates = [HasVLX] in { 10075 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10076 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10077 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10078 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>; 10079 10080 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), 10081 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>; 10082 10083 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10084 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10085 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10086 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10087 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10088 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>; 10089 10090 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10091 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10092 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), 10093 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>; 10094 10095 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10096 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10097 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10098 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10099 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), 10100 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>; 10101 } 10102 let Predicates = [HasVLX] in { 10103 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10104 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10105 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10106 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10107 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10108 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>; 10109 10110 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 10111 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10112 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), 10113 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>; 10114 10115 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10116 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10117 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))), 10118 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10119 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), 10120 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>; 10121 } 10122 // 512-bit patterns 10123 let Predicates = [HasAVX512] in { 10124 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 10125 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10126 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 10127 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10128 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), 10129 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>; 10130 } 10131} 10132 10133defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>; 10134defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>; 10135 10136// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge 10137// ext+trunc aggressively making it impossible to legalize the DAG to this 10138// pattern directly. 10139let Predicates = [HasAVX512, NoBWI] in { 10140def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), 10141 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>; 10142def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))), 10143 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>; 10144} 10145 10146//===----------------------------------------------------------------------===// 10147// GATHER - SCATTER Operations 10148 10149// FIXME: Improve scheduling of gather/scatter instructions. 10150multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10151 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10152 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb", 10153 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in 10154 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb), 10155 (ins _.RC:$src1, MaskRC:$mask, memop:$src2), 10156 !strconcat(OpcodeStr#_.Suffix, 10157 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), 10158 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10159 Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>; 10160} 10161 10162multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc, 10163 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10164 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, 10165 vy512xmem>, EVEX_V512, REX_W; 10166 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512, 10167 vz512mem>, EVEX_V512, REX_W; 10168let Predicates = [HasVLX] in { 10169 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10170 vx256xmem>, EVEX_V256, REX_W; 10171 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256, 10172 vy256xmem>, EVEX_V256, REX_W; 10173 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10174 vx128xmem>, EVEX_V128, REX_W; 10175 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10176 vx128xmem>, EVEX_V128, REX_W; 10177} 10178} 10179 10180multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc, 10181 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10182 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10183 EVEX_V512; 10184 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10185 EVEX_V512; 10186let Predicates = [HasVLX] in { 10187 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256, 10188 vy256xmem>, EVEX_V256; 10189 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10190 vy128xmem>, EVEX_V256; 10191 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128, 10192 vx128xmem>, EVEX_V128; 10193 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128, 10194 vx64xmem, VK2WM>, EVEX_V128; 10195} 10196} 10197 10198 10199defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">, 10200 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">; 10201 10202defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">, 10203 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">; 10204 10205multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, 10206 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> { 10207 10208let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain, 10209 hasSideEffects = 0 in 10210 10211 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb), 10212 (ins memop:$dst, MaskRC:$mask, _.RC:$src), 10213 !strconcat(OpcodeStr#_.Suffix, 10214 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"), 10215 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10216 Sched<[WriteStore]>; 10217} 10218 10219multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc, 10220 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10221 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, 10222 vy512xmem>, EVEX_V512, REX_W; 10223 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512, 10224 vz512mem>, EVEX_V512, REX_W; 10225let Predicates = [HasVLX] in { 10226 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10227 vx256xmem>, EVEX_V256, REX_W; 10228 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256, 10229 vy256xmem>, EVEX_V256, REX_W; 10230 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10231 vx128xmem>, EVEX_V128, REX_W; 10232 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10233 vx128xmem>, EVEX_V128, REX_W; 10234} 10235} 10236 10237multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc, 10238 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> { 10239 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>, 10240 EVEX_V512; 10241 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>, 10242 EVEX_V512; 10243let Predicates = [HasVLX] in { 10244 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256, 10245 vy256xmem>, EVEX_V256; 10246 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10247 vy128xmem>, EVEX_V256; 10248 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128, 10249 vx128xmem>, EVEX_V128; 10250 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128, 10251 vx64xmem, VK2WM>, EVEX_V128; 10252} 10253} 10254 10255defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">, 10256 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">; 10257 10258defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">, 10259 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">; 10260 10261// prefetch 10262multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr, 10263 RegisterClass KRC, X86MemOperand memop> { 10264 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in 10265 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src), 10266 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>, 10267 EVEX, EVEX_K, Sched<[WriteLoad]>; 10268} 10269 10270defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", 10271 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10272 10273defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", 10274 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10275 10276defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", 10277 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10278 10279defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", 10280 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10281 10282defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", 10283 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10284 10285defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", 10286 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10287 10288defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", 10289 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10290 10291defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", 10292 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10293 10294defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", 10295 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10296 10297defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", 10298 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10299 10300defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", 10301 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10302 10303defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", 10304 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10305 10306defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", 10307 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; 10308 10309defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", 10310 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; 10311 10312defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", 10313 VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>; 10314 10315defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", 10316 VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>; 10317 10318multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> { 10319def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), 10320 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"), 10321 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, 10322 EVEX, Sched<[Sched]>; 10323} 10324 10325multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, 10326 string OpcodeStr, Predicate prd> { 10327let Predicates = [prd] in 10328 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512; 10329 10330 let Predicates = [prd, HasVLX] in { 10331 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256; 10332 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128; 10333 } 10334} 10335 10336defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; 10337defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W; 10338defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>; 10339defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W; 10340 10341multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { 10342 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), 10343 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 10344 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>, 10345 EVEX, Sched<[WriteMove]>; 10346} 10347 10348// Use 512bit version to implement 128/256 bit in case NoVLX. 10349multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, 10350 X86VectorVTInfo _, 10351 string Name> { 10352 10353 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), 10354 (_.KVT (COPY_TO_REGCLASS 10355 (!cast<Instruction>(Name#"Zrr") 10356 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), 10357 _.RC:$src, _.SubRegIdx)), 10358 _.KRC))>; 10359} 10360 10361multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr, 10362 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 10363 let Predicates = [prd] in 10364 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>, 10365 EVEX_V512; 10366 10367 let Predicates = [prd, HasVLX] in { 10368 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>, 10369 EVEX_V256; 10370 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>, 10371 EVEX_V128; 10372 } 10373 let Predicates = [prd, NoVLX] in { 10374 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>; 10375 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>; 10376 } 10377} 10378 10379defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m", 10380 avx512vl_i8_info, HasBWI>; 10381defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m", 10382 avx512vl_i16_info, HasBWI>, REX_W; 10383defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m", 10384 avx512vl_i32_info, HasDQI>; 10385defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", 10386 avx512vl_i64_info, HasDQI>, REX_W; 10387 10388// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI 10389// is available, but BWI is not. We can't handle this in lowering because 10390// a target independent DAG combine likes to combine sext and trunc. 10391let Predicates = [HasDQI, NoBWI] in { 10392 def : Pat<(v16i8 (sext (v16i1 VK16:$src))), 10393 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10394 def : Pat<(v16i16 (sext (v16i1 VK16:$src))), 10395 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>; 10396} 10397 10398let Predicates = [HasDQI, NoBWI, HasVLX] in { 10399 def : Pat<(v8i16 (sext (v8i1 VK8:$src))), 10400 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>; 10401} 10402 10403//===----------------------------------------------------------------------===// 10404// AVX-512 - COMPRESS and EXPAND 10405// 10406 10407multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, 10408 string OpcodeStr, X86FoldableSchedWrite sched> { 10409 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), 10410 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10411 (null_frag)>, AVX5128IBase, 10412 Sched<[sched]>; 10413 10414 let mayStore = 1, hasSideEffects = 0 in 10415 def mr : AVX5128I<opc, MRMDestMem, (outs), 10416 (ins _.MemOp:$dst, _.RC:$src), 10417 OpcodeStr # "\t{$src, $dst|$dst, $src}", 10418 []>, EVEX_CD8<_.EltSize, CD8VT1>, 10419 Sched<[sched.Folded]>; 10420 10421 def mrk : AVX5128I<opc, MRMDestMem, (outs), 10422 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), 10423 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 10424 []>, 10425 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, 10426 Sched<[sched.Folded]>; 10427} 10428 10429multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10430 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask), 10431 (!cast<Instruction>(Name#_.ZSuffix#mrk) 10432 addr:$dst, _.KRCWM:$mask, _.RC:$src)>; 10433 10434 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10435 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10436 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10437 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10438 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10439 _.KRCWM:$mask, _.RC:$src)>; 10440} 10441 10442multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, 10443 X86FoldableSchedWrite sched, 10444 AVX512VLVectorVTInfo VTInfo, 10445 Predicate Pred = HasAVX512> { 10446 let Predicates = [Pred] in 10447 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, 10448 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10449 10450 let Predicates = [Pred, HasVLX] in { 10451 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, 10452 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10453 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, 10454 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10455 } 10456} 10457 10458// FIXME: Is there a better scheduler class for VPCOMPRESS? 10459defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, 10460 avx512vl_i32_info>, EVEX; 10461defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, 10462 avx512vl_i64_info>, EVEX, REX_W; 10463defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, 10464 avx512vl_f32_info>, EVEX; 10465defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, 10466 avx512vl_f64_info>, EVEX, REX_W; 10467 10468// expand 10469multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, 10470 string OpcodeStr, X86FoldableSchedWrite sched> { 10471 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10472 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 10473 (null_frag)>, AVX5128IBase, 10474 Sched<[sched]>; 10475 10476 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10477 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", 10478 (null_frag)>, 10479 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, 10480 Sched<[sched.Folded, sched.ReadAfterFold]>; 10481} 10482 10483multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> { 10484 10485 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)), 10486 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10487 _.KRCWM:$mask, addr:$src)>; 10488 10489 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)), 10490 (!cast<Instruction>(Name#_.ZSuffix#rmkz) 10491 _.KRCWM:$mask, addr:$src)>; 10492 10493 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, 10494 (_.VT _.RC:$src0))), 10495 (!cast<Instruction>(Name#_.ZSuffix#rmk) 10496 _.RC:$src0, _.KRCWM:$mask, addr:$src)>; 10497 10498 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask), 10499 (!cast<Instruction>(Name#_.ZSuffix#rrk) 10500 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>; 10501 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask), 10502 (!cast<Instruction>(Name#_.ZSuffix#rrkz) 10503 _.KRCWM:$mask, _.RC:$src)>; 10504} 10505 10506multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, 10507 X86FoldableSchedWrite sched, 10508 AVX512VLVectorVTInfo VTInfo, 10509 Predicate Pred = HasAVX512> { 10510 let Predicates = [Pred] in 10511 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, 10512 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512; 10513 10514 let Predicates = [Pred, HasVLX] in { 10515 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, 10516 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256; 10517 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, 10518 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128; 10519 } 10520} 10521 10522// FIXME: Is there a better scheduler class for VPEXPAND? 10523defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, 10524 avx512vl_i32_info>, EVEX; 10525defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, 10526 avx512vl_i64_info>, EVEX, REX_W; 10527defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, 10528 avx512vl_f32_info>, EVEX; 10529defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, 10530 avx512vl_f64_info>, EVEX, REX_W; 10531 10532//handle instruction reg_vec1 = op(reg_vec,imm) 10533// op(mem_vec,imm) 10534// op(broadcast(eltVt),imm) 10535//all instruction created with FROUND_CURRENT 10536multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, 10537 SDPatternOperator OpNode, 10538 SDPatternOperator MaskOpNode, 10539 X86FoldableSchedWrite sched, 10540 X86VectorVTInfo _> { 10541 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10542 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst), 10543 (ins _.RC:$src1, i32u8imm:$src2), 10544 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10545 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)), 10546 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>, 10547 Sched<[sched]>; 10548 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10549 (ins _.MemOp:$src1, i32u8imm:$src2), 10550 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2", 10551 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10552 (i32 timm:$src2)), 10553 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), 10554 (i32 timm:$src2))>, 10555 Sched<[sched.Folded, sched.ReadAfterFold]>; 10556 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst), 10557 (ins _.ScalarMemOp:$src1, i32u8imm:$src2), 10558 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr, 10559 "${src1}"#_.BroadcastStr#", $src2", 10560 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10561 (i32 timm:$src2)), 10562 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)), 10563 (i32 timm:$src2))>, EVEX_B, 10564 Sched<[sched.Folded, sched.ReadAfterFold]>; 10565 } 10566} 10567 10568//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10569multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10570 SDNode OpNode, X86FoldableSchedWrite sched, 10571 X86VectorVTInfo _> { 10572 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10573 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10574 (ins _.RC:$src1, i32u8imm:$src2), 10575 OpcodeStr#_.Suffix, "$src2, {sae}, $src1", 10576 "$src1, {sae}, $src2", 10577 (OpNode (_.VT _.RC:$src1), 10578 (i32 timm:$src2))>, 10579 EVEX_B, Sched<[sched]>; 10580} 10581 10582multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, 10583 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode, 10584 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched, 10585 Predicate prd>{ 10586 let Predicates = [prd] in { 10587 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10588 sched.ZMM, _.info512>, 10589 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, 10590 sched.ZMM, _.info512>, EVEX_V512; 10591 } 10592 let Predicates = [prd, HasVLX] in { 10593 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10594 sched.XMM, _.info128>, EVEX_V128; 10595 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 10596 sched.YMM, _.info256>, EVEX_V256; 10597 } 10598} 10599 10600//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10601// op(reg_vec2,mem_vec,imm) 10602// op(reg_vec2,broadcast(eltVt),imm) 10603//all instruction created with FROUND_CURRENT 10604multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10605 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10606 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10607 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10608 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10609 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10610 (OpNode (_.VT _.RC:$src1), 10611 (_.VT _.RC:$src2), 10612 (i32 timm:$src3))>, 10613 Sched<[sched]>; 10614 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10615 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), 10616 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10617 (OpNode (_.VT _.RC:$src1), 10618 (_.VT (bitconvert (_.LdFrag addr:$src2))), 10619 (i32 timm:$src3))>, 10620 Sched<[sched.Folded, sched.ReadAfterFold]>; 10621 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10622 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 10623 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10624 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10625 (OpNode (_.VT _.RC:$src1), 10626 (_.VT (_.BroadcastLdFrag addr:$src2)), 10627 (i32 timm:$src3))>, EVEX_B, 10628 Sched<[sched.Folded, sched.ReadAfterFold]>; 10629 } 10630} 10631 10632//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10633// op(reg_vec2,mem_vec,imm) 10634multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10635 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, 10636 X86VectorVTInfo SrcInfo>{ 10637 let ExeDomain = DestInfo.ExeDomain, ImmT = Imm8 in { 10638 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 10639 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3), 10640 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10641 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10642 (SrcInfo.VT SrcInfo.RC:$src2), 10643 (i8 timm:$src3)))>, 10644 Sched<[sched]>; 10645 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 10646 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), 10647 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10648 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), 10649 (SrcInfo.VT (bitconvert 10650 (SrcInfo.LdFrag addr:$src2))), 10651 (i8 timm:$src3)))>, 10652 Sched<[sched.Folded, sched.ReadAfterFold]>; 10653 } 10654} 10655 10656//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10657// op(reg_vec2,mem_vec,imm) 10658// op(reg_vec2,broadcast(eltVt),imm) 10659multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, 10660 X86FoldableSchedWrite sched, X86VectorVTInfo _>: 10661 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ 10662 10663 let ExeDomain = _.ExeDomain, ImmT = Imm8 in 10664 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10665 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10666 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10667 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10668 (OpNode (_.VT _.RC:$src1), 10669 (_.VT (_.BroadcastLdFrag addr:$src2)), 10670 (i8 timm:$src3))>, EVEX_B, 10671 Sched<[sched.Folded, sched.ReadAfterFold]>; 10672} 10673 10674//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) 10675// op(reg_vec2,mem_scalar,imm) 10676multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10677 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10678 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 10679 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10680 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10681 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10682 (OpNode (_.VT _.RC:$src1), 10683 (_.VT _.RC:$src2), 10684 (i32 timm:$src3))>, 10685 Sched<[sched]>; 10686 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 10687 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), 10688 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10689 (OpNode (_.VT _.RC:$src1), 10690 (_.ScalarIntMemFrags addr:$src2), 10691 (i32 timm:$src3))>, 10692 Sched<[sched.Folded, sched.ReadAfterFold]>; 10693 } 10694} 10695 10696//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10697multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, 10698 SDNode OpNode, X86FoldableSchedWrite sched, 10699 X86VectorVTInfo _> { 10700 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10701 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10702 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10703 OpcodeStr, "$src3, {sae}, $src2, $src1", 10704 "$src1, $src2, {sae}, $src3", 10705 (OpNode (_.VT _.RC:$src1), 10706 (_.VT _.RC:$src2), 10707 (i32 timm:$src3))>, 10708 EVEX_B, Sched<[sched]>; 10709} 10710 10711//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} 10712multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, 10713 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 10714 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in 10715 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 10716 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), 10717 OpcodeStr, "$src3, {sae}, $src2, $src1", 10718 "$src1, $src2, {sae}, $src3", 10719 (OpNode (_.VT _.RC:$src1), 10720 (_.VT _.RC:$src2), 10721 (i32 timm:$src3))>, 10722 EVEX_B, Sched<[sched]>; 10723} 10724 10725multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, 10726 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, 10727 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{ 10728 let Predicates = [prd] in { 10729 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10730 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>, 10731 EVEX_V512; 10732 10733 } 10734 let Predicates = [prd, HasVLX] in { 10735 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10736 EVEX_V128; 10737 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10738 EVEX_V256; 10739 } 10740} 10741 10742multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, 10743 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, 10744 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { 10745 let Predicates = [Pred] in { 10746 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512, 10747 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX, VVVV; 10748 } 10749 let Predicates = [Pred, HasVLX] in { 10750 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128, 10751 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX, VVVV; 10752 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256, 10753 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX, VVVV; 10754 } 10755} 10756 10757multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, 10758 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, 10759 Predicate Pred = HasAVX512> { 10760 let Predicates = [Pred] in { 10761 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, 10762 EVEX_V512; 10763 } 10764 let Predicates = [Pred, HasVLX] in { 10765 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, 10766 EVEX_V128; 10767 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, 10768 EVEX_V256; 10769 } 10770} 10771 10772multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, 10773 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, 10774 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> { 10775 let Predicates = [prd] in { 10776 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>, 10777 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>; 10778 } 10779} 10780 10781multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, 10782 bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode, 10783 SDPatternOperator MaskOpNode, SDNode OpNodeSAE, 10784 X86SchedWriteWidths sched, Predicate prd>{ 10785 defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info, 10786 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>, 10787 AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 10788 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, 10789 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 10790 AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>; 10791 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, 10792 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>, 10793 AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W; 10794} 10795 10796defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, 10797 X86VReduce, X86VReduce, X86VReduceSAE, 10798 SchedWriteFRnd, HasDQI>; 10799defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, 10800 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE, 10801 SchedWriteFRnd, HasAVX512>; 10802defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, 10803 X86VGetMant, X86VGetMant, X86VGetMantSAE, 10804 SchedWriteFRnd, HasAVX512>; 10805 10806defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 10807 0x50, X86VRange, X86VRangeSAE, 10808 SchedWriteFAdd, HasDQI>, 10809 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 10810defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 10811 0x50, X86VRange, X86VRangeSAE, 10812 SchedWriteFAdd, HasDQI>, 10813 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 10814 10815defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", 10816 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10817 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 10818defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, 10819 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>, 10820 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 10821 10822defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, 10823 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10824 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 10825defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, 10826 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>, 10827 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 10828defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info, 10829 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>, 10830 AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>; 10831 10832defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, 10833 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10834 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 10835defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, 10836 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>, 10837 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 10838defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info, 10839 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>, 10840 AVX512PSIi8Base, TA, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>; 10841 10842multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, 10843 X86FoldableSchedWrite sched, 10844 X86VectorVTInfo _, 10845 X86VectorVTInfo CastInfo> { 10846 let ExeDomain = _.ExeDomain in { 10847 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10848 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10849 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10850 (_.VT (bitconvert 10851 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, 10852 (i8 timm:$src3)))))>, 10853 Sched<[sched]>; 10854 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10855 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10856 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10857 (_.VT 10858 (bitconvert 10859 (CastInfo.VT (X86Shuf128 _.RC:$src1, 10860 (CastInfo.LdFrag addr:$src2), 10861 (i8 timm:$src3)))))>, 10862 Sched<[sched.Folded, sched.ReadAfterFold]>; 10863 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10864 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10865 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10866 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10867 (_.VT 10868 (bitconvert 10869 (CastInfo.VT 10870 (X86Shuf128 _.RC:$src1, 10871 (_.BroadcastLdFrag addr:$src2), 10872 (i8 timm:$src3)))))>, EVEX_B, 10873 Sched<[sched.Folded, sched.ReadAfterFold]>; 10874 } 10875} 10876 10877multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, 10878 AVX512VLVectorVTInfo _, 10879 AVX512VLVectorVTInfo CastInfo, bits<8> opc>{ 10880 let Predicates = [HasAVX512] in 10881 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10882 _.info512, CastInfo.info512>, EVEX_V512; 10883 10884 let Predicates = [HasAVX512, HasVLX] in 10885 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, 10886 _.info256, CastInfo.info256>, EVEX_V256; 10887} 10888 10889defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, 10890 avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 10891defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, 10892 avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 10893defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, 10894 avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 10895defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, 10896 avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 10897 10898multiclass avx512_valign<bits<8> opc, string OpcodeStr, 10899 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 10900 let ExeDomain = _.ExeDomain in { 10901 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 10902 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3), 10903 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10904 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>, 10905 Sched<[sched]>; 10906 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10907 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), 10908 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 10909 (_.VT (X86VAlign _.RC:$src1, 10910 (bitconvert (_.LdFrag addr:$src2)), 10911 (i8 timm:$src3)))>, 10912 Sched<[sched.Folded, sched.ReadAfterFold]>; 10913 10914 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 10915 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 10916 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1", 10917 "$src1, ${src2}"#_.BroadcastStr#", $src3", 10918 (X86VAlign _.RC:$src1, 10919 (_.VT (_.BroadcastLdFrag addr:$src2)), 10920 (i8 timm:$src3))>, EVEX_B, 10921 Sched<[sched.Folded, sched.ReadAfterFold]>; 10922 } 10923} 10924 10925multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched, 10926 AVX512VLVectorVTInfo _> { 10927 let Predicates = [HasAVX512] in { 10928 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>, 10929 AVX512AIi8Base, EVEX, VVVV, EVEX_V512; 10930 } 10931 let Predicates = [HasAVX512, HasVLX] in { 10932 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>, 10933 AVX512AIi8Base, EVEX, VVVV, EVEX_V128; 10934 // We can't really override the 256-bit version so change it back to unset. 10935 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>, 10936 AVX512AIi8Base, EVEX, VVVV, EVEX_V256; 10937 } 10938} 10939 10940defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle, 10941 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 10942defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle, 10943 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, 10944 REX_W; 10945 10946defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", 10947 SchedWriteShuffle, avx512vl_i8_info, 10948 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; 10949 10950// Fragments to help convert valignq into masked valignd. Or valignq/valignd 10951// into vpalignr. 10952def ValignqImm32XForm : SDNodeXForm<timm, [{ 10953 return getI8Imm(N->getZExtValue() * 2, SDLoc(N)); 10954}]>; 10955def ValignqImm8XForm : SDNodeXForm<timm, [{ 10956 return getI8Imm(N->getZExtValue() * 8, SDLoc(N)); 10957}]>; 10958def ValigndImm8XForm : SDNodeXForm<timm, [{ 10959 return getI8Imm(N->getZExtValue() * 4, SDLoc(N)); 10960}]>; 10961 10962multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode, 10963 X86VectorVTInfo From, X86VectorVTInfo To, 10964 SDNodeXForm ImmXForm> { 10965 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10966 (bitconvert 10967 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10968 timm:$src3))), 10969 To.RC:$src0)), 10970 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask, 10971 To.RC:$src1, To.RC:$src2, 10972 (ImmXForm timm:$src3))>; 10973 10974 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10975 (bitconvert 10976 (From.VT (OpNode From.RC:$src1, From.RC:$src2, 10977 timm:$src3))), 10978 To.ImmAllZerosV)), 10979 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask, 10980 To.RC:$src1, To.RC:$src2, 10981 (ImmXForm timm:$src3))>; 10982 10983 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10984 (bitconvert 10985 (From.VT (OpNode From.RC:$src1, 10986 (From.LdFrag addr:$src2), 10987 timm:$src3))), 10988 To.RC:$src0)), 10989 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask, 10990 To.RC:$src1, addr:$src2, 10991 (ImmXForm timm:$src3))>; 10992 10993 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 10994 (bitconvert 10995 (From.VT (OpNode From.RC:$src1, 10996 (From.LdFrag addr:$src2), 10997 timm:$src3))), 10998 To.ImmAllZerosV)), 10999 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask, 11000 To.RC:$src1, addr:$src2, 11001 (ImmXForm timm:$src3))>; 11002} 11003 11004multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode, 11005 X86VectorVTInfo From, 11006 X86VectorVTInfo To, 11007 SDNodeXForm ImmXForm> : 11008 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> { 11009 def : Pat<(From.VT (OpNode From.RC:$src1, 11010 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))), 11011 timm:$src3)), 11012 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2, 11013 (ImmXForm timm:$src3))>; 11014 11015 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11016 (bitconvert 11017 (From.VT (OpNode From.RC:$src1, 11018 (bitconvert 11019 (To.VT (To.BroadcastLdFrag addr:$src2))), 11020 timm:$src3))), 11021 To.RC:$src0)), 11022 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask, 11023 To.RC:$src1, addr:$src2, 11024 (ImmXForm timm:$src3))>; 11025 11026 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask, 11027 (bitconvert 11028 (From.VT (OpNode From.RC:$src1, 11029 (bitconvert 11030 (To.VT (To.BroadcastLdFrag addr:$src2))), 11031 timm:$src3))), 11032 To.ImmAllZerosV)), 11033 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask, 11034 To.RC:$src1, addr:$src2, 11035 (ImmXForm timm:$src3))>; 11036} 11037 11038let Predicates = [HasAVX512] in { 11039 // For 512-bit we lower to the widest element type we can. So we only need 11040 // to handle converting valignq to valignd. 11041 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info, 11042 v16i32_info, ValignqImm32XForm>; 11043} 11044 11045let Predicates = [HasVLX] in { 11046 // For 128-bit we lower to the widest element type we can. So we only need 11047 // to handle converting valignq to valignd. 11048 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info, 11049 v4i32x_info, ValignqImm32XForm>; 11050 // For 256-bit we lower to the widest element type we can. So we only need 11051 // to handle converting valignq to valignd. 11052 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info, 11053 v8i32x_info, ValignqImm32XForm>; 11054} 11055 11056let Predicates = [HasVLX, HasBWI] in { 11057 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR. 11058 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info, 11059 v16i8x_info, ValignqImm8XForm>; 11060 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info, 11061 v16i8x_info, ValigndImm8XForm>; 11062} 11063 11064defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", 11065 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>, 11066 EVEX_CD8<8, CD8VF>; 11067 11068multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 11069 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11070 let ExeDomain = _.ExeDomain in { 11071 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11072 (ins _.RC:$src1), OpcodeStr, 11073 "$src1", "$src1", 11074 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, 11075 Sched<[sched]>; 11076 11077 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11078 (ins _.MemOp:$src1), OpcodeStr, 11079 "$src1", "$src1", 11080 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, 11081 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, 11082 Sched<[sched.Folded]>; 11083 } 11084} 11085 11086multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, 11087 X86FoldableSchedWrite sched, X86VectorVTInfo _> : 11088 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { 11089 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11090 (ins _.ScalarMemOp:$src1), OpcodeStr, 11091 "${src1}"#_.BroadcastStr, 11092 "${src1}"#_.BroadcastStr, 11093 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>, 11094 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 11095 Sched<[sched.Folded]>; 11096} 11097 11098multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11099 X86SchedWriteWidths sched, 11100 AVX512VLVectorVTInfo VTInfo, Predicate prd> { 11101 let Predicates = [prd] in 11102 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11103 EVEX_V512; 11104 11105 let Predicates = [prd, HasVLX] in { 11106 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11107 EVEX_V256; 11108 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11109 EVEX_V128; 11110 } 11111} 11112 11113multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, 11114 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, 11115 Predicate prd> { 11116 let Predicates = [prd] in 11117 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>, 11118 EVEX_V512; 11119 11120 let Predicates = [prd, HasVLX] in { 11121 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>, 11122 EVEX_V256; 11123 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>, 11124 EVEX_V128; 11125 } 11126} 11127 11128multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, 11129 SDNode OpNode, X86SchedWriteWidths sched, 11130 Predicate prd> { 11131 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, 11132 avx512vl_i64_info, prd>, REX_W; 11133 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, 11134 avx512vl_i32_info, prd>; 11135} 11136 11137multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, 11138 SDNode OpNode, X86SchedWriteWidths sched, 11139 Predicate prd> { 11140 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, 11141 avx512vl_i16_info, prd>, WIG; 11142 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, 11143 avx512vl_i8_info, prd>, WIG; 11144} 11145 11146multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, 11147 bits<8> opc_d, bits<8> opc_q, 11148 string OpcodeStr, SDNode OpNode, 11149 X86SchedWriteWidths sched> { 11150 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, 11151 HasAVX512>, 11152 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, 11153 HasBWI>; 11154} 11155 11156defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, 11157 SchedWriteVecALU>; 11158 11159// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. 11160let Predicates = [HasAVX512, NoVLX] in { 11161 def : Pat<(v4i64 (abs VR256X:$src)), 11162 (EXTRACT_SUBREG 11163 (VPABSQZrr 11164 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), 11165 sub_ymm)>; 11166 def : Pat<(v2i64 (abs VR128X:$src)), 11167 (EXTRACT_SUBREG 11168 (VPABSQZrr 11169 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), 11170 sub_xmm)>; 11171} 11172 11173// Use 512bit version to implement 128/256 bit. 11174multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, 11175 AVX512VLVectorVTInfo _, Predicate prd> { 11176 let Predicates = [prd, NoVLX] in { 11177 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), 11178 (EXTRACT_SUBREG 11179 (!cast<Instruction>(InstrStr # "Zrr") 11180 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11181 _.info256.RC:$src1, 11182 _.info256.SubRegIdx)), 11183 _.info256.SubRegIdx)>; 11184 11185 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), 11186 (EXTRACT_SUBREG 11187 (!cast<Instruction>(InstrStr # "Zrr") 11188 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), 11189 _.info128.RC:$src1, 11190 _.info128.SubRegIdx)), 11191 _.info128.SubRegIdx)>; 11192 } 11193} 11194 11195defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, 11196 SchedWriteVecIMul, HasCDI>; 11197 11198// FIXME: Is there a better scheduler class for VPCONFLICT? 11199defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, 11200 SchedWriteVecALU, HasCDI>; 11201 11202// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. 11203defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; 11204defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; 11205 11206//===---------------------------------------------------------------------===// 11207// Counts number of ones - VPOPCNTD and VPOPCNTQ 11208//===---------------------------------------------------------------------===// 11209 11210// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? 11211defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, 11212 SchedWriteVecALU, HasVPOPCNTDQ>; 11213 11214defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; 11215defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; 11216 11217//===---------------------------------------------------------------------===// 11218// Replicate Single FP - MOVSHDUP and MOVSLDUP 11219//===---------------------------------------------------------------------===// 11220 11221multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, 11222 X86SchedWriteWidths sched> { 11223 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, 11224 avx512vl_f32_info, HasAVX512>, TB, XS; 11225} 11226 11227defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, 11228 SchedWriteFShuffle>; 11229defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, 11230 SchedWriteFShuffle>; 11231 11232//===----------------------------------------------------------------------===// 11233// AVX-512 - MOVDDUP 11234//===----------------------------------------------------------------------===// 11235 11236multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, 11237 X86FoldableSchedWrite sched, X86VectorVTInfo _> { 11238 let ExeDomain = _.ExeDomain in { 11239 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 11240 (ins _.RC:$src), OpcodeStr, "$src", "$src", 11241 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX, 11242 Sched<[sched]>; 11243 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 11244 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", 11245 (_.VT (_.BroadcastLdFrag addr:$src))>, 11246 EVEX, EVEX_CD8<_.EltSize, CD8VH>, 11247 Sched<[sched.Folded]>; 11248 } 11249} 11250 11251multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, 11252 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { 11253 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM, 11254 VTInfo.info512>, EVEX_V512; 11255 11256 let Predicates = [HasAVX512, HasVLX] in { 11257 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM, 11258 VTInfo.info256>, EVEX_V256; 11259 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM, 11260 VTInfo.info128>, EVEX_V128; 11261 } 11262} 11263 11264multiclass avx512_movddup<bits<8> opc, string OpcodeStr, 11265 X86SchedWriteWidths sched> { 11266 defm NAME: avx512_movddup_common<opc, OpcodeStr, sched, 11267 avx512vl_f64_info>, TB, XD, REX_W; 11268} 11269 11270defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>; 11271 11272let Predicates = [HasVLX] in { 11273def : Pat<(v2f64 (X86VBroadcast f64:$src)), 11274 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11275 11276def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11277 (v2f64 VR128X:$src0)), 11278 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, 11279 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11280def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), 11281 immAllZerosV), 11282 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; 11283} 11284 11285//===----------------------------------------------------------------------===// 11286// AVX-512 - Unpack Instructions 11287//===----------------------------------------------------------------------===// 11288 11289let Uses = []<Register>, mayRaiseFPException = 0 in { 11290defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512, 11291 SchedWriteFShuffleSizes, 0, 1>; 11292defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512, 11293 SchedWriteFShuffleSizes>; 11294} 11295 11296defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, 11297 SchedWriteShuffle, HasBWI>; 11298defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, 11299 SchedWriteShuffle, HasBWI>; 11300defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, 11301 SchedWriteShuffle, HasBWI>; 11302defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, 11303 SchedWriteShuffle, HasBWI>; 11304 11305defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, 11306 SchedWriteShuffle, HasAVX512>; 11307defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, 11308 SchedWriteShuffle, HasAVX512>; 11309defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, 11310 SchedWriteShuffle, HasAVX512>; 11311defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, 11312 SchedWriteShuffle, HasAVX512>; 11313 11314//===----------------------------------------------------------------------===// 11315// AVX-512 - Extract & Insert Integer Instructions 11316//===----------------------------------------------------------------------===// 11317 11318multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11319 X86VectorVTInfo _> { 11320 def mr : AVX512Ii8<opc, MRMDestMem, (outs), 11321 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11322 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11323 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))), 11324 addr:$dst)]>, 11325 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>; 11326} 11327 11328multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> { 11329 let Predicates = [HasBWI] in { 11330 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst), 11331 (ins _.RC:$src1, u8imm:$src2), 11332 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11333 [(set GR32orGR64:$dst, 11334 (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>, 11335 EVEX, TA, PD, Sched<[WriteVecExtract]>; 11336 11337 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TA, PD; 11338 } 11339} 11340 11341multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> { 11342 let Predicates = [HasBWI] in { 11343 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst), 11344 (ins _.RC:$src1, u8imm:$src2), 11345 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11346 [(set GR32orGR64:$dst, 11347 (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>, 11348 EVEX, TB, PD, Sched<[WriteVecExtract]>; 11349 11350 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in 11351 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), 11352 (ins _.RC:$src1, u8imm:$src2), 11353 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 11354 EVEX, TA, PD, Sched<[WriteVecExtract]>; 11355 11356 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TA, PD; 11357 } 11358} 11359 11360multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _, 11361 RegisterClass GRC> { 11362 let Predicates = [HasDQI] in { 11363 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst), 11364 (ins _.RC:$src1, u8imm:$src2), 11365 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11366 [(set GRC:$dst, 11367 (extractelt (_.VT _.RC:$src1), imm:$src2))]>, 11368 EVEX, TA, PD, Sched<[WriteVecExtract]>; 11369 11370 def mr : AVX512Ii8<0x16, MRMDestMem, (outs), 11371 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2), 11372 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", 11373 [(store (extractelt (_.VT _.RC:$src1), 11374 imm:$src2),addr:$dst)]>, 11375 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TA, PD, 11376 Sched<[WriteVecExtractSt]>; 11377 } 11378} 11379 11380defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG; 11381defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG; 11382defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; 11383defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W; 11384 11385multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode, 11386 X86VectorVTInfo _, PatFrag LdFrag, 11387 SDPatternOperator immoperator> { 11388 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst), 11389 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), 11390 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11391 [(set _.RC:$dst, 11392 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>, 11393 EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; 11394} 11395 11396multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode, 11397 X86VectorVTInfo _, PatFrag LdFrag> { 11398 let Predicates = [HasBWI] in { 11399 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11400 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3), 11401 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11402 [(set _.RC:$dst, 11403 (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX, VVVV, 11404 Sched<[WriteVecInsert]>; 11405 11406 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>; 11407 } 11408} 11409 11410multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr, 11411 X86VectorVTInfo _, RegisterClass GRC> { 11412 let Predicates = [HasDQI] in { 11413 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst), 11414 (ins _.RC:$src1, GRC:$src2, u8imm:$src3), 11415 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 11416 [(set _.RC:$dst, 11417 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>, 11418 EVEX, VVVV, TA, PD, Sched<[WriteVecInsert]>; 11419 11420 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _, 11421 _.ScalarLdFrag, imm>, TA, PD; 11422 } 11423} 11424 11425defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, 11426 extloadi8>, TA, PD, WIG; 11427defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, 11428 extloadi16>, TB, PD, WIG; 11429defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; 11430defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W; 11431 11432let Predicates = [HasAVX512, NoBWI] in { 11433 def : Pat<(X86pinsrb VR128:$src1, 11434 (i32 (anyext (i8 (bitconvert v8i1:$src2)))), 11435 timm:$src3), 11436 (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)), 11437 timm:$src3)>; 11438} 11439 11440let Predicates = [HasBWI] in { 11441 def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3), 11442 (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)), 11443 GR8:$src2, sub_8bit), timm:$src3)>; 11444 def : Pat<(X86pinsrb VR128:$src1, 11445 (i32 (anyext (i8 (bitconvert v8i1:$src2)))), 11446 timm:$src3), 11447 (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)), 11448 timm:$src3)>; 11449} 11450 11451// Always select FP16 instructions if available. 11452let Predicates = [HasBWI], AddedComplexity = -10 in { 11453 def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>; 11454 def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>; 11455 def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>; 11456 def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>; 11457} 11458 11459//===----------------------------------------------------------------------===// 11460// VSHUFPS - VSHUFPD Operations 11461//===----------------------------------------------------------------------===// 11462 11463multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{ 11464 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, 11465 SchedWriteFShuffle>, 11466 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, 11467 TA, EVEX, VVVV; 11468} 11469 11470defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, TB; 11471defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, TB, PD, REX_W; 11472 11473//===----------------------------------------------------------------------===// 11474// AVX-512 - Byte shift Left/Right 11475//===----------------------------------------------------------------------===// 11476 11477multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, 11478 Format MRMm, string OpcodeStr, 11479 X86FoldableSchedWrite sched, X86VectorVTInfo _>{ 11480 def ri : AVX512<opc, MRMr, 11481 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), 11482 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11483 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>, 11484 Sched<[sched]>; 11485 def mi : AVX512<opc, MRMm, 11486 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), 11487 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11488 [(set _.RC:$dst,(_.VT (OpNode 11489 (_.VT (bitconvert (_.LdFrag addr:$src1))), 11490 (i8 timm:$src2))))]>, 11491 Sched<[sched.Folded, sched.ReadAfterFold]>; 11492} 11493 11494multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 11495 Format MRMm, string OpcodeStr, 11496 X86SchedWriteWidths sched, Predicate prd>{ 11497 let Predicates = [prd] in 11498 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11499 sched.ZMM, v64i8_info>, EVEX_V512; 11500 let Predicates = [prd, HasVLX] in { 11501 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11502 sched.YMM, v32i8x_info>, EVEX_V256; 11503 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr, 11504 sched.XMM, v16i8x_info>, EVEX_V128; 11505 } 11506} 11507defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 11508 SchedWriteShuffle, HasBWI>, 11509 AVX512PDIi8Base, EVEX, VVVV, WIG; 11510defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 11511 SchedWriteShuffle, HasBWI>, 11512 AVX512PDIi8Base, EVEX, VVVV, WIG; 11513 11514multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 11515 string OpcodeStr, X86FoldableSchedWrite sched, 11516 X86VectorVTInfo _dst, X86VectorVTInfo _src> { 11517 let isCommutable = 1 in 11518 def rr : AVX512BI<opc, MRMSrcReg, 11519 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), 11520 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11521 [(set _dst.RC:$dst,(_dst.VT 11522 (OpNode (_src.VT _src.RC:$src1), 11523 (_src.VT _src.RC:$src2))))]>, 11524 Sched<[sched]>; 11525 def rm : AVX512BI<opc, MRMSrcMem, 11526 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), 11527 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 11528 [(set _dst.RC:$dst,(_dst.VT 11529 (OpNode (_src.VT _src.RC:$src1), 11530 (_src.VT (bitconvert 11531 (_src.LdFrag addr:$src2))))))]>, 11532 Sched<[sched.Folded, sched.ReadAfterFold]>; 11533} 11534 11535multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 11536 string OpcodeStr, X86SchedWriteWidths sched, 11537 Predicate prd> { 11538 let Predicates = [prd] in 11539 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM, 11540 v8i64_info, v64i8_info>, EVEX_V512; 11541 let Predicates = [prd, HasVLX] in { 11542 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM, 11543 v4i64x_info, v32i8x_info>, EVEX_V256; 11544 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM, 11545 v2i64x_info, v16i8x_info>, EVEX_V128; 11546 } 11547} 11548 11549defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 11550 SchedWritePSADBW, HasBWI>, EVEX, VVVV, WIG; 11551 11552// Transforms to swizzle an immediate to enable better matching when 11553// memory operand isn't in the right place. 11554def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{ 11555 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2. 11556 uint8_t Imm = N->getZExtValue(); 11557 // Swap bits 1/4 and 3/6. 11558 uint8_t NewImm = Imm & 0xa5; 11559 if (Imm & 0x02) NewImm |= 0x10; 11560 if (Imm & 0x10) NewImm |= 0x02; 11561 if (Imm & 0x08) NewImm |= 0x40; 11562 if (Imm & 0x40) NewImm |= 0x08; 11563 return getI8Imm(NewImm, SDLoc(N)); 11564}]>; 11565def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{ 11566 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11567 uint8_t Imm = N->getZExtValue(); 11568 // Swap bits 2/4 and 3/5. 11569 uint8_t NewImm = Imm & 0xc3; 11570 if (Imm & 0x04) NewImm |= 0x10; 11571 if (Imm & 0x10) NewImm |= 0x04; 11572 if (Imm & 0x08) NewImm |= 0x20; 11573 if (Imm & 0x20) NewImm |= 0x08; 11574 return getI8Imm(NewImm, SDLoc(N)); 11575}]>; 11576def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{ 11577 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2. 11578 uint8_t Imm = N->getZExtValue(); 11579 // Swap bits 1/2 and 5/6. 11580 uint8_t NewImm = Imm & 0x99; 11581 if (Imm & 0x02) NewImm |= 0x04; 11582 if (Imm & 0x04) NewImm |= 0x02; 11583 if (Imm & 0x20) NewImm |= 0x40; 11584 if (Imm & 0x40) NewImm |= 0x20; 11585 return getI8Imm(NewImm, SDLoc(N)); 11586}]>; 11587def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{ 11588 // Convert a VPTERNLOG immediate by moving operand 1 to the end. 11589 uint8_t Imm = N->getZExtValue(); 11590 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5 11591 uint8_t NewImm = Imm & 0x81; 11592 if (Imm & 0x02) NewImm |= 0x04; 11593 if (Imm & 0x04) NewImm |= 0x10; 11594 if (Imm & 0x08) NewImm |= 0x40; 11595 if (Imm & 0x10) NewImm |= 0x02; 11596 if (Imm & 0x20) NewImm |= 0x08; 11597 if (Imm & 0x40) NewImm |= 0x20; 11598 return getI8Imm(NewImm, SDLoc(N)); 11599}]>; 11600def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{ 11601 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning. 11602 uint8_t Imm = N->getZExtValue(); 11603 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3 11604 uint8_t NewImm = Imm & 0x81; 11605 if (Imm & 0x02) NewImm |= 0x10; 11606 if (Imm & 0x04) NewImm |= 0x02; 11607 if (Imm & 0x08) NewImm |= 0x20; 11608 if (Imm & 0x10) NewImm |= 0x04; 11609 if (Imm & 0x20) NewImm |= 0x40; 11610 if (Imm & 0x40) NewImm |= 0x08; 11611 return getI8Imm(NewImm, SDLoc(N)); 11612}]>; 11613 11614multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, 11615 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11616 string Name>{ 11617 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { 11618 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11619 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), 11620 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11621 (OpNode (_.VT _.RC:$src1), 11622 (_.VT _.RC:$src2), 11623 (_.VT _.RC:$src3), 11624 (i8 timm:$src4)), 1, 1>, 11625 AVX512AIi8Base, EVEX, VVVV, Sched<[sched]>; 11626 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11627 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), 11628 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", 11629 (OpNode (_.VT _.RC:$src1), 11630 (_.VT _.RC:$src2), 11631 (_.VT (bitconvert (_.LdFrag addr:$src3))), 11632 (i8 timm:$src4)), 1, 0>, 11633 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 11634 Sched<[sched.Folded, sched.ReadAfterFold]>; 11635 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11636 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), 11637 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11638 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11639 (OpNode (_.VT _.RC:$src1), 11640 (_.VT _.RC:$src2), 11641 (_.VT (_.BroadcastLdFrag addr:$src3)), 11642 (i8 timm:$src4)), 1, 0>, EVEX_B, 11643 AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<_.EltSize, CD8VF>, 11644 Sched<[sched.Folded, sched.ReadAfterFold]>; 11645 }// Constraints = "$src1 = $dst" 11646 11647 // Additional patterns for matching passthru operand in other positions. 11648 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11649 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11650 _.RC:$src1)), 11651 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11652 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11653 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11654 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)), 11655 _.RC:$src1)), 11656 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask, 11657 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11658 11659 // Additional patterns for matching zero masking with loads in other 11660 // positions. 11661 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11662 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11663 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11664 _.ImmAllZerosV)), 11665 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11666 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11667 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11668 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11669 _.RC:$src2, (i8 timm:$src4)), 11670 _.ImmAllZerosV)), 11671 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask, 11672 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11673 11674 // Additional patterns for matching masked loads with different 11675 // operand orders. 11676 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11677 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)), 11678 _.RC:$src2, (i8 timm:$src4)), 11679 _.RC:$src1)), 11680 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11681 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11682 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11683 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11684 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11685 _.RC:$src1)), 11686 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11687 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11688 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11689 (OpNode _.RC:$src2, _.RC:$src1, 11690 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)), 11691 _.RC:$src1)), 11692 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11693 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11694 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11695 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)), 11696 _.RC:$src1, (i8 timm:$src4)), 11697 _.RC:$src1)), 11698 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11699 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11700 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11701 (OpNode (bitconvert (_.LdFrag addr:$src3)), 11702 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11703 _.RC:$src1)), 11704 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, 11705 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11706 11707 // Additional patterns for matching zero masking with broadcasts in other 11708 // positions. 11709 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11710 (OpNode (_.BroadcastLdFrag addr:$src3), 11711 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11712 _.ImmAllZerosV)), 11713 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11714 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11715 (VPTERNLOG321_imm8 timm:$src4))>; 11716 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11717 (OpNode _.RC:$src1, 11718 (_.BroadcastLdFrag addr:$src3), 11719 _.RC:$src2, (i8 timm:$src4)), 11720 _.ImmAllZerosV)), 11721 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1, 11722 _.KRCWM:$mask, _.RC:$src2, addr:$src3, 11723 (VPTERNLOG132_imm8 timm:$src4))>; 11724 11725 // Additional patterns for matching masked broadcasts with different 11726 // operand orders. 11727 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11728 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3), 11729 _.RC:$src2, (i8 timm:$src4)), 11730 _.RC:$src1)), 11731 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11732 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>; 11733 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11734 (OpNode (_.BroadcastLdFrag addr:$src3), 11735 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)), 11736 _.RC:$src1)), 11737 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11738 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>; 11739 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11740 (OpNode _.RC:$src2, _.RC:$src1, 11741 (_.BroadcastLdFrag addr:$src3), 11742 (i8 timm:$src4)), _.RC:$src1)), 11743 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11744 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>; 11745 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11746 (OpNode _.RC:$src2, 11747 (_.BroadcastLdFrag addr:$src3), 11748 _.RC:$src1, (i8 timm:$src4)), 11749 _.RC:$src1)), 11750 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11751 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>; 11752 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask, 11753 (OpNode (_.BroadcastLdFrag addr:$src3), 11754 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)), 11755 _.RC:$src1)), 11756 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, 11757 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>; 11758} 11759 11760multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched, 11761 AVX512VLVectorVTInfo _> { 11762 let Predicates = [HasAVX512] in 11763 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM, 11764 _.info512, NAME>, EVEX_V512; 11765 let Predicates = [HasAVX512, HasVLX] in { 11766 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM, 11767 _.info128, NAME>, EVEX_V128; 11768 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM, 11769 _.info256, NAME>, EVEX_V256; 11770 } 11771} 11772 11773defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU, 11774 avx512vl_i32_info>; 11775defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU, 11776 avx512vl_i64_info>, REX_W; 11777 11778// Patterns to implement vnot using vpternlog instead of creating all ones 11779// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen 11780// so that the result is only dependent on src0. But we use the same source 11781// for all operands to prevent a false dependency. 11782// TODO: We should maybe have a more generalized algorithm for folding to 11783// vpternlog. 11784let Predicates = [HasAVX512] in { 11785 def : Pat<(v64i8 (vnot VR512:$src)), 11786 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11787 def : Pat<(v32i16 (vnot VR512:$src)), 11788 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11789 def : Pat<(v16i32 (vnot VR512:$src)), 11790 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11791 def : Pat<(v8i64 (vnot VR512:$src)), 11792 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; 11793} 11794 11795let Predicates = [HasAVX512, NoVLX] in { 11796 def : Pat<(v16i8 (vnot VR128X:$src)), 11797 (EXTRACT_SUBREG 11798 (VPTERNLOGQZrri 11799 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11800 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11801 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11802 (i8 15)), sub_xmm)>; 11803 def : Pat<(v8i16 (vnot VR128X:$src)), 11804 (EXTRACT_SUBREG 11805 (VPTERNLOGQZrri 11806 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11807 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11808 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11809 (i8 15)), sub_xmm)>; 11810 def : Pat<(v4i32 (vnot VR128X:$src)), 11811 (EXTRACT_SUBREG 11812 (VPTERNLOGQZrri 11813 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11814 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11815 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11816 (i8 15)), sub_xmm)>; 11817 def : Pat<(v2i64 (vnot VR128X:$src)), 11818 (EXTRACT_SUBREG 11819 (VPTERNLOGQZrri 11820 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11821 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11822 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), 11823 (i8 15)), sub_xmm)>; 11824 11825 def : Pat<(v32i8 (vnot VR256X:$src)), 11826 (EXTRACT_SUBREG 11827 (VPTERNLOGQZrri 11828 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11829 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11830 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11831 (i8 15)), sub_ymm)>; 11832 def : Pat<(v16i16 (vnot VR256X:$src)), 11833 (EXTRACT_SUBREG 11834 (VPTERNLOGQZrri 11835 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11836 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11837 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11838 (i8 15)), sub_ymm)>; 11839 def : Pat<(v8i32 (vnot VR256X:$src)), 11840 (EXTRACT_SUBREG 11841 (VPTERNLOGQZrri 11842 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11843 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11844 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11845 (i8 15)), sub_ymm)>; 11846 def : Pat<(v4i64 (vnot VR256X:$src)), 11847 (EXTRACT_SUBREG 11848 (VPTERNLOGQZrri 11849 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11850 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11851 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), 11852 (i8 15)), sub_ymm)>; 11853} 11854 11855let Predicates = [HasVLX] in { 11856 def : Pat<(v16i8 (vnot VR128X:$src)), 11857 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11858 def : Pat<(v8i16 (vnot VR128X:$src)), 11859 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11860 def : Pat<(v4i32 (vnot VR128X:$src)), 11861 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11862 def : Pat<(v2i64 (vnot VR128X:$src)), 11863 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; 11864 11865 def : Pat<(v32i8 (vnot VR256X:$src)), 11866 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11867 def : Pat<(v16i16 (vnot VR256X:$src)), 11868 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11869 def : Pat<(v8i32 (vnot VR256X:$src)), 11870 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11871 def : Pat<(v4i64 (vnot VR256X:$src)), 11872 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; 11873} 11874 11875//===----------------------------------------------------------------------===// 11876// AVX-512 - FixupImm 11877//===----------------------------------------------------------------------===// 11878 11879multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, 11880 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11881 X86VectorVTInfo TblVT>{ 11882 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, 11883 Uses = [MXCSR], mayRaiseFPException = 1 in { 11884 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11885 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11886 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11887 (X86VFixupimm (_.VT _.RC:$src1), 11888 (_.VT _.RC:$src2), 11889 (TblVT.VT _.RC:$src3), 11890 (i32 timm:$src4))>, Sched<[sched]>; 11891 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11892 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), 11893 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11894 (X86VFixupimm (_.VT _.RC:$src1), 11895 (_.VT _.RC:$src2), 11896 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), 11897 (i32 timm:$src4))>, 11898 Sched<[sched.Folded, sched.ReadAfterFold]>; 11899 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 11900 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11901 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2", 11902 "$src2, ${src3}"#_.BroadcastStr#", $src4", 11903 (X86VFixupimm (_.VT _.RC:$src1), 11904 (_.VT _.RC:$src2), 11905 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)), 11906 (i32 timm:$src4))>, 11907 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11908 } // Constraints = "$src1 = $dst" 11909} 11910 11911multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, 11912 X86FoldableSchedWrite sched, 11913 X86VectorVTInfo _, X86VectorVTInfo TblVT> 11914 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> { 11915let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in { 11916 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 11917 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11918 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 11919 "$src2, $src3, {sae}, $src4", 11920 (X86VFixupimmSAE (_.VT _.RC:$src1), 11921 (_.VT _.RC:$src2), 11922 (TblVT.VT _.RC:$src3), 11923 (i32 timm:$src4))>, 11924 EVEX_B, Sched<[sched]>; 11925 } 11926} 11927 11928multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, 11929 X86FoldableSchedWrite sched, X86VectorVTInfo _, 11930 X86VectorVTInfo _src3VT> { 11931 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], 11932 ExeDomain = _.ExeDomain in { 11933 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11934 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11935 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11936 (X86VFixupimms (_.VT _.RC:$src1), 11937 (_.VT _.RC:$src2), 11938 (_src3VT.VT _src3VT.RC:$src3), 11939 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC; 11940 let Uses = [MXCSR] in 11941 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), 11942 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), 11943 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2", 11944 "$src2, $src3, {sae}, $src4", 11945 (X86VFixupimmSAEs (_.VT _.RC:$src1), 11946 (_.VT _.RC:$src2), 11947 (_src3VT.VT _src3VT.RC:$src3), 11948 (i32 timm:$src4))>, 11949 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 11950 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), 11951 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), 11952 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", 11953 (X86VFixupimms (_.VT _.RC:$src1), 11954 (_.VT _.RC:$src2), 11955 (_src3VT.VT (scalar_to_vector 11956 (_src3VT.ScalarLdFrag addr:$src3))), 11957 (i32 timm:$src4))>, 11958 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 11959 } 11960} 11961 11962multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched, 11963 AVX512VLVectorVTInfo _Vec, 11964 AVX512VLVectorVTInfo _Tbl> { 11965 let Predicates = [HasAVX512] in 11966 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM, 11967 _Vec.info512, _Tbl.info512>, AVX512AIi8Base, 11968 EVEX, VVVV, EVEX_V512; 11969 let Predicates = [HasAVX512, HasVLX] in { 11970 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM, 11971 _Vec.info128, _Tbl.info128>, AVX512AIi8Base, 11972 EVEX, VVVV, EVEX_V128; 11973 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM, 11974 _Vec.info256, _Tbl.info256>, AVX512AIi8Base, 11975 EVEX, VVVV, EVEX_V256; 11976 } 11977} 11978 11979defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11980 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>, 11981 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 11982defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", 11983 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>, 11984 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 11985defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info, 11986 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 11987defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info, 11988 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W; 11989 11990// Patterns used to select SSE scalar fp arithmetic instructions from 11991// either: 11992// 11993// (1) a scalar fp operation followed by a blend 11994// 11995// The effect is that the backend no longer emits unnecessary vector 11996// insert instructions immediately after SSE scalar fp instructions 11997// like addss or mulss. 11998// 11999// For example, given the following code: 12000// __m128 foo(__m128 A, __m128 B) { 12001// A[0] += B[0]; 12002// return A; 12003// } 12004// 12005// Previously we generated: 12006// addss %xmm0, %xmm1 12007// movss %xmm1, %xmm0 12008// 12009// We now generate: 12010// addss %xmm1, %xmm0 12011// 12012// (2) a vector packed single/double fp operation followed by a vector insert 12013// 12014// The effect is that the backend converts the packed fp instruction 12015// followed by a vector insert into a single SSE scalar fp instruction. 12016// 12017// For example, given the following code: 12018// __m128 foo(__m128 A, __m128 B) { 12019// __m128 C = A + B; 12020// return (__m128) {c[0], a[1], a[2], a[3]}; 12021// } 12022// 12023// Previously we generated: 12024// addps %xmm0, %xmm1 12025// movss %xmm1, %xmm0 12026// 12027// We now generate: 12028// addss %xmm1, %xmm0 12029 12030// TODO: Some canonicalization in lowering would simplify the number of 12031// patterns we have to try to match. 12032multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp, 12033 string OpcPrefix, SDNode MoveNode, 12034 X86VectorVTInfo _, PatLeaf ZeroFP> { 12035 let Predicates = [HasAVX512] in { 12036 // extracted scalar math op with insert via movss 12037 def : Pat<(MoveNode 12038 (_.VT VR128X:$dst), 12039 (_.VT (scalar_to_vector 12040 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12041 _.FRC:$src)))), 12042 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst, 12043 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>; 12044 def : Pat<(MoveNode 12045 (_.VT VR128X:$dst), 12046 (_.VT (scalar_to_vector 12047 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), 12048 (_.ScalarLdFrag addr:$src))))), 12049 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>; 12050 12051 // extracted masked scalar math op with insert via movss 12052 def : Pat<(MoveNode (_.VT VR128X:$src1), 12053 (scalar_to_vector 12054 (X86selects_mask VK1WM:$mask, 12055 (MaskedOp (_.EltVT 12056 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12057 _.FRC:$src2), 12058 _.FRC:$src0))), 12059 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk") 12060 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12061 VK1WM:$mask, _.VT:$src1, 12062 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12063 def : Pat<(MoveNode (_.VT VR128X:$src1), 12064 (scalar_to_vector 12065 (X86selects_mask VK1WM:$mask, 12066 (MaskedOp (_.EltVT 12067 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12068 (_.ScalarLdFrag addr:$src2)), 12069 _.FRC:$src0))), 12070 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk") 12071 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), 12072 VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12073 12074 // extracted masked scalar math op with insert via movss 12075 def : Pat<(MoveNode (_.VT VR128X:$src1), 12076 (scalar_to_vector 12077 (X86selects_mask VK1WM:$mask, 12078 (MaskedOp (_.EltVT 12079 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12080 _.FRC:$src2), (_.EltVT ZeroFP)))), 12081 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz") 12082 VK1WM:$mask, _.VT:$src1, 12083 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; 12084 def : Pat<(MoveNode (_.VT VR128X:$src1), 12085 (scalar_to_vector 12086 (X86selects_mask VK1WM:$mask, 12087 (MaskedOp (_.EltVT 12088 (extractelt (_.VT VR128X:$src1), (iPTR 0))), 12089 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))), 12090 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>; 12091 } 12092} 12093 12094defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>; 12095defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>; 12096defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>; 12097defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>; 12098 12099defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>; 12100defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>; 12101defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>; 12102defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>; 12103 12104defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>; 12105defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>; 12106defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>; 12107defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>; 12108 12109multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix, 12110 SDNode Move, X86VectorVTInfo _> { 12111 let Predicates = [HasAVX512] in { 12112 def : Pat<(_.VT (Move _.VT:$dst, 12113 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))), 12114 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>; 12115 } 12116} 12117 12118defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>; 12119defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>; 12120defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>; 12121 12122//===----------------------------------------------------------------------===// 12123// AES instructions 12124//===----------------------------------------------------------------------===// 12125 12126multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> { 12127 let Predicates = [HasVLX, HasVAES] in { 12128 defm Z128 : AESI_binop_rm_int<Op, OpStr, 12129 !cast<Intrinsic>(IntPrefix), 12130 loadv2i64, 0, VR128X, i128mem>, 12131 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG; 12132 defm Z256 : AESI_binop_rm_int<Op, OpStr, 12133 !cast<Intrinsic>(IntPrefix#"_256"), 12134 loadv4i64, 0, VR256X, i256mem>, 12135 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG; 12136 } 12137 let Predicates = [HasAVX512, HasVAES] in 12138 defm Z : AESI_binop_rm_int<Op, OpStr, 12139 !cast<Intrinsic>(IntPrefix#"_512"), 12140 loadv8i64, 0, VR512, i512mem>, 12141 EVEX, VVVV, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG; 12142} 12143 12144defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">; 12145defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">; 12146defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">; 12147defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">; 12148 12149//===----------------------------------------------------------------------===// 12150// PCLMUL instructions - Carry less multiplication 12151//===----------------------------------------------------------------------===// 12152 12153let Predicates = [HasAVX512, HasVPCLMULQDQ] in 12154defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>, 12155 EVEX, VVVV, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG; 12156 12157let Predicates = [HasVLX, HasVPCLMULQDQ] in { 12158defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>, 12159 EVEX, VVVV, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG; 12160 12161defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64, 12162 int_x86_pclmulqdq_256>, EVEX, VVVV, EVEX_V256, 12163 EVEX_CD8<64, CD8VF>, WIG; 12164} 12165 12166// Aliases 12167defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>; 12168defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>; 12169defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; 12170 12171//===----------------------------------------------------------------------===// 12172// VBMI2 12173//===----------------------------------------------------------------------===// 12174 12175multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, 12176 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12177 let Constraints = "$src1 = $dst", 12178 ExeDomain = VTI.ExeDomain in { 12179 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12180 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12181 "$src3, $src2", "$src2, $src3", 12182 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, 12183 T8, PD, EVEX, VVVV, Sched<[sched]>; 12184 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12185 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12186 "$src3, $src2", "$src2, $src3", 12187 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12188 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12189 T8, PD, EVEX, VVVV, 12190 Sched<[sched.Folded, sched.ReadAfterFold]>; 12191 } 12192} 12193 12194multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12195 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> 12196 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { 12197 let Constraints = "$src1 = $dst", 12198 ExeDomain = VTI.ExeDomain in 12199 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12200 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, 12201 "${src3}"#VTI.BroadcastStr#", $src2", 12202 "$src2, ${src3}"#VTI.BroadcastStr, 12203 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12204 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12205 T8, PD, EVEX, VVVV, EVEX_B, 12206 Sched<[sched.Folded, sched.ReadAfterFold]>; 12207} 12208 12209multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, 12210 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12211 let Predicates = [HasVBMI2] in 12212 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12213 EVEX_V512; 12214 let Predicates = [HasVBMI2, HasVLX] in { 12215 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12216 EVEX_V256; 12217 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12218 EVEX_V128; 12219 } 12220} 12221 12222multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, 12223 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12224 let Predicates = [HasVBMI2] in 12225 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>, 12226 EVEX_V512; 12227 let Predicates = [HasVBMI2, HasVLX] in { 12228 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>, 12229 EVEX_V256; 12230 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>, 12231 EVEX_V128; 12232 } 12233} 12234multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, 12235 SDNode OpNode, X86SchedWriteWidths sched> { 12236 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched, 12237 avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>; 12238 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched, 12239 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; 12240 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched, 12241 avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>; 12242} 12243 12244multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, 12245 SDNode OpNode, X86SchedWriteWidths sched> { 12246 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched, 12247 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, 12248 REX_W, EVEX_CD8<16, CD8VF>; 12249 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp, 12250 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<32, CD8VF>; 12251 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode, 12252 sched, HasVBMI2>, AVX512AIi8Base, EVEX, VVVV, EVEX_CD8<64, CD8VF>, REX_W; 12253} 12254 12255// Concat & Shift 12256defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>; 12257defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>; 12258defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; 12259defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; 12260 12261// Compress 12262defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, 12263 avx512vl_i8_info, HasVBMI2>, EVEX; 12264defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, 12265 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W; 12266// Expand 12267defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, 12268 avx512vl_i8_info, HasVBMI2>, EVEX; 12269defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, 12270 avx512vl_i16_info, HasVBMI2>, EVEX, REX_W; 12271 12272//===----------------------------------------------------------------------===// 12273// VNNI 12274//===----------------------------------------------------------------------===// 12275 12276let Constraints = "$src1 = $dst" in 12277multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, 12278 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12279 bit IsCommutable> { 12280 let ExeDomain = VTI.ExeDomain in { 12281 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), 12282 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, 12283 "$src3, $src2", "$src2, $src3", 12284 (VTI.VT (OpNode VTI.RC:$src1, 12285 VTI.RC:$src2, VTI.RC:$src3)), 12286 IsCommutable, IsCommutable>, 12287 EVEX, VVVV, T8, PD, Sched<[sched]>; 12288 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12289 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, 12290 "$src3, $src2", "$src2, $src3", 12291 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 12292 (VTI.VT (VTI.LdFrag addr:$src3))))>, 12293 EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8, PD, 12294 Sched<[sched.Folded, sched.ReadAfterFold, 12295 sched.ReadAfterFold]>; 12296 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12297 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), 12298 OpStr, "${src3}"#VTI.BroadcastStr#", $src2", 12299 "$src2, ${src3}"#VTI.BroadcastStr, 12300 (OpNode VTI.RC:$src1, VTI.RC:$src2, 12301 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, 12302 EVEX, VVVV, EVEX_CD8<32, CD8VF>, EVEX_B, 12303 T8, PD, Sched<[sched.Folded, sched.ReadAfterFold, 12304 sched.ReadAfterFold]>; 12305 } 12306} 12307 12308multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, 12309 X86SchedWriteWidths sched, bit IsCommutable> { 12310 let Predicates = [HasVNNI] in 12311 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info, 12312 IsCommutable>, EVEX_V512; 12313 let Predicates = [HasVNNI, HasVLX] in { 12314 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info, 12315 IsCommutable>, EVEX_V256; 12316 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info, 12317 IsCommutable>, EVEX_V128; 12318 } 12319} 12320 12321// FIXME: Is there a better scheduler class for VPDP? 12322defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>; 12323defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>; 12324defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>; 12325defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>; 12326 12327// Patterns to match VPDPWSSD from existing instructions/intrinsics. 12328let Predicates = [HasVNNI] in { 12329 def : Pat<(v16i32 (add VR512:$src1, 12330 (X86vpmaddwd_su VR512:$src2, VR512:$src3))), 12331 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>; 12332 def : Pat<(v16i32 (add VR512:$src1, 12333 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))), 12334 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>; 12335} 12336let Predicates = [HasVNNI,HasVLX] in { 12337 def : Pat<(v8i32 (add VR256X:$src1, 12338 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))), 12339 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>; 12340 def : Pat<(v8i32 (add VR256X:$src1, 12341 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))), 12342 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>; 12343 def : Pat<(v4i32 (add VR128X:$src1, 12344 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))), 12345 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>; 12346 def : Pat<(v4i32 (add VR128X:$src1, 12347 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))), 12348 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>; 12349} 12350 12351//===----------------------------------------------------------------------===// 12352// Bit Algorithms 12353//===----------------------------------------------------------------------===// 12354 12355// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? 12356defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, 12357 avx512vl_i8_info, HasBITALG>; 12358defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, 12359 avx512vl_i16_info, HasBITALG>, REX_W; 12360 12361defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; 12362defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; 12363 12364multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { 12365 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), 12366 (ins VTI.RC:$src1, VTI.RC:$src2), 12367 "vpshufbitqmb", 12368 "$src2, $src1", "$src1, $src2", 12369 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12370 (VTI.VT VTI.RC:$src2)), 12371 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12372 (VTI.VT VTI.RC:$src2))>, EVEX, VVVV, T8, PD, 12373 Sched<[sched]>; 12374 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), 12375 (ins VTI.RC:$src1, VTI.MemOp:$src2), 12376 "vpshufbitqmb", 12377 "$src2, $src1", "$src1, $src2", 12378 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), 12379 (VTI.VT (VTI.LdFrag addr:$src2))), 12380 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1), 12381 (VTI.VT (VTI.LdFrag addr:$src2)))>, 12382 EVEX, VVVV, EVEX_CD8<8, CD8VF>, T8, PD, 12383 Sched<[sched.Folded, sched.ReadAfterFold]>; 12384} 12385 12386multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> { 12387 let Predicates = [HasBITALG] in 12388 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512; 12389 let Predicates = [HasBITALG, HasVLX] in { 12390 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256; 12391 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128; 12392 } 12393} 12394 12395// FIXME: Is there a better scheduler class for VPSHUFBITQMB? 12396defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>; 12397 12398//===----------------------------------------------------------------------===// 12399// GFNI 12400//===----------------------------------------------------------------------===// 12401 12402multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12403 X86SchedWriteWidths sched> { 12404 let Predicates = [HasGFNI, HasAVX512] in 12405 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>, 12406 EVEX_V512; 12407 let Predicates = [HasGFNI, HasVLX] in { 12408 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>, 12409 EVEX_V256; 12410 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>, 12411 EVEX_V128; 12412 } 12413} 12414 12415defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb, 12416 SchedWriteVecALU>, 12417 EVEX_CD8<8, CD8VF>, T8; 12418 12419multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, 12420 X86FoldableSchedWrite sched, X86VectorVTInfo VTI, 12421 X86VectorVTInfo BcstVTI> 12422 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { 12423 let ExeDomain = VTI.ExeDomain in 12424 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), 12425 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), 12426 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1", 12427 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3", 12428 (OpNode (VTI.VT VTI.RC:$src1), 12429 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))), 12430 (i8 timm:$src3))>, EVEX_B, 12431 Sched<[sched.Folded, sched.ReadAfterFold]>; 12432} 12433 12434multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, 12435 X86SchedWriteWidths sched> { 12436 let Predicates = [HasGFNI, HasAVX512] in 12437 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM, 12438 v64i8_info, v8i64_info>, EVEX_V512; 12439 let Predicates = [HasGFNI, HasVLX] in { 12440 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM, 12441 v32i8x_info, v4i64x_info>, EVEX_V256; 12442 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM, 12443 v16i8x_info, v2i64x_info>, EVEX_V128; 12444 } 12445} 12446 12447defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", 12448 X86GF2P8affineinvqb, SchedWriteVecIMul>, 12449 EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base; 12450defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", 12451 X86GF2P8affineqb, SchedWriteVecIMul>, 12452 EVEX, VVVV, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base; 12453 12454 12455//===----------------------------------------------------------------------===// 12456// AVX5124FMAPS 12457//===----------------------------------------------------------------------===// 12458 12459let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle, 12460 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in { 12461defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info, 12462 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12463 "v4fmaddps", "$src3, $src2", "$src2, $src3", 12464 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12465 Sched<[SchedWriteFMA.ZMM.Folded]>; 12466 12467defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info, 12468 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12469 "v4fnmaddps", "$src3, $src2", "$src2, $src3", 12470 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12471 Sched<[SchedWriteFMA.ZMM.Folded]>; 12472 12473defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info, 12474 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12475 "v4fmaddss", "$src3, $src2", "$src2, $src3", 12476 []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>, 12477 Sched<[SchedWriteFMA.Scl.Folded]>; 12478 12479defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info, 12480 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3), 12481 "v4fnmaddss", "$src3, $src2", "$src2, $src3", 12482 []>, VEX_LIG, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VF>, 12483 Sched<[SchedWriteFMA.Scl.Folded]>; 12484} 12485 12486//===----------------------------------------------------------------------===// 12487// AVX5124VNNIW 12488//===----------------------------------------------------------------------===// 12489 12490let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt, 12491 Constraints = "$src1 = $dst" in { 12492defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info, 12493 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12494 "vp4dpwssd", "$src3, $src2", "$src2, $src3", 12495 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12496 Sched<[SchedWriteFMA.ZMM.Folded]>; 12497 12498defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info, 12499 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3), 12500 "vp4dpwssds", "$src3, $src2", "$src2, $src3", 12501 []>, EVEX_V512, EVEX, VVVV, T8, XD, EVEX_CD8<32, CD8VQ>, 12502 Sched<[SchedWriteFMA.ZMM.Folded]>; 12503} 12504 12505let hasSideEffects = 0 in { 12506 let mayStore = 1, SchedRW = [WriteFStoreX] in 12507 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>; 12508 let mayLoad = 1, SchedRW = [WriteFLoadX] in 12509 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>; 12510} 12511 12512//===----------------------------------------------------------------------===// 12513// VP2INTERSECT 12514//===----------------------------------------------------------------------===// 12515 12516multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 12517 def rr : I<0x68, MRMSrcReg, 12518 (outs _.KRPC:$dst), 12519 (ins _.RC:$src1, _.RC:$src2), 12520 !strconcat("vp2intersect", _.Suffix, 12521 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12522 [(set _.KRPC:$dst, (X86vp2intersect 12523 _.RC:$src1, (_.VT _.RC:$src2)))]>, 12524 EVEX, VVVV, T8, XD, Sched<[sched]>; 12525 12526 def rm : I<0x68, MRMSrcMem, 12527 (outs _.KRPC:$dst), 12528 (ins _.RC:$src1, _.MemOp:$src2), 12529 !strconcat("vp2intersect", _.Suffix, 12530 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 12531 [(set _.KRPC:$dst, (X86vp2intersect 12532 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, 12533 EVEX, VVVV, T8, XD, EVEX_CD8<_.EltSize, CD8VF>, 12534 Sched<[sched.Folded, sched.ReadAfterFold]>; 12535 12536 def rmb : I<0x68, MRMSrcMem, 12537 (outs _.KRPC:$dst), 12538 (ins _.RC:$src1, _.ScalarMemOp:$src2), 12539 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr, 12540 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"), 12541 [(set _.KRPC:$dst, (X86vp2intersect 12542 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>, 12543 EVEX, VVVV, T8, XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, 12544 Sched<[sched.Folded, sched.ReadAfterFold]>; 12545} 12546 12547multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 12548 let Predicates = [HasAVX512, HasVP2INTERSECT] in 12549 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512; 12550 12551 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in { 12552 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256; 12553 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128; 12554 } 12555} 12556 12557let ExeDomain = SSEPackedInt in { 12558defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>; 12559defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W; 12560} 12561 12562multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr, 12563 X86SchedWriteWidths sched, 12564 AVX512VLVectorVTInfo _SrcVTInfo, 12565 AVX512VLVectorVTInfo _DstVTInfo, 12566 SDNode OpNode, Predicate prd, 12567 bit IsCommutable = 0> { 12568 let Predicates = [prd] in 12569 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 12570 _SrcVTInfo.info512, _DstVTInfo.info512, 12571 _SrcVTInfo.info512, IsCommutable>, 12572 EVEX_V512, EVEX_CD8<32, CD8VF>; 12573 let Predicates = [HasVLX, prd] in { 12574 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 12575 _SrcVTInfo.info256, _DstVTInfo.info256, 12576 _SrcVTInfo.info256, IsCommutable>, 12577 EVEX_V256, EVEX_CD8<32, CD8VF>; 12578 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 12579 _SrcVTInfo.info128, _DstVTInfo.info128, 12580 _SrcVTInfo.info128, IsCommutable>, 12581 EVEX_V128, EVEX_CD8<32, CD8VF>; 12582 } 12583} 12584 12585let ExeDomain = SSEPackedSingle in 12586defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16", 12587 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF 12588 avx512vl_f32_info, avx512vl_bf16_info, 12589 X86cvtne2ps2bf16, HasBF16, 0>, T8, XD; 12590 12591// Truncate Float to BFloat16 12592multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr, 12593 X86SchedWriteWidths sched> { 12594 let ExeDomain = SSEPackedSingle in { 12595 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in { 12596 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info, 12597 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512; 12598 } 12599 let Predicates = [HasBF16, HasVLX] in { 12600 let Uses = []<Register>, mayRaiseFPException = 0 in { 12601 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info, 12602 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem, 12603 VK4WM>, EVEX_V128; 12604 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info, 12605 X86cvtneps2bf16, X86cvtneps2bf16, 12606 sched.YMM, "{1to8}", "{y}">, EVEX_V256; 12607 } 12608 } // Predicates = [HasBF16, HasVLX] 12609 } // ExeDomain = SSEPackedSingle 12610 12611 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12612 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 12613 VR128X:$src), 0>; 12614 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 12615 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, 12616 f128mem:$src), 0, "intel">; 12617 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12618 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 12619 VR256X:$src), 0>; 12620 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 12621 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, 12622 f256mem:$src), 0, "intel">; 12623} 12624 12625defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16", 12626 SchedWriteCvtPD2PS>, T8, XS, 12627 EVEX_CD8<32, CD8VF>; 12628 12629let Predicates = [HasBF16, HasVLX] in { 12630 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction 12631 // patterns have been disabled with null_frag. 12632 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))), 12633 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12634 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0), 12635 VK4WM:$mask), 12636 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>; 12637 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV, 12638 VK4WM:$mask), 12639 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>; 12640 12641 def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))), 12642 (VCVTNEPS2BF16Z128rm addr:$src)>; 12643 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0), 12644 VK4WM:$mask), 12645 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12646 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV, 12647 VK4WM:$mask), 12648 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>; 12649 12650 def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 12651 (X86VBroadcastld32 addr:$src)))), 12652 (VCVTNEPS2BF16Z128rmb addr:$src)>; 12653 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12654 (v8bf16 VR128X:$src0), VK4WM:$mask), 12655 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 12656 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)), 12657 v8bf16x_info.ImmAllZerosV, VK4WM:$mask), 12658 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>; 12659 12660 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))), 12661 (VCVTNEPS2BF16Z128rr VR128X:$src)>; 12662 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))), 12663 (VCVTNEPS2BF16Z128rm addr:$src)>; 12664 12665 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))), 12666 (VCVTNEPS2BF16Z256rr VR256X:$src)>; 12667 def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))), 12668 (VCVTNEPS2BF16Z256rm addr:$src)>; 12669 12670 def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)), 12671 (VPBROADCASTWZ128rm addr:$src)>; 12672 def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)), 12673 (VPBROADCASTWZ256rm addr:$src)>; 12674 12675 def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12676 (VPBROADCASTWZ128rr VR128X:$src)>; 12677 def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12678 (VPBROADCASTWZ256rr VR128X:$src)>; 12679 12680 def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))), 12681 (VCVTNEPS2BF16Z256rr VR256X:$src)>; 12682 def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))), 12683 (VCVTNEPS2BF16Z256rm addr:$src)>; 12684 12685 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. 12686} 12687 12688let Predicates = [HasBF16] in { 12689 def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)), 12690 (VPBROADCASTWZrm addr:$src)>; 12691 12692 def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))), 12693 (VPBROADCASTWZrr VR128X:$src)>; 12694 12695 def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))), 12696 (VCVTNEPS2BF16Zrr VR512:$src)>; 12697 def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))), 12698 (VCVTNEPS2BF16Zrm addr:$src)>; 12699 // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. 12700} 12701 12702let Constraints = "$src1 = $dst" in { 12703multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 12704 X86FoldableSchedWrite sched, 12705 X86VectorVTInfo _, X86VectorVTInfo src_v> { 12706 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 12707 (ins src_v.RC:$src2, src_v.RC:$src3), 12708 OpcodeStr, "$src3, $src2", "$src2, $src3", 12709 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>, 12710 EVEX, VVVV, Sched<[sched]>; 12711 12712 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12713 (ins src_v.RC:$src2, src_v.MemOp:$src3), 12714 OpcodeStr, "$src3, $src2", "$src2, $src3", 12715 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12716 (src_v.LdFrag addr:$src3)))>, EVEX, VVVV, 12717 Sched<[sched.Folded, sched.ReadAfterFold]>; 12718 12719 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 12720 (ins src_v.RC:$src2, f32mem:$src3), 12721 OpcodeStr, 12722 !strconcat("${src3}", _.BroadcastStr,", $src2"), 12723 !strconcat("$src2, ${src3}", _.BroadcastStr), 12724 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, 12725 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>, 12726 EVEX_B, EVEX, VVVV, Sched<[sched.Folded, sched.ReadAfterFold]>; 12727 12728} 12729} // Constraints = "$src1 = $dst" 12730 12731multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, 12732 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, 12733 AVX512VLVectorVTInfo src_v, Predicate prd> { 12734 let Predicates = [prd] in { 12735 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, 12736 src_v.info512>, EVEX_V512; 12737 } 12738 let Predicates = [HasVLX, prd] in { 12739 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256, 12740 src_v.info256>, EVEX_V256; 12741 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128, 12742 src_v.info128>, EVEX_V128; 12743 } 12744} 12745 12746let ExeDomain = SSEPackedSingle in 12747defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA, 12748 avx512vl_f32_info, avx512vl_bf16_info, 12749 HasBF16>, T8, XS, EVEX_CD8<32, CD8VF>; 12750 12751//===----------------------------------------------------------------------===// 12752// AVX512FP16 12753//===----------------------------------------------------------------------===// 12754 12755let Predicates = [HasFP16] in { 12756// Move word ( r/m16) to Packed word 12757def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), 12758 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveFromGpr]>; 12759def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src), 12760 "vmovw\t{$src, $dst|$dst, $src}", 12761 [(set VR128X:$dst, 12762 (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>, 12763 T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>; 12764 12765def : Pat<(f16 (bitconvert GR16:$src)), 12766 (f16 (COPY_TO_REGCLASS 12767 (VMOVW2SHrr 12768 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), 12769 FR16X))>; 12770def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))), 12771 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 12772def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))), 12773 (VMOVW2SHrr GR32:$src)>; 12774// FIXME: We should really find a way to improve these patterns. 12775def : Pat<(v8i32 (X86vzmovl 12776 (insert_subvector undef, 12777 (v4i32 (scalar_to_vector 12778 (and GR32:$src, 0xffff))), 12779 (iPTR 0)))), 12780 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 12781def : Pat<(v16i32 (X86vzmovl 12782 (insert_subvector undef, 12783 (v4i32 (scalar_to_vector 12784 (and GR32:$src, 0xffff))), 12785 (iPTR 0)))), 12786 (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>; 12787 12788def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))), 12789 (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>; 12790 12791// AVX 128-bit movw instruction write zeros in the high 128-bit part. 12792def : Pat<(v8i16 (X86vzload16 addr:$src)), 12793 (VMOVWrm addr:$src)>; 12794def : Pat<(v16i16 (X86vzload16 addr:$src)), 12795 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 12796 12797// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. 12798def : Pat<(v32i16 (X86vzload16 addr:$src)), 12799 (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>; 12800 12801def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))), 12802 (VMOVWrm addr:$src)>; 12803def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))), 12804 (VMOVWrm addr:$src)>; 12805def : Pat<(v8i32 (X86vzmovl 12806 (insert_subvector undef, 12807 (v4i32 (scalar_to_vector 12808 (i32 (zextloadi16 addr:$src)))), 12809 (iPTR 0)))), 12810 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 12811def : Pat<(v16i32 (X86vzmovl 12812 (insert_subvector undef, 12813 (v4i32 (scalar_to_vector 12814 (i32 (zextloadi16 addr:$src)))), 12815 (iPTR 0)))), 12816 (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>; 12817 12818// Move word from xmm register to r/m16 12819def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src), 12820 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, Sched<[WriteVecMoveToGpr]>; 12821def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs), 12822 (ins i16mem:$dst, VR128X:$src), 12823 "vmovw\t{$src, $dst|$dst, $src}", 12824 [(store (i16 (extractelt (v8i16 VR128X:$src), 12825 (iPTR 0))), addr:$dst)]>, 12826 T_MAP5, PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>; 12827 12828def : Pat<(i16 (bitconvert FR16X:$src)), 12829 (i16 (EXTRACT_SUBREG 12830 (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)), 12831 sub_16bit))>; 12832def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))), 12833 (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>; 12834 12835// Allow "vmovw" to use GR64 12836let hasSideEffects = 0 in { 12837 def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src), 12838 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>; 12839 def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), 12840 "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5, PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>; 12841} 12842} 12843 12844// Convert 16-bit float to i16/u16 12845multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12846 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12847 AVX512VLVectorVTInfo _Dst, 12848 AVX512VLVectorVTInfo _Src, 12849 X86SchedWriteWidths sched> { 12850 let Predicates = [HasFP16] in { 12851 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 12852 OpNode, MaskOpNode, sched.ZMM>, 12853 avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512, 12854 OpNodeRnd, sched.ZMM>, EVEX_V512; 12855 } 12856 let Predicates = [HasFP16, HasVLX] in { 12857 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 12858 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 12859 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 12860 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 12861 } 12862} 12863 12864// Convert 16-bit float to i16/u16 truncate 12865multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12866 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12867 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src, 12868 X86SchedWriteWidths sched> { 12869 let Predicates = [HasFP16] in { 12870 defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512, 12871 OpNode, MaskOpNode, sched.ZMM>, 12872 avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512, 12873 OpNodeRnd, sched.ZMM>, EVEX_V512; 12874 } 12875 let Predicates = [HasFP16, HasVLX] in { 12876 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128, 12877 OpNode, MaskOpNode, sched.XMM>, EVEX_V128; 12878 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256, 12879 OpNode, MaskOpNode, sched.YMM>, EVEX_V256; 12880 } 12881} 12882 12883defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt, 12884 X86cvtp2UIntRnd, avx512vl_i16_info, 12885 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 12886 T_MAP5, EVEX_CD8<16, CD8VF>; 12887defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp, 12888 X86VUintToFpRnd, avx512vl_f16_info, 12889 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 12890 T_MAP5, XD, EVEX_CD8<16, CD8VF>; 12891defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si, 12892 X86cvttp2si, X86cvttp2siSAE, 12893 avx512vl_i16_info, avx512vl_f16_info, 12894 SchedWriteCvtPD2DQ>, T_MAP5, PD, EVEX_CD8<16, CD8VF>; 12895defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui, 12896 X86cvttp2ui, X86cvttp2uiSAE, 12897 avx512vl_i16_info, avx512vl_f16_info, 12898 SchedWriteCvtPD2DQ>, T_MAP5, EVEX_CD8<16, CD8VF>; 12899defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int, 12900 X86cvtp2IntRnd, avx512vl_i16_info, 12901 avx512vl_f16_info, SchedWriteCvtPD2DQ>, 12902 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 12903defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp, 12904 X86VSintToFpRnd, avx512vl_f16_info, 12905 avx512vl_i16_info, SchedWriteCvtPD2DQ>, 12906 T_MAP5, XS, EVEX_CD8<16, CD8VF>; 12907 12908// Convert Half to Signed/Unsigned Doubleword 12909multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12910 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12911 X86SchedWriteWidths sched> { 12912 let Predicates = [HasFP16] in { 12913 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 12914 MaskOpNode, sched.ZMM>, 12915 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info, 12916 OpNodeRnd, sched.ZMM>, EVEX_V512; 12917 } 12918 let Predicates = [HasFP16, HasVLX] in { 12919 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 12920 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 12921 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 12922 MaskOpNode, sched.YMM>, EVEX_V256; 12923 } 12924} 12925 12926// Convert Half to Signed/Unsigned Doubleword with truncation 12927multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12928 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12929 X86SchedWriteWidths sched> { 12930 let Predicates = [HasFP16] in { 12931 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode, 12932 MaskOpNode, sched.ZMM>, 12933 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info, 12934 OpNodeRnd, sched.ZMM>, EVEX_V512; 12935 } 12936 let Predicates = [HasFP16, HasVLX] in { 12937 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode, 12938 MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128; 12939 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode, 12940 MaskOpNode, sched.YMM>, EVEX_V256; 12941 } 12942} 12943 12944 12945defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int, 12946 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD, 12947 EVEX_CD8<16, CD8VH>; 12948defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt, 12949 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, 12950 EVEX_CD8<16, CD8VH>; 12951 12952defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si, 12953 X86cvttp2si, X86cvttp2siSAE, 12954 SchedWriteCvtPS2DQ>, T_MAP5, XS, 12955 EVEX_CD8<16, CD8VH>; 12956 12957defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui, 12958 X86cvttp2ui, X86cvttp2uiSAE, 12959 SchedWriteCvtPS2DQ>, T_MAP5, 12960 EVEX_CD8<16, CD8VH>; 12961 12962// Convert Half to Signed/Unsigned Quardword 12963multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12964 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12965 X86SchedWriteWidths sched> { 12966 let Predicates = [HasFP16] in { 12967 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 12968 MaskOpNode, sched.ZMM>, 12969 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info, 12970 OpNodeRnd, sched.ZMM>, EVEX_V512; 12971 } 12972 let Predicates = [HasFP16, HasVLX] in { 12973 // Explicitly specified broadcast string, since we take only 2 elements 12974 // from v8f16x_info source 12975 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 12976 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, 12977 EVEX_V128; 12978 // Explicitly specified broadcast string, since we take only 4 elements 12979 // from v8f16x_info source 12980 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 12981 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, 12982 EVEX_V256; 12983 } 12984} 12985 12986// Convert Half to Signed/Unsigned Quardword with truncation 12987multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 12988 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 12989 X86SchedWriteWidths sched> { 12990 let Predicates = [HasFP16] in { 12991 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode, 12992 MaskOpNode, sched.ZMM>, 12993 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info, 12994 OpNodeRnd, sched.ZMM>, EVEX_V512; 12995 } 12996 let Predicates = [HasFP16, HasVLX] in { 12997 // Explicitly specified broadcast string, since we take only 2 elements 12998 // from v8f16x_info source 12999 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode, 13000 MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128; 13001 // Explicitly specified broadcast string, since we take only 4 elements 13002 // from v8f16x_info source 13003 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode, 13004 MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256; 13005 } 13006} 13007 13008defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int, 13009 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD, 13010 EVEX_CD8<16, CD8VQ>; 13011 13012defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt, 13013 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5, PD, 13014 EVEX_CD8<16, CD8VQ>; 13015 13016defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si, 13017 X86cvttp2si, X86cvttp2siSAE, 13018 SchedWriteCvtPS2DQ>, T_MAP5, PD, 13019 EVEX_CD8<16, CD8VQ>; 13020 13021defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui, 13022 X86cvttp2ui, X86cvttp2uiSAE, 13023 SchedWriteCvtPS2DQ>, T_MAP5, PD, 13024 EVEX_CD8<16, CD8VQ>; 13025 13026// Convert Signed/Unsigned Quardword to Half 13027multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 13028 SDPatternOperator MaskOpNode, SDNode OpNodeRnd, 13029 X86SchedWriteWidths sched> { 13030 // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and 13031 // 512 memory forms of these instructions in Asm Parcer. They have the same 13032 // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly 13033 // due to the same reason. 13034 let Predicates = [HasFP16] in { 13035 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode, 13036 MaskOpNode, sched.ZMM, "{1to8}", "{z}">, 13037 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info, 13038 OpNodeRnd, sched.ZMM>, EVEX_V512; 13039 } 13040 let Predicates = [HasFP16, HasVLX] in { 13041 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info, 13042 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", 13043 i128mem, VK2WM>, EVEX_V128; 13044 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info, 13045 null_frag, null_frag, sched.YMM, "{1to4}", "{y}", 13046 i256mem, VK4WM>, EVEX_V256; 13047 } 13048 13049 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 13050 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 13051 VR128X:$src), 0, "att">; 13052 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 13053 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 13054 VK2WM:$mask, VR128X:$src), 0, "att">; 13055 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 13056 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 13057 VK2WM:$mask, VR128X:$src), 0, "att">; 13058 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 13059 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 13060 i64mem:$src), 0, "att">; 13061 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 13062 "$dst {${mask}}, ${src}{1to2}}", 13063 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 13064 VK2WM:$mask, i64mem:$src), 0, "att">; 13065 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 13066 "$dst {${mask}} {z}, ${src}{1to2}}", 13067 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 13068 VK2WM:$mask, i64mem:$src), 0, "att">; 13069 13070 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 13071 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 13072 VR256X:$src), 0, "att">; 13073 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|" 13074 "$dst {${mask}}, $src}", 13075 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 13076 VK4WM:$mask, VR256X:$src), 0, "att">; 13077 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|" 13078 "$dst {${mask}} {z}, $src}", 13079 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 13080 VK4WM:$mask, VR256X:$src), 0, "att">; 13081 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 13082 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 13083 i64mem:$src), 0, "att">; 13084 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 13085 "$dst {${mask}}, ${src}{1to4}}", 13086 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 13087 VK4WM:$mask, i64mem:$src), 0, "att">; 13088 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 13089 "$dst {${mask}} {z}, ${src}{1to4}}", 13090 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 13091 VK4WM:$mask, i64mem:$src), 0, "att">; 13092 13093 def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}", 13094 (!cast<Instruction>(NAME # "Zrr") VR128X:$dst, 13095 VR512:$src), 0, "att">; 13096 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|" 13097 "$dst {${mask}}, $src}", 13098 (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst, 13099 VK8WM:$mask, VR512:$src), 0, "att">; 13100 def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|" 13101 "$dst {${mask}} {z}, $src}", 13102 (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst, 13103 VK8WM:$mask, VR512:$src), 0, "att">; 13104 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}", 13105 (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst, 13106 i64mem:$src), 0, "att">; 13107 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|" 13108 "$dst {${mask}}, ${src}{1to8}}", 13109 (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst, 13110 VK8WM:$mask, i64mem:$src), 0, "att">; 13111 def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|" 13112 "$dst {${mask}} {z}, ${src}{1to8}}", 13113 (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst, 13114 VK8WM:$mask, i64mem:$src), 0, "att">; 13115} 13116 13117defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp, 13118 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, 13119 EVEX_CD8<64, CD8VF>; 13120 13121defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp, 13122 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5, XD, 13123 EVEX_CD8<64, CD8VF>; 13124 13125// Convert half to signed/unsigned int 32/64 13126defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si, 13127 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>, 13128 T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13129defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si, 13130 X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>, 13131 T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>; 13132defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi, 13133 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>, 13134 T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13135defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi, 13136 X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>, 13137 T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>; 13138 13139defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info, 13140 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13141 "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13142defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info, 13143 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I, 13144 "{q}", HasFP16>, REX_W, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13145defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info, 13146 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13147 "{l}", HasFP16>, T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 13148defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info, 13149 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I, 13150 "{q}", HasFP16>, T_MAP5, XS, REX_W, EVEX_CD8<16, CD8VT1>; 13151 13152let Predicates = [HasFP16] in { 13153 defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32, 13154 v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">, 13155 T_MAP5, XS, EVEX_CD8<32, CD8VT1>; 13156 defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64, 13157 v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">, 13158 T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>; 13159 defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32, 13160 v8f16x_info, i32mem, loadi32, 13161 "cvtusi2sh","l">, T_MAP5, XS, EVEX_CD8<32, CD8VT1>; 13162 defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64, 13163 v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">, 13164 T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VT1>; 13165 def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13166 (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13167 13168 def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}", 13169 (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">; 13170 13171 13172 def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))), 13173 (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13174 def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))), 13175 (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13176 13177 def : Pat<(f16 (any_sint_to_fp GR32:$src)), 13178 (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13179 def : Pat<(f16 (any_sint_to_fp GR64:$src)), 13180 (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13181 13182 def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))), 13183 (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13184 def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))), 13185 (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>; 13186 13187 def : Pat<(f16 (any_uint_to_fp GR32:$src)), 13188 (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>; 13189 def : Pat<(f16 (any_uint_to_fp GR64:$src)), 13190 (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>; 13191 13192 // Patterns used for matching vcvtsi2sh intrinsic sequences from clang 13193 // which produce unnecessary vmovsh instructions 13194 def : Pat<(v8f16 (X86Movsh 13195 (v8f16 VR128X:$dst), 13196 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))), 13197 (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13198 13199 def : Pat<(v8f16 (X86Movsh 13200 (v8f16 VR128X:$dst), 13201 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))), 13202 (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13203 13204 def : Pat<(v8f16 (X86Movsh 13205 (v8f16 VR128X:$dst), 13206 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))), 13207 (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13208 13209 def : Pat<(v8f16 (X86Movsh 13210 (v8f16 VR128X:$dst), 13211 (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))), 13212 (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13213 13214 def : Pat<(v8f16 (X86Movsh 13215 (v8f16 VR128X:$dst), 13216 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))), 13217 (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>; 13218 13219 def : Pat<(v8f16 (X86Movsh 13220 (v8f16 VR128X:$dst), 13221 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))), 13222 (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>; 13223 13224 def : Pat<(v8f16 (X86Movsh 13225 (v8f16 VR128X:$dst), 13226 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))), 13227 (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>; 13228 13229 def : Pat<(v8f16 (X86Movsh 13230 (v8f16 VR128X:$dst), 13231 (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))), 13232 (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>; 13233} // Predicates = [HasFP16] 13234 13235let Predicates = [HasFP16, HasVLX] in { 13236 // Special patterns to allow use of X86VMSintToFP for masking. Instruction 13237 // patterns have been disabled with null_frag. 13238 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))), 13239 (VCVTQQ2PHZ256rr VR256X:$src)>; 13240 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13241 VK4WM:$mask), 13242 (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13243 def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13244 VK4WM:$mask), 13245 (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13246 13247 def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))), 13248 (VCVTQQ2PHZ256rm addr:$src)>; 13249 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13250 VK4WM:$mask), 13251 (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13252 def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13253 VK4WM:$mask), 13254 (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13255 13256 def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13257 (VCVTQQ2PHZ256rmb addr:$src)>; 13258 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13259 (v8f16 VR128X:$src0), VK4WM:$mask), 13260 (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13261 def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13262 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13263 (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13264 13265 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))), 13266 (VCVTQQ2PHZ128rr VR128X:$src)>; 13267 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13268 VK2WM:$mask), 13269 (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13270 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13271 VK2WM:$mask), 13272 (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13273 13274 def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))), 13275 (VCVTQQ2PHZ128rm addr:$src)>; 13276 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13277 VK2WM:$mask), 13278 (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13279 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13280 VK2WM:$mask), 13281 (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13282 13283 def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13284 (VCVTQQ2PHZ128rmb addr:$src)>; 13285 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13286 (v8f16 VR128X:$src0), VK2WM:$mask), 13287 (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13288 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13289 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13290 (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13291 13292 // Special patterns to allow use of X86VMUintToFP for masking. Instruction 13293 // patterns have been disabled with null_frag. 13294 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))), 13295 (VCVTUQQ2PHZ256rr VR256X:$src)>; 13296 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0), 13297 VK4WM:$mask), 13298 (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; 13299 def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV, 13300 VK4WM:$mask), 13301 (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>; 13302 13303 def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))), 13304 (VCVTUQQ2PHZ256rm addr:$src)>; 13305 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0), 13306 VK4WM:$mask), 13307 (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13308 def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV, 13309 VK4WM:$mask), 13310 (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>; 13311 13312 def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))), 13313 (VCVTUQQ2PHZ256rmb addr:$src)>; 13314 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13315 (v8f16 VR128X:$src0), VK4WM:$mask), 13316 (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; 13317 def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)), 13318 v8f16x_info.ImmAllZerosV, VK4WM:$mask), 13319 (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>; 13320 13321 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))), 13322 (VCVTUQQ2PHZ128rr VR128X:$src)>; 13323 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0), 13324 VK2WM:$mask), 13325 (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 13326 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV, 13327 VK2WM:$mask), 13328 (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>; 13329 13330 def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))), 13331 (VCVTUQQ2PHZ128rm addr:$src)>; 13332 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0), 13333 VK2WM:$mask), 13334 (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13335 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV, 13336 VK2WM:$mask), 13337 (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>; 13338 13339 def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), 13340 (VCVTUQQ2PHZ128rmb addr:$src)>; 13341 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13342 (v8f16 VR128X:$src0), VK2WM:$mask), 13343 (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 13344 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), 13345 v8f16x_info.ImmAllZerosV, VK2WM:$mask), 13346 (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>; 13347} 13348 13349let Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13350 multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> { 13351 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13352 (ins _.RC:$src2, _.RC:$src3), 13353 OpcodeStr, "$src3, $src2", "$src2, $src3", 13354 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX, VVVV; 13355 13356 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13357 (ins _.RC:$src2, _.MemOp:$src3), 13358 OpcodeStr, "$src3, $src2", "$src2, $src3", 13359 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX, VVVV; 13360 13361 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), 13362 (ins _.RC:$src2, _.ScalarMemOp:$src3), 13363 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr), 13364 (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX, VVVV; 13365 } 13366} // Constraints = "@earlyclobber $dst, $src1 = $dst" 13367 13368multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode, 13369 X86VectorVTInfo _> { 13370 let Constraints = "@earlyclobber $dst, $src1 = $dst" in 13371 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), 13372 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), 13373 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", 13374 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>, 13375 EVEX, VVVV, EVEX_B, EVEX_RC; 13376} 13377 13378 13379multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> { 13380 let Predicates = [HasFP16] in { 13381 defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>, 13382 avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>, 13383 EVEX_V512, Sched<[WriteFMAZ]>; 13384 } 13385 let Predicates = [HasVLX, HasFP16] in { 13386 defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>; 13387 defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>; 13388 } 13389} 13390 13391multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13392 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> { 13393 let Predicates = [HasFP16] in { 13394 defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info, 13395 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, 13396 avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info, 13397 "", "@earlyclobber $dst">, EVEX_V512; 13398 } 13399 let Predicates = [HasVLX, HasFP16] in { 13400 defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info, 13401 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256; 13402 defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info, 13403 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128; 13404 } 13405} 13406 13407 13408let Uses = [MXCSR] in { 13409 defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>, 13410 T_MAP6, XS, EVEX_CD8<32, CD8VF>; 13411 defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>, 13412 T_MAP6, XD, EVEX_CD8<32, CD8VF>; 13413 13414 defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc, 13415 x86vfmulcRnd, 1>, T_MAP6, XS, EVEX_CD8<32, CD8VF>; 13416 defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc, 13417 x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6, XD, EVEX_CD8<32, CD8VF>; 13418} 13419 13420 13421multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, 13422 bit IsCommutable> { 13423 let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in { 13424 defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13425 (ins VR128X:$src2, VR128X:$src3), OpcodeStr, 13426 "$src3, $src2", "$src2, $src3", 13427 (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>, 13428 Sched<[WriteFMAX]>; 13429 defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst), 13430 (ins VR128X:$src2, ssmem:$src3), OpcodeStr, 13431 "$src3, $src2", "$src2, $src3", 13432 (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>, 13433 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13434 defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst), 13435 (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr, 13436 "$rc, $src3, $src2", "$src2, $src3, $rc", 13437 (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>, 13438 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13439 } 13440} 13441 13442multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, 13443 SDNode OpNodeRnd, bit IsCommutable> { 13444 let Predicates = [HasFP16] in { 13445 defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13446 (ins VR128X:$src1, VR128X:$src2), OpcodeStr, 13447 "$src2, $src1", "$src1, $src2", 13448 (v4f32 (OpNode VR128X:$src1, VR128X:$src2)), 13449 IsCommutable, IsCommutable, IsCommutable, 13450 X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>; 13451 defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst), 13452 (ins VR128X:$src1, ssmem:$src2), OpcodeStr, 13453 "$src2, $src1", "$src1, $src2", 13454 (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))), 13455 0, 0, 0, X86selects, "@earlyclobber $dst">, 13456 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 13457 defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst), 13458 (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr, 13459 "$rc, $src2, $src1", "$src1, $src2, $rc", 13460 (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)), 13461 0, 0, 0, X86selects, "@earlyclobber $dst">, 13462 EVEX_B, EVEX_RC, Sched<[WriteFMAX]>; 13463 } 13464} 13465 13466let Uses = [MXCSR] in { 13467 defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>, 13468 T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV; 13469 defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>, 13470 T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX, VVVV; 13471 13472 defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>, 13473 T_MAP6, XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV; 13474 defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>, 13475 T_MAP6, XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX, VVVV; 13476} 13477