1//===-- VOP3PInstructions.td - Vector Instruction Definitions -------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP3P Classes 11//===----------------------------------------------------------------------===// 12 13// Used for FMA_MIX* and MAD_MIX* insts 14// Their operands are only sort of f16 operands. Depending on 15// op_sel_hi, these may be interpreted as f32. The inline immediate 16// values are really f16 converted to f32, so we treat these as f16 17// operands. 18class VOP3P_Mix_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR, 19 bit useTiedOutput = 0> : VOP3_Profile<P, Features> { 20 bit UseTiedOutput = useTiedOutput; 21 22 dag srcs = 23 (ins FP16InputMods:$src0_modifiers, VCSrc_f16:$src0, 24 FP16InputMods:$src1_modifiers, VCSrc_f16:$src1, 25 FP16InputMods:$src2_modifiers, VCSrc_f16:$src2); 26 27 // FIXME: clampmod0 misbehaves with the non-default vdst_in 28 // following it. For now workaround this by requiring clamp 29 // in tied patterns. This should use undef_tied_input, but it 30 // seems underdeveloped and doesn't apply the right register 31 // class constraints. 32 dag mods = !con(!if(UseTiedOutput, (ins clampmod:$clamp, VGPR_32:$vdst_in), 33 (ins clampmod0:$clamp)), 34 (ins op_sel0:$op_sel, op_sel_hi0:$op_sel_hi)); 35 // We use Ins64 because that is the one which populates InOperandList 36 // due to the logic in class VOP3_Pseudo 37 let Ins64 = !con(srcs, mods); 38 let Asm64 = 39 "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$op_sel$op_sel_hi$clamp"; 40} 41 42multiclass VOP3PInst<string OpName, VOPProfile P, 43 SDPatternOperator node = null_frag, bit HasExplicitClamp = 0> { 44 def NAME : VOP3P_Pseudo<OpName, P, 45 !if (P.HasModifiers, 46 getVOP3PModPat<P, node, HasExplicitClamp>.ret, 47 getVOP3Pat<P, node>.ret)>; 48} 49 50 51// Non-packed instructions that use the VOP3P encoding. 52// VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed. 53multiclass VOP3_VOP3PInst<string OpName, VOP3P_Mix_Profile P, 54 SDPatternOperator node = null_frag> { 55 def NAME : VOP3P_Pseudo<OpName, P> { 56 let Constraints = !if(P.UseTiedOutput, "$vdst = $vdst_in", ""); 57 let DisableEncoding = !if(P.UseTiedOutput, "$vdst_in", ""); 58 } 59} 60 61let isCommutable = 1 in { 62defm V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>; 63defm V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>; 64 65let FPDPRounding = 1 in { 66defm V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, any_fma>; 67defm V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, any_fadd>; 68defm V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, any_fmul>; 69} // End FPDPRounding = 1 70defm V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum_like>; 71defm V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fminnum_like>; 72 73defm V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, add>; 74defm V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>; 75defm V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, mul>; 76 77defm V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, smin>; 78defm V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, umin>; 79defm V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, smax>; 80defm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, umax>; 81} 82 83defm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>; 84defm V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, sub>; 85 86defm V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshl_rev>; 87defm V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, ashr_rev>; 88defm V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>; 89 90 91let SubtargetPredicate = HasVOP3PInsts in { 92 93// Undo sub x, c -> add x, -c canonicalization since c is more likely 94// an inline immediate than -c. 95// The constant will be emitted as a mov, and folded later. 96// TODO: We could directly encode the immediate now 97def : GCNPat< 98 (add (v2i16 (VOP3PMods v2i16:$src0, i32:$src0_modifiers)), NegSubInlineConstV216:$src1), 99 (V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1) 100>; 101 102// Integer operations with clamp bit set. 103class VOP3PSatPat<SDPatternOperator pat, Instruction inst> : GCNPat< 104 (pat (v2i16 (VOP3PMods v2i16:$src0, i32:$src0_modifiers)), 105 (v2i16 (VOP3PMods v2i16:$src1, i32:$src1_modifiers))), 106 (inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE) 107>; 108 109def : VOP3PSatPat<uaddsat, V_PK_ADD_U16>; 110def : VOP3PSatPat<saddsat, V_PK_ADD_I16>; 111def : VOP3PSatPat<usubsat, V_PK_SUB_U16>; 112def : VOP3PSatPat<ssubsat, V_PK_SUB_I16>; 113} // End SubtargetPredicate = HasVOP3PInsts 114 115multiclass MadFmaMixPats<SDPatternOperator fma_like, 116 Instruction mix_inst, 117 Instruction mixlo_inst, 118 Instruction mixhi_inst> { 119 def : GCNPat < 120 (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), 121 (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), 122 (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))), 123 (mixlo_inst $src0_modifiers, $src0, 124 $src1_modifiers, $src1, 125 $src2_modifiers, $src2, 126 DSTCLAMP.NONE, 127 (i32 (IMPLICIT_DEF))) 128 >; 129 130 // FIXME: Special case handling for maxhi (especially for clamp) 131 // because dealing with the write to high half of the register is 132 // difficult. 133 def : GCNPat < 134 (build_vector f16:$elt0, (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), 135 (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), 136 (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))), 137 (v2f16 (mixhi_inst $src0_modifiers, $src0, 138 $src1_modifiers, $src1, 139 $src2_modifiers, $src2, 140 DSTCLAMP.NONE, 141 $elt0)) 142 >; 143 144 def : GCNPat < 145 (build_vector 146 f16:$elt0, 147 (AMDGPUclamp (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), 148 (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), 149 (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))), 150 (v2f16 (mixhi_inst $src0_modifiers, $src0, 151 $src1_modifiers, $src1, 152 $src2_modifiers, $src2, 153 DSTCLAMP.ENABLE, 154 $elt0)) 155 >; 156 157 def : GCNPat < 158 (AMDGPUclamp (build_vector 159 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)), 160 (f32 (VOP3PMadMixMods f16:$lo_src1, i32:$lo_src1_modifiers)), 161 (f32 (VOP3PMadMixMods f16:$lo_src2, i32:$lo_src2_modifiers)))), 162 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$hi_src0, i32:$hi_src0_modifiers)), 163 (f32 (VOP3PMadMixMods f16:$hi_src1, i32:$hi_src1_modifiers)), 164 (f32 (VOP3PMadMixMods f16:$hi_src2, i32:$hi_src2_modifiers)))))), 165 (v2f16 (mixhi_inst $hi_src0_modifiers, $hi_src0, 166 $hi_src1_modifiers, $hi_src1, 167 $hi_src2_modifiers, $hi_src2, 168 DSTCLAMP.ENABLE, 169 (mixlo_inst $lo_src0_modifiers, $lo_src0, 170 $lo_src1_modifiers, $lo_src1, 171 $lo_src2_modifiers, $lo_src2, 172 DSTCLAMP.ENABLE, 173 (i32 (IMPLICIT_DEF))))) 174 >; 175} 176 177let SubtargetPredicate = HasMadMixInsts in { 178 179// These are VOP3a-like opcodes which accept no omod. 180// Size of src arguments (16/32) is controlled by op_sel. 181// For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi. 182let isCommutable = 1, mayRaiseFPException = 0 in { 183defm V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3P_Mix_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>; 184 185let FPDPRounding = 1 in { 186// Clamp modifier is applied after conversion to f16. 187defm V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>; 188 189let ClampLo = 0, ClampHi = 1 in { 190defm V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>; 191} 192} // End FPDPRounding = 1 193} 194 195defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>; 196} // End SubtargetPredicate = HasMadMixInsts 197 198 199// Essentially the same as the mad_mix versions 200let SubtargetPredicate = HasFmaMixInsts in { 201let isCommutable = 1 in { 202defm V_FMA_MIX_F32 : VOP3_VOP3PInst<"v_fma_mix_f32", VOP3P_Mix_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>; 203 204let FPDPRounding = 1 in { 205// Clamp modifier is applied after conversion to f16. 206defm V_FMA_MIXLO_F16 : VOP3_VOP3PInst<"v_fma_mixlo_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>; 207 208let ClampLo = 0, ClampHi = 1 in { 209defm V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>; 210} 211} // End FPDPRounding = 1 212} 213 214defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>; 215} 216 217// Defines patterns that extract signed 4bit from each Idx[0]. 218foreach Idx = [[0,28],[4,24],[8,20],[12,16],[16,12],[20,8],[24,4]] in 219 def ExtractSigned4bit_#Idx[0] : PatFrag<(ops node:$src), 220 (sra (shl node:$src, (i32 Idx[1])), (i32 28))>; 221 222// Defines code pattern that extracts U(unsigned/signed) 4/8bit from FromBitIndex. 223class Extract<int FromBitIndex, int BitMask, bit U>: PatFrag< 224 (ops node:$src), 225 !if (!or (!and (!eq (BitMask, 255), !eq (FromBitIndex, 24)), !eq (FromBitIndex, 28)), // last element 226 !if (U, (srl node:$src, (i32 FromBitIndex)), (sra node:$src, (i32 FromBitIndex))), 227 !if (!eq (FromBitIndex, 0), // first element 228 !if (U, (and node:$src, (i32 BitMask)), 229 !if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src), 230 (sext_inreg node:$src, i8))), 231 !if (U, (and (srl node:$src, (i32 FromBitIndex)), (i32 BitMask)), 232 !if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src), 233 (sext_inreg (srl node:$src, (i32 FromBitIndex)), i8)))))>; 234 235 236foreach Type = ["I", "U"] in 237 foreach Index = 0-3 in { 238 // Defines patterns that extract each Index'ed 8bit from an unsigned 239 // 32bit scalar value; 240 def Type#Index#"_8bit" : Extract<!shl(Index, 3), 255, !eq (Type, "U")>; 241 242 // Defines multiplication patterns where the multiplication is happening on each 243 // Index'ed 8bit of a 32bit scalar value. 244 245 def Mul#Type#_Elt#Index : PatFrag< 246 (ops node:$src0, node:$src1), 247 (!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), AMDGPUmul_i24_oneuse, AMDGPUmul_u24_oneuse)) 248 (!cast<Extract>(Type#Index#"_8bit") node:$src0), 249 (!cast<Extract>(Type#Index#"_8bit") node:$src1))>; 250 } 251 252// Different variants of dot8 patterns cause a huge increase in the compile time. 253// Define non-associative/commutative add/mul to prevent permutation in the dot8 254// pattern. 255def NonACAdd : SDNode<"ISD::ADD" , SDTIntBinOp>; 256def NonACAdd_oneuse : HasOneUseBinOp<NonACAdd>; 257 258def NonACAMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24" , SDTIntBinOp>; 259def NonACAMDGPUmul_u24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_u24>; 260 261def NonACAMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24" , SDTIntBinOp>; 262def NonACAMDGPUmul_i24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_i24>; 263 264foreach Type = ["I", "U"] in 265 foreach Index = 0-7 in { 266 // Defines patterns that extract each Index'ed 4bit from an unsigned 267 // 32bit scalar value; 268 def Type#Index#"_4bit" : Extract<!shl(Index, 2), 15, !eq (Type, "U")>; 269 270 // Defines multiplication patterns where the multiplication is happening on each 271 // Index'ed 8bit of a 32bit scalar value. 272 def Mul#Type#Index#"_4bit" : PatFrag< 273 (ops node:$src0, node:$src1), 274 (!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), NonACAMDGPUmul_i24_oneuse, NonACAMDGPUmul_u24_oneuse)) 275 (!cast<Extract>(Type#Index#"_4bit") node:$src0), 276 (!cast<Extract>(Type#Index#"_4bit") node:$src1))>; 277 } 278 279class UDot2Pat<Instruction Inst> : GCNPat < 280 (add (add_oneuse (AMDGPUmul_u24_oneuse (srl i32:$src0, (i32 16)), 281 (srl i32:$src1, (i32 16))), i32:$src2), 282 (AMDGPUmul_u24_oneuse (and i32:$src0, (i32 65535)), 283 (and i32:$src1, (i32 65535))) 284 ), 285 (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> { 286 let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate; 287} 288 289class SDot2Pat<Instruction Inst> : GCNPat < 290 (add (add_oneuse (AMDGPUmul_i24_oneuse (sra i32:$src0, (i32 16)), 291 (sra i32:$src1, (i32 16))), i32:$src2), 292 (AMDGPUmul_i24_oneuse (sext_inreg i32:$src0, i16), 293 (sext_inreg i32:$src1, i16))), 294 (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> { 295 let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate; 296} 297 298let IsDOT = 1 in { 299let SubtargetPredicate = HasDot2Insts in { 300 301defm V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", 302 VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2, 1>; 303defm V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", 304 VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2, 1>; 305 306} // End SubtargetPredicate = HasDot2Insts 307 308let SubtargetPredicate = HasDot7Insts in { 309 310defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", 311 VOP3_Profile<VOP_F32_V2F16_V2F16_F32>, 312 AMDGPUfdot2, 1/*ExplicitClamp*/>; 313defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", 314 VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>; 315defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", 316 VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>; 317 318} // End SubtargetPredicate = HasDot7Insts 319 320let SubtargetPredicate = HasDot1Insts in { 321 322defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", 323 VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>; 324defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", 325 VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>; 326 327} // End SubtargetPredicate = HasDot1Insts 328} // End let IsDOT = 1 329 330def : UDot2Pat<V_DOT2_U32_U16>; 331def : SDot2Pat<V_DOT2_I32_I16>; 332 333foreach Type = ["U", "I"] in 334 let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT4_"#Type#"32_"#Type#8).SubtargetPredicate in 335 def : GCNPat < 336 !cast<dag>(!foldl((i32 i32:$src2), [0, 1, 2, 3], lhs, y, 337 (add_oneuse lhs, (!cast<PatFrag>("Mul"#Type#"_Elt"#y) i32:$src0, i32:$src1)))), 338 (!cast<VOP3P_Pseudo>("V_DOT4_"#Type#"32_"#Type#8) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; 339 340foreach Type = ["U", "I"] in 341 let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in 342 def : GCNPat < 343 !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)), 344 [1, 2, 3, 4, 5, 6, 7], lhs, y, 345 (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))), 346 (!cast<VOP3P_Pseudo>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; 347 348// Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase 349// in the compile time. Directly handle the pattern generated by the FE here. 350foreach Type = ["U", "I"] in 351 let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in 352 def : GCNPat < 353 !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)), 354 [7, 1, 2, 3, 4, 5, 6], lhs, y, 355 (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))), 356 (!cast<VOP3P_Pseudo>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; 357 358def ADst_32 : VOPDstOperand<AGPR_32>; 359def ADst_64 : VOPDstOperand<AReg_64>; 360def ADst_128 : VOPDstOperand<AReg_128>; 361def ADst_256 : VOPDstOperand<AReg_256>; 362def ADst_512 : VOPDstOperand<AReg_512>; 363def ADst_1024 : VOPDstOperand<AReg_1024>; 364def VDst_64 : VOPDstOperand<VReg_64>; 365def VDst_128 : VOPDstOperand<VReg_128>; 366def VDst_256 : VOPDstOperand<VReg_256>; 367def VDst_512 : VOPDstOperand<VReg_512>; 368def VDst_1024 : VOPDstOperand<VReg_1024>; 369 370def VOPProfileAccRead : VOP3_Profile<VOP_I32_I32, VOP3_MAI> { 371 let Src0RC64 = ARegSrc_32; 372} 373 374def VOPProfileAccWrite : VOP3_Profile<VOP_I32_I32, VOP3_MAI> { 375 let DstRC = ADst_32; 376 let Src0RC64 = VISrc_b32; 377} 378 379class VOPProfileMAI<VOPProfile P, RegisterOperand _SrcRC, RegisterOperand _DstRC, 380 RegisterOperand SrcABRC = AVSrc_32> 381 : VOP3_Profile<P, VOP3_MAI> { 382 let DstRC = _DstRC; 383 let Src0RC64 = SrcABRC; 384 let Src1RC64 = SrcABRC; 385 let Src2RC64 = _SrcRC; 386 let HasOpSel = 0; 387 let HasClamp = 0; 388 let HasIntClamp = 0; 389 let HasOMod = 0; 390 let HasModifiers = 0; 391 let Asm64 = "$vdst, $src0, $src1, $src2$cbsz$abid$blgp"; 392 let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, Src2RC64:$src2, cbsz:$cbsz, abid:$abid, blgp:$blgp); 393} 394 395def VOPProfileMAI_F32_F32_X4 : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32, AISrc_128_f32, ADst_128>; 396def VOPProfileMAI_F32_F32_X16 : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32, AISrc_512_f32, ADst_512>; 397def VOPProfileMAI_F32_F32_X32 : VOPProfileMAI<VOP_V32F32_F32_F32_V32F32, AISrc_1024_f32, ADst_1024>; 398def VOPProfileMAI_I32_I32_X4 : VOPProfileMAI<VOP_V4I32_I32_I32_V4I32, AISrc_128_b32, ADst_128>; 399def VOPProfileMAI_I32_I32_X16 : VOPProfileMAI<VOP_V16I32_I32_I32_V16I32, AISrc_512_b32, ADst_512>; 400def VOPProfileMAI_I32_I32_X32 : VOPProfileMAI<VOP_V32I32_I32_I32_V32I32, AISrc_1024_b32, ADst_1024>; 401def VOPProfileMAI_F32_V2I16_X4 : VOPProfileMAI<VOP_V4F32_V2I16_V2I16_V4F32, AISrc_128_b32, ADst_128>; 402def VOPProfileMAI_F32_V2I16_X16 : VOPProfileMAI<VOP_V16F32_V2I16_V2I16_V16F32, AISrc_512_b32, ADst_512>; 403def VOPProfileMAI_F32_V2I16_X32 : VOPProfileMAI<VOP_V32F32_V2I16_V2I16_V32F32, AISrc_1024_b32, ADst_1024>; 404def VOPProfileMAI_F32_V4F16_X4 : VOPProfileMAI<VOP_V4F32_V4F16_V4F16_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>; 405def VOPProfileMAI_F32_V4F16_X16 : VOPProfileMAI<VOP_V16F32_V4F16_V4F16_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>; 406def VOPProfileMAI_F32_V4F16_X32 : VOPProfileMAI<VOP_V32F32_V4F16_V4F16_V32F32, AISrc_1024_b32, ADst_1024, AVSrc_64>; 407def VOPProfileMAI_F32_V4I16_X4 : VOPProfileMAI<VOP_V4F32_V4I16_V4I16_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>; 408def VOPProfileMAI_F32_V4I16_X16 : VOPProfileMAI<VOP_V16F32_V4I16_V4I16_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>; 409def VOPProfileMAI_F32_V4I16_X32 : VOPProfileMAI<VOP_V32F32_V4I16_V4I16_V32F32, AISrc_1024_b32, ADst_1024, AVSrc_64>; 410def VOPProfileMAI_F64_16X16X4F64 : VOPProfileMAI<VOP_V4F64_F64_F64_V4F64, AISrc_256_f64, ADst_256, AVSrc_64>; 411def VOPProfileMAI_F64_4X4X4F64 : VOPProfileMAI<VOP_F64_F64_F64_F64, AISrc_64_f64, ADst_64, AVSrc_64>; 412 413def VOPProfileMAI_F32_F32_X4_VCD : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32, VISrc_128_f32, VDst_128>; 414def VOPProfileMAI_F32_F32_X16_VCD : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32, VISrc_512_f32, VDst_512>; 415def VOPProfileMAI_F32_F32_X32_VCD : VOPProfileMAI<VOP_V32F32_F32_F32_V32F32, VISrc_1024_f32, VDst_1024>; 416def VOPProfileMAI_I32_I32_X4_VCD : VOPProfileMAI<VOP_V4I32_I32_I32_V4I32, VISrc_128_b32, VDst_128>; 417def VOPProfileMAI_I32_I32_X16_VCD : VOPProfileMAI<VOP_V16I32_I32_I32_V16I32, VISrc_512_b32, VDst_512>; 418def VOPProfileMAI_I32_I32_X32_VCD : VOPProfileMAI<VOP_V32I32_I32_I32_V32I32, VISrc_1024_b32, VDst_1024>; 419def VOPProfileMAI_F32_V2I16_X4_VCD : VOPProfileMAI<VOP_V4F32_V2I16_V2I16_V4F32, VISrc_128_b32, VDst_128>; 420def VOPProfileMAI_F32_V2I16_X16_VCD : VOPProfileMAI<VOP_V16F32_V2I16_V2I16_V16F32, VISrc_512_b32, VDst_512>; 421def VOPProfileMAI_F32_V2I16_X32_VCD : VOPProfileMAI<VOP_V32F32_V2I16_V2I16_V32F32, VISrc_1024_b32, VDst_1024>; 422def VOPProfileMAI_F32_V4F16_X4_VCD : VOPProfileMAI<VOP_V4F32_V4F16_V4F16_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>; 423def VOPProfileMAI_F32_V4F16_X16_VCD : VOPProfileMAI<VOP_V16F32_V4F16_V4F16_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>; 424def VOPProfileMAI_F32_V4F16_X32_VCD : VOPProfileMAI<VOP_V32F32_V4F16_V4F16_V32F32, VISrc_1024_b32, VDst_1024, AVSrc_64>; 425def VOPProfileMAI_F32_V4I16_X4_VCD : VOPProfileMAI<VOP_V4F32_V4I16_V4I16_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>; 426def VOPProfileMAI_F32_V4I16_X16_VCD : VOPProfileMAI<VOP_V16F32_V4I16_V4I16_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>; 427def VOPProfileMAI_F32_V4I16_X32_VCD : VOPProfileMAI<VOP_V32F32_V4I16_V4I16_V32F32, VISrc_1024_b32, VDst_1024, AVSrc_64>; 428def VOPProfileMAI_F64_16X16X4F64_VCD : VOPProfileMAI<VOP_V4F64_F64_F64_V4F64, VISrc_256_f64, VDst_256, AVSrc_64>; 429def VOPProfileMAI_F64_4X4X4F64_VCD : VOPProfileMAI<VOP_F64_F64_F64_F64, VISrc_64_f64, VDst_64, AVSrc_64>; 430 431let Predicates = [HasMAIInsts] in { 432 433let isAsCheapAsAMove = 1, isReMaterializable = 1 in { 434 defm V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>; 435 let isMoveImm = 1 in { 436 defm V_ACCVGPR_WRITE_B32 : VOP3Inst<"v_accvgpr_write_b32", VOPProfileAccWrite>; 437 } // End isMoveImm = 1 438} // End isAsCheapAsAMove = 1, isReMaterializable = 1 439 440multiclass MAIInst<string OpName, string P, SDPatternOperator node> { 441 let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in { 442 // FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported. 443 defm "" : VOP3Inst<OpName, !cast<VOPProfileMAI>("VOPProfileMAI_" # P), node>; 444 445 let SubtargetPredicate = isGFX90APlus, Mnemonic = OpName in 446 defm _vgprcd : VOP3Inst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD")>; 447 } // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 448} 449 450defm V_MFMA_F32_4X4X1F32 : MAIInst<"v_mfma_f32_4x4x1f32", "F32_F32_X4", int_amdgcn_mfma_f32_4x4x1f32>; 451defm V_MFMA_F32_4X4X4F16 : MAIInst<"v_mfma_f32_4x4x4f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_4x4x4f16>; 452defm V_MFMA_I32_4X4X4I8 : MAIInst<"v_mfma_i32_4x4x4i8", "I32_I32_X4", int_amdgcn_mfma_i32_4x4x4i8>; 453defm V_MFMA_F32_16X16X1F32 : MAIInst<"v_mfma_f32_16x16x1f32", "F32_F32_X16", int_amdgcn_mfma_f32_16x16x1f32>; 454defm V_MFMA_F32_16X16X4F32 : MAIInst<"v_mfma_f32_16x16x4f32", "F32_F32_X4", int_amdgcn_mfma_f32_16x16x4f32>; 455defm V_MFMA_F32_16X16X4F16 : MAIInst<"v_mfma_f32_16x16x4f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_16x16x4f16>; 456defm V_MFMA_F32_16X16X16F16 : MAIInst<"v_mfma_f32_16x16x16f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_16x16x16f16>; 457defm V_MFMA_I32_16X16X4I8 : MAIInst<"v_mfma_i32_16x16x4i8", "I32_I32_X16", int_amdgcn_mfma_i32_16x16x4i8>; 458defm V_MFMA_F32_32X32X1F32 : MAIInst<"v_mfma_f32_32x32x1f32", "F32_F32_X32", int_amdgcn_mfma_f32_32x32x1f32>; 459defm V_MFMA_F32_32X32X2F32 : MAIInst<"v_mfma_f32_32x32x2f32", "F32_F32_X16", int_amdgcn_mfma_f32_32x32x2f32>; 460defm V_MFMA_F32_32X32X4F16 : MAIInst<"v_mfma_f32_32x32x4f16", "F32_V4F16_X32", int_amdgcn_mfma_f32_32x32x4f16>; 461defm V_MFMA_F32_32X32X8F16 : MAIInst<"v_mfma_f32_32x32x8f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_32x32x8f16>; 462defm V_MFMA_I32_32X32X4I8 : MAIInst<"v_mfma_i32_32x32x4i8", "I32_I32_X32", int_amdgcn_mfma_i32_32x32x4i8>; 463defm V_MFMA_I32_16X16X16I8 : MAIInst<"v_mfma_i32_16x16x16i8", "I32_I32_X4", int_amdgcn_mfma_i32_16x16x16i8>; 464defm V_MFMA_I32_32X32X8I8 : MAIInst<"v_mfma_i32_32x32x8i8", "I32_I32_X16", int_amdgcn_mfma_i32_32x32x8i8>; 465defm V_MFMA_F32_4X4X2BF16 : MAIInst<"v_mfma_f32_4x4x2bf16", "F32_V2I16_X4", int_amdgcn_mfma_f32_4x4x2bf16>; 466defm V_MFMA_F32_16X16X2BF16 : MAIInst<"v_mfma_f32_16x16x2bf16", "F32_V2I16_X16", int_amdgcn_mfma_f32_16x16x2bf16>; 467defm V_MFMA_F32_16X16X8BF16 : MAIInst<"v_mfma_f32_16x16x8bf16", "F32_V2I16_X4", int_amdgcn_mfma_f32_16x16x8bf16>; 468defm V_MFMA_F32_32X32X2BF16 : MAIInst<"v_mfma_f32_32x32x2bf16", "F32_V2I16_X32", int_amdgcn_mfma_f32_32x32x2bf16>; 469defm V_MFMA_F32_32X32X4BF16 : MAIInst<"v_mfma_f32_32x32x4bf16", "F32_V2I16_X16", int_amdgcn_mfma_f32_32x32x4bf16>; 470 471} // End SubtargetPredicate = HasMAIInsts 472 473let Predicates = [isGFX90APlus] in { 474 defm V_MFMA_F32_32X32X4BF16_1K : MAIInst<"v_mfma_f32_32x32x4bf16_1k", "F32_V4I16_X32", int_amdgcn_mfma_f32_32x32x4bf16_1k>; 475 defm V_MFMA_F32_16X16X4BF16_1K : MAIInst<"v_mfma_f32_16x16x4bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_16x16x4bf16_1k>; 476 defm V_MFMA_F32_4X4X4BF16_1K : MAIInst<"v_mfma_f32_4x4x4bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_4x4x4bf16_1k>; 477 defm V_MFMA_F32_32X32X8BF16_1K : MAIInst<"v_mfma_f32_32x32x8bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_32x32x8bf16_1k>; 478 defm V_MFMA_F32_16X16X16BF16_1K : MAIInst<"v_mfma_f32_16x16x16bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_16x16x16bf16_1k>; 479 480 defm V_MFMA_F64_16X16X4F64 : MAIInst<"v_mfma_f64_16x16x4f64", "F64_16X16X4F64", int_amdgcn_mfma_f64_16x16x4f64>; 481 defm V_MFMA_F64_4X4X4F64 : MAIInst<"v_mfma_f64_4x4x4f64", "F64_4X4X4F64", int_amdgcn_mfma_f64_4x4x4f64>; 482} // End Predicates = [isGFX90APlus] 483 484let SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1 in { 485 defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3_Profile<VOP_V2F32_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fma>; 486 defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fmul>; 487 defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fadd>; 488 defm V_PK_MOV_B32 : VOP3PInst<"v_pk_mov_b32", VOP3_Profile<VOP_V2I32_V2I32_V2I32, VOP3_PACKED>>; 489} // End SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1 490 491def : MnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">; 492def : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">; 493 494//===----------------------------------------------------------------------===// 495// Begin Real Encodings 496//===----------------------------------------------------------------------===// 497 498//===----------------------------------------------------------------------===// 499// GFX8 (VI) 500//===----------------------------------------------------------------------===// 501 502multiclass VOP3P_Real_vi<bits<7> op> { 503 def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>, 504 VOP3Pe <op, !cast<VOP3_Pseudo>(NAME).Pfl> { 505 let AssemblerPredicate = HasVOP3PInsts; 506 let DecoderNamespace = "GFX8"; 507 let VOP3P = 1; 508 } 509} 510 511multiclass VOP3P_Real_MAI<bits<7> op> { 512 def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 513 VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl, ?> { 514 let AssemblerPredicate = HasMAIInsts; 515 let DecoderNamespace = "GFX8"; 516 let Inst{14} = ?; // op_sel_hi(2) 517 let Inst{59} = ?; // op_sel_hi(0) 518 let Inst{60} = ?; // op_sel_hi(1) 519 } 520} 521 522multiclass VOP3P_Real_MFMA_gfx90a<bits<7> op> { 523 let SubtargetPredicate = isGFX90AOnly, 524 AssemblerPredicate = isGFX90AOnly, DecoderNamespace = "GFX90A" in { 525 def _gfx90a_acd : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>, 526 VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl, 1>; 527 528 def _gfx90a_vcd : VOP3P_Real<!cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64"), SIEncodingFamily.GFX90A>, 529 VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64").Pfl, 0>; 530 } // End AssemblerPredicate = isGFX90AOnly, DecoderNamespace = "GFX90A" 531} 532 533multiclass VOP3P_Real_MFMA<bits<7> op> : 534 VOP3P_Real_MFMA_gfx90a <op> { 535 def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 536 VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl, ?> { 537 let AssemblerPredicate = HasMAIInsts; 538 let DecoderNamespace = "GFX8"; 539 } 540} 541 542defm V_PK_MAD_I16 : VOP3P_Real_vi <0x00>; 543defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x01>; 544defm V_PK_ADD_I16 : VOP3P_Real_vi <0x02>; 545defm V_PK_SUB_I16 : VOP3P_Real_vi <0x03>; 546defm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x04>; 547defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x05>; 548defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x06>; 549defm V_PK_MAX_I16 : VOP3P_Real_vi <0x07>; 550defm V_PK_MIN_I16 : VOP3P_Real_vi <0x08>; 551defm V_PK_MAD_U16 : VOP3P_Real_vi <0x09>; 552 553defm V_PK_ADD_U16 : VOP3P_Real_vi <0x0a>; 554defm V_PK_SUB_U16 : VOP3P_Real_vi <0x0b>; 555defm V_PK_MAX_U16 : VOP3P_Real_vi <0x0c>; 556defm V_PK_MIN_U16 : VOP3P_Real_vi <0x0d>; 557defm V_PK_FMA_F16 : VOP3P_Real_vi <0x0e>; 558defm V_PK_ADD_F16 : VOP3P_Real_vi <0x0f>; 559defm V_PK_MUL_F16 : VOP3P_Real_vi <0x10>; 560defm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>; 561defm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>; 562 563 564let SubtargetPredicate = HasMadMixInsts in { 565defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>; 566defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>; 567defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x22>; 568} 569 570let SubtargetPredicate = HasFmaMixInsts in { 571let DecoderNamespace = "GFX9_DL" in { 572// The mad_mix instructions were renamed and their behaviors changed, 573// but the opcode stayed the same so we need to put these in a 574// different DecoderNamespace to avoid the ambiguity. 575defm V_FMA_MIX_F32 : VOP3P_Real_vi <0x20>; 576defm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x21>; 577defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x22>; 578} 579} 580 581 582let SubtargetPredicate = HasDot2Insts in { 583 584defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>; 585defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>; 586 587} // End SubtargetPredicate = HasDot2Insts 588 589let SubtargetPredicate = HasDot7Insts in { 590 591defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>; 592defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>; 593defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>; 594 595} // End SubtargetPredicate = HasDot7Insts 596 597let SubtargetPredicate = HasDot1Insts in { 598 599defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>; 600defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>; 601 602} // End SubtargetPredicate = HasDot1Insts 603 604let SubtargetPredicate = HasMAIInsts in { 605 606defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x58>; 607defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x59>; 608defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MFMA <0x40>; 609defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MFMA <0x41>; 610defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MFMA <0x42>; 611defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MFMA <0x44>; 612defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MFMA <0x45>; 613defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MFMA <0x48>; 614defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MFMA <0x49>; 615defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MFMA <0x4a>; 616defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MFMA <0x4c>; 617defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MFMA <0x4d>; 618defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MFMA <0x50>; 619defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MFMA <0x51>; 620defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MFMA <0x52>; 621defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MFMA <0x55>; 622defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MFMA <0x54>; 623defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MFMA <0x68>; 624defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MFMA <0x69>; 625defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MFMA <0x6b>; 626defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MFMA <0x6c>; 627defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MFMA <0x6d>; 628 629} // End SubtargetPredicate = HasMAIInsts 630 631defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x63>; 632defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x64>; 633defm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x65>; 634defm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx90a <0x66>; 635defm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx90a <0x67>; 636defm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx90a <0x6e>; 637defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx90a <0x6f>; 638 639let SubtargetPredicate = HasPackedFP32Ops in { 640 defm V_PK_FMA_F32 : VOP3P_Real_vi <0x30>; 641 defm V_PK_MUL_F32 : VOP3P_Real_vi <0x31>; 642 defm V_PK_ADD_F32 : VOP3P_Real_vi <0x32>; 643 defm V_PK_MOV_B32 : VOP3P_Real_vi <0x33>; 644} // End SubtargetPredicate = HasPackedFP32Ops 645 646//===----------------------------------------------------------------------===// 647// GFX10. 648//===----------------------------------------------------------------------===// 649 650let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10", VOP3P = 1 in { 651 multiclass VOP3P_Real_gfx10<bits<7> op> { 652 def _gfx10 : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.GFX10>, 653 VOP3Pe_gfx10 <op, !cast<VOP3P_Pseudo>(NAME).Pfl>; 654 } 655} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10", VOP3P = 1 656 657defm V_PK_MAD_I16 : VOP3P_Real_gfx10<0x00>; 658defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10<0x01>; 659defm V_PK_ADD_I16 : VOP3P_Real_gfx10<0x02>; 660defm V_PK_SUB_I16 : VOP3P_Real_gfx10<0x03>; 661defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x04>; 662defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x05>; 663defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x06>; 664defm V_PK_MAX_I16 : VOP3P_Real_gfx10<0x07>; 665defm V_PK_MIN_I16 : VOP3P_Real_gfx10<0x08>; 666defm V_PK_MAD_U16 : VOP3P_Real_gfx10<0x09>; 667defm V_PK_ADD_U16 : VOP3P_Real_gfx10<0x0a>; 668defm V_PK_SUB_U16 : VOP3P_Real_gfx10<0x0b>; 669defm V_PK_MAX_U16 : VOP3P_Real_gfx10<0x0c>; 670defm V_PK_MIN_U16 : VOP3P_Real_gfx10<0x0d>; 671defm V_PK_FMA_F16 : VOP3P_Real_gfx10<0x0e>; 672defm V_PK_ADD_F16 : VOP3P_Real_gfx10<0x0f>; 673defm V_PK_MUL_F16 : VOP3P_Real_gfx10<0x10>; 674defm V_PK_MIN_F16 : VOP3P_Real_gfx10<0x11>; 675defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x12>; 676defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x20>; 677defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x21>; 678defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x22>; 679 680let SubtargetPredicate = HasDot2Insts in { 681 682defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>; 683defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>; 684 685} // End SubtargetPredicate = HasDot2Insts 686 687let SubtargetPredicate = HasDot7Insts in { 688 689defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x13>; 690defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x17>; 691defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x19>; 692 693} // End SubtargetPredicate = HasDot7Insts 694 695let SubtargetPredicate = HasDot1Insts in { 696 697defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>; 698defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>; 699 700} // End SubtargetPredicate = HasDot1Insts 701