15ffd83dbSDimitry Andric//===-- VOP3PInstructions.td - Vector Instruction Definitions -------------===// 20b57cec5SDimitry Andric// 30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric// 70b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 100b57cec5SDimitry Andric// VOP3P Classes 110b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 1381ad6265SDimitry Andricclass VOP3P_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR, 1481ad6265SDimitry Andric bit HasDPP = 0> : VOP3_Profile<P, Features> { 1581ad6265SDimitry Andric let IsVOP3P = 1; 1681ad6265SDimitry Andric let HasExtVOP3DPP = HasDPP; 1781ad6265SDimitry Andric // We do not want to print src modifiers for vop3p because the bits are 1881ad6265SDimitry Andric // overloaded in meaning and the logic in printOperandAndFPInputMods is 1981ad6265SDimitry Andric // wrong for vop3p 20bdd1243dSDimitry Andric let AsmVOP3Base = AsmVOP3P; 2181ad6265SDimitry Andric} 2281ad6265SDimitry Andric 23fe6060f1SDimitry Andric// Used for FMA_MIX* and MAD_MIX* insts 24fe6060f1SDimitry Andric// Their operands are only sort of f16 operands. Depending on 250b57cec5SDimitry Andric// op_sel_hi, these may be interpreted as f32. The inline immediate 260b57cec5SDimitry Andric// values are really f16 converted to f32, so we treat these as f16 270b57cec5SDimitry Andric// operands. 28fe6060f1SDimitry Andricclass VOP3P_Mix_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR, 2981ad6265SDimitry Andric bit useTiedOutput = 0> : VOP3P_Profile<P, Features, 1> { 30fe6060f1SDimitry Andric bit UseTiedOutput = useTiedOutput; 31fe6060f1SDimitry Andric 32fe6060f1SDimitry Andric dag srcs = 330b57cec5SDimitry Andric (ins FP16InputMods:$src0_modifiers, VCSrc_f16:$src0, 340b57cec5SDimitry Andric FP16InputMods:$src1_modifiers, VCSrc_f16:$src1, 35fe6060f1SDimitry Andric FP16InputMods:$src2_modifiers, VCSrc_f16:$src2); 3681ad6265SDimitry Andric dag dpp_srcs = 3781ad6265SDimitry Andric (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, 3806c3fb27SDimitry Andric FPVRegInputMods:$src1_modifiers, VRegSrc_32:$src1, 3981ad6265SDimitry Andric FP16InputMods:$src2_modifiers, VCSrc_f16:$src2); 40fe6060f1SDimitry Andric 41*0fca6ea1SDimitry Andric // FIXME: Clamp0 misbehaves with the non-default vdst_in 425ffd83dbSDimitry Andric // following it. For now workaround this by requiring clamp 435ffd83dbSDimitry Andric // in tied patterns. This should use undef_tied_input, but it 445ffd83dbSDimitry Andric // seems underdeveloped and doesn't apply the right register 455ffd83dbSDimitry Andric // class constraints. 46*0fca6ea1SDimitry Andric dag mods = !con(!if(UseTiedOutput, (ins Clamp:$clamp, VGPR_32:$vdst_in), 47*0fca6ea1SDimitry Andric (ins Clamp0:$clamp)), 48e8d8bef9SDimitry Andric (ins op_sel0:$op_sel, op_sel_hi0:$op_sel_hi)); 49fe6060f1SDimitry Andric // We use Ins64 because that is the one which populates InOperandList 50fe6060f1SDimitry Andric // due to the logic in class VOP3_Pseudo 51fe6060f1SDimitry Andric let Ins64 = !con(srcs, mods); 5281ad6265SDimitry Andric let InsVOP3Base = !con(dpp_srcs, mods); 53bdd1243dSDimitry Andric let AsmVOP3Base = 540b57cec5SDimitry Andric "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$op_sel$op_sel_hi$clamp"; 550b57cec5SDimitry Andric} 560b57cec5SDimitry Andric 57fe6060f1SDimitry Andricmulticlass VOP3PInst<string OpName, VOPProfile P, 5881ad6265SDimitry Andric SDPatternOperator node = null_frag, bit IsDOT = 0> { 59fe6060f1SDimitry Andric def NAME : VOP3P_Pseudo<OpName, P, 60fe6060f1SDimitry Andric !if (P.HasModifiers, 6181ad6265SDimitry Andric getVOP3PModPat<P, node, IsDOT, IsDOT>.ret, 62fe6060f1SDimitry Andric getVOP3Pat<P, node>.ret)>; 6381ad6265SDimitry Andric let SubtargetPredicate = isGFX11Plus in { 6481ad6265SDimitry Andric if P.HasExtVOP3DPP then 6581ad6265SDimitry Andric def _dpp : VOP3_DPP_Pseudo<OpName, P> { 6681ad6265SDimitry Andric let VOP3P = 1; 6781ad6265SDimitry Andric let PseudoInstr = OpName #"_dpp"; 680b57cec5SDimitry Andric } 6981ad6265SDimitry Andric } // end SubtargetPredicate = isGFX11Plus 7081ad6265SDimitry Andric} 710b57cec5SDimitry Andric 72fe6060f1SDimitry Andric// Non-packed instructions that use the VOP3P encoding. 73fe6060f1SDimitry Andric// VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed. 74349cc55cSDimitry Andricmulticlass VOP3_VOP3PInst<string OpName, VOP3P_Mix_Profile P> { 75fe6060f1SDimitry Andric def NAME : VOP3P_Pseudo<OpName, P> { 76fe6060f1SDimitry Andric let Constraints = !if(P.UseTiedOutput, "$vdst = $vdst_in", ""); 77fe6060f1SDimitry Andric let DisableEncoding = !if(P.UseTiedOutput, "$vdst_in", ""); 78fe6060f1SDimitry Andric } 7981ad6265SDimitry Andric let SubtargetPredicate = isGFX11Plus in { 8081ad6265SDimitry Andric if P.HasExtVOP3DPP then 8181ad6265SDimitry Andric def _dpp : VOP3_DPP_Pseudo<OpName, P> { 8281ad6265SDimitry Andric let VOP3P = 1; 8381ad6265SDimitry Andric let PseudoInstr = OpName#"_dpp"; 8481ad6265SDimitry Andric let Constraints = !if(P.UseTiedOutput, "$vdst = $vdst_in", ""); 8581ad6265SDimitry Andric let DisableEncoding = !if(P.UseTiedOutput, "$vdst_in", ""); 8681ad6265SDimitry Andric } 8781ad6265SDimitry Andric } // end SubtargetPredicate = isGFX11Plus 88fe6060f1SDimitry Andric} 89fe6060f1SDimitry Andric 9081ad6265SDimitry Andriclet isReMaterializable = 1 in { 91fe6060f1SDimitry Andriclet isCommutable = 1 in { 9281ad6265SDimitry Andricdefm V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>; 93*0fca6ea1SDimitry Andricdefm V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16_V2I16>, imad>; 94fe6060f1SDimitry Andric 95fe6060f1SDimitry Andriclet FPDPRounding = 1 in { 9681ad6265SDimitry Andricdefm V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, any_fma>; 9781ad6265SDimitry Andricdefm V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, any_fadd>; 9881ad6265SDimitry Andricdefm V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, any_fmul>; 99fe6060f1SDimitry Andric} // End FPDPRounding = 1 10081ad6265SDimitry Andricdefm V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum_like>; 10181ad6265SDimitry Andricdefm V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, fminnum_like>; 102fe6060f1SDimitry Andric 10381ad6265SDimitry Andricdefm V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, add>; 10481ad6265SDimitry Andricdefm V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>>; 10581ad6265SDimitry Andricdefm V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, mul>; 106fe6060f1SDimitry Andric 10781ad6265SDimitry Andricdefm V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, smin>; 10881ad6265SDimitry Andricdefm V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, umin>; 10981ad6265SDimitry Andricdefm V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, smax>; 11081ad6265SDimitry Andricdefm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, umax>; 1115f757f3fSDimitry Andric 1125f757f3fSDimitry Andriclet SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { 113*0fca6ea1SDimitry Andricdefm V_PK_MAXIMUM_F16 : VOP3PInst<"v_pk_maximum_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16, VOP3_PACKED>, fmaximum>; 114*0fca6ea1SDimitry Andricdefm V_PK_MINIMUM_F16 : VOP3PInst<"v_pk_minimum_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16, VOP3_PACKED>, fminimum>; 1155f757f3fSDimitry Andric} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 116fe6060f1SDimitry Andric} 117fe6060f1SDimitry Andric 11881ad6265SDimitry Andricdefm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>>; 11981ad6265SDimitry Andricdefm V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, sub>; 120fe6060f1SDimitry Andric 12181ad6265SDimitry Andricdefm V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, clshl_rev_16>; 12281ad6265SDimitry Andricdefm V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, cashr_rev_16>; 12381ad6265SDimitry Andricdefm V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, clshr_rev_16>; 12481ad6265SDimitry Andric} // End isReMaterializable = 1 1250b57cec5SDimitry Andric 126e8d8bef9SDimitry Andriclet SubtargetPredicate = HasVOP3PInsts in { 127e8d8bef9SDimitry Andric 128e8d8bef9SDimitry Andric// Integer operations with clamp bit set. 129e8d8bef9SDimitry Andricclass VOP3PSatPat<SDPatternOperator pat, Instruction inst> : GCNPat< 130e8d8bef9SDimitry Andric (pat (v2i16 (VOP3PMods v2i16:$src0, i32:$src0_modifiers)), 131e8d8bef9SDimitry Andric (v2i16 (VOP3PMods v2i16:$src1, i32:$src1_modifiers))), 132e8d8bef9SDimitry Andric (inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE) 133e8d8bef9SDimitry Andric>; 134e8d8bef9SDimitry Andric 135e8d8bef9SDimitry Andricdef : VOP3PSatPat<uaddsat, V_PK_ADD_U16>; 136e8d8bef9SDimitry Andricdef : VOP3PSatPat<saddsat, V_PK_ADD_I16>; 137e8d8bef9SDimitry Andricdef : VOP3PSatPat<usubsat, V_PK_SUB_U16>; 138e8d8bef9SDimitry Andricdef : VOP3PSatPat<ssubsat, V_PK_SUB_I16>; 139e8d8bef9SDimitry Andric} // End SubtargetPredicate = HasVOP3PInsts 140e8d8bef9SDimitry Andric 14106c3fb27SDimitry Andric// TODO: Make sure we're doing the right thing with denormals. Note 14206c3fb27SDimitry Andric// that FMA and MAD will differ. 1430b57cec5SDimitry Andricmulticlass MadFmaMixPats<SDPatternOperator fma_like, 14406c3fb27SDimitry Andric Instruction mix_inst, 1450b57cec5SDimitry Andric Instruction mixlo_inst, 1460b57cec5SDimitry Andric Instruction mixhi_inst> { 14706c3fb27SDimitry Andric // At least one of the operands needs to be an fpextend of an f16 14806c3fb27SDimitry Andric // for this to be worthwhile, so we need three patterns here. 14906c3fb27SDimitry Andric // TODO: Could we use a predicate to inspect src1/2/3 instead? 15006c3fb27SDimitry Andric def : GCNPat < 15106c3fb27SDimitry Andric (f32 (fma_like (f32 (VOP3PMadMixModsExt f16:$src0, i32:$src0_mods)), 15206c3fb27SDimitry Andric (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_mods)), 15306c3fb27SDimitry Andric (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_mods)))), 15406c3fb27SDimitry Andric (mix_inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, 15506c3fb27SDimitry Andric DSTCLAMP.NONE)>; 15606c3fb27SDimitry Andric def : GCNPat < 15706c3fb27SDimitry Andric (f32 (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_mods)), 15806c3fb27SDimitry Andric (f32 (VOP3PMadMixModsExt f16:$src1, i32:$src1_mods)), 15906c3fb27SDimitry Andric (f32 (VOP3PMadMixMods f32:$src2, i32:$src2_mods)))), 16006c3fb27SDimitry Andric (mix_inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, 16106c3fb27SDimitry Andric DSTCLAMP.NONE)>; 16206c3fb27SDimitry Andric def : GCNPat < 16306c3fb27SDimitry Andric (f32 (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_mods)), 16406c3fb27SDimitry Andric (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_mods)), 16506c3fb27SDimitry Andric (f32 (VOP3PMadMixModsExt f16:$src2, i32:$src2_mods)))), 16606c3fb27SDimitry Andric (mix_inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, 16706c3fb27SDimitry Andric DSTCLAMP.NONE)>; 16806c3fb27SDimitry Andric 1690b57cec5SDimitry Andric def : GCNPat < 1700b57cec5SDimitry Andric (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), 1710b57cec5SDimitry Andric (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), 1720b57cec5SDimitry Andric (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))), 1730b57cec5SDimitry Andric (mixlo_inst $src0_modifiers, $src0, 1740b57cec5SDimitry Andric $src1_modifiers, $src1, 1750b57cec5SDimitry Andric $src2_modifiers, $src2, 1760b57cec5SDimitry Andric DSTCLAMP.NONE, 1770b57cec5SDimitry Andric (i32 (IMPLICIT_DEF))) 1780b57cec5SDimitry Andric >; 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric // FIXME: Special case handling for maxhi (especially for clamp) 1810b57cec5SDimitry Andric // because dealing with the write to high half of the register is 1820b57cec5SDimitry Andric // difficult. 1830b57cec5SDimitry Andric def : GCNPat < 184647cbc5dSDimitry Andric (build_vector f16:$elt0, (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), 1850b57cec5SDimitry Andric (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), 186647cbc5dSDimitry Andric (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))), 1870b57cec5SDimitry Andric (v2f16 (mixhi_inst $src0_modifiers, $src0, 1880b57cec5SDimitry Andric $src1_modifiers, $src1, 1890b57cec5SDimitry Andric $src2_modifiers, $src2, 1900b57cec5SDimitry Andric DSTCLAMP.NONE, 191bdd1243dSDimitry Andric VGPR_32:$elt0)) 1920b57cec5SDimitry Andric >; 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric def : GCNPat < 1950b57cec5SDimitry Andric (build_vector 1960b57cec5SDimitry Andric f16:$elt0, 197647cbc5dSDimitry Andric (AMDGPUclamp (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), 1980b57cec5SDimitry Andric (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), 199647cbc5dSDimitry Andric (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))))), 2000b57cec5SDimitry Andric (v2f16 (mixhi_inst $src0_modifiers, $src0, 2010b57cec5SDimitry Andric $src1_modifiers, $src1, 2020b57cec5SDimitry Andric $src2_modifiers, $src2, 2030b57cec5SDimitry Andric DSTCLAMP.ENABLE, 204bdd1243dSDimitry Andric VGPR_32:$elt0)) 2050b57cec5SDimitry Andric >; 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric def : GCNPat < 2080b57cec5SDimitry Andric (AMDGPUclamp (build_vector 209647cbc5dSDimitry Andric (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)), 2100b57cec5SDimitry Andric (f32 (VOP3PMadMixMods f16:$lo_src1, i32:$lo_src1_modifiers)), 211647cbc5dSDimitry Andric (f32 (VOP3PMadMixMods f16:$lo_src2, i32:$lo_src2_modifiers))))), 212647cbc5dSDimitry Andric (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$hi_src0, i32:$hi_src0_modifiers)), 2130b57cec5SDimitry Andric (f32 (VOP3PMadMixMods f16:$hi_src1, i32:$hi_src1_modifiers)), 214647cbc5dSDimitry Andric (f32 (VOP3PMadMixMods f16:$hi_src2, i32:$hi_src2_modifiers))))))), 2150b57cec5SDimitry Andric (v2f16 (mixhi_inst $hi_src0_modifiers, $hi_src0, 2160b57cec5SDimitry Andric $hi_src1_modifiers, $hi_src1, 2170b57cec5SDimitry Andric $hi_src2_modifiers, $hi_src2, 2180b57cec5SDimitry Andric DSTCLAMP.ENABLE, 2190b57cec5SDimitry Andric (mixlo_inst $lo_src0_modifiers, $lo_src0, 2200b57cec5SDimitry Andric $lo_src1_modifiers, $lo_src1, 2210b57cec5SDimitry Andric $lo_src2_modifiers, $lo_src2, 2220b57cec5SDimitry Andric DSTCLAMP.ENABLE, 2230b57cec5SDimitry Andric (i32 (IMPLICIT_DEF))))) 2240b57cec5SDimitry Andric >; 22506c3fb27SDimitry Andric 22606c3fb27SDimitry Andric def : GCNPat < 22706c3fb27SDimitry Andric (f16 (fpround (fmul (f32 (VOP3PMadMixMods f32:$src0, i32:$src0_modifiers)), 22806c3fb27SDimitry Andric (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_modifiers))))), 22906c3fb27SDimitry Andric (mixlo_inst $src0_modifiers, $src0, 23006c3fb27SDimitry Andric $src1_modifiers, $src1, 23106c3fb27SDimitry Andric (i32 0), (i32 0), 23206c3fb27SDimitry Andric DSTCLAMP.NONE, 23306c3fb27SDimitry Andric (i32 (IMPLICIT_DEF))) 23406c3fb27SDimitry Andric >; 23506c3fb27SDimitry Andric 23606c3fb27SDimitry Andric def : GCNPat < 237647cbc5dSDimitry Andric (build_vector f16:$elt0, (f16 (fpround (fmul (f32 (VOP3PMadMixMods f32:$src0, i32:$src0_modifiers)), 238647cbc5dSDimitry Andric (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_modifiers)))))), 23906c3fb27SDimitry Andric (v2f16 (mixhi_inst $src0_modifiers, $src0, 24006c3fb27SDimitry Andric $src1_modifiers, $src1, 24106c3fb27SDimitry Andric (i32 0), (i32 0), 24206c3fb27SDimitry Andric DSTCLAMP.NONE, 24306c3fb27SDimitry Andric VGPR_32:$elt0)) 24406c3fb27SDimitry Andric >; 2450b57cec5SDimitry Andric} 2460b57cec5SDimitry Andric 24706c3fb27SDimitry Andriclet SubtargetPredicate = HasMadMixInsts, OtherPredicates = [NoFP32Denormals] in { 2485ffd83dbSDimitry Andric 2490b57cec5SDimitry Andric// These are VOP3a-like opcodes which accept no omod. 2500b57cec5SDimitry Andric// Size of src arguments (16/32) is controlled by op_sel. 2510b57cec5SDimitry Andric// For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi. 2525ffd83dbSDimitry Andriclet isCommutable = 1, mayRaiseFPException = 0 in { 25381ad6265SDimitry Andriclet isReMaterializable = 1 in 254fe6060f1SDimitry Andricdefm V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3P_Mix_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>; 2550b57cec5SDimitry Andric 2560b57cec5SDimitry Andriclet FPDPRounding = 1 in { 2570b57cec5SDimitry Andric// Clamp modifier is applied after conversion to f16. 258fe6060f1SDimitry Andricdefm V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>; 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andriclet ClampLo = 0, ClampHi = 1 in { 261fe6060f1SDimitry Andricdefm V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>; 2620b57cec5SDimitry Andric} 2630b57cec5SDimitry Andric} // End FPDPRounding = 1 2640b57cec5SDimitry Andric} 2650b57cec5SDimitry Andric 26606c3fb27SDimitry Andricdefm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>; 26706c3fb27SDimitry Andric} // End SubtargetPredicate = HasMadMixInsts, OtherPredicates = [NoFP32Denormals] 2680b57cec5SDimitry Andric 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric// Essentially the same as the mad_mix versions 2710b57cec5SDimitry Andriclet SubtargetPredicate = HasFmaMixInsts in { 2720b57cec5SDimitry Andriclet isCommutable = 1 in { 27381ad6265SDimitry Andric 27481ad6265SDimitry Andriclet isReMaterializable = 1 in 275fe6060f1SDimitry Andricdefm V_FMA_MIX_F32 : VOP3_VOP3PInst<"v_fma_mix_f32", VOP3P_Mix_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>; 2760b57cec5SDimitry Andric 2770b57cec5SDimitry Andriclet FPDPRounding = 1 in { 2780b57cec5SDimitry Andric// Clamp modifier is applied after conversion to f16. 279fe6060f1SDimitry Andricdefm V_FMA_MIXLO_F16 : VOP3_VOP3PInst<"v_fma_mixlo_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>; 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andriclet ClampLo = 0, ClampHi = 1 in { 282fe6060f1SDimitry Andricdefm V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>; 2830b57cec5SDimitry Andric} 2840b57cec5SDimitry Andric} // End FPDPRounding = 1 2850b57cec5SDimitry Andric} 2860b57cec5SDimitry Andric 28706c3fb27SDimitry Andricdefm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>; 2880b57cec5SDimitry Andric} 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric// Defines patterns that extract signed 4bit from each Idx[0]. 2910b57cec5SDimitry Andricforeach Idx = [[0,28],[4,24],[8,20],[12,16],[16,12],[20,8],[24,4]] in 2920b57cec5SDimitry Andric def ExtractSigned4bit_#Idx[0] : PatFrag<(ops node:$src), 2930b57cec5SDimitry Andric (sra (shl node:$src, (i32 Idx[1])), (i32 28))>; 2940b57cec5SDimitry Andric 2950b57cec5SDimitry Andric// Defines code pattern that extracts U(unsigned/signed) 4/8bit from FromBitIndex. 2960b57cec5SDimitry Andricclass Extract<int FromBitIndex, int BitMask, bit U>: PatFrag< 2970b57cec5SDimitry Andric (ops node:$src), 2980b57cec5SDimitry Andric !if (!or (!and (!eq (BitMask, 255), !eq (FromBitIndex, 24)), !eq (FromBitIndex, 28)), // last element 2990b57cec5SDimitry Andric !if (U, (srl node:$src, (i32 FromBitIndex)), (sra node:$src, (i32 FromBitIndex))), 3000b57cec5SDimitry Andric !if (!eq (FromBitIndex, 0), // first element 3010b57cec5SDimitry Andric !if (U, (and node:$src, (i32 BitMask)), 3020b57cec5SDimitry Andric !if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src), 3030b57cec5SDimitry Andric (sext_inreg node:$src, i8))), 3040b57cec5SDimitry Andric !if (U, (and (srl node:$src, (i32 FromBitIndex)), (i32 BitMask)), 3050b57cec5SDimitry Andric !if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src), 3060b57cec5SDimitry Andric (sext_inreg (srl node:$src, (i32 FromBitIndex)), i8)))))>; 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andricforeach Type = ["I", "U"] in 3100b57cec5SDimitry Andric foreach Index = 0-3 in { 3110b57cec5SDimitry Andric // Defines patterns that extract each Index'ed 8bit from an unsigned 3120b57cec5SDimitry Andric // 32bit scalar value; 313e8d8bef9SDimitry Andric def Type#Index#"_8bit" : Extract<!shl(Index, 3), 255, !eq (Type, "U")>; 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric // Defines multiplication patterns where the multiplication is happening on each 3160b57cec5SDimitry Andric // Index'ed 8bit of a 32bit scalar value. 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric def Mul#Type#_Elt#Index : PatFrag< 3190b57cec5SDimitry Andric (ops node:$src0, node:$src1), 3200b57cec5SDimitry Andric (!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), AMDGPUmul_i24_oneuse, AMDGPUmul_u24_oneuse)) 3215ffd83dbSDimitry Andric (!cast<Extract>(Type#Index#"_8bit") node:$src0), 3225ffd83dbSDimitry Andric (!cast<Extract>(Type#Index#"_8bit") node:$src1))>; 3230b57cec5SDimitry Andric } 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric// Different variants of dot8 patterns cause a huge increase in the compile time. 3260b57cec5SDimitry Andric// Define non-associative/commutative add/mul to prevent permutation in the dot8 3270b57cec5SDimitry Andric// pattern. 3280b57cec5SDimitry Andricdef NonACAdd : SDNode<"ISD::ADD" , SDTIntBinOp>; 3290b57cec5SDimitry Andricdef NonACAdd_oneuse : HasOneUseBinOp<NonACAdd>; 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andricdef NonACAMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24" , SDTIntBinOp>; 3320b57cec5SDimitry Andricdef NonACAMDGPUmul_u24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_u24>; 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andricdef NonACAMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24" , SDTIntBinOp>; 3350b57cec5SDimitry Andricdef NonACAMDGPUmul_i24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_i24>; 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andricforeach Type = ["I", "U"] in 3380b57cec5SDimitry Andric foreach Index = 0-7 in { 3390b57cec5SDimitry Andric // Defines patterns that extract each Index'ed 4bit from an unsigned 3400b57cec5SDimitry Andric // 32bit scalar value; 341e8d8bef9SDimitry Andric def Type#Index#"_4bit" : Extract<!shl(Index, 2), 15, !eq (Type, "U")>; 3420b57cec5SDimitry Andric 3430b57cec5SDimitry Andric // Defines multiplication patterns where the multiplication is happening on each 3440b57cec5SDimitry Andric // Index'ed 8bit of a 32bit scalar value. 3450b57cec5SDimitry Andric def Mul#Type#Index#"_4bit" : PatFrag< 3460b57cec5SDimitry Andric (ops node:$src0, node:$src1), 3470b57cec5SDimitry Andric (!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), NonACAMDGPUmul_i24_oneuse, NonACAMDGPUmul_u24_oneuse)) 3485ffd83dbSDimitry Andric (!cast<Extract>(Type#Index#"_4bit") node:$src0), 3495ffd83dbSDimitry Andric (!cast<Extract>(Type#Index#"_4bit") node:$src1))>; 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric 3525f757f3fSDimitry Andricclass UDot2Pat<VOP_Pseudo Inst> : GCNPat < 3530b57cec5SDimitry Andric (add (add_oneuse (AMDGPUmul_u24_oneuse (srl i32:$src0, (i32 16)), 3540b57cec5SDimitry Andric (srl i32:$src1, (i32 16))), i32:$src2), 3550b57cec5SDimitry Andric (AMDGPUmul_u24_oneuse (and i32:$src0, (i32 65535)), 3560b57cec5SDimitry Andric (and i32:$src1, (i32 65535))) 3570b57cec5SDimitry Andric ), 3580b57cec5SDimitry Andric (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> { 3595f757f3fSDimitry Andric let Predicates = Inst.Predicates; 3600b57cec5SDimitry Andric} 3610b57cec5SDimitry Andric 3625f757f3fSDimitry Andricclass SDot2Pat<VOP_Pseudo Inst> : GCNPat < 3630b57cec5SDimitry Andric (add (add_oneuse (AMDGPUmul_i24_oneuse (sra i32:$src0, (i32 16)), 3640b57cec5SDimitry Andric (sra i32:$src1, (i32 16))), i32:$src2), 3650b57cec5SDimitry Andric (AMDGPUmul_i24_oneuse (sext_inreg i32:$src0, i16), 3660b57cec5SDimitry Andric (sext_inreg i32:$src1, i16))), 3670b57cec5SDimitry Andric (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> { 3685f757f3fSDimitry Andric let Predicates = Inst.Predicates; 3690b57cec5SDimitry Andric} 3700b57cec5SDimitry Andric 3718bcb0991SDimitry Andriclet IsDOT = 1 in { 3725f757f3fSDimitry Andriclet OtherPredicates = [HasDot2Insts] in { 373fe6060f1SDimitry Andricdefm V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", 37481ad6265SDimitry Andric VOP3P_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2, 1>; 375fe6060f1SDimitry Andricdefm V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", 37681ad6265SDimitry Andric VOP3P_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2, 1>; 3775f757f3fSDimitry Andric} // End OtherPredicates = [HasDot2Insts] 3780b57cec5SDimitry Andric 3795f757f3fSDimitry Andriclet OtherPredicates = [HasDot10Insts] in 380fe6060f1SDimitry Andricdefm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", 38181ad6265SDimitry Andric VOP3P_Profile<VOP_F32_V2F16_V2F16_F32, VOP3_REGULAR, /*HasDPP*/ 1>, 382fe6060f1SDimitry Andric AMDGPUfdot2, 1/*ExplicitClamp*/>; 38306c3fb27SDimitry Andric 3845f757f3fSDimitry Andriclet OtherPredicates = [HasDot7Insts] in { 385*0fca6ea1SDimitry Andriclet IsInvalidSingleUseConsumer = 1 in { 386fe6060f1SDimitry Andric defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", 38781ad6265SDimitry Andric VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>; 388*0fca6ea1SDimitry Andric} 389fe6060f1SDimitry Andricdefm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", 39081ad6265SDimitry Andric VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>; 3915f757f3fSDimitry Andric} // End OtherPredicates = [HasDot7Insts] 392fe6060f1SDimitry Andric 3935f757f3fSDimitry Andriclet OtherPredicates = [HasDot1Insts] in { 394*0fca6ea1SDimitry Andriclet IsInvalidSingleUseConsumer = 1 in { 395fe6060f1SDimitry Andric defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", 39681ad6265SDimitry Andric VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>; 397*0fca6ea1SDimitry Andric} 398fe6060f1SDimitry Andricdefm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", 39981ad6265SDimitry Andric VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>; 4005f757f3fSDimitry Andric} // End OtherPredicates = [HasDot1Insts] 40181ad6265SDimitry Andric 402bdd1243dSDimitry Andricdef DOT2_BF16_Profile 403*0fca6ea1SDimitry Andric : VOP3P_Profile<VOP_F32_V2BF16_V2BF16_F32, VOP3_REGULAR, /*HasDPP*/ 1> { 404bdd1243dSDimitry Andric let HasSrc1Mods = 1; 405bdd1243dSDimitry Andric} 40681ad6265SDimitry Andric 407bdd1243dSDimitry Andriclet SubtargetPredicate = HasDot9Insts in { 408bdd1243dSDimitry Andric 409bdd1243dSDimitry Andricdefm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", DOT2_BF16_Profile, 41081ad6265SDimitry Andric int_amdgcn_fdot2_f32_bf16, 1>; 41181ad6265SDimitry Andric 412bdd1243dSDimitry Andric} // End SubtargetPredicate = HasDot9Insts 41381ad6265SDimitry Andric 4148bcb0991SDimitry Andric} // End let IsDOT = 1 4150b57cec5SDimitry Andric 41681ad6265SDimitry Andricmulticlass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> { 41781ad6265SDimitry Andric let IsDOT = 1 in 41881ad6265SDimitry Andric defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, 41981ad6265SDimitry Andric null_frag, 1>; 42081ad6265SDimitry Andric // Dot-iu instructions consider input as signed if imod neg bits are set. Thus 42181ad6265SDimitry Andric // Dot-iu Intrinsics have extra operands and require separate codegen pattern. 4227a6dacacSDimitry Andric def : GCNPat < (intrinsic_node (VOP3PModsNeg i32:$src0_mods), i32:$src0, 4237a6dacacSDimitry Andric (VOP3PModsNeg i32:$src1_mods), i32:$src1, 42481ad6265SDimitry Andric i32:$src2, (i1 timm:$clamp)), 42581ad6265SDimitry Andric (!cast<Instruction>(NAME) $src0_mods, i32:$src0, 42681ad6265SDimitry Andric $src1_mods, i32:$src1, 42781ad6265SDimitry Andric (i32 8), i32:$src2, i1:$clamp) 42881ad6265SDimitry Andric >; 42981ad6265SDimitry Andric} 43081ad6265SDimitry Andric 43181ad6265SDimitry Andriclet SubtargetPredicate = HasDot8Insts in { 43281ad6265SDimitry Andricdefm V_DOT4_I32_IU8 : VOP3PDOTIUInst<"v_dot4_i32_iu8", int_amdgcn_sudot4>; 43381ad6265SDimitry Andricdefm V_DOT8_I32_IU4 : VOP3PDOTIUInst<"v_dot8_i32_iu4", int_amdgcn_sudot8>; 4345f757f3fSDimitry Andric 4355f757f3fSDimitry Andricdef : GCNPat < (int_amdgcn_sdot8 i32:$src0, 4365f757f3fSDimitry Andric i32:$src1, 4375f757f3fSDimitry Andric i32:$src2, (i1 timm:$clamp)), 4385f757f3fSDimitry Andric (V_DOT8_I32_IU4 (i32 9), i32:$src0, 4395f757f3fSDimitry Andric (i32 9), i32:$src1, (i32 8), i32:$src2, i1:$clamp) 4405f757f3fSDimitry Andric>; 4415f757f3fSDimitry Andric 4425f757f3fSDimitry Andricdef : GCNPat < (int_amdgcn_sdot4 i32:$src0, 4435f757f3fSDimitry Andric i32:$src1, 4445f757f3fSDimitry Andric i32:$src2, (i1 timm:$clamp)), 4455f757f3fSDimitry Andric (V_DOT4_I32_IU8 (i32 9), i32:$src0, 4465f757f3fSDimitry Andric (i32 9), i32:$src1, (i32 8), i32:$src2, i1:$clamp) 4475f757f3fSDimitry Andric>; 44881ad6265SDimitry Andric} // End SubtargetPredicate = HasDot8Insts 44981ad6265SDimitry Andric 4507a6dacacSDimitry Andric// Does not use opsel, no src_modifiers on src0 and src1. 4517a6dacacSDimitry Andric// src_modifiers on src2(f32) are f32 fneg(neg_lo[2]) and f32 fabs(neg_hi[2]). 4527a6dacacSDimitry Andricdef VOP3P_DOTF8_Profile : VOP3P_Profile<VOPProfile <[f32, i32, i32, f32]>, 4537a6dacacSDimitry Andric VOP3_PACKED, 1> { 4547a6dacacSDimitry Andric let HasClamp = 0; 4557a6dacacSDimitry Andric let HasOpSel = 0; 4567a6dacacSDimitry Andric let HasOMod = 0; 4577a6dacacSDimitry Andric let IsDOT = 1; 4587a6dacacSDimitry Andric let HasSrc0Mods = 0; 4597a6dacacSDimitry Andric let HasSrc1Mods = 0; 4607a6dacacSDimitry Andric let HasSrc2Mods = 1; 4617a6dacacSDimitry Andric 4627a6dacacSDimitry Andric let InsVOP3P = (ins VSrc_b32:$src0, VSrc_b32:$src1, 4637a6dacacSDimitry Andric PackedF16InputMods:$src2_modifiers, VSrc_f32:$src2, 4647a6dacacSDimitry Andric neg_lo0:$neg_lo, neg_hi0:$neg_hi); 4657a6dacacSDimitry Andric 4667a6dacacSDimitry Andric let InsVOP3DPP8 = (ins DstRC:$old, VGPR_32:$src0, VRegSrc_32:$src1, 4677a6dacacSDimitry Andric PackedF16InputMods:$src2_modifiers, VRegSrc_32:$src2, 468*0fca6ea1SDimitry Andric neg_lo0:$neg_lo, neg_hi0:$neg_hi, dpp8:$dpp8, Dpp8FI:$fi); 4697a6dacacSDimitry Andric 4707a6dacacSDimitry Andric let InsVOP3DPP16 = (ins DstRC:$old, VGPR_32:$src0, VRegSrc_32:$src1, 4717a6dacacSDimitry Andric PackedF16InputMods:$src2_modifiers, VRegSrc_32:$src2, 4727a6dacacSDimitry Andric neg_lo0:$neg_lo, neg_hi0:$neg_hi, dpp_ctrl:$dpp_ctrl, 473*0fca6ea1SDimitry Andric DppRowMask:$row_mask, DppBankMask:$bank_mask, 474*0fca6ea1SDimitry Andric DppBoundCtrl:$bound_ctrl, Dpp16FI:$fi); 4757a6dacacSDimitry Andric} 4767a6dacacSDimitry Andric 4777a6dacacSDimitry Andricmulticlass VOP3PDOTF8Inst <string OpName, SDPatternOperator intrinsic_node> { 4787a6dacacSDimitry Andric defm NAME : VOP3PInst<OpName, VOP3P_DOTF8_Profile, null_frag, 1>; 4797a6dacacSDimitry Andric 4807a6dacacSDimitry Andric let SubtargetPredicate = isGFX12Plus in 4817a6dacacSDimitry Andric def : GCNPat <(intrinsic_node i32:$src0, i32:$src1, 4827a6dacacSDimitry Andric (VOP3Mods f32:$src2, i32:$src2_modifiers)), 4837a6dacacSDimitry Andric (!cast<Instruction>(NAME) i32:$src0, i32:$src1, 4847a6dacacSDimitry Andric i32:$src2_modifiers, f32:$src2)>; 4857a6dacacSDimitry Andric} 4867a6dacacSDimitry Andric 487*0fca6ea1SDimitry Andriclet OtherPredicates = [HasDot11Insts] in { 4887a6dacacSDimitry Andricdefm V_DOT4_F32_FP8_BF8 : VOP3PDOTF8Inst<"v_dot4_f32_fp8_bf8", int_amdgcn_dot4_f32_fp8_bf8>; 4897a6dacacSDimitry Andricdefm V_DOT4_F32_BF8_FP8 : VOP3PDOTF8Inst<"v_dot4_f32_bf8_fp8", int_amdgcn_dot4_f32_bf8_fp8>; 4907a6dacacSDimitry Andricdefm V_DOT4_F32_FP8_FP8 : VOP3PDOTF8Inst<"v_dot4_f32_fp8_fp8", int_amdgcn_dot4_f32_fp8_fp8>; 4917a6dacacSDimitry Andricdefm V_DOT4_F32_BF8_BF8 : VOP3PDOTF8Inst<"v_dot4_f32_bf8_bf8", int_amdgcn_dot4_f32_bf8_bf8>; 492*0fca6ea1SDimitry Andric} 4937a6dacacSDimitry Andric 4940b57cec5SDimitry Andricdef : UDot2Pat<V_DOT2_U32_U16>; 4950b57cec5SDimitry Andricdef : SDot2Pat<V_DOT2_I32_I16>; 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andricforeach Type = ["U", "I"] in 4985f757f3fSDimitry Andric let Predicates = !cast<VOP_Pseudo>("V_DOT4_"#Type#"32_"#Type#8).Predicates in 4990b57cec5SDimitry Andric def : GCNPat < 5000b57cec5SDimitry Andric !cast<dag>(!foldl((i32 i32:$src2), [0, 1, 2, 3], lhs, y, 5010b57cec5SDimitry Andric (add_oneuse lhs, (!cast<PatFrag>("Mul"#Type#"_Elt"#y) i32:$src0, i32:$src1)))), 502fe6060f1SDimitry Andric (!cast<VOP3P_Pseudo>("V_DOT4_"#Type#"32_"#Type#8) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; 5030b57cec5SDimitry Andric 5040b57cec5SDimitry Andricforeach Type = ["U", "I"] in 5055f757f3fSDimitry Andric let Predicates = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).Predicates in 5060b57cec5SDimitry Andric def : GCNPat < 5070b57cec5SDimitry Andric !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)), 5080b57cec5SDimitry Andric [1, 2, 3, 4, 5, 6, 7], lhs, y, 5090b57cec5SDimitry Andric (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))), 510fe6060f1SDimitry Andric (!cast<VOP3P_Pseudo>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric// Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase 5130b57cec5SDimitry Andric// in the compile time. Directly handle the pattern generated by the FE here. 5140b57cec5SDimitry Andricforeach Type = ["U", "I"] in 5155f757f3fSDimitry Andric let Predicates = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).Predicates in 5160b57cec5SDimitry Andric def : GCNPat < 5170b57cec5SDimitry Andric !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)), 5180b57cec5SDimitry Andric [7, 1, 2, 3, 4, 5, 6], lhs, y, 5190b57cec5SDimitry Andric (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))), 520fe6060f1SDimitry Andric (!cast<VOP3P_Pseudo>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; 5210b57cec5SDimitry Andric 5220b57cec5SDimitry Andricdef ADst_32 : VOPDstOperand<AGPR_32>; 523fe6060f1SDimitry Andricdef ADst_64 : VOPDstOperand<AReg_64>; 5240b57cec5SDimitry Andricdef ADst_128 : VOPDstOperand<AReg_128>; 525fe6060f1SDimitry Andricdef ADst_256 : VOPDstOperand<AReg_256>; 5260b57cec5SDimitry Andricdef ADst_512 : VOPDstOperand<AReg_512>; 5270b57cec5SDimitry Andricdef ADst_1024 : VOPDstOperand<AReg_1024>; 528fe6060f1SDimitry Andricdef VDst_64 : VOPDstOperand<VReg_64>; 529fe6060f1SDimitry Andricdef VDst_128 : VOPDstOperand<VReg_128>; 530fe6060f1SDimitry Andricdef VDst_256 : VOPDstOperand<VReg_256>; 531fe6060f1SDimitry Andricdef VDst_512 : VOPDstOperand<VReg_512>; 532fe6060f1SDimitry Andricdef VDst_1024 : VOPDstOperand<VReg_1024>; 5330b57cec5SDimitry Andric 53481ad6265SDimitry Andricdef VOPProfileAccRead : VOP3P_Profile<VOP_I32_I32, VOP3_MAI> { 5350b57cec5SDimitry Andric let Src0RC64 = ARegSrc_32; 5360b57cec5SDimitry Andric} 5370b57cec5SDimitry Andric 53881ad6265SDimitry Andricdef VOPProfileAccWrite : VOP3P_Profile<VOP_I32_I32, VOP3_MAI> { 5390b57cec5SDimitry Andric let DstRC = ADst_32; 54081ad6265SDimitry Andric let Src0RC64 = VCSrc_b32; 5410b57cec5SDimitry Andric} 5420b57cec5SDimitry Andric 5430b57cec5SDimitry Andricclass VOPProfileMAI<VOPProfile P, RegisterOperand _SrcRC, RegisterOperand _DstRC, 5440b57cec5SDimitry Andric RegisterOperand SrcABRC = AVSrc_32> 54581ad6265SDimitry Andric : VOP3P_Profile<P, VOP3_MAI> { 5460b57cec5SDimitry Andric let DstRC = _DstRC; 5470b57cec5SDimitry Andric let Src0RC64 = SrcABRC; 5480b57cec5SDimitry Andric let Src1RC64 = SrcABRC; 5490b57cec5SDimitry Andric let Src2RC64 = _SrcRC; 5500b57cec5SDimitry Andric let HasOpSel = 0; 5510b57cec5SDimitry Andric let HasClamp = 0; 552fe6060f1SDimitry Andric let HasIntClamp = 0; 553fe6060f1SDimitry Andric let HasOMod = 0; 554fe6060f1SDimitry Andric let HasModifiers = 0; 555bdd1243dSDimitry Andric let AsmVOP3Base = "$vdst, $src0, $src1, $src2$cbsz$abid$blgp"; 556*0fca6ea1SDimitry Andric let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, Src2RC64:$src2, CBSZ:$cbsz, ABID:$abid, blgp:$blgp); 55781ad6265SDimitry Andric let InsVOP3Base = Ins64; 55804eeddc0SDimitry Andric // Dst and SrcC cannot partially overlap if SrcC/Dst is bigger than 4 VGPRs. 55904eeddc0SDimitry Andric // We then create two versions of the instruction: with tied dst and src2 56081ad6265SDimitry Andric // and with the earlyclobber flag on the dst. This is stricter than the 56104eeddc0SDimitry Andric // actual HW restriction. In particular earlyclobber also affects src0 and 56204eeddc0SDimitry Andric // src1 allocation which is not required. 56304eeddc0SDimitry Andric bit NoDstOverlap = !gt(DstVT.Size, 128); 5640b57cec5SDimitry Andric} 5650b57cec5SDimitry Andric 56681ad6265SDimitry Andricclass VOPProfileSMFMAC<VOPProfile P, RegisterOperand _DstRC, 56781ad6265SDimitry Andric RegisterOperand _SrcARC, RegisterOperand _SrcBRC> 56881ad6265SDimitry Andric : VOPProfileMAI<P, _DstRC, _DstRC, _SrcARC> { 56981ad6265SDimitry Andric let Src1RC64 = _SrcBRC; 57081ad6265SDimitry Andric let Src2VT = DstVT; 57181ad6265SDimitry Andric let Asm64 = " $vdst, $src0, $src1, $idx$cbsz$abid"; 57281ad6265SDimitry Andric let Outs64 = (outs DstRC:$vdst); 573*0fca6ea1SDimitry Andric let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, VRegSrc_32:$idx, CBSZ:$cbsz, ABID:$abid, Src2RC64:$src2); 57481ad6265SDimitry Andric} 57581ad6265SDimitry Andric 5760b57cec5SDimitry Andricdef VOPProfileMAI_F32_F32_X4 : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32, AISrc_128_f32, ADst_128>; 5770b57cec5SDimitry Andricdef VOPProfileMAI_F32_F32_X16 : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32, AISrc_512_f32, ADst_512>; 5780b57cec5SDimitry Andricdef VOPProfileMAI_F32_F32_X32 : VOPProfileMAI<VOP_V32F32_F32_F32_V32F32, AISrc_1024_f32, ADst_1024>; 5790b57cec5SDimitry Andricdef VOPProfileMAI_I32_I32_X4 : VOPProfileMAI<VOP_V4I32_I32_I32_V4I32, AISrc_128_b32, ADst_128>; 5800b57cec5SDimitry Andricdef VOPProfileMAI_I32_I32_X16 : VOPProfileMAI<VOP_V16I32_I32_I32_V16I32, AISrc_512_b32, ADst_512>; 5810b57cec5SDimitry Andricdef VOPProfileMAI_I32_I32_X32 : VOPProfileMAI<VOP_V32I32_I32_I32_V32I32, AISrc_1024_b32, ADst_1024>; 5820b57cec5SDimitry Andricdef VOPProfileMAI_F32_V2I16_X4 : VOPProfileMAI<VOP_V4F32_V2I16_V2I16_V4F32, AISrc_128_b32, ADst_128>; 5830b57cec5SDimitry Andricdef VOPProfileMAI_F32_V2I16_X16 : VOPProfileMAI<VOP_V16F32_V2I16_V2I16_V16F32, AISrc_512_b32, ADst_512>; 5840b57cec5SDimitry Andricdef VOPProfileMAI_F32_V2I16_X32 : VOPProfileMAI<VOP_V32F32_V2I16_V2I16_V32F32, AISrc_1024_b32, ADst_1024>; 5850b57cec5SDimitry Andricdef VOPProfileMAI_F32_V4F16_X4 : VOPProfileMAI<VOP_V4F32_V4F16_V4F16_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>; 5860b57cec5SDimitry Andricdef VOPProfileMAI_F32_V4F16_X16 : VOPProfileMAI<VOP_V16F32_V4F16_V4F16_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>; 5870b57cec5SDimitry Andricdef VOPProfileMAI_F32_V4F16_X32 : VOPProfileMAI<VOP_V32F32_V4F16_V4F16_V32F32, AISrc_1024_b32, ADst_1024, AVSrc_64>; 588fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4I16_X4 : VOPProfileMAI<VOP_V4F32_V4I16_V4I16_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>; 589fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4I16_X16 : VOPProfileMAI<VOP_V16F32_V4I16_V4I16_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>; 590fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4I16_X32 : VOPProfileMAI<VOP_V32F32_V4I16_V4I16_V32F32, AISrc_1024_b32, ADst_1024, AVSrc_64>; 591fe6060f1SDimitry Andricdef VOPProfileMAI_F64_16X16X4F64 : VOPProfileMAI<VOP_V4F64_F64_F64_V4F64, AISrc_256_f64, ADst_256, AVSrc_64>; 592fe6060f1SDimitry Andricdef VOPProfileMAI_F64_4X4X4F64 : VOPProfileMAI<VOP_F64_F64_F64_F64, AISrc_64_f64, ADst_64, AVSrc_64>; 59381ad6265SDimitry Andricdef VOPProfileMAI_I32_I64_X16 : VOPProfileMAI<VOP_V4I32_I64_I64_V4I32, AISrc_128_b32, ADst_128, AVSrc_64>; 59481ad6265SDimitry Andricdef VOPProfileMAI_I32_I64_X32 : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32, AISrc_512_b32, ADst_512, AVSrc_64>; 59581ad6265SDimitry Andricdef VOPProfileMAI_F32_V2F32_X16 : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>; 59681ad6265SDimitry Andricdef VOPProfileMAI_F32_V2F32_X32 : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>; 597fcaf7f86SDimitry Andricdef VOPProfileMAI_F32_I64_X32 : VOPProfileMAI<VOP_V4F32_I64_I64_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>; 598fcaf7f86SDimitry Andricdef VOPProfileMAI_F32_I64_X16 : VOPProfileMAI<VOP_V16F32_I64_I64_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>; 599fe6060f1SDimitry Andric 600fe6060f1SDimitry Andricdef VOPProfileMAI_F32_F32_X4_VCD : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32, VISrc_128_f32, VDst_128>; 601fe6060f1SDimitry Andricdef VOPProfileMAI_F32_F32_X16_VCD : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32, VISrc_512_f32, VDst_512>; 602fe6060f1SDimitry Andricdef VOPProfileMAI_F32_F32_X32_VCD : VOPProfileMAI<VOP_V32F32_F32_F32_V32F32, VISrc_1024_f32, VDst_1024>; 603fe6060f1SDimitry Andricdef VOPProfileMAI_I32_I32_X4_VCD : VOPProfileMAI<VOP_V4I32_I32_I32_V4I32, VISrc_128_b32, VDst_128>; 604fe6060f1SDimitry Andricdef VOPProfileMAI_I32_I32_X16_VCD : VOPProfileMAI<VOP_V16I32_I32_I32_V16I32, VISrc_512_b32, VDst_512>; 605fe6060f1SDimitry Andricdef VOPProfileMAI_I32_I32_X32_VCD : VOPProfileMAI<VOP_V32I32_I32_I32_V32I32, VISrc_1024_b32, VDst_1024>; 606fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V2I16_X4_VCD : VOPProfileMAI<VOP_V4F32_V2I16_V2I16_V4F32, VISrc_128_b32, VDst_128>; 607fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V2I16_X16_VCD : VOPProfileMAI<VOP_V16F32_V2I16_V2I16_V16F32, VISrc_512_b32, VDst_512>; 608fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V2I16_X32_VCD : VOPProfileMAI<VOP_V32F32_V2I16_V2I16_V32F32, VISrc_1024_b32, VDst_1024>; 609fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4F16_X4_VCD : VOPProfileMAI<VOP_V4F32_V4F16_V4F16_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>; 610fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4F16_X16_VCD : VOPProfileMAI<VOP_V16F32_V4F16_V4F16_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>; 611fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4F16_X32_VCD : VOPProfileMAI<VOP_V32F32_V4F16_V4F16_V32F32, VISrc_1024_b32, VDst_1024, AVSrc_64>; 612fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4I16_X4_VCD : VOPProfileMAI<VOP_V4F32_V4I16_V4I16_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>; 613fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4I16_X16_VCD : VOPProfileMAI<VOP_V16F32_V4I16_V4I16_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>; 614fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4I16_X32_VCD : VOPProfileMAI<VOP_V32F32_V4I16_V4I16_V32F32, VISrc_1024_b32, VDst_1024, AVSrc_64>; 615fe6060f1SDimitry Andricdef VOPProfileMAI_F64_16X16X4F64_VCD : VOPProfileMAI<VOP_V4F64_F64_F64_V4F64, VISrc_256_f64, VDst_256, AVSrc_64>; 616fe6060f1SDimitry Andricdef VOPProfileMAI_F64_4X4X4F64_VCD : VOPProfileMAI<VOP_F64_F64_F64_F64, VISrc_64_f64, VDst_64, AVSrc_64>; 61781ad6265SDimitry Andricdef VOPProfileMAI_I32_I64_X16_VCD : VOPProfileMAI<VOP_V4I32_I64_I64_V4I32, VISrc_128_b32, VDst_128, AVSrc_64>; 61881ad6265SDimitry Andricdef VOPProfileMAI_I32_I64_X32_VCD : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32, VISrc_512_b32, VDst_512, AVSrc_64>; 61981ad6265SDimitry Andricdef VOPProfileMAI_F32_V2F32_X16_VCD : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>; 62081ad6265SDimitry Andricdef VOPProfileMAI_F32_V2F32_X32_VCD : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>; 621fcaf7f86SDimitry Andricdef VOPProfileMAI_F32_I64_X32_VCD : VOPProfileMAI<VOP_V4F32_I64_I64_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>; 622fcaf7f86SDimitry Andricdef VOPProfileMAI_F32_I64_X16_VCD : VOPProfileMAI<VOP_V16F32_I64_I64_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>; 62381ad6265SDimitry Andric 62481ad6265SDimitry Andricdef VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC<VOP_V4F32_V4F16_V8F16_I32, AVDst_128, AVSrc_64, AVSrc_128>; 62581ad6265SDimitry Andricdef VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC<VOP_V16F32_V4F16_V8F16_I32, AVDst_512, AVSrc_64, AVSrc_128>; 62681ad6265SDimitry Andricdef VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC<VOP_V4F32_V4I16_V8I16_I32, AVDst_128, AVSrc_64, AVSrc_128>; 62781ad6265SDimitry Andricdef VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC<VOP_V16F32_V4I16_V8I16_I32, AVDst_512, AVSrc_64, AVSrc_128>; 62881ad6265SDimitry Andricdef VOPProfileSMFMAC_I32_16X16X64_I8 : VOPProfileSMFMAC<VOP_V4I32_V2I32_V4I32_I32, AVDst_128, AVSrc_64, AVSrc_128>; 62981ad6265SDimitry Andricdef VOPProfileSMFMAC_I32_32X32X32_I8 : VOPProfileSMFMAC<VOP_V16I32_V2I32_V4I32_I32, AVDst_512, AVSrc_64, AVSrc_128>; 630fcaf7f86SDimitry Andricdef VOPProfileSMFMAC_F32_16X16X64_F8 : VOPProfileSMFMAC<VOP_V4F32_V2I32_V4I32_I32, AVDst_128, AVSrc_64, AVSrc_128>; 631fcaf7f86SDimitry Andricdef VOPProfileSMFMAC_F32_32X32X32_F8 : VOPProfileSMFMAC<VOP_V16F32_V2I32_V4I32_I32, AVDst_512, AVSrc_64, AVSrc_128>; 6320b57cec5SDimitry Andric 63304eeddc0SDimitry Andricclass MFMATable <bit is_mac, string Name> { 63404eeddc0SDimitry Andric bit IsMac = is_mac; 63504eeddc0SDimitry Andric string FMAOp = Name; 63604eeddc0SDimitry Andric} 63704eeddc0SDimitry Andric 63881ad6265SDimitry Andricclass MAIFrag<SDPatternOperator Op, code pred> : PatFrag < 63981ad6265SDimitry Andric (ops node:$src0, node:$src1, node:$src2, node:$cbsz, node:$abid, node:$blgp), 64081ad6265SDimitry Andric (Op $src0, $src1, $src2, $cbsz, $abid, $blgp), 64181ad6265SDimitry Andric pred 64281ad6265SDimitry Andric>; 64381ad6265SDimitry Andric 6447a6dacacSDimitry Andricdefvar MayNeedAGPRs = [{ 6457a6dacacSDimitry Andric return MF->getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); 6467a6dacacSDimitry Andric}]; 64781ad6265SDimitry Andric 6487a6dacacSDimitry Andricdefvar MayNeedAGPRs_gisel = [{ 6497a6dacacSDimitry Andric return MF.getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); 6507a6dacacSDimitry Andric}]; 6517a6dacacSDimitry Andric 6527a6dacacSDimitry Andricdefvar MayNotNeedAGPRs = [{ 6537a6dacacSDimitry Andric return !MF->getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); 6547a6dacacSDimitry Andric}]; 6557a6dacacSDimitry Andric 6567a6dacacSDimitry Andricdefvar MayNotNeedAGPRs_gisel = [{ 6577a6dacacSDimitry Andric return !MF.getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); 6587a6dacacSDimitry Andric}]; 6597a6dacacSDimitry Andric 6607a6dacacSDimitry Andricclass AgprMAIFrag<SDPatternOperator Op> : MAIFrag<Op, MayNeedAGPRs> { 6617a6dacacSDimitry Andric let GISelPredicateCode = MayNeedAGPRs_gisel; 6627a6dacacSDimitry Andric} 6637a6dacacSDimitry Andric 6647a6dacacSDimitry Andricclass VgprMAIFrag<SDPatternOperator Op> : MAIFrag<Op, MayNotNeedAGPRs> { 6657a6dacacSDimitry Andric let GISelPredicateCode = MayNotNeedAGPRs_gisel; 6667a6dacacSDimitry Andric} 66781ad6265SDimitry Andric 6685f757f3fSDimitry Andriclet SubtargetPredicate = HasMAIInsts in { 6695ffd83dbSDimitry Andric 6705ffd83dbSDimitry Andriclet isAsCheapAsAMove = 1, isReMaterializable = 1 in { 671e8d8bef9SDimitry Andric defm V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>; 672e8d8bef9SDimitry Andric let isMoveImm = 1 in { 673e8d8bef9SDimitry Andric defm V_ACCVGPR_WRITE_B32 : VOP3Inst<"v_accvgpr_write_b32", VOPProfileAccWrite>; 674e8d8bef9SDimitry Andric } // End isMoveImm = 1 675e8d8bef9SDimitry Andric} // End isAsCheapAsAMove = 1, isReMaterializable = 1 6760b57cec5SDimitry Andric 67781ad6265SDimitry Andricclass MAIInst<string OpName, VOPProfile P, SDPatternOperator node> 67881ad6265SDimitry Andric : VOP3InstBase<OpName, P, node> { 67981ad6265SDimitry Andric Instruction Opcode = !cast<Instruction>(NAME); 68081ad6265SDimitry Andric bit is_dgemm = 0; 68181ad6265SDimitry Andric bit is_gfx940_xdl = 0; 68281ad6265SDimitry Andric} 68381ad6265SDimitry Andric 684*0fca6ea1SDimitry Andricmulticlass MAIInst<string OpName, string P, SDPatternOperator node> { 685*0fca6ea1SDimitry Andric defvar NoDstOverlap = !cast<VOPProfileMAI>("VOPProfileMAI_" # P).NoDstOverlap; 686*0fca6ea1SDimitry Andric 6875ffd83dbSDimitry Andric let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in { 688fe6060f1SDimitry Andric // FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported. 68904eeddc0SDimitry Andric let Constraints = !if(NoDstOverlap, "@earlyclobber $vdst", "") in { 69081ad6265SDimitry Andric def _e64 : MAIInst<OpName, !cast<VOPProfileMAI>("VOPProfileMAI_" # P), 6911db9f3b2SDimitry Andric !if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, AgprMAIFrag<node>)>, 69204eeddc0SDimitry Andric MFMATable<0, NAME # "_e64">; 693fe6060f1SDimitry Andric 694fe6060f1SDimitry Andric let SubtargetPredicate = isGFX90APlus, Mnemonic = OpName in 69581ad6265SDimitry Andric def _vgprcd_e64 : MAIInst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"), 6961db9f3b2SDimitry Andric !if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, VgprMAIFrag<node>)>, 69704eeddc0SDimitry Andric MFMATable<0, NAME # "_vgprcd_e64">; 69804eeddc0SDimitry Andric } 69904eeddc0SDimitry Andric 70006c3fb27SDimitry Andric if NoDstOverlap then { 70104eeddc0SDimitry Andric let Constraints = !if(NoDstOverlap, "$vdst = $src2", ""), 70204eeddc0SDimitry Andric isConvertibleToThreeAddress = NoDstOverlap, 70304eeddc0SDimitry Andric Mnemonic = OpName in { 7041db9f3b2SDimitry Andric def "_mac_e64" : MAIInst<OpName # "_mac", !cast<VOPProfileMAI>("VOPProfileMAI_" # P), 7051db9f3b2SDimitry Andric !if(!eq(node, null_frag), null_frag, AgprMAIFrag<node>)>, 70604eeddc0SDimitry Andric MFMATable<1, NAME # "_e64">; 70704eeddc0SDimitry Andric 70804eeddc0SDimitry Andric let SubtargetPredicate = isGFX90APlus in 70981ad6265SDimitry Andric def _mac_vgprcd_e64 : MAIInst<OpName # "_mac_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"), 7101db9f3b2SDimitry Andric !if(!eq(node, null_frag), null_frag, VgprMAIFrag<node>)>, 71104eeddc0SDimitry Andric MFMATable<1, NAME # "_vgprcd_e64">; 71204eeddc0SDimitry Andric } 71304eeddc0SDimitry Andric } 7145ffd83dbSDimitry Andric } // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 715fe6060f1SDimitry Andric} 716fe6060f1SDimitry Andric 717fe6060f1SDimitry Andricdefm V_MFMA_F32_4X4X1F32 : MAIInst<"v_mfma_f32_4x4x1f32", "F32_F32_X4", int_amdgcn_mfma_f32_4x4x1f32>; 718fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X1F32 : MAIInst<"v_mfma_f32_16x16x1f32", "F32_F32_X16", int_amdgcn_mfma_f32_16x16x1f32>; 719fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X4F32 : MAIInst<"v_mfma_f32_16x16x4f32", "F32_F32_X4", int_amdgcn_mfma_f32_16x16x4f32>; 72081ad6265SDimitry Andricdefm V_MFMA_F32_32X32X1F32 : MAIInst<"v_mfma_f32_32x32x1f32", "F32_F32_X32", int_amdgcn_mfma_f32_32x32x1f32>; 72181ad6265SDimitry Andricdefm V_MFMA_F32_32X32X2F32 : MAIInst<"v_mfma_f32_32x32x2f32", "F32_F32_X16", int_amdgcn_mfma_f32_32x32x2f32>; 72281ad6265SDimitry Andric 72381ad6265SDimitry Andriclet is_gfx940_xdl = 1 in { 72481ad6265SDimitry Andricdefm V_MFMA_F32_4X4X4F16 : MAIInst<"v_mfma_f32_4x4x4f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_4x4x4f16>; 72581ad6265SDimitry Andricdefm V_MFMA_I32_4X4X4I8 : MAIInst<"v_mfma_i32_4x4x4i8", "I32_I32_X4", int_amdgcn_mfma_i32_4x4x4i8>; 726fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X4F16 : MAIInst<"v_mfma_f32_16x16x4f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_16x16x4f16>; 727fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X16F16 : MAIInst<"v_mfma_f32_16x16x16f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_16x16x16f16>; 728fe6060f1SDimitry Andricdefm V_MFMA_I32_16X16X4I8 : MAIInst<"v_mfma_i32_16x16x4i8", "I32_I32_X16", int_amdgcn_mfma_i32_16x16x4i8>; 729fe6060f1SDimitry Andricdefm V_MFMA_F32_32X32X4F16 : MAIInst<"v_mfma_f32_32x32x4f16", "F32_V4F16_X32", int_amdgcn_mfma_f32_32x32x4f16>; 730fe6060f1SDimitry Andricdefm V_MFMA_F32_32X32X8F16 : MAIInst<"v_mfma_f32_32x32x8f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_32x32x8f16>; 731fe6060f1SDimitry Andricdefm V_MFMA_I32_32X32X4I8 : MAIInst<"v_mfma_i32_32x32x4i8", "I32_I32_X32", int_amdgcn_mfma_i32_32x32x4i8>; 73281ad6265SDimitry Andric} 73381ad6265SDimitry Andric 73481ad6265SDimitry Andriclet Predicates = [isGFX908orGFX90A] in { 735fe6060f1SDimitry Andricdefm V_MFMA_I32_16X16X16I8 : MAIInst<"v_mfma_i32_16x16x16i8", "I32_I32_X4", int_amdgcn_mfma_i32_16x16x16i8>; 736fe6060f1SDimitry Andricdefm V_MFMA_I32_32X32X8I8 : MAIInst<"v_mfma_i32_32x32x8i8", "I32_I32_X16", int_amdgcn_mfma_i32_32x32x8i8>; 737fe6060f1SDimitry Andricdefm V_MFMA_F32_4X4X2BF16 : MAIInst<"v_mfma_f32_4x4x2bf16", "F32_V2I16_X4", int_amdgcn_mfma_f32_4x4x2bf16>; 738fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X2BF16 : MAIInst<"v_mfma_f32_16x16x2bf16", "F32_V2I16_X16", int_amdgcn_mfma_f32_16x16x2bf16>; 739fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X8BF16 : MAIInst<"v_mfma_f32_16x16x8bf16", "F32_V2I16_X4", int_amdgcn_mfma_f32_16x16x8bf16>; 740fe6060f1SDimitry Andricdefm V_MFMA_F32_32X32X2BF16 : MAIInst<"v_mfma_f32_32x32x2bf16", "F32_V2I16_X32", int_amdgcn_mfma_f32_32x32x2bf16>; 741fe6060f1SDimitry Andricdefm V_MFMA_F32_32X32X4BF16 : MAIInst<"v_mfma_f32_32x32x4bf16", "F32_V2I16_X16", int_amdgcn_mfma_f32_32x32x4bf16>; 74281ad6265SDimitry Andric} 7430b57cec5SDimitry Andric 7440b57cec5SDimitry Andric} // End SubtargetPredicate = HasMAIInsts 7450b57cec5SDimitry Andric 746fe6060f1SDimitry Andriclet Predicates = [isGFX90APlus] in { 74781ad6265SDimitry Andric let is_gfx940_xdl = 1 in { 748fe6060f1SDimitry Andric defm V_MFMA_F32_32X32X4BF16_1K : MAIInst<"v_mfma_f32_32x32x4bf16_1k", "F32_V4I16_X32", int_amdgcn_mfma_f32_32x32x4bf16_1k>; 749fe6060f1SDimitry Andric defm V_MFMA_F32_16X16X4BF16_1K : MAIInst<"v_mfma_f32_16x16x4bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_16x16x4bf16_1k>; 750fe6060f1SDimitry Andric defm V_MFMA_F32_4X4X4BF16_1K : MAIInst<"v_mfma_f32_4x4x4bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_4x4x4bf16_1k>; 751fe6060f1SDimitry Andric defm V_MFMA_F32_32X32X8BF16_1K : MAIInst<"v_mfma_f32_32x32x8bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_32x32x8bf16_1k>; 752fe6060f1SDimitry Andric defm V_MFMA_F32_16X16X16BF16_1K : MAIInst<"v_mfma_f32_16x16x16bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_16x16x16bf16_1k>; 75381ad6265SDimitry Andric } 754fe6060f1SDimitry Andric 75581ad6265SDimitry Andric let is_dgemm = 1 in { 756fe6060f1SDimitry Andric defm V_MFMA_F64_16X16X4F64 : MAIInst<"v_mfma_f64_16x16x4f64", "F64_16X16X4F64", int_amdgcn_mfma_f64_16x16x4f64>; 757fe6060f1SDimitry Andric defm V_MFMA_F64_4X4X4F64 : MAIInst<"v_mfma_f64_4x4x4f64", "F64_4X4X4F64", int_amdgcn_mfma_f64_4x4x4f64>; 75881ad6265SDimitry Andric } 759fe6060f1SDimitry Andric} // End Predicates = [isGFX90APlus] 760fe6060f1SDimitry Andric 7615f757f3fSDimitry Andriclet SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 in { 76281ad6265SDimitry Andric defm V_MFMA_I32_32X32X16I8 : MAIInst<"v_mfma_i32_32x32x16i8", "I32_I64_X32", int_amdgcn_mfma_i32_32x32x16_i8>; 76381ad6265SDimitry Andric defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>; 76481ad6265SDimitry Andric defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>; 76581ad6265SDimitry Andric defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>; 766fcaf7f86SDimitry Andric defm V_MFMA_F32_16X16X32_BF8_BF8 : MAIInst<"v_mfma_f32_16x16x32_bf8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_bf8>; 767fcaf7f86SDimitry Andric defm V_MFMA_F32_16X16X32_BF8_FP8 : MAIInst<"v_mfma_f32_16x16x32_bf8_fp8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_fp8>; 768fcaf7f86SDimitry Andric defm V_MFMA_F32_16X16X32_FP8_BF8 : MAIInst<"v_mfma_f32_16x16x32_fp8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_fp8_bf8>; 769fcaf7f86SDimitry Andric defm V_MFMA_F32_16X16X32_FP8_FP8 : MAIInst<"v_mfma_f32_16x16x32_fp8_fp8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_fp8_fp8>; 770fcaf7f86SDimitry Andric defm V_MFMA_F32_32X32X16_BF8_BF8 : MAIInst<"v_mfma_f32_32x32x16_bf8_bf8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_bf8_bf8>; 771fcaf7f86SDimitry Andric defm V_MFMA_F32_32X32X16_BF8_FP8 : MAIInst<"v_mfma_f32_32x32x16_bf8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_bf8_fp8>; 772fcaf7f86SDimitry Andric defm V_MFMA_F32_32X32X16_FP8_BF8 : MAIInst<"v_mfma_f32_32x32x16_fp8_bf8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_bf8>; 773fcaf7f86SDimitry Andric defm V_MFMA_F32_32X32X16_FP8_FP8 : MAIInst<"v_mfma_f32_32x32x16_fp8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_fp8>; 7745f757f3fSDimitry Andric} // End SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 77581ad6265SDimitry Andric 77681ad6265SDimitry Andricmulticlass SMFMACInst<string OpName, string P, SDPatternOperator node> { 77781ad6265SDimitry Andric let Constraints = "$vdst = $src2", DisableEncoding = "$src2", 77881ad6265SDimitry Andric isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1, is_gfx940_xdl = 1 in { 77981ad6265SDimitry Andric def _e64 : MAIInst<OpName, !cast<VOPProfileSMFMAC>("VOPProfileSMFMAC_" # P), node>; 78081ad6265SDimitry Andric } 78181ad6265SDimitry Andric} 78281ad6265SDimitry Andric 78381ad6265SDimitry Andriclet SubtargetPredicate = isGFX940Plus in { 78481ad6265SDimitry Andricdefm V_SMFMAC_F32_16X16X32_F16 : SMFMACInst<"v_smfmac_f32_16x16x32_f16", "F32_16X16X32_F16", int_amdgcn_smfmac_f32_16x16x32_f16>; 78581ad6265SDimitry Andricdefm V_SMFMAC_F32_32X32X16_F16 : SMFMACInst<"v_smfmac_f32_32x32x16_f16", "F32_32X32X16_F16", int_amdgcn_smfmac_f32_32x32x16_f16>; 78681ad6265SDimitry Andricdefm V_SMFMAC_F32_16X16X32_BF16 : SMFMACInst<"v_smfmac_f32_16x16x32_bf16", "F32_16X16X32_I16", int_amdgcn_smfmac_f32_16x16x32_bf16>; 78781ad6265SDimitry Andricdefm V_SMFMAC_F32_32X32X16_BF16 : SMFMACInst<"v_smfmac_f32_32x32x16_bf16", "F32_32X32X16_I16", int_amdgcn_smfmac_f32_32x32x16_bf16>; 78881ad6265SDimitry Andricdefm V_SMFMAC_I32_16X16X64_I8 : SMFMACInst<"v_smfmac_i32_16x16x64_i8", "I32_16X16X64_I8", int_amdgcn_smfmac_i32_16x16x64_i8>; 78981ad6265SDimitry Andricdefm V_SMFMAC_I32_32X32X32_I8 : SMFMACInst<"v_smfmac_i32_32x32x32_i8", "I32_32X32X32_I8", int_amdgcn_smfmac_i32_32x32x32_i8>; 790fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_BF8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_bf8>; 791fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_BF8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_fp8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_fp8>; 792fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_FP8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_fp8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_fp8_bf8>; 793fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_FP8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x64_fp8_fp8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_fp8_fp8>; 794fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_BF8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_bf8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_bf8_bf8>; 795fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_BF8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_bf8_fp8>; 796fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_FP8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_bf8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_bf8>; 797fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_FP8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_fp8>; 79881ad6265SDimitry Andric} 79981ad6265SDimitry Andric 80081ad6265SDimitry Andricdef MAIInstInfoTable : GenericTable { 80181ad6265SDimitry Andric let FilterClass = "MAIInst"; 80281ad6265SDimitry Andric let CppTypeName = "MAIInstInfo"; 80381ad6265SDimitry Andric let Fields = [ 80481ad6265SDimitry Andric "Opcode", "is_dgemm", "is_gfx940_xdl" 80581ad6265SDimitry Andric ]; 80681ad6265SDimitry Andric 80781ad6265SDimitry Andric let PrimaryKey = ["Opcode"]; 80881ad6265SDimitry Andric let PrimaryKeyName = "getMAIInstInfoHelper"; 80981ad6265SDimitry Andric} 81081ad6265SDimitry Andric 8115f757f3fSDimitry Andriclet isCommutable = 1, isReMaterializable = 1 in { 8125f757f3fSDimitry Andric let SubtargetPredicate = HasPackedFP32Ops in { 81381ad6265SDimitry Andric defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fma>; 81481ad6265SDimitry Andric defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fmul>; 81581ad6265SDimitry Andric defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fadd>; 8165f757f3fSDimitry Andric } // End SubtargetPredicate = HasPackedFP32Ops 8175f757f3fSDimitry Andric 8185f757f3fSDimitry Andric let SubtargetPredicate = HasPkMovB32 in 81981ad6265SDimitry Andric defm V_PK_MOV_B32 : VOP3PInst<"v_pk_mov_b32", VOP3P_Profile<VOP_V2I32_V2I32_V2I32, VOP3_PACKED>>; 8205f757f3fSDimitry Andric} // End isCommutable = 1, isReMaterializable = 1 821fe6060f1SDimitry Andric 822*0fca6ea1SDimitry Andricdef : AMDGPUMnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">; 823*0fca6ea1SDimitry Andricdef : AMDGPUMnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">; 8240b57cec5SDimitry Andric 82581ad6265SDimitry Andricclass VOPProfileWMMA<VOPProfile P, string Suffix, RegisterOperand _Src01RC64, bit _HasClamp, bit _HasOpSel> : VOP3P_Profile<P> { 82681ad6265SDimitry Andric let DstRC = !if(!eq(Suffix, "_w32"), VDst_256, VDst_128); 82781ad6265SDimitry Andric let Src0RC64 = _Src01RC64; 82881ad6265SDimitry Andric let Src1RC64 = _Src01RC64; 82981ad6265SDimitry Andric let Src2RC64 = !if(!eq(Suffix, "_w32"), VISrc_256_f64, VISrc_128_f32); 83081ad6265SDimitry Andric let HasClamp = _HasClamp; 83181ad6265SDimitry Andric let HasOpSel = _HasOpSel; 83281ad6265SDimitry Andric let IsPacked = 1; 83381ad6265SDimitry Andric let IsWMMA = 1; 83481ad6265SDimitry Andric} 83581ad6265SDimitry Andric 83681ad6265SDimitry Andricdef VOP_V8F32_V16F16_V16F16_V8F32 : VOPProfile <[v8f32, v16f16, v16f16, v8f32]>; 83781ad6265SDimitry Andricdef VOP_V8F32_V16I16_V16I16_V8F32 : VOPProfile <[v8f32, v16i16, v16i16, v8f32]>; 83881ad6265SDimitry Andricdef VOP_V16F16_V16F16_V16F16_V16F16 : VOPProfile <[v16f16, v16f16, v16f16, v16f16]>; 83981ad6265SDimitry Andricdef VOP_V16I16_V16I16_V16I16_V16I16 : VOPProfile <[v16i16, v16i16, v16i16, v16i16]>; 84081ad6265SDimitry Andricdef VOP_V8I32_V4I32_V4I32_V8I32 : VOPProfile <[v8i32, v4i32, v4i32, v8i32]>; 84181ad6265SDimitry Andricdef VOP_V8I32_V2I32_V2I32_V8I32 : VOPProfile <[v8i32, v2i32, v2i32, v8i32]>; 84281ad6265SDimitry Andric 84381ad6265SDimitry Andricdef VOP_V4F32_V16F16_V16F16_V4F32 : VOPProfile <[v4f32, v16f16, v16f16, v4f32]>; 84481ad6265SDimitry Andricdef VOP_V4F32_V16I16_V16I16_V4F32 : VOPProfile <[v4f32, v16i16, v16i16, v4f32]>; 84581ad6265SDimitry Andricdef VOP_V8F16_V16F16_V16F16_V8F16 : VOPProfile <[v8f16, v16f16, v16f16, v8f16]>; 84681ad6265SDimitry Andricdef VOP_V8I16_V16I16_V16I16_V8I16 : VOPProfile <[v8i16, v16i16, v16i16, v8i16]>; 84781ad6265SDimitry Andricdef VOP_V4I32_V4I32_V4I32_V4I32 : VOPProfile <[v4i32, v4i32, v4i32, v4i32]>; 84881ad6265SDimitry Andricdef VOP_V4I32_V2I32_V2I32_V4I32 : VOPProfile <[v4i32, v2i32, v2i32, v4i32]>; 84981ad6265SDimitry Andric 85081ad6265SDimitry Andric 85181ad6265SDimitry Andricclass WMMAType <bits<2> val> { 85281ad6265SDimitry Andric bit hasClamp = val{0}; 85381ad6265SDimitry Andric bit hasOpsel = val{1}; 85481ad6265SDimitry Andric} 85581ad6265SDimitry Andric 85681ad6265SDimitry Andricdef WMMARegular : WMMAType<0b00>; 85781ad6265SDimitry Andricdef WMMAUIClamp : WMMAType<0b01>; 85881ad6265SDimitry Andricdef WMMAOpSel : WMMAType<0b10>; 85981ad6265SDimitry Andric 86081ad6265SDimitry Andricclass WMMARegularPat<Instruction Inst, SDPatternOperator node, VOPProfile P> : 86181ad6265SDimitry Andric GCNPat < (P.DstVT (node 86281ad6265SDimitry Andric (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers)), 86381ad6265SDimitry Andric (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)), 86481ad6265SDimitry Andric (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers)) 86581ad6265SDimitry Andric )), 86681ad6265SDimitry Andric (P.DstVT (Inst i32:$src0_modifiers, P.Src0VT:$src0, i32:$src1_modifiers, P.Src1VT:$src1, $src2_modifiers, P.Src2VT:$src2)) 86781ad6265SDimitry Andric>; 86881ad6265SDimitry Andric 86981ad6265SDimitry Andricclass WMMAOpSelPat<Instruction Inst, SDPatternOperator node, VOPProfile P> : 87081ad6265SDimitry Andric GCNPat < (P.DstVT (node 87181ad6265SDimitry Andric (P.Src0VT P.Src0VT:$src0), 87281ad6265SDimitry Andric (P.Src1VT P.Src1VT:$src1), 87381ad6265SDimitry Andric (P.Src2VT P.Src2VT:$src2), (WMMAOpSelVOP3PMods i32:$src2_modifiers) 87481ad6265SDimitry Andric )), 87581ad6265SDimitry Andric (P.DstVT (Inst (i32 8), P.Src0VT:$src0, (i32 8), P.Src1VT:$src1, i32:$src2_modifiers, P.Src2VT:$src2)) 87681ad6265SDimitry Andric>; 87781ad6265SDimitry Andric 87881ad6265SDimitry Andricclass WMMAUIClampPat<Instruction Inst, SDPatternOperator node, VOPProfile P> : 87981ad6265SDimitry Andric GCNPat < (P.DstVT (node 8807a6dacacSDimitry Andric (VOP3PModsNeg i32:$src0_modifiers), (P.Src0VT P.Src0VT:$src0), 8817a6dacacSDimitry Andric (VOP3PModsNeg i32:$src1_modifiers), (P.Src1VT P.Src1VT:$src1), 88281ad6265SDimitry Andric (P.Src2VT P.Src2VT:$src2), (i1 timm:$clamp) 88381ad6265SDimitry Andric )), 88481ad6265SDimitry Andric (P.DstVT (Inst i32:$src0_modifiers, P.Src0VT:$src0, i32:$src1_modifiers, P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp)) 88581ad6265SDimitry Andric>; 88681ad6265SDimitry Andric 88781ad6265SDimitry Andricclass WMMAOpcodeMapping<Instruction TwoAddr, Instruction ThreeAddr> { 88881ad6265SDimitry Andric Instruction Opcode2Addr = TwoAddr; 88981ad6265SDimitry Andric Instruction Opcode3Addr = ThreeAddr; 89081ad6265SDimitry Andric Predicate WaveSizePredicate; 89181ad6265SDimitry Andric} 89281ad6265SDimitry Andric 89381ad6265SDimitry Andricdef WMMAOpcode : GenericEnum { 89481ad6265SDimitry Andric let FilterClass = "VOP3P_Pseudo"; 89581ad6265SDimitry Andric} 89681ad6265SDimitry Andric 89781ad6265SDimitry Andricclass WMMAMappingTable : GenericTable { 89881ad6265SDimitry Andric let FilterClass = "WMMAOpcodeMapping"; 89981ad6265SDimitry Andric let CppTypeName = "WMMAOpcodeMappingInfo"; 90081ad6265SDimitry Andric let Fields = ["Opcode2Addr", "Opcode3Addr"]; 90181ad6265SDimitry Andric string TypeOf_Opcode2Addr = "WMMAOpcode"; 90281ad6265SDimitry Andric string TypeOf_Opcode3Addr = "WMMAOpcode"; 90381ad6265SDimitry Andric} 90481ad6265SDimitry Andric 90581ad6265SDimitry Andricdef WMMAOpcode2AddrMappingTable : WMMAMappingTable { 90681ad6265SDimitry Andric let PrimaryKey = ["Opcode2Addr"]; 90781ad6265SDimitry Andric let PrimaryKeyName = "getWMMAMappingInfoFrom2AddrOpcode"; 90881ad6265SDimitry Andric} 90981ad6265SDimitry Andric 91081ad6265SDimitry Andricdef WMMAOpcode3AddrMappingTable : WMMAMappingTable { 91181ad6265SDimitry Andric let PrimaryKey = ["Opcode3Addr"]; 91281ad6265SDimitry Andric let PrimaryKeyName = "getWMMAMappingInfoFrom3AddrOpcode"; 91381ad6265SDimitry Andric} 91481ad6265SDimitry Andric 91581ad6265SDimitry Andric// The WMMA instruction has extra constraints: 91681ad6265SDimitry Andric// Matrices A and B cannot overlap with D. C cannot partially overlap with D, 91781ad6265SDimitry Andric// but it is OK for them to be the same (which is a typical case). 91881ad6265SDimitry Andric// 91981ad6265SDimitry Andric// We implement it as follows: 92081ad6265SDimitry Andric// 1) Map the intrinsic to the pseudo where D is tied to C ($vdst = $src2). 92181ad6265SDimitry Andric// 2) The pass twoaddressinstruction checks if src2 is live and if that is the case 92281ad6265SDimitry Andric// it converts the default pseudo to the pseudo where src2 is not the same as vdst. 92381ad6265SDimitry Andric// 3) @earlyclobber on the destination satisfies the constraint during RA. 92481ad6265SDimitry Andric 9255f757f3fSDimitry Andricmulticlass WMMAInst<string Suffix, string Instr, VOPProfile P, SDPatternOperator node = null_frag, RegisterOperand _Src01RC64 = VRegSrc_256, WMMAType Type, bit convertibleTo3Addr> { 92681ad6265SDimitry Andric 92781ad6265SDimitry Andric defvar WMMAConstraints2Addr = "@earlyclobber $vdst,$vdst = $src2"; 92881ad6265SDimitry Andric defvar WMMAConstraints3Addr = "@earlyclobber $vdst"; 92981ad6265SDimitry Andric 93081ad6265SDimitry Andric defvar WMMAProfile = VOPProfileWMMA<P, Suffix, _Src01RC64, Type.hasClamp, Type.hasOpsel>; 93181ad6265SDimitry Andric let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in { 9325f757f3fSDimitry Andric let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = convertibleTo3Addr in { 9335f757f3fSDimitry Andric def _twoaddr # Suffix : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>; 93481ad6265SDimitry Andric } 93581ad6265SDimitry Andric } 9365f757f3fSDimitry Andric if convertibleTo3Addr then { 93781ad6265SDimitry Andric let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in { 93881ad6265SDimitry Andric let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in { 9395f757f3fSDimitry Andric def _threeaddr # Suffix : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>; 94081ad6265SDimitry Andric } 94181ad6265SDimitry Andric } 9425f757f3fSDimitry Andric def : WMMAOpcodeMapping<!cast<Instruction>(NAME # _twoaddr # Suffix), 9435f757f3fSDimitry Andric !cast<Instruction>(NAME # _threeaddr # Suffix)>; 94481ad6265SDimitry Andric } 94581ad6265SDimitry Andric 946b3edf446SDimitry Andric let SubtargetPredicate = isGFX11Only in { 94781ad6265SDimitry Andric if !eq(Type, WMMAOpSel) then { 94881ad6265SDimitry Andric def : WMMAOpSelPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>; 94981ad6265SDimitry Andric } else if !eq(Type, WMMAUIClamp) then { 95081ad6265SDimitry Andric def : WMMAUIClampPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>; 95181ad6265SDimitry Andric } else { 95281ad6265SDimitry Andric def : WMMARegularPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>; 95381ad6265SDimitry Andric } 95481ad6265SDimitry Andric } 955b3edf446SDimitry Andric} 956b3edf446SDimitry Andric 95781ad6265SDimitry Andric 95881ad6265SDimitry Andric 95981ad6265SDimitry Andriclet WaveSizePredicate = isWave32 in { 9605f757f3fSDimitry Andric defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_f16", VOP_V8F32_V16F16_V16F16_V8F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular, 1>; 9615f757f3fSDimitry Andric defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_bf16", VOP_V8F32_V16I16_V16I16_V8F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular, 1>; 9625f757f3fSDimitry Andric defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f16_16x16x16_f16", VOP_V16F16_V16F16_V16F16_V16F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel, 1>; 9635f757f3fSDimitry Andric defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_bf16_16x16x16_bf16", VOP_V16I16_V16I16_V16I16_V16I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel, 1>; 9645f757f3fSDimitry Andric defm V_WMMA_F16_16X16X16_F16_TIED : WMMAInst<"_w32", "v_wmma_f16_16x16x16_f16", VOP_V16F16_V16F16_V16F16_V16F16, int_amdgcn_wmma_f16_16x16x16_f16_tied, VRegSrc_256, WMMAOpSel, 0>; 9655f757f3fSDimitry Andric defm V_WMMA_BF16_16X16X16_BF16_TIED : WMMAInst<"_w32", "v_wmma_bf16_16x16x16_bf16", VOP_V16I16_V16I16_V16I16_V16I16, int_amdgcn_wmma_bf16_16x16x16_bf16_tied, VRegSrc_256, WMMAOpSel, 0>; 9665f757f3fSDimitry Andric defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu8", VOP_V8I32_V4I32_V4I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp, 1>; 9675f757f3fSDimitry Andric defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu4", VOP_V8I32_V2I32_V2I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp, 1>; 96881ad6265SDimitry Andric} 96981ad6265SDimitry Andric 97081ad6265SDimitry Andriclet WaveSizePredicate = isWave64 in { 9715f757f3fSDimitry Andric defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_f16", VOP_V4F32_V16F16_V16F16_V4F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular, 1>; 9725f757f3fSDimitry Andric defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_bf16", VOP_V4F32_V16I16_V16I16_V4F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular, 1>; 9735f757f3fSDimitry Andric defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f16_16x16x16_f16", VOP_V8F16_V16F16_V16F16_V8F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel, 1>; 9745f757f3fSDimitry Andric defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_bf16_16x16x16_bf16", VOP_V8I16_V16I16_V16I16_V8I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel, 1>; 9755f757f3fSDimitry Andric defm V_WMMA_F16_16X16X16_F16_TIED : WMMAInst<"_w64", "v_wmma_f16_16x16x16_f16", VOP_V8F16_V16F16_V16F16_V8F16, int_amdgcn_wmma_f16_16x16x16_f16_tied, VRegSrc_256, WMMAOpSel, 0>; 9765f757f3fSDimitry Andric defm V_WMMA_BF16_16X16X16_BF16_TIED : WMMAInst<"_w64", "v_wmma_bf16_16x16x16_bf16", VOP_V8I16_V16I16_V16I16_V8I16, int_amdgcn_wmma_bf16_16x16x16_bf16_tied, VRegSrc_256, WMMAOpSel, 0>; 9775f757f3fSDimitry Andric defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu8", VOP_V4I32_V4I32_V4I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp, 1>; 9785f757f3fSDimitry Andric defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu4", VOP_V4I32_V2I32_V2I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp, 1>; 97981ad6265SDimitry Andric 98081ad6265SDimitry Andric} 98181ad6265SDimitry Andric 982b3edf446SDimitry Andricclass VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType, 983b3edf446SDimitry Andric bit _IsIU, bit _IsFP8BF8> 984b3edf446SDimitry Andric : VOP3P_Profile<VOPProfile<ArgTy>> { 985b3edf446SDimitry Andric bit IsIU = _IsIU; 986b3edf446SDimitry Andric bit IsFP8BF8 = _IsFP8BF8; 987b3edf446SDimitry Andric bit IsF16BF16 = !not(!or(IsIU, IsFP8BF8)); 988b3edf446SDimitry Andric 989b3edf446SDimitry Andric int IndexType = _IndexType; 990b3edf446SDimitry Andric 991b3edf446SDimitry Andric let IsPacked = 1; 992b3edf446SDimitry Andric let IsWMMA = !not(_IsSWMMAC); 993b3edf446SDimitry Andric let IsSWMMAC = _IsSWMMAC; 994b3edf446SDimitry Andric 995b3edf446SDimitry Andric bit IsAB_F16 = !and(IsF16BF16, ArgTy[1].isFP); 996b3edf446SDimitry Andric bit IsAB_BF16 = !and(IsF16BF16, isIntType<ArgTy[1]>.ret); 997b3edf446SDimitry Andric bit IsC_F32 = !or(!eq(ArgTy[3], v8f32), !eq(ArgTy[3], v4f32)); 998b3edf446SDimitry Andric bit IsC_BF16 = !or(!eq(ArgTy[3], v8i16), !eq(ArgTy[3], v4i16)); 999b3edf446SDimitry Andric bit IsC_F16 = !or(!eq(ArgTy[3], v8f16), !eq(ArgTy[3], v4f16)); 1000b3edf446SDimitry Andric 1001b3edf446SDimitry Andric bit NegLo01 = !or(IsF16BF16, IsIU); 1002b3edf446SDimitry Andric bit NegLo2 = !and(!or(IsF16BF16, IsFP8BF8), IsWMMA); 1003b3edf446SDimitry Andric bit NegHi01 = IsF16BF16; 1004b3edf446SDimitry Andric bit NegHi2 = !and(!or(IsF16BF16, IsFP8BF8), IsWMMA); 1005b3edf446SDimitry Andric bit NegLoAny = !or(NegLo01, NegLo2); 1006b3edf446SDimitry Andric bit NegHiAny = !or(NegHi01, NegHi2); 1007b3edf446SDimitry Andric 1008*0fca6ea1SDimitry Andric let DstRC = !cast<RegisterOperand>("VDst_"#ArgTy[0].Size); 1009*0fca6ea1SDimitry Andric let Src0RC64 = !cast<RegisterOperand>("VRegSrc_"#ArgTy[1].Size); 1010*0fca6ea1SDimitry Andric let Src1RC64 = !cast<RegisterOperand>("VRegSrc_"#ArgTy[2].Size); 1011b3edf446SDimitry Andric let Src2RC64 = !if(IsSWMMAC, DstRC, 1012*0fca6ea1SDimitry Andric !cast<RegisterOperand>("VISrc_"#ArgTy[3].Size# 1013*0fca6ea1SDimitry Andric !cond(IsC_F32: "_f32", 1014*0fca6ea1SDimitry Andric IsC_F16: "_f16", 1015*0fca6ea1SDimitry Andric IsC_BF16: "_bf16", 1016*0fca6ea1SDimitry Andric 1: "_b32"))); 1017b3edf446SDimitry Andric 1018b3edf446SDimitry Andric // For f16 and bf16 matrices A and B, each element can be modified by 1019b3edf446SDimitry Andric // fneg(neg_lo,neg_hi = 1). For iu4 and iu8 matrices A and B neg_lo is 1020b3edf446SDimitry Andric // overloaded to mean unsigned/signed: neg_lo = 0 (u4 and u8) unsigned(zext) 1021b3edf446SDimitry Andric // neg_lo = 1 (i4 and i8) signed(sext). For f16, bf16 and f32 matrix C each 1022b3edf446SDimitry Andric // element can be modified by fneg(neg_lo = 1) or fabs(neg_hi = 1). 1023b3edf446SDimitry Andric 1024b3edf446SDimitry Andric // Opcode | src0/src1 - matrix A/B | src2 - matrix C or Index 1025b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1026b3edf446SDimitry Andric // wmma f32_f16 | both neg_lo,neg_hi = 1 | neg_lo = 1 neg C(f32) 1027b3edf446SDimitry Andric // wmma f32_bf16 | neg A/B (f16 or bf16) | neg_hi = 1 abs C(f32) 1028b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1029b3edf446SDimitry Andric // wmma f16_f16 | both neg_lo,neg_hi = 1 | neg_lo = 1 neg C(f16 or bf16) 1030b3edf446SDimitry Andric // wmma bf16_bf16 | neg A/B (f16 or bf16) | neg_hi = 1 abs C(f16 or bf16) 1031b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1032b3edf446SDimitry Andric // wmma i32_iu8/iu4 | neg_lo = 0 u4/u8(zext) | not allowed for 1033b3edf446SDimitry Andric // | neg_lo = 1 i4/i8(sext) | i32 matrices 1034b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1035b3edf446SDimitry Andric // wmma f32_fp8/bf8 | not allowed for | neg_lo = 1 neg C(f32) 1036b3edf446SDimitry Andric // (4 instructions) | f8 and bf8 matrices | neg_hi = 1 abs C(f32) 1037b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1038b3edf446SDimitry Andric // swmmac f32_f16 | both neg_lo,neg_hi = 1 | not allowed for sparse matrix 1039b3edf446SDimitry Andric // swmmac f32_bf16 | neg A/B (f16 or bf16) | A Index - matrix C is in dst 1040b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1041b3edf446SDimitry Andric // swmmac f16_f16 | both neg_lo,neg_hi = 1 | not allowed for sparse matrix 1042b3edf446SDimitry Andric // swmmac bf16_bf16 | neg A/B (f16 or bf16) | A Index - matrix C is in dst 1043b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1044b3edf446SDimitry Andric // swmmac i32_iu8/iu4 | neg_lo = 0 u4/u8(zext) | not allowed for sparse matrix 1045b3edf446SDimitry Andric // | neg_lo = 1 i4/i8(sext) | A Index - matrix C is in dst 1046b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1047b3edf446SDimitry Andric // swmmac f32_fp8/bf8 | not allowed for | not allowed for sparse matrix 1048b3edf446SDimitry Andric // (4 instructions) | f8 and bf8 matrices | A Index - matrix C is in dst 1049b3edf446SDimitry Andric 1050b3edf446SDimitry Andric // pseudo 1051b3edf446SDimitry Andric 1052b3edf446SDimitry Andric // fp8bf8 wmmas don't use src (0 and 1) modifiers, iu use neg_lo, f16 and bf16 1053b3edf446SDimitry Andric // use neg_lo and neg_hi. iu wmmas (C is i32) don't use src 2 modifiers, 1054b3edf446SDimitry Andric // remaining wmmas(f16, bf16 and f8bf8) use neg_lo and neg_hi for C (C is f32 1055b3edf446SDimitry Andric // f16 or bf16). swmmac use index_key and don't use src 2 modifiers. 1056b3edf446SDimitry Andric 1057b3edf446SDimitry Andric dag Src0Mods = !if(IsFP8BF8, (ins), (ins PackedF16InputMods:$src0_modifiers)); 1058b3edf446SDimitry Andric dag Src1Mods = !if(IsFP8BF8, (ins), (ins PackedF16InputMods:$src1_modifiers)); 1059b3edf446SDimitry Andric dag Src2Mods = !if(IsIU, (ins), (ins PackedF16InputMods:$src2_modifiers)); 1060b3edf446SDimitry Andric dag IndexKey = !cond(!eq(IndexType, 0) : (ins), 1061b3edf446SDimitry Andric !eq(IndexType, 8) : (ins IndexKey8bit:$index_key_8bit), 1062b3edf446SDimitry Andric !eq(IndexType, 16): (ins IndexKey16bit:$index_key_16bit)); 1063*0fca6ea1SDimitry Andric dag Clamp = !if(IsIU, (ins Clamp0:$clamp), (ins)); 1064b3edf446SDimitry Andric dag Neg = !cond(!and(NegLoAny, NegHiAny) : (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi), 1065b3edf446SDimitry Andric !and(NegLoAny, !not(NegHiAny)) : (ins neg_lo0:$neg_lo), 1066b3edf446SDimitry Andric !and(!not(NegLoAny), !not(NegHiAny)) : (ins)); 1067b3edf446SDimitry Andric 1068b3edf446SDimitry Andric let InsVOP3P = !con(Src0Mods, (ins Src0RC64:$src0), Src1Mods, (ins Src1RC64:$src1), 1069b3edf446SDimitry Andric !cond(IsWMMA : !con(Src2Mods, (ins Src2RC64:$src2)), 1070b3edf446SDimitry Andric IsSWMMAC : !con((ins DstRC:$srcTiedDef), (ins VRegSrc_32:$src2), IndexKey)), 1071b3edf446SDimitry Andric Clamp, Neg); 1072b3edf446SDimitry Andric 1073b3edf446SDimitry Andric // asm 1074b3edf446SDimitry Andric 1075b3edf446SDimitry Andric string IndexKeyAsm = !cond(!eq(IndexType, 0) : "", 1076b3edf446SDimitry Andric !eq(IndexType, 8) : "$index_key_8bit", 1077b3edf446SDimitry Andric !eq(IndexType, 16) : "$index_key_16bit"); 1078b3edf446SDimitry Andric string ClampAsm = !if(IsIU, "$clamp", ""); 1079b3edf446SDimitry Andric string NegAsm = !cond(!and(NegLoAny, NegHiAny) : "$neg_lo$neg_hi", 1080b3edf446SDimitry Andric !and(NegLoAny, !not(NegHiAny)) : "$neg_lo", 1081b3edf446SDimitry Andric !and(!not(NegLoAny), !not(NegHiAny)) : ""); 1082b3edf446SDimitry Andric 1083b3edf446SDimitry Andric let AsmVOP3P = "$vdst, $src0, $src1, $src2"#IndexKeyAsm#NegAsm#ClampAsm; 1084b3edf446SDimitry Andric 1085b3edf446SDimitry Andric // isel patterns 1086b3edf446SDimitry Andric 1087b3edf446SDimitry Andric dag Src0InPat = !cond(IsAB_F16 : (ins (Src0VT (WMMAModsF16Neg Src0VT:$src0, i32:$src0_modifiers))), 1088b3edf446SDimitry Andric IsAB_BF16 : (ins Src0VT:$src0), 1089b3edf446SDimitry Andric IsIU : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0), 1090b3edf446SDimitry Andric IsFP8BF8 : (ins Src0VT:$src0)); 1091b3edf446SDimitry Andric dag Src0OutPat = !cond(IsAB_F16 : (ins i32:$src0_modifiers, Src0VT:$src0), 1092b3edf446SDimitry Andric IsAB_BF16 : (ins (i32 8), Src0VT:$src0), 1093b3edf446SDimitry Andric IsIU : (ins i32:$src0_modifiers, Src0VT:$src0), 1094b3edf446SDimitry Andric IsFP8BF8 : (ins Src0VT:$src0)); 1095b3edf446SDimitry Andric dag Src1InPat = !cond(IsAB_F16 : (ins (Src1VT (WMMAModsF16Neg Src1VT:$src1, i32:$src1_modifiers))), 1096b3edf446SDimitry Andric IsAB_BF16 : (ins Src1VT:$src1), 1097b3edf446SDimitry Andric IsIU : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1), 1098b3edf446SDimitry Andric IsFP8BF8 : (ins Src1VT:$src1)); 1099b3edf446SDimitry Andric dag Src1OutPat = !cond(IsAB_F16 : (ins i32:$src1_modifiers, Src1VT:$src1), 1100b3edf446SDimitry Andric IsAB_BF16 : (ins (i32 8), Src1VT:$src1), 1101b3edf446SDimitry Andric IsIU : (ins i32:$src1_modifiers, Src1VT:$src1), 1102b3edf446SDimitry Andric IsFP8BF8 : (ins Src1VT:$src1)); 1103b3edf446SDimitry Andric dag Src2InPatWmma = !cond(IsC_F32 : (ins (Src2VT (WMMAModsF32NegAbs Src2VT:$src2, i32:$src2_modifiers))), 1104b3edf446SDimitry Andric IsC_F16 : (ins (Src2VT (WMMAModsF16NegAbs Src2VT:$src2, i32:$src2_modifiers))), 1105b3edf446SDimitry Andric IsC_BF16 : (ins Src2VT:$src2), 1106b3edf446SDimitry Andric IsIU : (ins Src2VT:$src2), 1107b3edf446SDimitry Andric IsSWMMAC : (ins)); 1108b3edf446SDimitry Andric dag Src2OutPatWmma = !cond(IsC_F32 : (ins i32:$src2_modifiers, Src2VT:$src2), 1109b3edf446SDimitry Andric IsC_F16 : (ins i32:$src2_modifiers, Src2VT:$src2), 1110b3edf446SDimitry Andric IsC_BF16 : (ins (i32 8), Src2VT:$src2), 1111b3edf446SDimitry Andric IsIU : (ins Src2VT:$src2), 1112b3edf446SDimitry Andric IsSWMMAC : (ins)); 1113b3edf446SDimitry Andric dag ClampPat = !if(IsIU, (ins i1:$clamp), (ins)); 1114b3edf446SDimitry Andric dag IndexInPat = !cond(!eq(IndexType, 0) : (ins i32:$src2), 1115b3edf446SDimitry Andric !eq(IndexType, 8) : (ins (i32 (SWMMACIndex8 i32:$src2, i32:$index_key_8bit))), 1116b3edf446SDimitry Andric !eq(IndexType, 16): (ins (i32 (SWMMACIndex16 i32:$src2, i32:$index_key_16bit)))); 1117b3edf446SDimitry Andric dag IndexOutPat = !cond(!eq(IndexType, 0) : (ins i32:$src2), 1118b3edf446SDimitry Andric !eq(IndexType, 8) : (ins i32:$src2, i32:$index_key_8bit), 1119b3edf446SDimitry Andric !eq(IndexType, 16): (ins i32:$src2, i32:$index_key_16bit)); 1120b3edf446SDimitry Andric dag Src2InlineInPat = (ins (Src2VT (WMMAVISrc Src2VT:$src2))); 1121b3edf446SDimitry Andric dag Src2InlineOutPat = !con(!if(IsIU, (ins), (ins (i32 8))), (ins Src2VT:$src2)); 1122b3edf446SDimitry Andric 1123b3edf446SDimitry Andric 1124b3edf446SDimitry Andric dag WmmaInPat = !con(Src0InPat, Src1InPat, Src2InPatWmma, ClampPat); 1125b3edf446SDimitry Andric dag WmmaOutPat = !con(Src0OutPat, Src1OutPat, Src2OutPatWmma, ClampPat); 1126b3edf446SDimitry Andric 1127b3edf446SDimitry Andric dag SwmmacInPat = !con(Src0InPat, Src1InPat, (ins Src2VT:$srcTiedDef), IndexInPat, ClampPat); 1128b3edf446SDimitry Andric dag SwmmacOutPat = !con(Src0OutPat, Src1OutPat, (ins Src2VT:$srcTiedDef), IndexOutPat, ClampPat); 1129b3edf446SDimitry Andric 1130b3edf446SDimitry Andric // wmma pattern where src2 is inline imm uses _threeaddr pseudo, 1131b3edf446SDimitry Andric // can't use _twoaddr since it would violate src2 tied to vdst constraint. 1132b3edf446SDimitry Andric dag WmmaInlineInPat = !con(Src0InPat, Src1InPat, Src2InlineInPat, ClampPat); 1133b3edf446SDimitry Andric dag WmmaInlineOutPat = !con(Src0OutPat, Src1OutPat, Src2InlineOutPat, ClampPat); 1134b3edf446SDimitry Andric} 1135b3edf446SDimitry Andric 1136b3edf446SDimitry Andricmulticlass WMMAInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string PseudoInstrSuffix> { 1137b3edf446SDimitry Andric let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in { 1138b3edf446SDimitry Andric let Constraints = "@earlyclobber $vdst,$vdst = $src2", isConvertibleToThreeAddress = 1 in 1139b3edf446SDimitry Andric def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>{ 1140b3edf446SDimitry Andric let PseudoInstr = Instr#PseudoInstrSuffix; 1141b3edf446SDimitry Andric } 1142b3edf446SDimitry Andric 1143b3edf446SDimitry Andric let Constraints = "@earlyclobber $vdst", SchedRW = [Write32Bit, Write32Bit] in 1144b3edf446SDimitry Andric def _threeaddr : VOP3P_Pseudo<Instr, WMMAProfile>{ 1145b3edf446SDimitry Andric let PseudoInstr = Instr#PseudoInstrSuffix; 1146b3edf446SDimitry Andric } 1147b3edf446SDimitry Andric 1148b3edf446SDimitry Andric } 1149b3edf446SDimitry Andric def : WMMAOpcodeMapping<!cast<Instruction>(NAME # _twoaddr), 1150b3edf446SDimitry Andric !cast<Instruction>(NAME # _threeaddr)>; 1151b3edf446SDimitry Andric} 1152b3edf446SDimitry Andric 1153b3edf446SDimitry Andricmulticlass SWMMACInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string PseudoInstrSuffix> { 1154b3edf446SDimitry Andric def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>{ 1155b3edf446SDimitry Andric let Mnemonic = Instr; 1156b3edf446SDimitry Andric let PseudoInstr = Instr#PseudoInstrSuffix; 1157b3edf446SDimitry Andric let mayRaiseFPException = 0; 1158b3edf446SDimitry Andric let ReadsModeReg = 0; 1159b3edf446SDimitry Andric let AsmMatchConverter = "cvtSWMMAC"; 1160b3edf446SDimitry Andric 1161b3edf446SDimitry Andric let Constraints = "@earlyclobber $vdst,$vdst = $srcTiedDef"; 1162b3edf446SDimitry Andric } 1163b3edf446SDimitry Andric} 1164b3edf446SDimitry Andric 1165b3edf446SDimitry Andric// First argument in Profile is types for matrices D, A, B and C (D = A * B + C) 1166b3edf446SDimitry Andric// as used by llvm ir, types are vectors(with matrix elements) 1167b3edf446SDimitry Andric// wave32: 1168b3edf446SDimitry Andric// For 16x16 matrices, lanes 0 to 31 will have 8 matrix elts, 1169b3edf446SDimitry Andric// for 16 x 32 16 elts and for 16 x 64 lanes have 32 elts. 1170b3edf446SDimitry Andric// wave64: 1171b3edf446SDimitry Andric// lanes will have half the size of elements in lanes compared to wave32 with 1172b3edf446SDimitry Andric// exception of 16x16_iu4: lanes0-31 will have 8xi4, remaining lanes are ignored 1173b3edf446SDimitry Andric 1174b3edf446SDimitry Andric// general idea on element distribution differences: 1175b3edf446SDimitry Andric// wave32: lane n has 8 matrix elements 1176b3edf446SDimitry Andric// wave64: lane n has first 4, lane n+32 has other 4 elements 1177b3edf446SDimitry Andric 1178b3edf446SDimitry Andric// index size, for each 2 elements in lane you need 4bits in index 1179b3edf446SDimitry Andric 1180b3edf446SDimitry Andric// Non-standard types (iu8, iu4, fp8, bf8) will be packed in vectors of i32s. 1181b3edf446SDimitry Andric// Original type for them is in comment on the right and refers to A and B. 1182b3edf446SDimitry Andric 1183b3edf446SDimitry Andricdef F32_F16_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v8f16, v8f16, v8f32], 0, 0, 0, 0>; 1184b3edf446SDimitry Andricdef F32_BF16_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v8i16, v8i16, v8f32], 0, 0, 0, 0>; 1185b3edf446SDimitry Andricdef F16_F16_WMMA_w32 : VOP3PWMMA_Profile<[v8f16, v8f16, v8f16, v8f16], 0, 0, 0, 0>; 1186b3edf446SDimitry Andricdef BF16_BF16_WMMA_w32 : VOP3PWMMA_Profile<[v8i16, v8i16, v8i16, v8i16], 0, 0, 0, 0>; 1187b3edf446SDimitry Andricdef I32_IU8_WMMA_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v2i32, v8i32], 0, 0, 1, 0>; // 8xi8 1188b3edf446SDimitry Andricdef I32_IU4X16_WMMA_w32 : VOP3PWMMA_Profile<[v8i32, i32, i32, v8i32], 0, 0, 1, 0>; // 8xi4 1189b3edf446SDimitry Andricdef F32_FP8BF8_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v2i32, v2i32, v8f32], 0, 0, 0, 1>; // 8xf8 1190b3edf446SDimitry Andricdef I32_IU4X32_WMMA_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v2i32, v8i32], 0, 0, 1, 0>; // 16xi4 1191b3edf446SDimitry Andric 1192b3edf446SDimitry Andricdef F32_F16_WMMA_w64 : VOP3PWMMA_Profile<[v4f32, v4f16, v4f16, v4f32], 0, 0, 0, 0>; 1193b3edf446SDimitry Andricdef F32_BF16_WMMA_w64 : VOP3PWMMA_Profile<[v4f32, v4i16, v4i16, v4f32], 0, 0, 0, 0>; 1194b3edf446SDimitry Andricdef F16_F16_WMMA_w64 : VOP3PWMMA_Profile<[v4f16, v4f16, v4f16, v4f16], 0, 0, 0, 0>; 1195b3edf446SDimitry Andricdef BF16_BF16_WMMA_w64 : VOP3PWMMA_Profile<[v4i16, v4i16, v4i16, v4i16], 0, 0, 0, 0>; 1196b3edf446SDimitry Andricdef I32_IU8_WMMA_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 0, 0, 1, 0>; // 4xi8 1197b3edf446SDimitry Andricdef I32_IU4X16_WMMA_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 0, 0, 1, 0>; // 8xi4 * 1198b3edf446SDimitry Andricdef F32_FP8BF8_WMMA_w64 : VOP3PWMMA_Profile<[v4f32, i32, i32, v4f32], 0, 0, 0, 1>; // 4xf8 1199b3edf446SDimitry Andricdef I32_IU4X32_WMMA_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 0, 0, 1, 0>; // 8xi4 1200b3edf446SDimitry Andric 1201b3edf446SDimitry Andricdef F32_F16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v8f16, v16f16, v8f32], 1, 16, 0, 0>; 1202b3edf446SDimitry Andricdef F32_BF16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v8i16, v16i16, v8f32], 1, 16, 0, 0>; 1203b3edf446SDimitry Andricdef F16_F16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f16, v8f16, v16f16, v8f16], 1, 16, 0, 0>; 1204b3edf446SDimitry Andricdef BF16_BF16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i16, v8i16, v16i16, v8i16], 1, 16, 0, 0>; 1205b3edf446SDimitry Andricdef I32_IU8_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v4i32, v8i32], 1, 16, 1, 0>; // 8xi8, 16xi8 1206b3edf446SDimitry Andricdef I32_IU4X32_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i32, i32, v2i32, v8i32], 1, 16, 1, 0>; // 8xi4, 16xi4 1207b3edf446SDimitry Andricdef I32_IU4X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v4i32, v8i32], 1, 0, 1, 0>; // 16xi4, 32xi4 ** 1208b3edf446SDimitry Andricdef F32_FP8BF8_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v2i32, v4i32, v8f32], 1, 16, 0, 1>; // 8xf8, 16xf8 1209b3edf446SDimitry Andric 1210b3edf446SDimitry Andricdef F32_F16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f32, v4f16, v8f16, v4f32], 1, 8, 0, 0>; 1211b3edf446SDimitry Andricdef F32_BF16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f32, v4i16, v8i16, v4f32], 1, 8, 0, 0>; 1212b3edf446SDimitry Andricdef F16_F16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f16, v4f16, v8f16, v4f16], 1, 8, 0, 0>; 1213b3edf446SDimitry Andricdef BF16_BF16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i16, v4i16, v8i16, v4i16], 1, 8, 0, 0>; 1214b3edf446SDimitry Andricdef I32_IU8_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i32, i32, v2i32, v4i32], 1, 8, 1, 0>; // 4xi8, 8xi8 1215b3edf446SDimitry Andricdef I32_IU4X32_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 1, 16, 1, 0>; // 8xi4, 8xi4 *** 1216b3edf446SDimitry Andricdef I32_IU4X64_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i32, i32, v2i32, v4i32], 1, 16, 1, 0>; // 8xi4, 16xi4 1217b3edf446SDimitry Andricdef F32_FP8BF8_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f32, i32, v2i32, v4f32], 1, 8, 0, 1>; // 4xf8, 8xf8 1218b3edf446SDimitry Andric 1219b3edf446SDimitry Andric// * IU4X16_WMMA_w64 lanes 0-31 will have 8xi4, remaining lanes are ignored 1220b3edf446SDimitry Andric// ** IU4X64_SWMMAC_w32 index is i32, index_key is not used 1221b3edf446SDimitry Andric// *** IU4X32_SWMMAC_w64 lanes 0-31 will have 8xi4 remaining lanes are ignored 1222b3edf446SDimitry Andric// for matrix A, index is i16; Matrix B uses all lanes 1223b3edf446SDimitry Andric 1224b3edf446SDimitry Andriclet WaveSizePredicate = isWave32 in { 1225b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_F16_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_f16", F32_F16_WMMA_w32, "_w32">; 1226b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF16_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf16", F32_BF16_WMMA_w32, "_w32">; 1227b3edf446SDimitry Andricdefm V_WMMA_F16_16X16X16_F16_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x16_f16", F16_F16_WMMA_w32, "_w32">; 1228b3edf446SDimitry Andricdefm V_WMMA_BF16_16X16X16_BF16_w32 : WMMAInstGFX12<"v_wmma_bf16_16x16x16_bf16", BF16_BF16_WMMA_w32, "_w32">; 1229b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU8_w32 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu8", I32_IU8_WMMA_w32, "_w32">; 1230b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU4_w32 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu4", I32_IU4X16_WMMA_w32, "_w32">; 1231b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_FP8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_fp8", F32_FP8BF8_WMMA_w32, "_w32">; 1232b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_BF8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_bf8", F32_FP8BF8_WMMA_w32, "_w32">; 1233b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_FP8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_fp8", F32_FP8BF8_WMMA_w32, "_w32">; 1234b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_BF8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_bf8", F32_FP8BF8_WMMA_w32, "_w32">; 1235b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X32_IU4_w32 : WMMAInstGFX12<"v_wmma_i32_16x16x32_iu4", I32_IU4X32_WMMA_w32, "_w32">; 1236b3edf446SDimitry Andric 1237b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_F16_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_f16", F32_F16_SWMMAC_w32, "_w32">; 1238b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF16_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf16", F32_BF16_SWMMAC_w32, "_w32">; 1239b3edf446SDimitry Andricdefm V_SWMMAC_F16_16X16X32_F16_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x32_f16", F16_F16_SWMMAC_w32, "_w32">; 1240b3edf446SDimitry Andricdefm V_SWMMAC_BF16_16X16X32_BF16_w32 : SWMMACInstGFX12<"v_swmmac_bf16_16x16x32_bf16", BF16_BF16_SWMMAC_w32, "_w32">; 1241b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU8_w32 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu8", I32_IU8_SWMMAC_w32, "_w32">; 1242b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU4_w32 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu4", I32_IU4X32_SWMMAC_w32, "_w32">; 1243b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X64_IU4_w32 : SWMMACInstGFX12<"v_swmmac_i32_16x16x64_iu4", I32_IU4X64_SWMMAC_w32, "_w32">; 1244b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_fp8", F32_FP8BF8_SWMMAC_w32, "_w32">; 1245b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_bf8", F32_FP8BF8_SWMMAC_w32, "_w32">; 1246b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_fp8", F32_FP8BF8_SWMMAC_w32, "_w32">; 1247b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_bf8", F32_FP8BF8_SWMMAC_w32, "_w32">; 1248b3edf446SDimitry Andric} 1249b3edf446SDimitry Andric 1250b3edf446SDimitry Andriclet WaveSizePredicate = isWave64 in { 1251b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_F16_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_f16", F32_F16_WMMA_w64, "_w64">; 1252b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF16_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf16", F32_BF16_WMMA_w64, "_w64">; 1253b3edf446SDimitry Andricdefm V_WMMA_F16_16X16X16_F16_w64 : WMMAInstGFX12<"v_wmma_f16_16x16x16_f16", F16_F16_WMMA_w64, "_w64">; 1254b3edf446SDimitry Andricdefm V_WMMA_BF16_16X16X16_BF16_w64 : WMMAInstGFX12<"v_wmma_bf16_16x16x16_bf16", BF16_BF16_WMMA_w64, "_w64">; 1255b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU8_w64 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu8", I32_IU8_WMMA_w64, "_w64">; 1256b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU4_w64 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu4", I32_IU4X16_WMMA_w64, "_w64">; 1257b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_FP8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_fp8", F32_FP8BF8_WMMA_w64, "_w64">; 1258b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_BF8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_bf8", F32_FP8BF8_WMMA_w64, "_w64">; 1259b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_FP8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_fp8", F32_FP8BF8_WMMA_w64, "_w64">; 1260b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_BF8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_bf8", F32_FP8BF8_WMMA_w64, "_w64">; 1261b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X32_IU4_w64 : WMMAInstGFX12<"v_wmma_i32_16x16x32_iu4", I32_IU4X32_WMMA_w64, "_w64">; 1262b3edf446SDimitry Andric 1263b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_F16_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_f16", F32_F16_SWMMAC_w64, "_w64">; 1264b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF16_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf16", F32_BF16_SWMMAC_w64, "_w64">; 1265b3edf446SDimitry Andricdefm V_SWMMAC_F16_16X16X32_F16_w64 : SWMMACInstGFX12<"v_swmmac_f16_16x16x32_f16", F16_F16_SWMMAC_w64, "_w64">; 1266b3edf446SDimitry Andricdefm V_SWMMAC_BF16_16X16X32_BF16_w64 : SWMMACInstGFX12<"v_swmmac_bf16_16x16x32_bf16", BF16_BF16_SWMMAC_w64, "_w64">; 1267b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU8_w64 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu8", I32_IU8_SWMMAC_w64, "_w64">; 1268b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU4_w64 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu4", I32_IU4X32_SWMMAC_w64, "_w64">; 1269b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X64_IU4_w64 : SWMMACInstGFX12<"v_swmmac_i32_16x16x64_iu4", I32_IU4X64_SWMMAC_w64, "_w64">; 1270b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_FP8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_fp8", F32_FP8BF8_SWMMAC_w64, "_w64">; 1271b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_BF8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_bf8", F32_FP8BF8_SWMMAC_w64, "_w64">; 1272b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_FP8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_fp8", F32_FP8BF8_SWMMAC_w64, "_w64">; 1273b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_BF8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_bf8", F32_FP8BF8_SWMMAC_w64, "_w64">; 1274b3edf446SDimitry Andric} 1275b3edf446SDimitry Andric 1276b3edf446SDimitry Andric// IsGFX11OpselIntrinsic: f16_f16 and bf16_bf16 Intrinsics have imm operand that 1277b3edf446SDimitry Andric// controls opsel. Used by gfx11, removed in gfx12 (operand must be 0). 1278b3edf446SDimitry Andricmulticlass WMMAPat<string Inst, SDPatternOperator node, VOP3PWMMA_Profile P, bit IsGFX11OpselIntrinsic = 0> { 1279b3edf446SDimitry Andric def : GCNPat <(P.DstVT !setdagop(!con(P.WmmaInPat, !if(IsGFX11OpselIntrinsic, (ins 0), (ins))), node)), 1280b3edf446SDimitry Andric (P.DstVT !setdagop(P.WmmaOutPat, !cast<Instruction>(Inst#"_twoaddr")))>; 1281b3edf446SDimitry Andric let AddedComplexity = 4 in 1282b3edf446SDimitry Andric def : GCNPat <(P.DstVT !setdagop(!con(P.WmmaInlineInPat, !if(IsGFX11OpselIntrinsic, (ins 0), (ins))), node)), 1283b3edf446SDimitry Andric (P.DstVT !setdagop(P.WmmaInlineOutPat, !cast<Instruction>(Inst#"_threeaddr")))>; 1284b3edf446SDimitry Andric} 1285b3edf446SDimitry Andric 1286b3edf446SDimitry Andricclass SWMMACPat<Instruction Inst, SDPatternOperator node, VOP3PWMMA_Profile P> : 1287b3edf446SDimitry Andric GCNPat <(P.DstVT !setdagop(P.SwmmacInPat, node)), 1288b3edf446SDimitry Andric (P.DstVT !setdagop(P.SwmmacOutPat, Inst))>; 1289b3edf446SDimitry Andric 1290b3edf446SDimitry Andricclass SWMMACPat_w64<Instruction Inst, SDPatternOperator node, VOP3PWMMA_Profile P> : 1291b3edf446SDimitry Andric GCNPat <(P.DstVT !setdagop(P.SwmmacInPat, node)), 1292b3edf446SDimitry Andric (P.DstVT !setdagop(P.SwmmacOutPat, Inst))>{ 1293b3edf446SDimitry Andric let WaveSizePredicate = isWave64; 1294b3edf446SDimitry Andric } 1295b3edf446SDimitry Andric 1296b3edf446SDimitry Andriclet WaveSizePredicate = isWave32, SubtargetPredicate = isGFX12Plus in { 1297b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_F16_w32", int_amdgcn_wmma_f32_16x16x16_f16, F32_F16_WMMA_w32>; 1298b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_BF16_w32", int_amdgcn_wmma_f32_16x16x16_bf16, F32_BF16_WMMA_w32>; 1299b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F16_16X16X16_F16_w32", int_amdgcn_wmma_f16_16x16x16_f16, F16_F16_WMMA_w32,1>; 1300b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_BF16_16X16X16_BF16_w32", int_amdgcn_wmma_bf16_16x16x16_bf16, BF16_BF16_WMMA_w32,1>; 1301b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_I32_16X16X16_IU8_w32", int_amdgcn_wmma_i32_16x16x16_iu8, I32_IU8_WMMA_w32>; 1302b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_I32_16X16X16_IU4_w32", int_amdgcn_wmma_i32_16x16x16_iu4, I32_IU4X16_WMMA_w32>; 1303b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_FP8_w32", int_amdgcn_wmma_f32_16x16x16_fp8_fp8, F32_FP8BF8_WMMA_w32>; 1304b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_BF8_w32", int_amdgcn_wmma_f32_16x16x16_fp8_bf8, F32_FP8BF8_WMMA_w32>; 1305b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_FP8_w32", int_amdgcn_wmma_f32_16x16x16_bf8_fp8, F32_FP8BF8_WMMA_w32>; 1306b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_BF8_w32", int_amdgcn_wmma_f32_16x16x16_bf8_bf8, F32_FP8BF8_WMMA_w32>; 1307b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_I32_16X16X32_IU4_w32", int_amdgcn_wmma_i32_16x16x32_iu4, I32_IU4X32_WMMA_w32>; 1308b3edf446SDimitry Andric 1309b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_F16_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_f16, F32_F16_SWMMAC_w32>; 1310b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF16_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf16, F32_BF16_SWMMAC_w32>; 1311b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F16_16X16X32_F16_w32_twoaddr, int_amdgcn_swmmac_f16_16x16x32_f16, F16_F16_SWMMAC_w32>; 1312b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_BF16_16X16X32_BF16_w32_twoaddr, int_amdgcn_swmmac_bf16_16x16x32_bf16, BF16_BF16_SWMMAC_w32>; 1313b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU8_w32_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu8, I32_IU8_SWMMAC_w32>; 1314b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU4_w32_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu4, I32_IU4X32_SWMMAC_w32>; 1315b3edf446SDimitry Andric def : GCNPat <(I32_IU4X64_SWMMAC_w32.DstVT !setdagop(I32_IU4X64_SWMMAC_w32.SwmmacInPat, int_amdgcn_swmmac_i32_16x16x64_iu4)), 1316b3edf446SDimitry Andric (I32_IU4X64_SWMMAC_w32.DstVT !setdagop(I32_IU4X64_SWMMAC_w32.SwmmacOutPat, V_SWMMAC_I32_16X16X64_IU4_w32_twoaddr))>; 1317b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_FP8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_fp8, F32_FP8BF8_SWMMAC_w32>; 1318b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_BF8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_bf8, F32_FP8BF8_SWMMAC_w32>; 1319b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_FP8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_fp8, F32_FP8BF8_SWMMAC_w32>; 1320b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_BF8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_bf8, F32_FP8BF8_SWMMAC_w32>; 1321b3edf446SDimitry Andric} 1322b3edf446SDimitry Andric 1323b3edf446SDimitry Andriclet WaveSizePredicate = isWave64, SubtargetPredicate = isGFX12Plus in { 1324b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_F16_w64", int_amdgcn_wmma_f32_16x16x16_f16, F32_F16_WMMA_w64>; 1325b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_BF16_w64", int_amdgcn_wmma_f32_16x16x16_bf16, F32_BF16_WMMA_w64>; 1326b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F16_16X16X16_F16_w64", int_amdgcn_wmma_f16_16x16x16_f16, F16_F16_WMMA_w64,1>; 1327b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_BF16_16X16X16_BF16_w64", int_amdgcn_wmma_bf16_16x16x16_bf16, BF16_BF16_WMMA_w64,1>; 1328b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_I32_16X16X16_IU8_w64", int_amdgcn_wmma_i32_16x16x16_iu8, I32_IU8_WMMA_w64>; 1329b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_I32_16X16X16_IU4_w64", int_amdgcn_wmma_i32_16x16x16_iu4, I32_IU4X16_WMMA_w64>; 1330b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_FP8_w64", int_amdgcn_wmma_f32_16x16x16_fp8_fp8, F32_FP8BF8_WMMA_w64>; 1331b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_BF8_w64", int_amdgcn_wmma_f32_16x16x16_fp8_bf8, F32_FP8BF8_WMMA_w64>; 1332b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_FP8_w64", int_amdgcn_wmma_f32_16x16x16_bf8_fp8, F32_FP8BF8_WMMA_w64>; 1333b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_BF8_w64", int_amdgcn_wmma_f32_16x16x16_bf8_bf8, F32_FP8BF8_WMMA_w64>; 1334b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_I32_16X16X32_IU4_w64", int_amdgcn_wmma_i32_16x16x32_iu4, I32_IU4X32_WMMA_w64>; 1335b3edf446SDimitry Andric 1336b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_F16_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_f16, F32_F16_SWMMAC_w64>; 1337b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF16_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf16, F32_BF16_SWMMAC_w64>; 1338b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F16_16X16X32_F16_w64_twoaddr, int_amdgcn_swmmac_f16_16x16x32_f16, F16_F16_SWMMAC_w64>; 1339b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_BF16_16X16X32_BF16_w64_twoaddr, int_amdgcn_swmmac_bf16_16x16x32_bf16, BF16_BF16_SWMMAC_w64>; 1340b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU8_w64_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu8, I32_IU8_SWMMAC_w64>; 1341b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU4_w64_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu4, I32_IU4X32_SWMMAC_w64>; 1342b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_I32_16X16X64_IU4_w64_twoaddr, int_amdgcn_swmmac_i32_16x16x64_iu4, I32_IU4X64_SWMMAC_w64>; 1343b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_FP8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_fp8, F32_FP8BF8_SWMMAC_w64>; 1344b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_BF8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_bf8, F32_FP8BF8_SWMMAC_w64>; 1345b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_FP8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_fp8, F32_FP8BF8_SWMMAC_w64>; 1346b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_BF8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_bf8, F32_FP8BF8_SWMMAC_w64>; 1347b3edf446SDimitry Andric} 1348b3edf446SDimitry Andric 1349b3edf446SDimitry Andric 1350e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 1351e8d8bef9SDimitry Andric// Begin Real Encodings 1352e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 1353e8d8bef9SDimitry Andric 135481ad6265SDimitry Andricclass VOP3P_DPP16<bits<7> op, VOP_DPP_Pseudo ps, int subtarget, 135581ad6265SDimitry Andric string opName = ps.OpName> 135681ad6265SDimitry Andric : VOP3P_DPP<op, opName, ps.Pfl, 1>, SIMCInstr<ps.PseudoInstr, subtarget> { 135781ad6265SDimitry Andric let hasSideEffects = ps.hasSideEffects; 135881ad6265SDimitry Andric let Defs = ps.Defs; 135981ad6265SDimitry Andric let SchedRW = ps.SchedRW; 136081ad6265SDimitry Andric let Uses = ps.Uses; 136181ad6265SDimitry Andric let AssemblerPredicate = HasDPP16; 1362*0fca6ea1SDimitry Andric let SubtargetPredicate = ps.SubtargetPredicate; 136381ad6265SDimitry Andric let OtherPredicates = ps.OtherPredicates; 1364*0fca6ea1SDimitry Andric let IsPacked = ps.IsPacked; 136581ad6265SDimitry Andric} 136681ad6265SDimitry Andric 136781ad6265SDimitry Andricclass VOP3P_DPP8_Base<bits<7> op, VOP_Pseudo ps, string opName = ps.OpName> 136881ad6265SDimitry Andric : VOP3P_DPP8<op, opName, ps.Pfl> { 136981ad6265SDimitry Andric let hasSideEffects = ps.hasSideEffects; 137081ad6265SDimitry Andric let Defs = ps.Defs; 137181ad6265SDimitry Andric let SchedRW = ps.SchedRW; 137281ad6265SDimitry Andric let Uses = ps.Uses; 1373*0fca6ea1SDimitry Andric let SubtargetPredicate = ps.SubtargetPredicate; 137481ad6265SDimitry Andric let OtherPredicates = ps.OtherPredicates; 1375*0fca6ea1SDimitry Andric let IsPacked = ps.IsPacked; 137681ad6265SDimitry Andric} 137781ad6265SDimitry Andric 137881ad6265SDimitry Andric//===----------------------------------------------------------------------===// 13795f757f3fSDimitry Andric// GFX11, GFX12 138081ad6265SDimitry Andric//===----------------------------------------------------------------------===// 138181ad6265SDimitry Andric 13825f757f3fSDimitry Andricmulticlass VOP3P_Real_Base<GFXGen Gen, bits<7> op, string backing_ps_name = NAME, 138381ad6265SDimitry Andric string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> { 13845f757f3fSDimitry Andric def Gen.Suffix : 13855f757f3fSDimitry Andric VOP3P_Real_Gen<!cast<VOP3P_Pseudo>(backing_ps_name), Gen, asmName>, 13865f757f3fSDimitry Andric VOP3Pe_gfx11_gfx12<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl>; 138781ad6265SDimitry Andric} 138881ad6265SDimitry Andric 1389b3edf446SDimitry Andricclass VOP3PeWmma<bits<7> op, VOPProfile P, VOP3PWMMA_Profile WMMAP> 1390b3edf446SDimitry Andric : VOP3Pe_gfx11_gfx12<op, P>{ 1391b3edf446SDimitry Andric // opsel 1392b3edf446SDimitry Andric let Inst{11} = !cond(!eq(WMMAP.IndexType, 0) : 0, 1393b3edf446SDimitry Andric !eq(WMMAP.IndexType, 8) : index_key_8bit{0}, 1394b3edf446SDimitry Andric !eq(WMMAP.IndexType, 16) : index_key_16bit{0}); 1395b3edf446SDimitry Andric let Inst{12} = !if(!eq(WMMAP.IndexType, 8), index_key_8bit{1}, 0); 1396b3edf446SDimitry Andric let Inst{13} = 0; 1397b3edf446SDimitry Andric // opsel_hi 1398b3edf446SDimitry Andric let Inst{59} = 1; 1399b3edf446SDimitry Andric let Inst{60} = 1; 1400b3edf446SDimitry Andric let Inst{14} = 1; 1401b3edf446SDimitry Andric // neg_lo 1402b3edf446SDimitry Andric let Inst{61} = !if(WMMAP.NegLo01, src0_modifiers{0}, 0); 1403b3edf446SDimitry Andric let Inst{62} = !if(WMMAP.NegLo01, src1_modifiers{0}, 0); 1404b3edf446SDimitry Andric let Inst{63} = !if(WMMAP.NegLo2, src2_modifiers{0}, 0); 1405b3edf446SDimitry Andric // neg_hi 1406b3edf446SDimitry Andric let Inst{8} = !if(WMMAP.NegHi01, src0_modifiers{1}, 0); 1407b3edf446SDimitry Andric let Inst{9} = !if(WMMAP.NegHi01, src1_modifiers{1}, 0); 1408b3edf446SDimitry Andric let Inst{10} = !if(WMMAP.NegHi2, src2_modifiers{1}, 0); 1409b3edf446SDimitry Andric // clamp 1410b3edf446SDimitry Andric let Inst{15} = !if(WMMAP.IsIU, clamp{0}, 0); 1411b3edf446SDimitry Andric} 1412b3edf446SDimitry Andric 1413b3edf446SDimitry Andricmulticlass VOP3P_WMMA_Real_Base<GFXGen Gen, bits<7> op, VOP3PWMMA_Profile WMMAP, 1414b3edf446SDimitry Andric string backing_ps_name = NAME, 1415b3edf446SDimitry Andric string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> { 1416b3edf446SDimitry Andric def Gen.Suffix : 1417b3edf446SDimitry Andric VOP3P_Real_Gen<!cast<VOP3P_Pseudo>(backing_ps_name), Gen, asmName>, 1418b3edf446SDimitry Andric VOP3PeWmma<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl, WMMAP>; 1419b3edf446SDimitry Andric} 1420b3edf446SDimitry Andric 1421b3edf446SDimitry Andricmulticlass VOP3P_Real_WMMA_gfx12 <bits<7> op, VOP3PWMMA_Profile WMMAP> { 1422b3edf446SDimitry Andric let WaveSizePredicate = isWave32, DecoderNamespace = "GFX12" in { 1423b3edf446SDimitry Andric defm _twoaddr : VOP3P_WMMA_Real_Base <GFX12Gen, op, WMMAP>; 1424b3edf446SDimitry Andric } 1425b3edf446SDimitry Andric} 1426b3edf446SDimitry Andric 1427b3edf446SDimitry Andricmulticlass VOP3P_Real_WMMA_gfx12w64 <bits<7> op, VOP3PWMMA_Profile WMMAP> { 1428*0fca6ea1SDimitry Andric let WaveSizePredicate = isWave64, DecoderNamespace = "GFX12W64" in { 1429b3edf446SDimitry Andric defm _twoaddr : VOP3P_WMMA_Real_Base <GFX12Gen, op, WMMAP>; 1430b3edf446SDimitry Andric } 1431b3edf446SDimitry Andric} 1432b3edf446SDimitry Andric 1433b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x040, F32_F16_WMMA_w32>; 1434b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x041, F32_BF16_WMMA_w32>; 1435b3edf446SDimitry Andricdefm V_WMMA_F16_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x042, F16_F16_WMMA_w32>; 1436b3edf446SDimitry Andricdefm V_WMMA_BF16_16X16X16_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x043, BF16_BF16_WMMA_w32>; 1437b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU8_w32 : VOP3P_Real_WMMA_gfx12 <0x044, I32_IU8_WMMA_w32>; 1438b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x045, I32_IU4X16_WMMA_w32>; 1439b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x046, F32_FP8BF8_WMMA_w32>; 1440b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x047, F32_FP8BF8_WMMA_w32>; 1441b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x048, F32_FP8BF8_WMMA_w32>; 1442b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x049, F32_FP8BF8_WMMA_w32>; 1443b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X32_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x04a, I32_IU4X32_WMMA_w32>; 1444b3edf446SDimitry Andric 1445b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x040, F32_F16_WMMA_w64>; 1446b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x041, F32_BF16_WMMA_w64>; 1447b3edf446SDimitry Andricdefm V_WMMA_F16_16X16X16_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x042, F16_F16_WMMA_w64>; 1448b3edf446SDimitry Andricdefm V_WMMA_BF16_16X16X16_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x043, BF16_BF16_WMMA_w64>; 1449b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x044, I32_IU8_WMMA_w64>; 1450b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x045, I32_IU4X16_WMMA_w64>; 1451b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x046, F32_FP8BF8_WMMA_w64>; 1452b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x047, F32_FP8BF8_WMMA_w64>; 1453b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x048, F32_FP8BF8_WMMA_w64>; 1454b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x049, F32_FP8BF8_WMMA_w64>; 1455b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X32_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x04a, I32_IU4X32_WMMA_w64>; 1456b3edf446SDimitry Andric 1457b3edf446SDimitry Andric 1458b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x050, F32_F16_SWMMAC_w32>; 1459b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x051, F32_BF16_SWMMAC_w32>; 1460b3edf446SDimitry Andricdefm V_SWMMAC_F16_16X16X32_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x052, F16_F16_SWMMAC_w32>; 1461b3edf446SDimitry Andricdefm V_SWMMAC_BF16_16X16X32_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x053, BF16_BF16_SWMMAC_w32>; 1462b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU8_w32 : VOP3P_Real_WMMA_gfx12 <0x054, I32_IU8_SWMMAC_w32>; 1463b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x055, I32_IU4X32_SWMMAC_w32>; 1464b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X64_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x056, I32_IU4X64_SWMMAC_w32>; 1465b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x057, F32_FP8BF8_SWMMAC_w32>; 1466b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x058, F32_FP8BF8_SWMMAC_w32>; 1467b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x059, F32_FP8BF8_SWMMAC_w32>; 1468b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x05a, F32_FP8BF8_SWMMAC_w32>; 1469b3edf446SDimitry Andric 1470b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x050, F32_F16_SWMMAC_w64>; 1471b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x051, F32_BF16_SWMMAC_w64>; 1472b3edf446SDimitry Andricdefm V_SWMMAC_F16_16X16X32_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x052, F16_F16_SWMMAC_w64>; 1473b3edf446SDimitry Andricdefm V_SWMMAC_BF16_16X16X32_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x053, BF16_BF16_SWMMAC_w64>; 1474b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x054, I32_IU8_SWMMAC_w64>; 1475b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x055, I32_IU4X32_SWMMAC_w64>; 1476b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X64_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x056, I32_IU4X64_SWMMAC_w64>; 1477b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x057, F32_FP8BF8_SWMMAC_w64>; 1478b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x058, F32_FP8BF8_SWMMAC_w64>; 1479b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x059, F32_FP8BF8_SWMMAC_w64>; 1480b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x05a, F32_FP8BF8_SWMMAC_w64>; 1481b3edf446SDimitry Andric 14825f757f3fSDimitry Andricmulticlass VOP3P_Real_with_name<GFXGen Gen, bits<7> op, 14835f757f3fSDimitry Andric string backing_ps_name = NAME, 148481ad6265SDimitry Andric string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> { 148581ad6265SDimitry Andric defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name); 14865f757f3fSDimitry Andric let AsmString = asmName # ps.AsmOperands in 14875f757f3fSDimitry Andric def Gen.Suffix : 14885f757f3fSDimitry Andric VOP3P_Real_Gen<!cast<VOP3P_Pseudo>(backing_ps_name), Gen, asmName>, 1489*0fca6ea1SDimitry Andric VOP3Pe_gfx11_gfx12<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl>; 1490*0fca6ea1SDimitry Andric 1491*0fca6ea1SDimitry Andric def : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> { 1492*0fca6ea1SDimitry Andric let AssemblerPredicate = Gen.AssemblerPredicate; 1493*0fca6ea1SDimitry Andric } 14945f757f3fSDimitry Andric} 14955f757f3fSDimitry Andric 14965f757f3fSDimitry Andricmulticlass VOP3P_Real_dpp<GFXGen Gen, bits<7> op, string backing_ps_name = NAME, 14975f757f3fSDimitry Andric string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> { 14985f757f3fSDimitry Andric defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name); 14995f757f3fSDimitry Andric def _dpp#Gen.Suffix 150081ad6265SDimitry Andric : VOP3P_DPP16<op, !cast<VOP_DPP_Pseudo>(backing_ps_name #"_dpp"), 15015f757f3fSDimitry Andric Gen.Subtarget> { 150281ad6265SDimitry Andric let AsmString = asmName #ps.Pfl.AsmVOP3DPP16; 1503*0fca6ea1SDimitry Andric let DecoderNamespace = Gen.DecoderNamespace; 15045f757f3fSDimitry Andric let AssemblerPredicate = Gen.AssemblerPredicate; 150581ad6265SDimitry Andric } 150681ad6265SDimitry Andric} 150781ad6265SDimitry Andric 15085f757f3fSDimitry Andricmulticlass VOP3P_Real_dpp8<GFXGen Gen, bits<7> op, string backing_ps_name = NAME, 150981ad6265SDimitry Andric string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> { 151081ad6265SDimitry Andric defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name); 15115f757f3fSDimitry Andric def _dpp8#Gen.Suffix : VOP3P_DPP8_Base<op, ps> { 151281ad6265SDimitry Andric let AsmString = asmName #ps.Pfl.AsmVOP3DPP8; 1513*0fca6ea1SDimitry Andric let DecoderNamespace = Gen.DecoderNamespace; 15145f757f3fSDimitry Andric let AssemblerPredicate = Gen.AssemblerPredicate; 151581ad6265SDimitry Andric } 151681ad6265SDimitry Andric} 151781ad6265SDimitry Andric 15185f757f3fSDimitry Andricmulticlass VOP3P_Realtriple<GFXGen Gen, bits<7> op, string backing_ps_name = NAME, 151981ad6265SDimitry Andric string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> 15205f757f3fSDimitry Andric : VOP3P_Real_Base<Gen, op, backing_ps_name, asmName>, 15215f757f3fSDimitry Andric VOP3P_Real_dpp<Gen, op, backing_ps_name, asmName>, 15225f757f3fSDimitry Andric VOP3P_Real_dpp8<Gen, op, backing_ps_name, asmName>; 152381ad6265SDimitry Andric 15245f757f3fSDimitry Andric//===----------------------------------------------------------------------===// 15255f757f3fSDimitry Andric// GFX12 15265f757f3fSDimitry Andric//===----------------------------------------------------------------------===// 15275f757f3fSDimitry Andric 15285f757f3fSDimitry Andricmulticlass VOP3P_Real_gfx12<bits<7> op> : VOP3P_Real_Base<GFX12Gen, op>; 15295f757f3fSDimitry Andric 15305f757f3fSDimitry Andricmulticlass VOP3P_Real_with_name_gfx12<bits<7> op, 15315f757f3fSDimitry Andric string backing_ps_name = NAME, 15325f757f3fSDimitry Andric string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> : 15335f757f3fSDimitry Andric VOP3P_Real_with_name<GFX12Gen, op, backing_ps_name, asmName>; 15345f757f3fSDimitry Andric 15355f757f3fSDimitry Andricdefm V_PK_MIN_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1b, "V_PK_MIN_F16", "v_pk_min_num_f16">; 15365f757f3fSDimitry Andricdefm V_PK_MAX_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1c, "V_PK_MAX_F16", "v_pk_max_num_f16">; 15375f757f3fSDimitry Andric 15385f757f3fSDimitry Andricdefm V_PK_MINIMUM_F16 : VOP3P_Real_gfx12<0x1d>; 15395f757f3fSDimitry Andricdefm V_PK_MAXIMUM_F16 : VOP3P_Real_gfx12<0x1e>; 15405f757f3fSDimitry Andric 15417a6dacacSDimitry Andricdefm V_DOT4_F32_FP8_BF8 : VOP3P_Realtriple<GFX12Gen, 0x24>; 15427a6dacacSDimitry Andricdefm V_DOT4_F32_BF8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x25>; 15437a6dacacSDimitry Andricdefm V_DOT4_F32_FP8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x26>; 15447a6dacacSDimitry Andricdefm V_DOT4_F32_BF8_BF8 : VOP3P_Realtriple<GFX12Gen, 0x27>; 15457a6dacacSDimitry Andric 15465f757f3fSDimitry Andric//===----------------------------------------------------------------------===// 15475f757f3fSDimitry Andric// GFX11 15485f757f3fSDimitry Andric//===----------------------------------------------------------------------===// 15495f757f3fSDimitry Andric 15505f757f3fSDimitry Andricmulticlass VOP3P_Real_gfx11_gfx12<bits<7> op> : 15515f757f3fSDimitry Andric VOP3P_Real_Base<GFX11Gen, op>, VOP3P_Real_Base<GFX12Gen, op>; 15525f757f3fSDimitry Andric 15535f757f3fSDimitry Andricdefm V_DOT4_I32_IU8 : VOP3P_Real_gfx11_gfx12<0x16>; 15545f757f3fSDimitry Andricdefm V_DOT8_I32_IU4 : VOP3P_Real_gfx11_gfx12<0x18>; 15555f757f3fSDimitry Andricdefm V_DOT2_F32_BF16 : VOP3P_Real_gfx11_gfx12<0x1a>; 155681ad6265SDimitry Andric 155781ad6265SDimitry Andricmulticlass VOP3P_Real_WMMA <bits<7> op> { 155881ad6265SDimitry Andric let WaveSizePredicate = isWave32, DecoderNamespace = "GFX11" in { 15595f757f3fSDimitry Andric defm _twoaddr_w32 : VOP3P_Real_Base <GFX11Gen, op>; 156081ad6265SDimitry Andric } 1561*0fca6ea1SDimitry Andric let WaveSizePredicate = isWave64, DecoderNamespace = "GFX11W64" in { 15625f757f3fSDimitry Andric defm _twoaddr_w64 : VOP3P_Real_Base <GFX11Gen, op>; 156381ad6265SDimitry Andric } 156481ad6265SDimitry Andric} 156581ad6265SDimitry Andric 156681ad6265SDimitry Andricdefm V_WMMA_F32_16X16X16_F16 : VOP3P_Real_WMMA <0x040>; 156781ad6265SDimitry Andricdefm V_WMMA_F32_16X16X16_BF16 : VOP3P_Real_WMMA <0x041>; 156881ad6265SDimitry Andricdefm V_WMMA_F16_16X16X16_F16 : VOP3P_Real_WMMA <0x042>; 156981ad6265SDimitry Andricdefm V_WMMA_BF16_16X16X16_BF16 : VOP3P_Real_WMMA <0x043>; 157081ad6265SDimitry Andricdefm V_WMMA_I32_16X16X16_IU8 : VOP3P_Real_WMMA <0x044>; 157181ad6265SDimitry Andricdefm V_WMMA_I32_16X16X16_IU4 : VOP3P_Real_WMMA <0x045>; 157281ad6265SDimitry Andric 1573e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 1574e8d8bef9SDimitry Andric// GFX8 (VI) 1575e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 1576e8d8bef9SDimitry Andric 1577e8d8bef9SDimitry Andricmulticlass VOP3P_Real_vi<bits<7> op> { 15780b57cec5SDimitry Andric def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>, 15790b57cec5SDimitry Andric VOP3Pe <op, !cast<VOP3_Pseudo>(NAME).Pfl> { 1580480093f4SDimitry Andric let AssemblerPredicate = HasVOP3PInsts; 15810b57cec5SDimitry Andric let DecoderNamespace = "GFX8"; 1582fe6060f1SDimitry Andric let VOP3P = 1; 15830b57cec5SDimitry Andric } 15840b57cec5SDimitry Andric} 15850b57cec5SDimitry Andric 1586e8d8bef9SDimitry Andricmulticlass VOP3P_Real_MAI<bits<7> op> { 1587e8d8bef9SDimitry Andric def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1588fe6060f1SDimitry Andric VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl, ?> { 1589e8d8bef9SDimitry Andric let AssemblerPredicate = HasMAIInsts; 1590e8d8bef9SDimitry Andric let DecoderNamespace = "GFX8"; 1591fe6060f1SDimitry Andric let Inst{14} = ?; // op_sel_hi(2) 1592fe6060f1SDimitry Andric let Inst{59} = ?; // op_sel_hi(0) 1593fe6060f1SDimitry Andric let Inst{60} = ?; // op_sel_hi(1) 1594e8d8bef9SDimitry Andric } 1595e8d8bef9SDimitry Andric} 1596e8d8bef9SDimitry Andric 159704eeddc0SDimitry Andriclet Constraints = "" in { 1598fe6060f1SDimitry Andricmulticlass VOP3P_Real_MFMA_gfx90a<bits<7> op> { 1599fe6060f1SDimitry Andric let SubtargetPredicate = isGFX90AOnly, 1600fe6060f1SDimitry Andric AssemblerPredicate = isGFX90AOnly, DecoderNamespace = "GFX90A" in { 1601fe6060f1SDimitry Andric def _gfx90a_acd : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>, 1602fe6060f1SDimitry Andric VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl, 1>; 1603fe6060f1SDimitry Andric 1604fe6060f1SDimitry Andric def _gfx90a_vcd : VOP3P_Real<!cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64"), SIEncodingFamily.GFX90A>, 1605fe6060f1SDimitry Andric VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64").Pfl, 0>; 1606fe6060f1SDimitry Andric } // End AssemblerPredicate = isGFX90AOnly, DecoderNamespace = "GFX90A" 1607fe6060f1SDimitry Andric} 160881ad6265SDimitry Andric} 1609fe6060f1SDimitry Andric 161081ad6265SDimitry Andricmulticlass VOP3P_Real_MFMA_gfx940_aliases<string NameFrom, string NameTo, string Op, 161181ad6265SDimitry Andric VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(Op # "_e64"), 161281ad6265SDimitry Andric VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(Op # "_vgprcd" # "_e64"), 161381ad6265SDimitry Andric VOPProfile Pfl_ACD = PS_ACD.Pfl, 161481ad6265SDimitry Andric VOPProfile Pfl_VCD = PS_VCD.Pfl> { 161506c3fb27SDimitry Andric if !ne(NameFrom, NameTo) then { 161681ad6265SDimitry Andric def : InstAlias <NameTo # " " # PS_ACD.AsmOperands, 161781ad6265SDimitry Andric (!cast<VOP3P_Real>(Op # "_gfx940_acd") Pfl_ACD.DstRC:$vdst, 161881ad6265SDimitry Andric Pfl_ACD.Src0RC64:$src0, Pfl_ACD.Src1RC64:$src1, Pfl_ACD.Src2RC64:$src2, 1619*0fca6ea1SDimitry Andric CBSZ:$cbsz, ABID:$abid, blgp:$blgp)>, PredicateControl; 162081ad6265SDimitry Andric def : InstAlias <NameTo # " " # PS_VCD.AsmOperands, 162181ad6265SDimitry Andric (!cast<VOP3P_Real>(Op # "_gfx940_vcd") Pfl_VCD.DstRC:$vdst, 162281ad6265SDimitry Andric Pfl_VCD.Src0RC64:$src0, Pfl_VCD.Src1RC64:$src1, Pfl_VCD.Src2RC64:$src2, 1623*0fca6ea1SDimitry Andric CBSZ:$cbsz, ABID:$abid, blgp:$blgp)>, PredicateControl; 162481ad6265SDimitry Andric } 162581ad6265SDimitry Andric} 162681ad6265SDimitry Andric 162781ad6265SDimitry Andricmulticlass VOP3P_Real_MFMA_gfx940<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic, 162881ad6265SDimitry Andric VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(NAME # "_e64"), 162981ad6265SDimitry Andric VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64")> { 1630*0fca6ea1SDimitry Andric let AssemblerPredicate = isGFX940Plus, 16315f757f3fSDimitry Andric DecoderNamespace = "GFX940", 163281ad6265SDimitry Andric AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in { 163381ad6265SDimitry Andric def _gfx940_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX940>, 163481ad6265SDimitry Andric VOP3Pe_MAI <op, PS_ACD.Pfl, 1>; 163581ad6265SDimitry Andric 163681ad6265SDimitry Andric def _gfx940_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX940>, 163781ad6265SDimitry Andric VOP3Pe_MAI <op, PS_VCD.Pfl, 0>; 1638bdd1243dSDimitry Andric } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940" 163981ad6265SDimitry Andric 16405f757f3fSDimitry Andric let SubtargetPredicate = isGFX940Plus in { 164181ad6265SDimitry Andric defm : VOP3P_Real_MFMA_gfx940_aliases<Name, PS_ACD.Mnemonic, NAME>; 164281ad6265SDimitry Andric 164306c3fb27SDimitry Andric if !ne(!subst("_1k", "", PS_ACD.Mnemonic), PS_ACD.Mnemonic) then 164481ad6265SDimitry Andric defm : VOP3P_Real_MFMA_gfx940_aliases<Name, !subst("_1k", "", PS_ACD.Mnemonic), NAME>; 164581ad6265SDimitry Andric } 16465f757f3fSDimitry Andric} 164781ad6265SDimitry Andric 16485f757f3fSDimitry Andricmulticlass VOP3P_Real_MFMA_vi<bits<7> op> { 1649e8d8bef9SDimitry Andric def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1650fe6060f1SDimitry Andric VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl, ?> { 16515f757f3fSDimitry Andric let SubtargetPredicate = isGFX8GFX9NotGFX90A; 1652480093f4SDimitry Andric let AssemblerPredicate = HasMAIInsts; 16530b57cec5SDimitry Andric let DecoderNamespace = "GFX8"; 165481ad6265SDimitry Andric let Constraints = ""; 16550b57cec5SDimitry Andric } 16560b57cec5SDimitry Andric} 165781ad6265SDimitry Andric 16585f757f3fSDimitry Andricmulticlass VOP3P_Real_MFMA_vi_gfx90a<bits<7> op> : 16595f757f3fSDimitry Andric VOP3P_Real_MFMA_gfx90a <op>, 16605f757f3fSDimitry Andric VOP3P_Real_MFMA_vi <op>; 16615f757f3fSDimitry Andric 16625f757f3fSDimitry Andricmulticlass VOP3P_Real_MFMA<bits<7> op, string GFX940Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> : 16635f757f3fSDimitry Andric VOP3P_Real_MFMA_vi_gfx90a <op>, 16645f757f3fSDimitry Andric VOP3P_Real_MFMA_gfx940 <op, GFX940Name>; 16655f757f3fSDimitry Andric 166681ad6265SDimitry Andricmulticlass VOP3P_Real_SMFMAC<bits<7> op, string alias> { 166781ad6265SDimitry Andric def _gfx940 : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 166881ad6265SDimitry Andric VOP3Pe_SMFMAC <op> { 166981ad6265SDimitry Andric let AssemblerPredicate = isGFX940Plus; 167081ad6265SDimitry Andric let DecoderNamespace = "GFX8"; 167181ad6265SDimitry Andric } 1672*0fca6ea1SDimitry Andric def : AMDGPUMnemonicAlias<alias, !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> { 1673*0fca6ea1SDimitry Andric let AssemblerPredicate = isGFX940Plus; 1674*0fca6ea1SDimitry Andric } 167504eeddc0SDimitry Andric} 16760b57cec5SDimitry Andric 16775f757f3fSDimitry Andriclet SubtargetPredicate = isGFX8GFX9 in { 1678e8d8bef9SDimitry Andricdefm V_PK_MAD_I16 : VOP3P_Real_vi <0x00>; 1679e8d8bef9SDimitry Andricdefm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x01>; 1680e8d8bef9SDimitry Andricdefm V_PK_ADD_I16 : VOP3P_Real_vi <0x02>; 1681e8d8bef9SDimitry Andricdefm V_PK_SUB_I16 : VOP3P_Real_vi <0x03>; 1682e8d8bef9SDimitry Andricdefm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x04>; 1683e8d8bef9SDimitry Andricdefm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x05>; 1684e8d8bef9SDimitry Andricdefm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x06>; 1685e8d8bef9SDimitry Andricdefm V_PK_MAX_I16 : VOP3P_Real_vi <0x07>; 1686e8d8bef9SDimitry Andricdefm V_PK_MIN_I16 : VOP3P_Real_vi <0x08>; 1687e8d8bef9SDimitry Andricdefm V_PK_MAD_U16 : VOP3P_Real_vi <0x09>; 16880b57cec5SDimitry Andric 1689e8d8bef9SDimitry Andricdefm V_PK_ADD_U16 : VOP3P_Real_vi <0x0a>; 1690e8d8bef9SDimitry Andricdefm V_PK_SUB_U16 : VOP3P_Real_vi <0x0b>; 1691e8d8bef9SDimitry Andricdefm V_PK_MAX_U16 : VOP3P_Real_vi <0x0c>; 1692e8d8bef9SDimitry Andricdefm V_PK_MIN_U16 : VOP3P_Real_vi <0x0d>; 1693e8d8bef9SDimitry Andricdefm V_PK_FMA_F16 : VOP3P_Real_vi <0x0e>; 1694e8d8bef9SDimitry Andricdefm V_PK_ADD_F16 : VOP3P_Real_vi <0x0f>; 1695e8d8bef9SDimitry Andricdefm V_PK_MUL_F16 : VOP3P_Real_vi <0x10>; 1696e8d8bef9SDimitry Andricdefm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>; 1697e8d8bef9SDimitry Andricdefm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>; 16980b57cec5SDimitry Andric 16995f757f3fSDimitry Andriclet OtherPredicates = [HasMadMixInsts] in { 1700e8d8bef9SDimitry Andricdefm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>; 1701e8d8bef9SDimitry Andricdefm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>; 1702e8d8bef9SDimitry Andricdefm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x22>; 17030b57cec5SDimitry Andric} 17040b57cec5SDimitry Andric 17055f757f3fSDimitry Andriclet OtherPredicates = [HasFmaMixInsts], 17065f757f3fSDimitry Andric DecoderNamespace = "GFX9_DL" in { 17070b57cec5SDimitry Andric// The mad_mix instructions were renamed and their behaviors changed, 17080b57cec5SDimitry Andric// but the opcode stayed the same so we need to put these in a 17090b57cec5SDimitry Andric// different DecoderNamespace to avoid the ambiguity. 1710e8d8bef9SDimitry Andricdefm V_FMA_MIX_F32 : VOP3P_Real_vi <0x20>; 1711e8d8bef9SDimitry Andricdefm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x21>; 1712e8d8bef9SDimitry Andricdefm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x22>; 17130b57cec5SDimitry Andric} 17140b57cec5SDimitry Andric 1715e8d8bef9SDimitry Andricdefm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>; 1716e8d8bef9SDimitry Andricdefm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>; 1717fe6060f1SDimitry Andric 1718fe6060f1SDimitry Andricdefm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>; 1719e8d8bef9SDimitry Andricdefm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>; 1720e8d8bef9SDimitry Andricdefm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>; 17210b57cec5SDimitry Andric 1722e8d8bef9SDimitry Andricdefm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>; 1723e8d8bef9SDimitry Andricdefm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>; 17245f757f3fSDimitry Andric} // End SubtargetPredicate = isGFX8GFX9 17250b57cec5SDimitry Andric 17265f757f3fSDimitry Andriclet OtherPredicates = [HasMAIInsts] in { 17270b57cec5SDimitry Andric 1728e8d8bef9SDimitry Andricdefm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x58>; 1729e8d8bef9SDimitry Andricdefm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x59>; 173081ad6265SDimitry Andricdefm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MFMA <0x40, "v_mfma_f32_32x32x1_2b_f32">; 173181ad6265SDimitry Andricdefm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MFMA <0x41, "v_mfma_f32_16x16x1_4b_f32">; 173281ad6265SDimitry Andricdefm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MFMA <0x42, "v_mfma_f32_4x4x1_16b_f32">; 173381ad6265SDimitry Andricdefm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MFMA <0x44, "v_mfma_f32_32x32x2_f32">; 173481ad6265SDimitry Andricdefm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MFMA <0x45, "v_mfma_f32_16x16x4_f32">; 173581ad6265SDimitry Andricdefm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MFMA <0x48, "v_mfma_f32_32x32x4_2b_f16">; 173681ad6265SDimitry Andricdefm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MFMA <0x49, "v_mfma_f32_16x16x4_4b_f16">; 173781ad6265SDimitry Andricdefm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MFMA <0x4a, "v_mfma_f32_4x4x4_16b_f16">; 173881ad6265SDimitry Andricdefm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MFMA <0x4c, "v_mfma_f32_32x32x8_f16">; 173981ad6265SDimitry Andricdefm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MFMA <0x4d, "v_mfma_f32_16x16x16_f16">; 174081ad6265SDimitry Andricdefm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MFMA <0x50, "v_mfma_i32_32x32x4_2b_i8">; 174181ad6265SDimitry Andricdefm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MFMA <0x51, "v_mfma_i32_16x16x4_4b_i8">; 174281ad6265SDimitry Andricdefm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MFMA <0x52, "v_mfma_i32_4x4x4_16b_i8">; 174381ad6265SDimitry Andric 17445f757f3fSDimitry Andricdefm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MFMA_vi_gfx90a <0x55>; 17455f757f3fSDimitry Andricdefm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MFMA_vi_gfx90a <0x54>; 17465f757f3fSDimitry Andricdefm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x68>; 17475f757f3fSDimitry Andricdefm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x69>; 17485f757f3fSDimitry Andricdefm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x6b>; 17495f757f3fSDimitry Andricdefm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x6c>; 17505f757f3fSDimitry Andricdefm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x6d>; 17510b57cec5SDimitry Andric 17525f757f3fSDimitry Andric} // End OtherPredicates = [HasMAIInsts] 17530b57cec5SDimitry Andric 1754fe6060f1SDimitry Andricdefm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x63>; 1755fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x64>; 1756fe6060f1SDimitry Andricdefm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x65>; 1757fe6060f1SDimitry Andricdefm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx90a <0x66>; 1758fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx90a <0x67>; 1759fe6060f1SDimitry Andricdefm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx90a <0x6e>; 1760fe6060f1SDimitry Andricdefm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx90a <0x6f>; 1761fe6060f1SDimitry Andric 176281ad6265SDimitry Andricdefm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">; 176381ad6265SDimitry Andricdefm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">; 176481ad6265SDimitry Andricdefm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">; 176581ad6265SDimitry Andricdefm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">; 1766fcaf7f86SDimitry Andricdefm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x70>; 1767fcaf7f86SDimitry Andricdefm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x71>; 1768fcaf7f86SDimitry Andricdefm V_MFMA_F32_16X16X32_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x72>; 1769fcaf7f86SDimitry Andricdefm V_MFMA_F32_16X16X32_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x73>; 1770fcaf7f86SDimitry Andricdefm V_MFMA_F32_32X32X16_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x74>; 1771fcaf7f86SDimitry Andricdefm V_MFMA_F32_32X32X16_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x75>; 1772fcaf7f86SDimitry Andricdefm V_MFMA_F32_32X32X16_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x76>; 1773fcaf7f86SDimitry Andricdefm V_MFMA_F32_32X32X16_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x77>; 177481ad6265SDimitry Andric 177581ad6265SDimitry Andricdefm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">; 177681ad6265SDimitry Andricdefm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">; 177781ad6265SDimitry Andricdefm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5f, "v_mfma_f32_4x4x4_16b_bf16">; 177881ad6265SDimitry Andricdefm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx940 <0x60, "v_mfma_f32_32x32x8_bf16">; 177981ad6265SDimitry Andricdefm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx940 <0x61, "v_mfma_f32_16x16x16_bf16">; 178081ad6265SDimitry Andric 178181ad6265SDimitry Andricdefm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx940 <0x6e, "v_mfma_f64_16x16x4_f64">; 178281ad6265SDimitry Andricdefm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx940 <0x6f, "v_mfma_f64_4x4x4_4b_f64">; 178381ad6265SDimitry Andric 178481ad6265SDimitry Andricdefm V_SMFMAC_F32_16X16X32_F16 : VOP3P_Real_SMFMAC <0x62, "v_smfmac_f32_16x16x32f16">; 178581ad6265SDimitry Andricdefm V_SMFMAC_F32_32X32X16_F16 : VOP3P_Real_SMFMAC <0x64, "v_smfmac_f32_32x32x16f16">; 178681ad6265SDimitry Andricdefm V_SMFMAC_F32_16X16X32_BF16 : VOP3P_Real_SMFMAC <0x66, "v_smfmac_f32_16x16x32bf16">; 178781ad6265SDimitry Andricdefm V_SMFMAC_F32_32X32X16_BF16 : VOP3P_Real_SMFMAC <0x68, "v_smfmac_f32_32x32x16bf16">; 178881ad6265SDimitry Andricdefm V_SMFMAC_I32_16X16X64_I8 : VOP3P_Real_SMFMAC <0x6a, "v_smfmac_i32_16x16x64i8">; 178981ad6265SDimitry Andricdefm V_SMFMAC_I32_32X32X32_I8 : VOP3P_Real_SMFMAC <0x6c, "v_smfmac_i32_32x32x32i8">; 1790fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_BF8_BF8 : VOP3P_Real_SMFMAC <0x78, "v_smfmac_f32_16x16x64bf8bf8">; 1791fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_BF8_FP8 : VOP3P_Real_SMFMAC <0x79, "v_smfmac_f32_16x16x64bf8fp8">; 1792fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_FP8_BF8 : VOP3P_Real_SMFMAC <0x7a, "v_smfmac_f32_16x16x64fp8bf8">; 1793fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_FP8_FP8 : VOP3P_Real_SMFMAC <0x7b, "v_smfmac_f32_16x16x64fp8fp8">; 1794fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_BF8_BF8 : VOP3P_Real_SMFMAC <0x7c, "v_smfmac_f32_32x32x32bf8bf8">; 1795fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_BF8_FP8 : VOP3P_Real_SMFMAC <0x7d, "v_smfmac_f32_32x32x32bf8fp8">; 1796fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_FP8_BF8 : VOP3P_Real_SMFMAC <0x7e, "v_smfmac_f32_32x32x32fp8bf8">; 1797fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_FP8_FP8 : VOP3P_Real_SMFMAC <0x7f, "v_smfmac_f32_32x32x32fp8fp8">; 179881ad6265SDimitry Andric 1799fe6060f1SDimitry Andricdefm V_PK_FMA_F32 : VOP3P_Real_vi <0x30>; 1800fe6060f1SDimitry Andricdefm V_PK_MUL_F32 : VOP3P_Real_vi <0x31>; 1801fe6060f1SDimitry Andricdefm V_PK_ADD_F32 : VOP3P_Real_vi <0x32>; 1802fe6060f1SDimitry Andricdefm V_PK_MOV_B32 : VOP3P_Real_vi <0x33>; 1803fe6060f1SDimitry Andric 18040b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 18050b57cec5SDimitry Andric// GFX10. 18060b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 18070b57cec5SDimitry Andric 180881ad6265SDimitry Andriclet AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10", VOP3P = 1 in { 1809e8d8bef9SDimitry Andric multiclass VOP3P_Real_gfx10<bits<7> op> { 18100b57cec5SDimitry Andric def _gfx10 : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.GFX10>, 18110b57cec5SDimitry Andric VOP3Pe_gfx10 <op, !cast<VOP3P_Pseudo>(NAME).Pfl>; 18120b57cec5SDimitry Andric } 181381ad6265SDimitry Andric} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10", VOP3P = 1 18140b57cec5SDimitry Andric 18155f757f3fSDimitry Andricmulticlass VOP3P_Real_gfx10_gfx11<bits<7> op> : 18165f757f3fSDimitry Andric VOP3P_Real_gfx10<op>, VOP3P_Real_Base<GFX11Gen, op>; 181781ad6265SDimitry Andric 18185f757f3fSDimitry Andricmulticlass VOP3P_Real_gfx10_gfx11_gfx12<bits<7> op> : 18195f757f3fSDimitry Andric VOP3P_Real_gfx10_gfx11<op>, VOP3P_Real_Base<GFX12Gen, op>; 182081ad6265SDimitry Andric 18215f757f3fSDimitry Andricmulticlass VOP3P_Real_gfx10_gfx11_gfx12_Triple<bits<7> op> : 18225f757f3fSDimitry Andric VOP3P_Real_gfx10<op>, VOP3P_Realtriple<GFX11Gen, op>, 18235f757f3fSDimitry Andric VOP3P_Realtriple<GFX12Gen, op>; 18245f757f3fSDimitry Andric 18255f757f3fSDimitry Andricdefm V_PK_MAD_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x00>; 18265f757f3fSDimitry Andricdefm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x01>; 18275f757f3fSDimitry Andricdefm V_PK_ADD_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x02>; 18285f757f3fSDimitry Andricdefm V_PK_SUB_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x03>; 18295f757f3fSDimitry Andricdefm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10_gfx11_gfx12<0x04>; 18305f757f3fSDimitry Andricdefm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10_gfx11_gfx12<0x05>; 18315f757f3fSDimitry Andricdefm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x06>; 18325f757f3fSDimitry Andricdefm V_PK_MAX_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x07>; 18335f757f3fSDimitry Andricdefm V_PK_MIN_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x08>; 18345f757f3fSDimitry Andricdefm V_PK_MAD_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x09>; 18355f757f3fSDimitry Andricdefm V_PK_ADD_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0a>; 18365f757f3fSDimitry Andricdefm V_PK_SUB_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0b>; 18375f757f3fSDimitry Andricdefm V_PK_MAX_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0c>; 18385f757f3fSDimitry Andricdefm V_PK_MIN_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0d>; 18395f757f3fSDimitry Andricdefm V_PK_FMA_F16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0e>; 18405f757f3fSDimitry Andricdefm V_PK_ADD_F16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0f>; 18415f757f3fSDimitry Andricdefm V_PK_MUL_F16 : VOP3P_Real_gfx10_gfx11_gfx12<0x10>; 184281ad6265SDimitry Andricdefm V_PK_MIN_F16 : VOP3P_Real_gfx10_gfx11<0x11>; 184381ad6265SDimitry Andricdefm V_PK_MAX_F16 : VOP3P_Real_gfx10_gfx11<0x12>; 18445f757f3fSDimitry Andricdefm V_FMA_MIX_F32 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x20>; 18455f757f3fSDimitry Andricdefm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x21>; 18465f757f3fSDimitry Andricdefm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x22>; 18470b57cec5SDimitry Andric 1848e8d8bef9SDimitry Andricdefm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>; 1849e8d8bef9SDimitry Andricdefm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>; 1850fe6060f1SDimitry Andric 18515f757f3fSDimitry Andricdefm V_DOT2_F32_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x13>; 18525f757f3fSDimitry Andricdefm V_DOT4_U32_U8 : VOP3P_Real_gfx10_gfx11_gfx12<0x17>; 18535f757f3fSDimitry Andricdefm V_DOT8_U32_U4 : VOP3P_Real_gfx10_gfx11_gfx12<0x19>; 18540b57cec5SDimitry Andric 1855e8d8bef9SDimitry Andricdefm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>; 1856e8d8bef9SDimitry Andricdefm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>; 1857