15ffd83dbSDimitry Andric//===-- VOP3PInstructions.td - Vector Instruction Definitions -------------===// 20b57cec5SDimitry Andric// 30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric// 70b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 100b57cec5SDimitry Andric// VOP3P Classes 110b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 1381ad6265SDimitry Andricclass VOP3P_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR, 1481ad6265SDimitry Andric bit HasDPP = 0> : VOP3_Profile<P, Features> { 1581ad6265SDimitry Andric let IsVOP3P = 1; 1681ad6265SDimitry Andric let HasExtVOP3DPP = HasDPP; 1781ad6265SDimitry Andric // We do not want to print src modifiers for vop3p because the bits are 1881ad6265SDimitry Andric // overloaded in meaning and the logic in printOperandAndFPInputMods is 1981ad6265SDimitry Andric // wrong for vop3p 20bdd1243dSDimitry Andric let AsmVOP3Base = AsmVOP3P; 2181ad6265SDimitry Andric} 2281ad6265SDimitry Andric 23fe6060f1SDimitry Andric// Used for FMA_MIX* and MAD_MIX* insts 24fe6060f1SDimitry Andric// Their operands are only sort of f16 operands. Depending on 250b57cec5SDimitry Andric// op_sel_hi, these may be interpreted as f32. The inline immediate 260b57cec5SDimitry Andric// values are really f16 converted to f32, so we treat these as f16 270b57cec5SDimitry Andric// operands. 28fe6060f1SDimitry Andricclass VOP3P_Mix_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR, 2981ad6265SDimitry Andric bit useTiedOutput = 0> : VOP3P_Profile<P, Features, 1> { 30fe6060f1SDimitry Andric bit UseTiedOutput = useTiedOutput; 31fe6060f1SDimitry Andric 32fe6060f1SDimitry Andric dag srcs = 330b57cec5SDimitry Andric (ins FP16InputMods:$src0_modifiers, VCSrc_f16:$src0, 340b57cec5SDimitry Andric FP16InputMods:$src1_modifiers, VCSrc_f16:$src1, 35fe6060f1SDimitry Andric FP16InputMods:$src2_modifiers, VCSrc_f16:$src2); 3681ad6265SDimitry Andric dag dpp_srcs = 3781ad6265SDimitry Andric (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, 3806c3fb27SDimitry Andric FPVRegInputMods:$src1_modifiers, VRegSrc_32:$src1, 3981ad6265SDimitry Andric FP16InputMods:$src2_modifiers, VCSrc_f16:$src2); 40fe6060f1SDimitry Andric 415ffd83dbSDimitry Andric // FIXME: clampmod0 misbehaves with the non-default vdst_in 425ffd83dbSDimitry Andric // following it. For now workaround this by requiring clamp 435ffd83dbSDimitry Andric // in tied patterns. This should use undef_tied_input, but it 445ffd83dbSDimitry Andric // seems underdeveloped and doesn't apply the right register 455ffd83dbSDimitry Andric // class constraints. 46fe6060f1SDimitry Andric dag mods = !con(!if(UseTiedOutput, (ins clampmod:$clamp, VGPR_32:$vdst_in), 47fe6060f1SDimitry Andric (ins clampmod0:$clamp)), 48e8d8bef9SDimitry Andric (ins op_sel0:$op_sel, op_sel_hi0:$op_sel_hi)); 49fe6060f1SDimitry Andric // We use Ins64 because that is the one which populates InOperandList 50fe6060f1SDimitry Andric // due to the logic in class VOP3_Pseudo 51fe6060f1SDimitry Andric let Ins64 = !con(srcs, mods); 5281ad6265SDimitry Andric let InsVOP3Base = !con(dpp_srcs, mods); 53bdd1243dSDimitry Andric let AsmVOP3Base = 540b57cec5SDimitry Andric "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$op_sel$op_sel_hi$clamp"; 550b57cec5SDimitry Andric} 560b57cec5SDimitry Andric 57fe6060f1SDimitry Andricmulticlass VOP3PInst<string OpName, VOPProfile P, 5881ad6265SDimitry Andric SDPatternOperator node = null_frag, bit IsDOT = 0> { 59fe6060f1SDimitry Andric def NAME : VOP3P_Pseudo<OpName, P, 60fe6060f1SDimitry Andric !if (P.HasModifiers, 6181ad6265SDimitry Andric getVOP3PModPat<P, node, IsDOT, IsDOT>.ret, 62fe6060f1SDimitry Andric getVOP3Pat<P, node>.ret)>; 6381ad6265SDimitry Andric let SubtargetPredicate = isGFX11Plus in { 6481ad6265SDimitry Andric if P.HasExtVOP3DPP then 6581ad6265SDimitry Andric def _dpp : VOP3_DPP_Pseudo<OpName, P> { 6681ad6265SDimitry Andric let VOP3P = 1; 6781ad6265SDimitry Andric let PseudoInstr = OpName #"_dpp"; 680b57cec5SDimitry Andric } 6981ad6265SDimitry Andric } // end SubtargetPredicate = isGFX11Plus 7081ad6265SDimitry Andric} 710b57cec5SDimitry Andric 72fe6060f1SDimitry Andric// Non-packed instructions that use the VOP3P encoding. 73fe6060f1SDimitry Andric// VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed. 74349cc55cSDimitry Andricmulticlass VOP3_VOP3PInst<string OpName, VOP3P_Mix_Profile P> { 75fe6060f1SDimitry Andric def NAME : VOP3P_Pseudo<OpName, P> { 76fe6060f1SDimitry Andric let Constraints = !if(P.UseTiedOutput, "$vdst = $vdst_in", ""); 77fe6060f1SDimitry Andric let DisableEncoding = !if(P.UseTiedOutput, "$vdst_in", ""); 78fe6060f1SDimitry Andric } 7981ad6265SDimitry Andric let SubtargetPredicate = isGFX11Plus in { 8081ad6265SDimitry Andric if P.HasExtVOP3DPP then 8181ad6265SDimitry Andric def _dpp : VOP3_DPP_Pseudo<OpName, P> { 8281ad6265SDimitry Andric let VOP3P = 1; 8381ad6265SDimitry Andric let PseudoInstr = OpName#"_dpp"; 8481ad6265SDimitry Andric let Constraints = !if(P.UseTiedOutput, "$vdst = $vdst_in", ""); 8581ad6265SDimitry Andric let DisableEncoding = !if(P.UseTiedOutput, "$vdst_in", ""); 8681ad6265SDimitry Andric } 8781ad6265SDimitry Andric } // end SubtargetPredicate = isGFX11Plus 88fe6060f1SDimitry Andric} 89fe6060f1SDimitry Andric 9081ad6265SDimitry Andriclet isReMaterializable = 1 in { 91fe6060f1SDimitry Andriclet isCommutable = 1 in { 9281ad6265SDimitry Andricdefm V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>; 9381ad6265SDimitry Andricdefm V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>; 94fe6060f1SDimitry Andric 95fe6060f1SDimitry Andriclet FPDPRounding = 1 in { 9681ad6265SDimitry Andricdefm V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, any_fma>; 9781ad6265SDimitry Andricdefm V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, any_fadd>; 9881ad6265SDimitry Andricdefm V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, any_fmul>; 99fe6060f1SDimitry Andric} // End FPDPRounding = 1 10081ad6265SDimitry Andricdefm V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum_like>; 10181ad6265SDimitry Andricdefm V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, fminnum_like>; 102fe6060f1SDimitry Andric 10381ad6265SDimitry Andricdefm V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, add>; 10481ad6265SDimitry Andricdefm V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>>; 10581ad6265SDimitry Andricdefm V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, mul>; 106fe6060f1SDimitry Andric 10781ad6265SDimitry Andricdefm V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, smin>; 10881ad6265SDimitry Andricdefm V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, umin>; 10981ad6265SDimitry Andricdefm V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, smax>; 11081ad6265SDimitry Andricdefm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, umax>; 1115f757f3fSDimitry Andric 1125f757f3fSDimitry Andriclet SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { 1135f757f3fSDimitry Andricdefm V_PK_MAXIMUM_F16 : VOP3PInst<"v_pk_maximum_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, fmaximum>; 1145f757f3fSDimitry Andricdefm V_PK_MINIMUM_F16 : VOP3PInst<"v_pk_minimum_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, fminimum>; 1155f757f3fSDimitry Andric} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 116fe6060f1SDimitry Andric} 117fe6060f1SDimitry Andric 11881ad6265SDimitry Andricdefm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>>; 11981ad6265SDimitry Andricdefm V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, sub>; 120fe6060f1SDimitry Andric 12181ad6265SDimitry Andricdefm V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, clshl_rev_16>; 12281ad6265SDimitry Andricdefm V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, cashr_rev_16>; 12381ad6265SDimitry Andricdefm V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, clshr_rev_16>; 12481ad6265SDimitry Andric} // End isReMaterializable = 1 1250b57cec5SDimitry Andric 126e8d8bef9SDimitry Andriclet SubtargetPredicate = HasVOP3PInsts in { 127e8d8bef9SDimitry Andric 128e8d8bef9SDimitry Andric// Integer operations with clamp bit set. 129e8d8bef9SDimitry Andricclass VOP3PSatPat<SDPatternOperator pat, Instruction inst> : GCNPat< 130e8d8bef9SDimitry Andric (pat (v2i16 (VOP3PMods v2i16:$src0, i32:$src0_modifiers)), 131e8d8bef9SDimitry Andric (v2i16 (VOP3PMods v2i16:$src1, i32:$src1_modifiers))), 132e8d8bef9SDimitry Andric (inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE) 133e8d8bef9SDimitry Andric>; 134e8d8bef9SDimitry Andric 135e8d8bef9SDimitry Andricdef : VOP3PSatPat<uaddsat, V_PK_ADD_U16>; 136e8d8bef9SDimitry Andricdef : VOP3PSatPat<saddsat, V_PK_ADD_I16>; 137e8d8bef9SDimitry Andricdef : VOP3PSatPat<usubsat, V_PK_SUB_U16>; 138e8d8bef9SDimitry Andricdef : VOP3PSatPat<ssubsat, V_PK_SUB_I16>; 139e8d8bef9SDimitry Andric} // End SubtargetPredicate = HasVOP3PInsts 140e8d8bef9SDimitry Andric 14106c3fb27SDimitry Andric// TODO: Make sure we're doing the right thing with denormals. Note 14206c3fb27SDimitry Andric// that FMA and MAD will differ. 1430b57cec5SDimitry Andricmulticlass MadFmaMixPats<SDPatternOperator fma_like, 14406c3fb27SDimitry Andric Instruction mix_inst, 1450b57cec5SDimitry Andric Instruction mixlo_inst, 1460b57cec5SDimitry Andric Instruction mixhi_inst> { 14706c3fb27SDimitry Andric // At least one of the operands needs to be an fpextend of an f16 14806c3fb27SDimitry Andric // for this to be worthwhile, so we need three patterns here. 14906c3fb27SDimitry Andric // TODO: Could we use a predicate to inspect src1/2/3 instead? 15006c3fb27SDimitry Andric def : GCNPat < 15106c3fb27SDimitry Andric (f32 (fma_like (f32 (VOP3PMadMixModsExt f16:$src0, i32:$src0_mods)), 15206c3fb27SDimitry Andric (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_mods)), 15306c3fb27SDimitry Andric (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_mods)))), 15406c3fb27SDimitry Andric (mix_inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, 15506c3fb27SDimitry Andric DSTCLAMP.NONE)>; 15606c3fb27SDimitry Andric def : GCNPat < 15706c3fb27SDimitry Andric (f32 (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_mods)), 15806c3fb27SDimitry Andric (f32 (VOP3PMadMixModsExt f16:$src1, i32:$src1_mods)), 15906c3fb27SDimitry Andric (f32 (VOP3PMadMixMods f32:$src2, i32:$src2_mods)))), 16006c3fb27SDimitry Andric (mix_inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, 16106c3fb27SDimitry Andric DSTCLAMP.NONE)>; 16206c3fb27SDimitry Andric def : GCNPat < 16306c3fb27SDimitry Andric (f32 (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_mods)), 16406c3fb27SDimitry Andric (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_mods)), 16506c3fb27SDimitry Andric (f32 (VOP3PMadMixModsExt f16:$src2, i32:$src2_mods)))), 16606c3fb27SDimitry Andric (mix_inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, 16706c3fb27SDimitry Andric DSTCLAMP.NONE)>; 16806c3fb27SDimitry Andric 1690b57cec5SDimitry Andric def : GCNPat < 1700b57cec5SDimitry Andric (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), 1710b57cec5SDimitry Andric (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), 1720b57cec5SDimitry Andric (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))), 1730b57cec5SDimitry Andric (mixlo_inst $src0_modifiers, $src0, 1740b57cec5SDimitry Andric $src1_modifiers, $src1, 1750b57cec5SDimitry Andric $src2_modifiers, $src2, 1760b57cec5SDimitry Andric DSTCLAMP.NONE, 1770b57cec5SDimitry Andric (i32 (IMPLICIT_DEF))) 1780b57cec5SDimitry Andric >; 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric // FIXME: Special case handling for maxhi (especially for clamp) 1810b57cec5SDimitry Andric // because dealing with the write to high half of the register is 1820b57cec5SDimitry Andric // difficult. 1830b57cec5SDimitry Andric def : GCNPat < 184647cbc5dSDimitry Andric (build_vector f16:$elt0, (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), 1850b57cec5SDimitry Andric (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), 186647cbc5dSDimitry Andric (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))), 1870b57cec5SDimitry Andric (v2f16 (mixhi_inst $src0_modifiers, $src0, 1880b57cec5SDimitry Andric $src1_modifiers, $src1, 1890b57cec5SDimitry Andric $src2_modifiers, $src2, 1900b57cec5SDimitry Andric DSTCLAMP.NONE, 191bdd1243dSDimitry Andric VGPR_32:$elt0)) 1920b57cec5SDimitry Andric >; 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric def : GCNPat < 1950b57cec5SDimitry Andric (build_vector 1960b57cec5SDimitry Andric f16:$elt0, 197647cbc5dSDimitry Andric (AMDGPUclamp (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), 1980b57cec5SDimitry Andric (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)), 199647cbc5dSDimitry Andric (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))))), 2000b57cec5SDimitry Andric (v2f16 (mixhi_inst $src0_modifiers, $src0, 2010b57cec5SDimitry Andric $src1_modifiers, $src1, 2020b57cec5SDimitry Andric $src2_modifiers, $src2, 2030b57cec5SDimitry Andric DSTCLAMP.ENABLE, 204bdd1243dSDimitry Andric VGPR_32:$elt0)) 2050b57cec5SDimitry Andric >; 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric def : GCNPat < 2080b57cec5SDimitry Andric (AMDGPUclamp (build_vector 209647cbc5dSDimitry Andric (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)), 2100b57cec5SDimitry Andric (f32 (VOP3PMadMixMods f16:$lo_src1, i32:$lo_src1_modifiers)), 211647cbc5dSDimitry Andric (f32 (VOP3PMadMixMods f16:$lo_src2, i32:$lo_src2_modifiers))))), 212647cbc5dSDimitry Andric (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$hi_src0, i32:$hi_src0_modifiers)), 2130b57cec5SDimitry Andric (f32 (VOP3PMadMixMods f16:$hi_src1, i32:$hi_src1_modifiers)), 214647cbc5dSDimitry Andric (f32 (VOP3PMadMixMods f16:$hi_src2, i32:$hi_src2_modifiers))))))), 2150b57cec5SDimitry Andric (v2f16 (mixhi_inst $hi_src0_modifiers, $hi_src0, 2160b57cec5SDimitry Andric $hi_src1_modifiers, $hi_src1, 2170b57cec5SDimitry Andric $hi_src2_modifiers, $hi_src2, 2180b57cec5SDimitry Andric DSTCLAMP.ENABLE, 2190b57cec5SDimitry Andric (mixlo_inst $lo_src0_modifiers, $lo_src0, 2200b57cec5SDimitry Andric $lo_src1_modifiers, $lo_src1, 2210b57cec5SDimitry Andric $lo_src2_modifiers, $lo_src2, 2220b57cec5SDimitry Andric DSTCLAMP.ENABLE, 2230b57cec5SDimitry Andric (i32 (IMPLICIT_DEF))))) 2240b57cec5SDimitry Andric >; 22506c3fb27SDimitry Andric 22606c3fb27SDimitry Andric def : GCNPat < 22706c3fb27SDimitry Andric (f16 (fpround (fmul (f32 (VOP3PMadMixMods f32:$src0, i32:$src0_modifiers)), 22806c3fb27SDimitry Andric (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_modifiers))))), 22906c3fb27SDimitry Andric (mixlo_inst $src0_modifiers, $src0, 23006c3fb27SDimitry Andric $src1_modifiers, $src1, 23106c3fb27SDimitry Andric (i32 0), (i32 0), 23206c3fb27SDimitry Andric DSTCLAMP.NONE, 23306c3fb27SDimitry Andric (i32 (IMPLICIT_DEF))) 23406c3fb27SDimitry Andric >; 23506c3fb27SDimitry Andric 23606c3fb27SDimitry Andric def : GCNPat < 237647cbc5dSDimitry Andric (build_vector f16:$elt0, (f16 (fpround (fmul (f32 (VOP3PMadMixMods f32:$src0, i32:$src0_modifiers)), 238647cbc5dSDimitry Andric (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_modifiers)))))), 23906c3fb27SDimitry Andric (v2f16 (mixhi_inst $src0_modifiers, $src0, 24006c3fb27SDimitry Andric $src1_modifiers, $src1, 24106c3fb27SDimitry Andric (i32 0), (i32 0), 24206c3fb27SDimitry Andric DSTCLAMP.NONE, 24306c3fb27SDimitry Andric VGPR_32:$elt0)) 24406c3fb27SDimitry Andric >; 2450b57cec5SDimitry Andric} 2460b57cec5SDimitry Andric 24706c3fb27SDimitry Andriclet SubtargetPredicate = HasMadMixInsts, OtherPredicates = [NoFP32Denormals] in { 2485ffd83dbSDimitry Andric 2490b57cec5SDimitry Andric// These are VOP3a-like opcodes which accept no omod. 2500b57cec5SDimitry Andric// Size of src arguments (16/32) is controlled by op_sel. 2510b57cec5SDimitry Andric// For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi. 2525ffd83dbSDimitry Andriclet isCommutable = 1, mayRaiseFPException = 0 in { 25381ad6265SDimitry Andriclet isReMaterializable = 1 in 254fe6060f1SDimitry Andricdefm V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3P_Mix_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>; 2550b57cec5SDimitry Andric 2560b57cec5SDimitry Andriclet FPDPRounding = 1 in { 2570b57cec5SDimitry Andric// Clamp modifier is applied after conversion to f16. 258fe6060f1SDimitry Andricdefm V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>; 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andriclet ClampLo = 0, ClampHi = 1 in { 261fe6060f1SDimitry Andricdefm V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>; 2620b57cec5SDimitry Andric} 2630b57cec5SDimitry Andric} // End FPDPRounding = 1 2640b57cec5SDimitry Andric} 2650b57cec5SDimitry Andric 26606c3fb27SDimitry Andricdefm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>; 26706c3fb27SDimitry Andric} // End SubtargetPredicate = HasMadMixInsts, OtherPredicates = [NoFP32Denormals] 2680b57cec5SDimitry Andric 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric// Essentially the same as the mad_mix versions 2710b57cec5SDimitry Andriclet SubtargetPredicate = HasFmaMixInsts in { 2720b57cec5SDimitry Andriclet isCommutable = 1 in { 27381ad6265SDimitry Andric 27481ad6265SDimitry Andriclet isReMaterializable = 1 in 275fe6060f1SDimitry Andricdefm V_FMA_MIX_F32 : VOP3_VOP3PInst<"v_fma_mix_f32", VOP3P_Mix_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>; 2760b57cec5SDimitry Andric 2770b57cec5SDimitry Andriclet FPDPRounding = 1 in { 2780b57cec5SDimitry Andric// Clamp modifier is applied after conversion to f16. 279fe6060f1SDimitry Andricdefm V_FMA_MIXLO_F16 : VOP3_VOP3PInst<"v_fma_mixlo_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>; 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andriclet ClampLo = 0, ClampHi = 1 in { 282fe6060f1SDimitry Andricdefm V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>; 2830b57cec5SDimitry Andric} 2840b57cec5SDimitry Andric} // End FPDPRounding = 1 2850b57cec5SDimitry Andric} 2860b57cec5SDimitry Andric 28706c3fb27SDimitry Andricdefm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>; 2880b57cec5SDimitry Andric} 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric// Defines patterns that extract signed 4bit from each Idx[0]. 2910b57cec5SDimitry Andricforeach Idx = [[0,28],[4,24],[8,20],[12,16],[16,12],[20,8],[24,4]] in 2920b57cec5SDimitry Andric def ExtractSigned4bit_#Idx[0] : PatFrag<(ops node:$src), 2930b57cec5SDimitry Andric (sra (shl node:$src, (i32 Idx[1])), (i32 28))>; 2940b57cec5SDimitry Andric 2950b57cec5SDimitry Andric// Defines code pattern that extracts U(unsigned/signed) 4/8bit from FromBitIndex. 2960b57cec5SDimitry Andricclass Extract<int FromBitIndex, int BitMask, bit U>: PatFrag< 2970b57cec5SDimitry Andric (ops node:$src), 2980b57cec5SDimitry Andric !if (!or (!and (!eq (BitMask, 255), !eq (FromBitIndex, 24)), !eq (FromBitIndex, 28)), // last element 2990b57cec5SDimitry Andric !if (U, (srl node:$src, (i32 FromBitIndex)), (sra node:$src, (i32 FromBitIndex))), 3000b57cec5SDimitry Andric !if (!eq (FromBitIndex, 0), // first element 3010b57cec5SDimitry Andric !if (U, (and node:$src, (i32 BitMask)), 3020b57cec5SDimitry Andric !if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src), 3030b57cec5SDimitry Andric (sext_inreg node:$src, i8))), 3040b57cec5SDimitry Andric !if (U, (and (srl node:$src, (i32 FromBitIndex)), (i32 BitMask)), 3050b57cec5SDimitry Andric !if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src), 3060b57cec5SDimitry Andric (sext_inreg (srl node:$src, (i32 FromBitIndex)), i8)))))>; 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andricforeach Type = ["I", "U"] in 3100b57cec5SDimitry Andric foreach Index = 0-3 in { 3110b57cec5SDimitry Andric // Defines patterns that extract each Index'ed 8bit from an unsigned 3120b57cec5SDimitry Andric // 32bit scalar value; 313e8d8bef9SDimitry Andric def Type#Index#"_8bit" : Extract<!shl(Index, 3), 255, !eq (Type, "U")>; 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric // Defines multiplication patterns where the multiplication is happening on each 3160b57cec5SDimitry Andric // Index'ed 8bit of a 32bit scalar value. 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric def Mul#Type#_Elt#Index : PatFrag< 3190b57cec5SDimitry Andric (ops node:$src0, node:$src1), 3200b57cec5SDimitry Andric (!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), AMDGPUmul_i24_oneuse, AMDGPUmul_u24_oneuse)) 3215ffd83dbSDimitry Andric (!cast<Extract>(Type#Index#"_8bit") node:$src0), 3225ffd83dbSDimitry Andric (!cast<Extract>(Type#Index#"_8bit") node:$src1))>; 3230b57cec5SDimitry Andric } 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric// Different variants of dot8 patterns cause a huge increase in the compile time. 3260b57cec5SDimitry Andric// Define non-associative/commutative add/mul to prevent permutation in the dot8 3270b57cec5SDimitry Andric// pattern. 3280b57cec5SDimitry Andricdef NonACAdd : SDNode<"ISD::ADD" , SDTIntBinOp>; 3290b57cec5SDimitry Andricdef NonACAdd_oneuse : HasOneUseBinOp<NonACAdd>; 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andricdef NonACAMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24" , SDTIntBinOp>; 3320b57cec5SDimitry Andricdef NonACAMDGPUmul_u24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_u24>; 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andricdef NonACAMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24" , SDTIntBinOp>; 3350b57cec5SDimitry Andricdef NonACAMDGPUmul_i24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_i24>; 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andricforeach Type = ["I", "U"] in 3380b57cec5SDimitry Andric foreach Index = 0-7 in { 3390b57cec5SDimitry Andric // Defines patterns that extract each Index'ed 4bit from an unsigned 3400b57cec5SDimitry Andric // 32bit scalar value; 341e8d8bef9SDimitry Andric def Type#Index#"_4bit" : Extract<!shl(Index, 2), 15, !eq (Type, "U")>; 3420b57cec5SDimitry Andric 3430b57cec5SDimitry Andric // Defines multiplication patterns where the multiplication is happening on each 3440b57cec5SDimitry Andric // Index'ed 8bit of a 32bit scalar value. 3450b57cec5SDimitry Andric def Mul#Type#Index#"_4bit" : PatFrag< 3460b57cec5SDimitry Andric (ops node:$src0, node:$src1), 3470b57cec5SDimitry Andric (!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), NonACAMDGPUmul_i24_oneuse, NonACAMDGPUmul_u24_oneuse)) 3485ffd83dbSDimitry Andric (!cast<Extract>(Type#Index#"_4bit") node:$src0), 3495ffd83dbSDimitry Andric (!cast<Extract>(Type#Index#"_4bit") node:$src1))>; 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric 3525f757f3fSDimitry Andricclass UDot2Pat<VOP_Pseudo Inst> : GCNPat < 3530b57cec5SDimitry Andric (add (add_oneuse (AMDGPUmul_u24_oneuse (srl i32:$src0, (i32 16)), 3540b57cec5SDimitry Andric (srl i32:$src1, (i32 16))), i32:$src2), 3550b57cec5SDimitry Andric (AMDGPUmul_u24_oneuse (and i32:$src0, (i32 65535)), 3560b57cec5SDimitry Andric (and i32:$src1, (i32 65535))) 3570b57cec5SDimitry Andric ), 3580b57cec5SDimitry Andric (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> { 3595f757f3fSDimitry Andric let Predicates = Inst.Predicates; 3600b57cec5SDimitry Andric} 3610b57cec5SDimitry Andric 3625f757f3fSDimitry Andricclass SDot2Pat<VOP_Pseudo Inst> : GCNPat < 3630b57cec5SDimitry Andric (add (add_oneuse (AMDGPUmul_i24_oneuse (sra i32:$src0, (i32 16)), 3640b57cec5SDimitry Andric (sra i32:$src1, (i32 16))), i32:$src2), 3650b57cec5SDimitry Andric (AMDGPUmul_i24_oneuse (sext_inreg i32:$src0, i16), 3660b57cec5SDimitry Andric (sext_inreg i32:$src1, i16))), 3670b57cec5SDimitry Andric (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> { 3685f757f3fSDimitry Andric let Predicates = Inst.Predicates; 3690b57cec5SDimitry Andric} 3700b57cec5SDimitry Andric 3718bcb0991SDimitry Andriclet IsDOT = 1 in { 3725f757f3fSDimitry Andriclet OtherPredicates = [HasDot2Insts] in { 373fe6060f1SDimitry Andricdefm V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", 37481ad6265SDimitry Andric VOP3P_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2, 1>; 375fe6060f1SDimitry Andricdefm V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", 37681ad6265SDimitry Andric VOP3P_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2, 1>; 3775f757f3fSDimitry Andric} // End OtherPredicates = [HasDot2Insts] 3780b57cec5SDimitry Andric 3795f757f3fSDimitry Andriclet OtherPredicates = [HasDot10Insts] in 380fe6060f1SDimitry Andricdefm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", 38181ad6265SDimitry Andric VOP3P_Profile<VOP_F32_V2F16_V2F16_F32, VOP3_REGULAR, /*HasDPP*/ 1>, 382fe6060f1SDimitry Andric AMDGPUfdot2, 1/*ExplicitClamp*/>; 38306c3fb27SDimitry Andric 3845f757f3fSDimitry Andriclet OtherPredicates = [HasDot7Insts] in { 385fe6060f1SDimitry Andricdefm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", 38681ad6265SDimitry Andric VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>; 387fe6060f1SDimitry Andricdefm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", 38881ad6265SDimitry Andric VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>; 3895f757f3fSDimitry Andric} // End OtherPredicates = [HasDot7Insts] 390fe6060f1SDimitry Andric 3915f757f3fSDimitry Andriclet OtherPredicates = [HasDot1Insts] in { 392fe6060f1SDimitry Andricdefm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", 39381ad6265SDimitry Andric VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>; 394fe6060f1SDimitry Andricdefm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", 39581ad6265SDimitry Andric VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>; 3965f757f3fSDimitry Andric} // End OtherPredicates = [HasDot1Insts] 39781ad6265SDimitry Andric 398bdd1243dSDimitry Andricdef DOT2_BF16_Profile 399bdd1243dSDimitry Andric : VOP3P_Profile<VOP_F32_V2I16_V2I16_F32, VOP3_REGULAR, /*HasDPP*/ 1> { 400bdd1243dSDimitry Andric let HasSrc1Mods = 1; 401bdd1243dSDimitry Andric} 40281ad6265SDimitry Andric 403bdd1243dSDimitry Andriclet SubtargetPredicate = HasDot9Insts in { 404bdd1243dSDimitry Andric 405bdd1243dSDimitry Andricdefm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", DOT2_BF16_Profile, 40681ad6265SDimitry Andric int_amdgcn_fdot2_f32_bf16, 1>; 40781ad6265SDimitry Andric 408bdd1243dSDimitry Andric} // End SubtargetPredicate = HasDot9Insts 40981ad6265SDimitry Andric 4108bcb0991SDimitry Andric} // End let IsDOT = 1 4110b57cec5SDimitry Andric 41281ad6265SDimitry Andricmulticlass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> { 41381ad6265SDimitry Andric let IsDOT = 1 in 41481ad6265SDimitry Andric defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, 41581ad6265SDimitry Andric null_frag, 1>; 41681ad6265SDimitry Andric // Dot-iu instructions consider input as signed if imod neg bits are set. Thus 41781ad6265SDimitry Andric // Dot-iu Intrinsics have extra operands and require separate codegen pattern. 4187a6dacacSDimitry Andric def : GCNPat < (intrinsic_node (VOP3PModsNeg i32:$src0_mods), i32:$src0, 4197a6dacacSDimitry Andric (VOP3PModsNeg i32:$src1_mods), i32:$src1, 42081ad6265SDimitry Andric i32:$src2, (i1 timm:$clamp)), 42181ad6265SDimitry Andric (!cast<Instruction>(NAME) $src0_mods, i32:$src0, 42281ad6265SDimitry Andric $src1_mods, i32:$src1, 42381ad6265SDimitry Andric (i32 8), i32:$src2, i1:$clamp) 42481ad6265SDimitry Andric >; 42581ad6265SDimitry Andric} 42681ad6265SDimitry Andric 42781ad6265SDimitry Andriclet SubtargetPredicate = HasDot8Insts in { 42881ad6265SDimitry Andricdefm V_DOT4_I32_IU8 : VOP3PDOTIUInst<"v_dot4_i32_iu8", int_amdgcn_sudot4>; 42981ad6265SDimitry Andricdefm V_DOT8_I32_IU4 : VOP3PDOTIUInst<"v_dot8_i32_iu4", int_amdgcn_sudot8>; 4305f757f3fSDimitry Andric 4315f757f3fSDimitry Andricdef : GCNPat < (int_amdgcn_sdot8 i32:$src0, 4325f757f3fSDimitry Andric i32:$src1, 4335f757f3fSDimitry Andric i32:$src2, (i1 timm:$clamp)), 4345f757f3fSDimitry Andric (V_DOT8_I32_IU4 (i32 9), i32:$src0, 4355f757f3fSDimitry Andric (i32 9), i32:$src1, (i32 8), i32:$src2, i1:$clamp) 4365f757f3fSDimitry Andric>; 4375f757f3fSDimitry Andric 4385f757f3fSDimitry Andricdef : GCNPat < (int_amdgcn_sdot4 i32:$src0, 4395f757f3fSDimitry Andric i32:$src1, 4405f757f3fSDimitry Andric i32:$src2, (i1 timm:$clamp)), 4415f757f3fSDimitry Andric (V_DOT4_I32_IU8 (i32 9), i32:$src0, 4425f757f3fSDimitry Andric (i32 9), i32:$src1, (i32 8), i32:$src2, i1:$clamp) 4435f757f3fSDimitry Andric>; 44481ad6265SDimitry Andric} // End SubtargetPredicate = HasDot8Insts 44581ad6265SDimitry Andric 4467a6dacacSDimitry Andric// Does not use opsel, no src_modifiers on src0 and src1. 4477a6dacacSDimitry Andric// src_modifiers on src2(f32) are f32 fneg(neg_lo[2]) and f32 fabs(neg_hi[2]). 4487a6dacacSDimitry Andricdef VOP3P_DOTF8_Profile : VOP3P_Profile<VOPProfile <[f32, i32, i32, f32]>, 4497a6dacacSDimitry Andric VOP3_PACKED, 1> { 4507a6dacacSDimitry Andric let HasClamp = 0; 4517a6dacacSDimitry Andric let HasOpSel = 0; 4527a6dacacSDimitry Andric let HasOMod = 0; 4537a6dacacSDimitry Andric let IsDOT = 1; 4547a6dacacSDimitry Andric let HasSrc0Mods = 0; 4557a6dacacSDimitry Andric let HasSrc1Mods = 0; 4567a6dacacSDimitry Andric let HasSrc2Mods = 1; 4577a6dacacSDimitry Andric 4587a6dacacSDimitry Andric let InsVOP3P = (ins VSrc_b32:$src0, VSrc_b32:$src1, 4597a6dacacSDimitry Andric PackedF16InputMods:$src2_modifiers, VSrc_f32:$src2, 4607a6dacacSDimitry Andric neg_lo0:$neg_lo, neg_hi0:$neg_hi); 4617a6dacacSDimitry Andric 4627a6dacacSDimitry Andric let InsVOP3DPP8 = (ins DstRC:$old, VGPR_32:$src0, VRegSrc_32:$src1, 4637a6dacacSDimitry Andric PackedF16InputMods:$src2_modifiers, VRegSrc_32:$src2, 4647a6dacacSDimitry Andric neg_lo0:$neg_lo, neg_hi0:$neg_hi, dpp8:$dpp8, FI:$fi); 4657a6dacacSDimitry Andric 4667a6dacacSDimitry Andric let InsVOP3DPP16 = (ins DstRC:$old, VGPR_32:$src0, VRegSrc_32:$src1, 4677a6dacacSDimitry Andric PackedF16InputMods:$src2_modifiers, VRegSrc_32:$src2, 4687a6dacacSDimitry Andric neg_lo0:$neg_lo, neg_hi0:$neg_hi, dpp_ctrl:$dpp_ctrl, 4697a6dacacSDimitry Andric row_mask:$row_mask, bank_mask:$bank_mask, 4707a6dacacSDimitry Andric bound_ctrl:$bound_ctrl, FI:$fi); 4717a6dacacSDimitry Andric} 4727a6dacacSDimitry Andric 4737a6dacacSDimitry Andricmulticlass VOP3PDOTF8Inst <string OpName, SDPatternOperator intrinsic_node> { 4747a6dacacSDimitry Andric defm NAME : VOP3PInst<OpName, VOP3P_DOTF8_Profile, null_frag, 1>; 4757a6dacacSDimitry Andric 4767a6dacacSDimitry Andric let SubtargetPredicate = isGFX12Plus in 4777a6dacacSDimitry Andric def : GCNPat <(intrinsic_node i32:$src0, i32:$src1, 4787a6dacacSDimitry Andric (VOP3Mods f32:$src2, i32:$src2_modifiers)), 4797a6dacacSDimitry Andric (!cast<Instruction>(NAME) i32:$src0, i32:$src1, 4807a6dacacSDimitry Andric i32:$src2_modifiers, f32:$src2)>; 4817a6dacacSDimitry Andric} 4827a6dacacSDimitry Andric 4837a6dacacSDimitry Andricdefm V_DOT4_F32_FP8_BF8 : VOP3PDOTF8Inst<"v_dot4_f32_fp8_bf8", int_amdgcn_dot4_f32_fp8_bf8>; 4847a6dacacSDimitry Andricdefm V_DOT4_F32_BF8_FP8 : VOP3PDOTF8Inst<"v_dot4_f32_bf8_fp8", int_amdgcn_dot4_f32_bf8_fp8>; 4857a6dacacSDimitry Andricdefm V_DOT4_F32_FP8_FP8 : VOP3PDOTF8Inst<"v_dot4_f32_fp8_fp8", int_amdgcn_dot4_f32_fp8_fp8>; 4867a6dacacSDimitry Andricdefm V_DOT4_F32_BF8_BF8 : VOP3PDOTF8Inst<"v_dot4_f32_bf8_bf8", int_amdgcn_dot4_f32_bf8_bf8>; 4877a6dacacSDimitry Andric 4880b57cec5SDimitry Andricdef : UDot2Pat<V_DOT2_U32_U16>; 4890b57cec5SDimitry Andricdef : SDot2Pat<V_DOT2_I32_I16>; 4900b57cec5SDimitry Andric 4910b57cec5SDimitry Andricforeach Type = ["U", "I"] in 4925f757f3fSDimitry Andric let Predicates = !cast<VOP_Pseudo>("V_DOT4_"#Type#"32_"#Type#8).Predicates in 4930b57cec5SDimitry Andric def : GCNPat < 4940b57cec5SDimitry Andric !cast<dag>(!foldl((i32 i32:$src2), [0, 1, 2, 3], lhs, y, 4950b57cec5SDimitry Andric (add_oneuse lhs, (!cast<PatFrag>("Mul"#Type#"_Elt"#y) i32:$src0, i32:$src1)))), 496fe6060f1SDimitry Andric (!cast<VOP3P_Pseudo>("V_DOT4_"#Type#"32_"#Type#8) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; 4970b57cec5SDimitry Andric 4980b57cec5SDimitry Andricforeach Type = ["U", "I"] in 4995f757f3fSDimitry Andric let Predicates = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).Predicates in 5000b57cec5SDimitry Andric def : GCNPat < 5010b57cec5SDimitry Andric !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)), 5020b57cec5SDimitry Andric [1, 2, 3, 4, 5, 6, 7], lhs, y, 5030b57cec5SDimitry Andric (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))), 504fe6060f1SDimitry Andric (!cast<VOP3P_Pseudo>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; 5050b57cec5SDimitry Andric 5060b57cec5SDimitry Andric// Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase 5070b57cec5SDimitry Andric// in the compile time. Directly handle the pattern generated by the FE here. 5080b57cec5SDimitry Andricforeach Type = ["U", "I"] in 5095f757f3fSDimitry Andric let Predicates = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).Predicates in 5100b57cec5SDimitry Andric def : GCNPat < 5110b57cec5SDimitry Andric !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)), 5120b57cec5SDimitry Andric [7, 1, 2, 3, 4, 5, 6], lhs, y, 5130b57cec5SDimitry Andric (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))), 514fe6060f1SDimitry Andric (!cast<VOP3P_Pseudo>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>; 5150b57cec5SDimitry Andric 5160b57cec5SDimitry Andricdef ADst_32 : VOPDstOperand<AGPR_32>; 517fe6060f1SDimitry Andricdef ADst_64 : VOPDstOperand<AReg_64>; 5180b57cec5SDimitry Andricdef ADst_128 : VOPDstOperand<AReg_128>; 519fe6060f1SDimitry Andricdef ADst_256 : VOPDstOperand<AReg_256>; 5200b57cec5SDimitry Andricdef ADst_512 : VOPDstOperand<AReg_512>; 5210b57cec5SDimitry Andricdef ADst_1024 : VOPDstOperand<AReg_1024>; 522fe6060f1SDimitry Andricdef VDst_64 : VOPDstOperand<VReg_64>; 523fe6060f1SDimitry Andricdef VDst_128 : VOPDstOperand<VReg_128>; 524fe6060f1SDimitry Andricdef VDst_256 : VOPDstOperand<VReg_256>; 525fe6060f1SDimitry Andricdef VDst_512 : VOPDstOperand<VReg_512>; 526fe6060f1SDimitry Andricdef VDst_1024 : VOPDstOperand<VReg_1024>; 5270b57cec5SDimitry Andric 52881ad6265SDimitry Andricdef VOPProfileAccRead : VOP3P_Profile<VOP_I32_I32, VOP3_MAI> { 5290b57cec5SDimitry Andric let Src0RC64 = ARegSrc_32; 5300b57cec5SDimitry Andric} 5310b57cec5SDimitry Andric 53281ad6265SDimitry Andricdef VOPProfileAccWrite : VOP3P_Profile<VOP_I32_I32, VOP3_MAI> { 5330b57cec5SDimitry Andric let DstRC = ADst_32; 53481ad6265SDimitry Andric let Src0RC64 = VCSrc_b32; 5350b57cec5SDimitry Andric} 5360b57cec5SDimitry Andric 5370b57cec5SDimitry Andricclass VOPProfileMAI<VOPProfile P, RegisterOperand _SrcRC, RegisterOperand _DstRC, 5380b57cec5SDimitry Andric RegisterOperand SrcABRC = AVSrc_32> 53981ad6265SDimitry Andric : VOP3P_Profile<P, VOP3_MAI> { 5400b57cec5SDimitry Andric let DstRC = _DstRC; 5410b57cec5SDimitry Andric let Src0RC64 = SrcABRC; 5420b57cec5SDimitry Andric let Src1RC64 = SrcABRC; 5430b57cec5SDimitry Andric let Src2RC64 = _SrcRC; 5440b57cec5SDimitry Andric let HasOpSel = 0; 5450b57cec5SDimitry Andric let HasClamp = 0; 546fe6060f1SDimitry Andric let HasIntClamp = 0; 547fe6060f1SDimitry Andric let HasOMod = 0; 548fe6060f1SDimitry Andric let HasModifiers = 0; 549bdd1243dSDimitry Andric let AsmVOP3Base = "$vdst, $src0, $src1, $src2$cbsz$abid$blgp"; 5500b57cec5SDimitry Andric let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, Src2RC64:$src2, cbsz:$cbsz, abid:$abid, blgp:$blgp); 55181ad6265SDimitry Andric let InsVOP3Base = Ins64; 55204eeddc0SDimitry Andric // Dst and SrcC cannot partially overlap if SrcC/Dst is bigger than 4 VGPRs. 55304eeddc0SDimitry Andric // We then create two versions of the instruction: with tied dst and src2 55481ad6265SDimitry Andric // and with the earlyclobber flag on the dst. This is stricter than the 55504eeddc0SDimitry Andric // actual HW restriction. In particular earlyclobber also affects src0 and 55604eeddc0SDimitry Andric // src1 allocation which is not required. 55704eeddc0SDimitry Andric bit NoDstOverlap = !gt(DstVT.Size, 128); 5580b57cec5SDimitry Andric} 5590b57cec5SDimitry Andric 56081ad6265SDimitry Andricclass VOPProfileSMFMAC<VOPProfile P, RegisterOperand _DstRC, 56181ad6265SDimitry Andric RegisterOperand _SrcARC, RegisterOperand _SrcBRC> 56281ad6265SDimitry Andric : VOPProfileMAI<P, _DstRC, _DstRC, _SrcARC> { 56381ad6265SDimitry Andric let Src1RC64 = _SrcBRC; 56481ad6265SDimitry Andric let Src2VT = DstVT; 56581ad6265SDimitry Andric let Asm64 = " $vdst, $src0, $src1, $idx$cbsz$abid"; 56681ad6265SDimitry Andric let Outs64 = (outs DstRC:$vdst); 56781ad6265SDimitry Andric let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, VRegSrc_32:$idx, cbsz:$cbsz, abid:$abid, Src2RC64:$src2); 56881ad6265SDimitry Andric} 56981ad6265SDimitry Andric 5700b57cec5SDimitry Andricdef VOPProfileMAI_F32_F32_X4 : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32, AISrc_128_f32, ADst_128>; 5710b57cec5SDimitry Andricdef VOPProfileMAI_F32_F32_X16 : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32, AISrc_512_f32, ADst_512>; 5720b57cec5SDimitry Andricdef VOPProfileMAI_F32_F32_X32 : VOPProfileMAI<VOP_V32F32_F32_F32_V32F32, AISrc_1024_f32, ADst_1024>; 5730b57cec5SDimitry Andricdef VOPProfileMAI_I32_I32_X4 : VOPProfileMAI<VOP_V4I32_I32_I32_V4I32, AISrc_128_b32, ADst_128>; 5740b57cec5SDimitry Andricdef VOPProfileMAI_I32_I32_X16 : VOPProfileMAI<VOP_V16I32_I32_I32_V16I32, AISrc_512_b32, ADst_512>; 5750b57cec5SDimitry Andricdef VOPProfileMAI_I32_I32_X32 : VOPProfileMAI<VOP_V32I32_I32_I32_V32I32, AISrc_1024_b32, ADst_1024>; 5760b57cec5SDimitry Andricdef VOPProfileMAI_F32_V2I16_X4 : VOPProfileMAI<VOP_V4F32_V2I16_V2I16_V4F32, AISrc_128_b32, ADst_128>; 5770b57cec5SDimitry Andricdef VOPProfileMAI_F32_V2I16_X16 : VOPProfileMAI<VOP_V16F32_V2I16_V2I16_V16F32, AISrc_512_b32, ADst_512>; 5780b57cec5SDimitry Andricdef VOPProfileMAI_F32_V2I16_X32 : VOPProfileMAI<VOP_V32F32_V2I16_V2I16_V32F32, AISrc_1024_b32, ADst_1024>; 5790b57cec5SDimitry Andricdef VOPProfileMAI_F32_V4F16_X4 : VOPProfileMAI<VOP_V4F32_V4F16_V4F16_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>; 5800b57cec5SDimitry Andricdef VOPProfileMAI_F32_V4F16_X16 : VOPProfileMAI<VOP_V16F32_V4F16_V4F16_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>; 5810b57cec5SDimitry Andricdef VOPProfileMAI_F32_V4F16_X32 : VOPProfileMAI<VOP_V32F32_V4F16_V4F16_V32F32, AISrc_1024_b32, ADst_1024, AVSrc_64>; 582fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4I16_X4 : VOPProfileMAI<VOP_V4F32_V4I16_V4I16_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>; 583fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4I16_X16 : VOPProfileMAI<VOP_V16F32_V4I16_V4I16_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>; 584fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4I16_X32 : VOPProfileMAI<VOP_V32F32_V4I16_V4I16_V32F32, AISrc_1024_b32, ADst_1024, AVSrc_64>; 585fe6060f1SDimitry Andricdef VOPProfileMAI_F64_16X16X4F64 : VOPProfileMAI<VOP_V4F64_F64_F64_V4F64, AISrc_256_f64, ADst_256, AVSrc_64>; 586fe6060f1SDimitry Andricdef VOPProfileMAI_F64_4X4X4F64 : VOPProfileMAI<VOP_F64_F64_F64_F64, AISrc_64_f64, ADst_64, AVSrc_64>; 58781ad6265SDimitry Andricdef VOPProfileMAI_I32_I64_X16 : VOPProfileMAI<VOP_V4I32_I64_I64_V4I32, AISrc_128_b32, ADst_128, AVSrc_64>; 58881ad6265SDimitry Andricdef VOPProfileMAI_I32_I64_X32 : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32, AISrc_512_b32, ADst_512, AVSrc_64>; 58981ad6265SDimitry Andricdef VOPProfileMAI_F32_V2F32_X16 : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>; 59081ad6265SDimitry Andricdef VOPProfileMAI_F32_V2F32_X32 : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>; 591fcaf7f86SDimitry Andricdef VOPProfileMAI_F32_I64_X32 : VOPProfileMAI<VOP_V4F32_I64_I64_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>; 592fcaf7f86SDimitry Andricdef VOPProfileMAI_F32_I64_X16 : VOPProfileMAI<VOP_V16F32_I64_I64_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>; 593fe6060f1SDimitry Andric 594fe6060f1SDimitry Andricdef VOPProfileMAI_F32_F32_X4_VCD : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32, VISrc_128_f32, VDst_128>; 595fe6060f1SDimitry Andricdef VOPProfileMAI_F32_F32_X16_VCD : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32, VISrc_512_f32, VDst_512>; 596fe6060f1SDimitry Andricdef VOPProfileMAI_F32_F32_X32_VCD : VOPProfileMAI<VOP_V32F32_F32_F32_V32F32, VISrc_1024_f32, VDst_1024>; 597fe6060f1SDimitry Andricdef VOPProfileMAI_I32_I32_X4_VCD : VOPProfileMAI<VOP_V4I32_I32_I32_V4I32, VISrc_128_b32, VDst_128>; 598fe6060f1SDimitry Andricdef VOPProfileMAI_I32_I32_X16_VCD : VOPProfileMAI<VOP_V16I32_I32_I32_V16I32, VISrc_512_b32, VDst_512>; 599fe6060f1SDimitry Andricdef VOPProfileMAI_I32_I32_X32_VCD : VOPProfileMAI<VOP_V32I32_I32_I32_V32I32, VISrc_1024_b32, VDst_1024>; 600fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V2I16_X4_VCD : VOPProfileMAI<VOP_V4F32_V2I16_V2I16_V4F32, VISrc_128_b32, VDst_128>; 601fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V2I16_X16_VCD : VOPProfileMAI<VOP_V16F32_V2I16_V2I16_V16F32, VISrc_512_b32, VDst_512>; 602fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V2I16_X32_VCD : VOPProfileMAI<VOP_V32F32_V2I16_V2I16_V32F32, VISrc_1024_b32, VDst_1024>; 603fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4F16_X4_VCD : VOPProfileMAI<VOP_V4F32_V4F16_V4F16_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>; 604fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4F16_X16_VCD : VOPProfileMAI<VOP_V16F32_V4F16_V4F16_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>; 605fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4F16_X32_VCD : VOPProfileMAI<VOP_V32F32_V4F16_V4F16_V32F32, VISrc_1024_b32, VDst_1024, AVSrc_64>; 606fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4I16_X4_VCD : VOPProfileMAI<VOP_V4F32_V4I16_V4I16_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>; 607fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4I16_X16_VCD : VOPProfileMAI<VOP_V16F32_V4I16_V4I16_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>; 608fe6060f1SDimitry Andricdef VOPProfileMAI_F32_V4I16_X32_VCD : VOPProfileMAI<VOP_V32F32_V4I16_V4I16_V32F32, VISrc_1024_b32, VDst_1024, AVSrc_64>; 609fe6060f1SDimitry Andricdef VOPProfileMAI_F64_16X16X4F64_VCD : VOPProfileMAI<VOP_V4F64_F64_F64_V4F64, VISrc_256_f64, VDst_256, AVSrc_64>; 610fe6060f1SDimitry Andricdef VOPProfileMAI_F64_4X4X4F64_VCD : VOPProfileMAI<VOP_F64_F64_F64_F64, VISrc_64_f64, VDst_64, AVSrc_64>; 61181ad6265SDimitry Andricdef VOPProfileMAI_I32_I64_X16_VCD : VOPProfileMAI<VOP_V4I32_I64_I64_V4I32, VISrc_128_b32, VDst_128, AVSrc_64>; 61281ad6265SDimitry Andricdef VOPProfileMAI_I32_I64_X32_VCD : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32, VISrc_512_b32, VDst_512, AVSrc_64>; 61381ad6265SDimitry Andricdef VOPProfileMAI_F32_V2F32_X16_VCD : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>; 61481ad6265SDimitry Andricdef VOPProfileMAI_F32_V2F32_X32_VCD : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>; 615fcaf7f86SDimitry Andricdef VOPProfileMAI_F32_I64_X32_VCD : VOPProfileMAI<VOP_V4F32_I64_I64_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>; 616fcaf7f86SDimitry Andricdef VOPProfileMAI_F32_I64_X16_VCD : VOPProfileMAI<VOP_V16F32_I64_I64_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>; 61781ad6265SDimitry Andric 61881ad6265SDimitry Andricdef VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC<VOP_V4F32_V4F16_V8F16_I32, AVDst_128, AVSrc_64, AVSrc_128>; 61981ad6265SDimitry Andricdef VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC<VOP_V16F32_V4F16_V8F16_I32, AVDst_512, AVSrc_64, AVSrc_128>; 62081ad6265SDimitry Andricdef VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC<VOP_V4F32_V4I16_V8I16_I32, AVDst_128, AVSrc_64, AVSrc_128>; 62181ad6265SDimitry Andricdef VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC<VOP_V16F32_V4I16_V8I16_I32, AVDst_512, AVSrc_64, AVSrc_128>; 62281ad6265SDimitry Andricdef VOPProfileSMFMAC_I32_16X16X64_I8 : VOPProfileSMFMAC<VOP_V4I32_V2I32_V4I32_I32, AVDst_128, AVSrc_64, AVSrc_128>; 62381ad6265SDimitry Andricdef VOPProfileSMFMAC_I32_32X32X32_I8 : VOPProfileSMFMAC<VOP_V16I32_V2I32_V4I32_I32, AVDst_512, AVSrc_64, AVSrc_128>; 624fcaf7f86SDimitry Andricdef VOPProfileSMFMAC_F32_16X16X64_F8 : VOPProfileSMFMAC<VOP_V4F32_V2I32_V4I32_I32, AVDst_128, AVSrc_64, AVSrc_128>; 625fcaf7f86SDimitry Andricdef VOPProfileSMFMAC_F32_32X32X32_F8 : VOPProfileSMFMAC<VOP_V16F32_V2I32_V4I32_I32, AVDst_512, AVSrc_64, AVSrc_128>; 6260b57cec5SDimitry Andric 62704eeddc0SDimitry Andricclass MFMATable <bit is_mac, string Name> { 62804eeddc0SDimitry Andric bit IsMac = is_mac; 62904eeddc0SDimitry Andric string FMAOp = Name; 63004eeddc0SDimitry Andric} 63104eeddc0SDimitry Andric 63281ad6265SDimitry Andricclass MAIFrag<SDPatternOperator Op, code pred> : PatFrag < 63381ad6265SDimitry Andric (ops node:$src0, node:$src1, node:$src2, node:$cbsz, node:$abid, node:$blgp), 63481ad6265SDimitry Andric (Op $src0, $src1, $src2, $cbsz, $abid, $blgp), 63581ad6265SDimitry Andric pred 63681ad6265SDimitry Andric>; 63781ad6265SDimitry Andric 6387a6dacacSDimitry Andricdefvar MayNeedAGPRs = [{ 6397a6dacacSDimitry Andric return MF->getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); 6407a6dacacSDimitry Andric}]; 64181ad6265SDimitry Andric 6427a6dacacSDimitry Andricdefvar MayNeedAGPRs_gisel = [{ 6437a6dacacSDimitry Andric return MF.getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); 6447a6dacacSDimitry Andric}]; 6457a6dacacSDimitry Andric 6467a6dacacSDimitry Andricdefvar MayNotNeedAGPRs = [{ 6477a6dacacSDimitry Andric return !MF->getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); 6487a6dacacSDimitry Andric}]; 6497a6dacacSDimitry Andric 6507a6dacacSDimitry Andricdefvar MayNotNeedAGPRs_gisel = [{ 6517a6dacacSDimitry Andric return !MF.getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); 6527a6dacacSDimitry Andric}]; 6537a6dacacSDimitry Andric 6547a6dacacSDimitry Andricclass AgprMAIFrag<SDPatternOperator Op> : MAIFrag<Op, MayNeedAGPRs> { 6557a6dacacSDimitry Andric let GISelPredicateCode = MayNeedAGPRs_gisel; 6567a6dacacSDimitry Andric} 6577a6dacacSDimitry Andric 6587a6dacacSDimitry Andricclass VgprMAIFrag<SDPatternOperator Op> : MAIFrag<Op, MayNotNeedAGPRs> { 6597a6dacacSDimitry Andric let GISelPredicateCode = MayNotNeedAGPRs_gisel; 6607a6dacacSDimitry Andric} 66181ad6265SDimitry Andric 6625f757f3fSDimitry Andriclet SubtargetPredicate = HasMAIInsts in { 6635ffd83dbSDimitry Andric 6645ffd83dbSDimitry Andriclet isAsCheapAsAMove = 1, isReMaterializable = 1 in { 665e8d8bef9SDimitry Andric defm V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>; 666e8d8bef9SDimitry Andric let isMoveImm = 1 in { 667e8d8bef9SDimitry Andric defm V_ACCVGPR_WRITE_B32 : VOP3Inst<"v_accvgpr_write_b32", VOPProfileAccWrite>; 668e8d8bef9SDimitry Andric } // End isMoveImm = 1 669e8d8bef9SDimitry Andric} // End isAsCheapAsAMove = 1, isReMaterializable = 1 6700b57cec5SDimitry Andric 67181ad6265SDimitry Andricclass MAIInst<string OpName, VOPProfile P, SDPatternOperator node> 67281ad6265SDimitry Andric : VOP3InstBase<OpName, P, node> { 67381ad6265SDimitry Andric Instruction Opcode = !cast<Instruction>(NAME); 67481ad6265SDimitry Andric bit is_dgemm = 0; 67581ad6265SDimitry Andric bit is_gfx940_xdl = 0; 67681ad6265SDimitry Andric} 67781ad6265SDimitry Andric 67804eeddc0SDimitry Andricmulticlass MAIInst<string OpName, string P, SDPatternOperator node, 67904eeddc0SDimitry Andric bit NoDstOverlap = !cast<VOPProfileMAI>("VOPProfileMAI_" # P).NoDstOverlap> { 6805ffd83dbSDimitry Andric let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in { 681fe6060f1SDimitry Andric // FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported. 68204eeddc0SDimitry Andric let Constraints = !if(NoDstOverlap, "@earlyclobber $vdst", "") in { 68381ad6265SDimitry Andric def _e64 : MAIInst<OpName, !cast<VOPProfileMAI>("VOPProfileMAI_" # P), 6841db9f3b2SDimitry Andric !if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, AgprMAIFrag<node>)>, 68504eeddc0SDimitry Andric MFMATable<0, NAME # "_e64">; 686fe6060f1SDimitry Andric 687fe6060f1SDimitry Andric let SubtargetPredicate = isGFX90APlus, Mnemonic = OpName in 68881ad6265SDimitry Andric def _vgprcd_e64 : MAIInst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"), 6891db9f3b2SDimitry Andric !if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, VgprMAIFrag<node>)>, 69004eeddc0SDimitry Andric MFMATable<0, NAME # "_vgprcd_e64">; 69104eeddc0SDimitry Andric } 69204eeddc0SDimitry Andric 69306c3fb27SDimitry Andric if NoDstOverlap then { 69404eeddc0SDimitry Andric let Constraints = !if(NoDstOverlap, "$vdst = $src2", ""), 69504eeddc0SDimitry Andric isConvertibleToThreeAddress = NoDstOverlap, 69604eeddc0SDimitry Andric Mnemonic = OpName in { 6971db9f3b2SDimitry Andric def "_mac_e64" : MAIInst<OpName # "_mac", !cast<VOPProfileMAI>("VOPProfileMAI_" # P), 6981db9f3b2SDimitry Andric !if(!eq(node, null_frag), null_frag, AgprMAIFrag<node>)>, 69904eeddc0SDimitry Andric MFMATable<1, NAME # "_e64">; 70004eeddc0SDimitry Andric 70104eeddc0SDimitry Andric let SubtargetPredicate = isGFX90APlus in 70281ad6265SDimitry Andric def _mac_vgprcd_e64 : MAIInst<OpName # "_mac_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"), 7031db9f3b2SDimitry Andric !if(!eq(node, null_frag), null_frag, VgprMAIFrag<node>)>, 70404eeddc0SDimitry Andric MFMATable<1, NAME # "_vgprcd_e64">; 70504eeddc0SDimitry Andric } 70604eeddc0SDimitry Andric } 7075ffd83dbSDimitry Andric } // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 708fe6060f1SDimitry Andric} 709fe6060f1SDimitry Andric 710fe6060f1SDimitry Andricdefm V_MFMA_F32_4X4X1F32 : MAIInst<"v_mfma_f32_4x4x1f32", "F32_F32_X4", int_amdgcn_mfma_f32_4x4x1f32>; 711fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X1F32 : MAIInst<"v_mfma_f32_16x16x1f32", "F32_F32_X16", int_amdgcn_mfma_f32_16x16x1f32>; 712fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X4F32 : MAIInst<"v_mfma_f32_16x16x4f32", "F32_F32_X4", int_amdgcn_mfma_f32_16x16x4f32>; 71381ad6265SDimitry Andricdefm V_MFMA_F32_32X32X1F32 : MAIInst<"v_mfma_f32_32x32x1f32", "F32_F32_X32", int_amdgcn_mfma_f32_32x32x1f32>; 71481ad6265SDimitry Andricdefm V_MFMA_F32_32X32X2F32 : MAIInst<"v_mfma_f32_32x32x2f32", "F32_F32_X16", int_amdgcn_mfma_f32_32x32x2f32>; 71581ad6265SDimitry Andric 71681ad6265SDimitry Andriclet is_gfx940_xdl = 1 in { 71781ad6265SDimitry Andricdefm V_MFMA_F32_4X4X4F16 : MAIInst<"v_mfma_f32_4x4x4f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_4x4x4f16>; 71881ad6265SDimitry Andricdefm V_MFMA_I32_4X4X4I8 : MAIInst<"v_mfma_i32_4x4x4i8", "I32_I32_X4", int_amdgcn_mfma_i32_4x4x4i8>; 719fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X4F16 : MAIInst<"v_mfma_f32_16x16x4f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_16x16x4f16>; 720fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X16F16 : MAIInst<"v_mfma_f32_16x16x16f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_16x16x16f16>; 721fe6060f1SDimitry Andricdefm V_MFMA_I32_16X16X4I8 : MAIInst<"v_mfma_i32_16x16x4i8", "I32_I32_X16", int_amdgcn_mfma_i32_16x16x4i8>; 722fe6060f1SDimitry Andricdefm V_MFMA_F32_32X32X4F16 : MAIInst<"v_mfma_f32_32x32x4f16", "F32_V4F16_X32", int_amdgcn_mfma_f32_32x32x4f16>; 723fe6060f1SDimitry Andricdefm V_MFMA_F32_32X32X8F16 : MAIInst<"v_mfma_f32_32x32x8f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_32x32x8f16>; 724fe6060f1SDimitry Andricdefm V_MFMA_I32_32X32X4I8 : MAIInst<"v_mfma_i32_32x32x4i8", "I32_I32_X32", int_amdgcn_mfma_i32_32x32x4i8>; 72581ad6265SDimitry Andric} 72681ad6265SDimitry Andric 72781ad6265SDimitry Andriclet Predicates = [isGFX908orGFX90A] in { 728fe6060f1SDimitry Andricdefm V_MFMA_I32_16X16X16I8 : MAIInst<"v_mfma_i32_16x16x16i8", "I32_I32_X4", int_amdgcn_mfma_i32_16x16x16i8>; 729fe6060f1SDimitry Andricdefm V_MFMA_I32_32X32X8I8 : MAIInst<"v_mfma_i32_32x32x8i8", "I32_I32_X16", int_amdgcn_mfma_i32_32x32x8i8>; 730fe6060f1SDimitry Andricdefm V_MFMA_F32_4X4X2BF16 : MAIInst<"v_mfma_f32_4x4x2bf16", "F32_V2I16_X4", int_amdgcn_mfma_f32_4x4x2bf16>; 731fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X2BF16 : MAIInst<"v_mfma_f32_16x16x2bf16", "F32_V2I16_X16", int_amdgcn_mfma_f32_16x16x2bf16>; 732fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X8BF16 : MAIInst<"v_mfma_f32_16x16x8bf16", "F32_V2I16_X4", int_amdgcn_mfma_f32_16x16x8bf16>; 733fe6060f1SDimitry Andricdefm V_MFMA_F32_32X32X2BF16 : MAIInst<"v_mfma_f32_32x32x2bf16", "F32_V2I16_X32", int_amdgcn_mfma_f32_32x32x2bf16>; 734fe6060f1SDimitry Andricdefm V_MFMA_F32_32X32X4BF16 : MAIInst<"v_mfma_f32_32x32x4bf16", "F32_V2I16_X16", int_amdgcn_mfma_f32_32x32x4bf16>; 73581ad6265SDimitry Andric} 7360b57cec5SDimitry Andric 7370b57cec5SDimitry Andric} // End SubtargetPredicate = HasMAIInsts 7380b57cec5SDimitry Andric 739fe6060f1SDimitry Andriclet Predicates = [isGFX90APlus] in { 74081ad6265SDimitry Andric let is_gfx940_xdl = 1 in { 741fe6060f1SDimitry Andric defm V_MFMA_F32_32X32X4BF16_1K : MAIInst<"v_mfma_f32_32x32x4bf16_1k", "F32_V4I16_X32", int_amdgcn_mfma_f32_32x32x4bf16_1k>; 742fe6060f1SDimitry Andric defm V_MFMA_F32_16X16X4BF16_1K : MAIInst<"v_mfma_f32_16x16x4bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_16x16x4bf16_1k>; 743fe6060f1SDimitry Andric defm V_MFMA_F32_4X4X4BF16_1K : MAIInst<"v_mfma_f32_4x4x4bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_4x4x4bf16_1k>; 744fe6060f1SDimitry Andric defm V_MFMA_F32_32X32X8BF16_1K : MAIInst<"v_mfma_f32_32x32x8bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_32x32x8bf16_1k>; 745fe6060f1SDimitry Andric defm V_MFMA_F32_16X16X16BF16_1K : MAIInst<"v_mfma_f32_16x16x16bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_16x16x16bf16_1k>; 74681ad6265SDimitry Andric } 747fe6060f1SDimitry Andric 74881ad6265SDimitry Andric let is_dgemm = 1 in { 749fe6060f1SDimitry Andric defm V_MFMA_F64_16X16X4F64 : MAIInst<"v_mfma_f64_16x16x4f64", "F64_16X16X4F64", int_amdgcn_mfma_f64_16x16x4f64>; 750fe6060f1SDimitry Andric defm V_MFMA_F64_4X4X4F64 : MAIInst<"v_mfma_f64_4x4x4f64", "F64_4X4X4F64", int_amdgcn_mfma_f64_4x4x4f64>; 75181ad6265SDimitry Andric } 752fe6060f1SDimitry Andric} // End Predicates = [isGFX90APlus] 753fe6060f1SDimitry Andric 7545f757f3fSDimitry Andriclet SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 in { 75581ad6265SDimitry Andric defm V_MFMA_I32_32X32X16I8 : MAIInst<"v_mfma_i32_32x32x16i8", "I32_I64_X32", int_amdgcn_mfma_i32_32x32x16_i8>; 75681ad6265SDimitry Andric defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>; 75781ad6265SDimitry Andric defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>; 75881ad6265SDimitry Andric defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>; 759fcaf7f86SDimitry Andric defm V_MFMA_F32_16X16X32_BF8_BF8 : MAIInst<"v_mfma_f32_16x16x32_bf8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_bf8>; 760fcaf7f86SDimitry Andric defm V_MFMA_F32_16X16X32_BF8_FP8 : MAIInst<"v_mfma_f32_16x16x32_bf8_fp8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_fp8>; 761fcaf7f86SDimitry Andric defm V_MFMA_F32_16X16X32_FP8_BF8 : MAIInst<"v_mfma_f32_16x16x32_fp8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_fp8_bf8>; 762fcaf7f86SDimitry Andric defm V_MFMA_F32_16X16X32_FP8_FP8 : MAIInst<"v_mfma_f32_16x16x32_fp8_fp8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_fp8_fp8>; 763fcaf7f86SDimitry Andric defm V_MFMA_F32_32X32X16_BF8_BF8 : MAIInst<"v_mfma_f32_32x32x16_bf8_bf8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_bf8_bf8>; 764fcaf7f86SDimitry Andric defm V_MFMA_F32_32X32X16_BF8_FP8 : MAIInst<"v_mfma_f32_32x32x16_bf8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_bf8_fp8>; 765fcaf7f86SDimitry Andric defm V_MFMA_F32_32X32X16_FP8_BF8 : MAIInst<"v_mfma_f32_32x32x16_fp8_bf8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_bf8>; 766fcaf7f86SDimitry Andric defm V_MFMA_F32_32X32X16_FP8_FP8 : MAIInst<"v_mfma_f32_32x32x16_fp8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_fp8>; 7675f757f3fSDimitry Andric} // End SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 76881ad6265SDimitry Andric 76981ad6265SDimitry Andricmulticlass SMFMACInst<string OpName, string P, SDPatternOperator node> { 77081ad6265SDimitry Andric let Constraints = "$vdst = $src2", DisableEncoding = "$src2", 77181ad6265SDimitry Andric isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1, is_gfx940_xdl = 1 in { 77281ad6265SDimitry Andric def _e64 : MAIInst<OpName, !cast<VOPProfileSMFMAC>("VOPProfileSMFMAC_" # P), node>; 77381ad6265SDimitry Andric } 77481ad6265SDimitry Andric} 77581ad6265SDimitry Andric 77681ad6265SDimitry Andriclet SubtargetPredicate = isGFX940Plus in { 77781ad6265SDimitry Andricdefm V_SMFMAC_F32_16X16X32_F16 : SMFMACInst<"v_smfmac_f32_16x16x32_f16", "F32_16X16X32_F16", int_amdgcn_smfmac_f32_16x16x32_f16>; 77881ad6265SDimitry Andricdefm V_SMFMAC_F32_32X32X16_F16 : SMFMACInst<"v_smfmac_f32_32x32x16_f16", "F32_32X32X16_F16", int_amdgcn_smfmac_f32_32x32x16_f16>; 77981ad6265SDimitry Andricdefm V_SMFMAC_F32_16X16X32_BF16 : SMFMACInst<"v_smfmac_f32_16x16x32_bf16", "F32_16X16X32_I16", int_amdgcn_smfmac_f32_16x16x32_bf16>; 78081ad6265SDimitry Andricdefm V_SMFMAC_F32_32X32X16_BF16 : SMFMACInst<"v_smfmac_f32_32x32x16_bf16", "F32_32X32X16_I16", int_amdgcn_smfmac_f32_32x32x16_bf16>; 78181ad6265SDimitry Andricdefm V_SMFMAC_I32_16X16X64_I8 : SMFMACInst<"v_smfmac_i32_16x16x64_i8", "I32_16X16X64_I8", int_amdgcn_smfmac_i32_16x16x64_i8>; 78281ad6265SDimitry Andricdefm V_SMFMAC_I32_32X32X32_I8 : SMFMACInst<"v_smfmac_i32_32x32x32_i8", "I32_32X32X32_I8", int_amdgcn_smfmac_i32_32x32x32_i8>; 783fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_BF8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_bf8>; 784fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_BF8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_fp8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_fp8>; 785fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_FP8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_fp8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_fp8_bf8>; 786fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_FP8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x64_fp8_fp8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_fp8_fp8>; 787fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_BF8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_bf8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_bf8_bf8>; 788fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_BF8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_bf8_fp8>; 789fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_FP8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_bf8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_bf8>; 790fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_FP8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_fp8>; 79181ad6265SDimitry Andric} 79281ad6265SDimitry Andric 79381ad6265SDimitry Andricdef MAIInstInfoTable : GenericTable { 79481ad6265SDimitry Andric let FilterClass = "MAIInst"; 79581ad6265SDimitry Andric let CppTypeName = "MAIInstInfo"; 79681ad6265SDimitry Andric let Fields = [ 79781ad6265SDimitry Andric "Opcode", "is_dgemm", "is_gfx940_xdl" 79881ad6265SDimitry Andric ]; 79981ad6265SDimitry Andric 80081ad6265SDimitry Andric let PrimaryKey = ["Opcode"]; 80181ad6265SDimitry Andric let PrimaryKeyName = "getMAIInstInfoHelper"; 80281ad6265SDimitry Andric} 80381ad6265SDimitry Andric 8045f757f3fSDimitry Andriclet isCommutable = 1, isReMaterializable = 1 in { 8055f757f3fSDimitry Andric let SubtargetPredicate = HasPackedFP32Ops in { 80681ad6265SDimitry Andric defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fma>; 80781ad6265SDimitry Andric defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fmul>; 80881ad6265SDimitry Andric defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fadd>; 8095f757f3fSDimitry Andric } // End SubtargetPredicate = HasPackedFP32Ops 8105f757f3fSDimitry Andric 8115f757f3fSDimitry Andric let SubtargetPredicate = HasPkMovB32 in 81281ad6265SDimitry Andric defm V_PK_MOV_B32 : VOP3PInst<"v_pk_mov_b32", VOP3P_Profile<VOP_V2I32_V2I32_V2I32, VOP3_PACKED>>; 8135f757f3fSDimitry Andric} // End isCommutable = 1, isReMaterializable = 1 814fe6060f1SDimitry Andric 8150b57cec5SDimitry Andricdef : MnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">; 8160b57cec5SDimitry Andricdef : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">; 8170b57cec5SDimitry Andric 81881ad6265SDimitry Andricclass VOPProfileWMMA<VOPProfile P, string Suffix, RegisterOperand _Src01RC64, bit _HasClamp, bit _HasOpSel> : VOP3P_Profile<P> { 81981ad6265SDimitry Andric let DstRC = !if(!eq(Suffix, "_w32"), VDst_256, VDst_128); 82081ad6265SDimitry Andric let Src0RC64 = _Src01RC64; 82181ad6265SDimitry Andric let Src1RC64 = _Src01RC64; 82281ad6265SDimitry Andric let Src2RC64 = !if(!eq(Suffix, "_w32"), VISrc_256_f64, VISrc_128_f32); 82381ad6265SDimitry Andric let HasClamp = _HasClamp; 82481ad6265SDimitry Andric let HasOpSel = _HasOpSel; 82581ad6265SDimitry Andric let IsPacked = 1; 82681ad6265SDimitry Andric let IsWMMA = 1; 82781ad6265SDimitry Andric} 82881ad6265SDimitry Andric 82981ad6265SDimitry Andricdef VOP_V8F32_V16F16_V16F16_V8F32 : VOPProfile <[v8f32, v16f16, v16f16, v8f32]>; 83081ad6265SDimitry Andricdef VOP_V8F32_V16I16_V16I16_V8F32 : VOPProfile <[v8f32, v16i16, v16i16, v8f32]>; 83181ad6265SDimitry Andricdef VOP_V16F16_V16F16_V16F16_V16F16 : VOPProfile <[v16f16, v16f16, v16f16, v16f16]>; 83281ad6265SDimitry Andricdef VOP_V16I16_V16I16_V16I16_V16I16 : VOPProfile <[v16i16, v16i16, v16i16, v16i16]>; 83381ad6265SDimitry Andricdef VOP_V8I32_V4I32_V4I32_V8I32 : VOPProfile <[v8i32, v4i32, v4i32, v8i32]>; 83481ad6265SDimitry Andricdef VOP_V8I32_V2I32_V2I32_V8I32 : VOPProfile <[v8i32, v2i32, v2i32, v8i32]>; 83581ad6265SDimitry Andric 83681ad6265SDimitry Andricdef VOP_V4F32_V16F16_V16F16_V4F32 : VOPProfile <[v4f32, v16f16, v16f16, v4f32]>; 83781ad6265SDimitry Andricdef VOP_V4F32_V16I16_V16I16_V4F32 : VOPProfile <[v4f32, v16i16, v16i16, v4f32]>; 83881ad6265SDimitry Andricdef VOP_V8F16_V16F16_V16F16_V8F16 : VOPProfile <[v8f16, v16f16, v16f16, v8f16]>; 83981ad6265SDimitry Andricdef VOP_V8I16_V16I16_V16I16_V8I16 : VOPProfile <[v8i16, v16i16, v16i16, v8i16]>; 84081ad6265SDimitry Andricdef VOP_V4I32_V4I32_V4I32_V4I32 : VOPProfile <[v4i32, v4i32, v4i32, v4i32]>; 84181ad6265SDimitry Andricdef VOP_V4I32_V2I32_V2I32_V4I32 : VOPProfile <[v4i32, v2i32, v2i32, v4i32]>; 84281ad6265SDimitry Andric 84381ad6265SDimitry Andric 84481ad6265SDimitry Andricclass WMMAType <bits<2> val> { 84581ad6265SDimitry Andric bit hasClamp = val{0}; 84681ad6265SDimitry Andric bit hasOpsel = val{1}; 84781ad6265SDimitry Andric} 84881ad6265SDimitry Andric 84981ad6265SDimitry Andricdef WMMARegular : WMMAType<0b00>; 85081ad6265SDimitry Andricdef WMMAUIClamp : WMMAType<0b01>; 85181ad6265SDimitry Andricdef WMMAOpSel : WMMAType<0b10>; 85281ad6265SDimitry Andric 85381ad6265SDimitry Andricclass WMMARegularPat<Instruction Inst, SDPatternOperator node, VOPProfile P> : 85481ad6265SDimitry Andric GCNPat < (P.DstVT (node 85581ad6265SDimitry Andric (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers)), 85681ad6265SDimitry Andric (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)), 85781ad6265SDimitry Andric (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers)) 85881ad6265SDimitry Andric )), 85981ad6265SDimitry Andric (P.DstVT (Inst i32:$src0_modifiers, P.Src0VT:$src0, i32:$src1_modifiers, P.Src1VT:$src1, $src2_modifiers, P.Src2VT:$src2)) 86081ad6265SDimitry Andric>; 86181ad6265SDimitry Andric 86281ad6265SDimitry Andricclass WMMAOpSelPat<Instruction Inst, SDPatternOperator node, VOPProfile P> : 86381ad6265SDimitry Andric GCNPat < (P.DstVT (node 86481ad6265SDimitry Andric (P.Src0VT P.Src0VT:$src0), 86581ad6265SDimitry Andric (P.Src1VT P.Src1VT:$src1), 86681ad6265SDimitry Andric (P.Src2VT P.Src2VT:$src2), (WMMAOpSelVOP3PMods i32:$src2_modifiers) 86781ad6265SDimitry Andric )), 86881ad6265SDimitry Andric (P.DstVT (Inst (i32 8), P.Src0VT:$src0, (i32 8), P.Src1VT:$src1, i32:$src2_modifiers, P.Src2VT:$src2)) 86981ad6265SDimitry Andric>; 87081ad6265SDimitry Andric 87181ad6265SDimitry Andricclass WMMAUIClampPat<Instruction Inst, SDPatternOperator node, VOPProfile P> : 87281ad6265SDimitry Andric GCNPat < (P.DstVT (node 8737a6dacacSDimitry Andric (VOP3PModsNeg i32:$src0_modifiers), (P.Src0VT P.Src0VT:$src0), 8747a6dacacSDimitry Andric (VOP3PModsNeg i32:$src1_modifiers), (P.Src1VT P.Src1VT:$src1), 87581ad6265SDimitry Andric (P.Src2VT P.Src2VT:$src2), (i1 timm:$clamp) 87681ad6265SDimitry Andric )), 87781ad6265SDimitry Andric (P.DstVT (Inst i32:$src0_modifiers, P.Src0VT:$src0, i32:$src1_modifiers, P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp)) 87881ad6265SDimitry Andric>; 87981ad6265SDimitry Andric 88081ad6265SDimitry Andricclass WMMAOpcodeMapping<Instruction TwoAddr, Instruction ThreeAddr> { 88181ad6265SDimitry Andric Instruction Opcode2Addr = TwoAddr; 88281ad6265SDimitry Andric Instruction Opcode3Addr = ThreeAddr; 88381ad6265SDimitry Andric Predicate WaveSizePredicate; 88481ad6265SDimitry Andric} 88581ad6265SDimitry Andric 88681ad6265SDimitry Andricdef WMMAOpcode : GenericEnum { 88781ad6265SDimitry Andric let FilterClass = "VOP3P_Pseudo"; 88881ad6265SDimitry Andric} 88981ad6265SDimitry Andric 89081ad6265SDimitry Andricclass WMMAMappingTable : GenericTable { 89181ad6265SDimitry Andric let FilterClass = "WMMAOpcodeMapping"; 89281ad6265SDimitry Andric let CppTypeName = "WMMAOpcodeMappingInfo"; 89381ad6265SDimitry Andric let Fields = ["Opcode2Addr", "Opcode3Addr"]; 89481ad6265SDimitry Andric string TypeOf_Opcode2Addr = "WMMAOpcode"; 89581ad6265SDimitry Andric string TypeOf_Opcode3Addr = "WMMAOpcode"; 89681ad6265SDimitry Andric} 89781ad6265SDimitry Andric 89881ad6265SDimitry Andricdef WMMAOpcode2AddrMappingTable : WMMAMappingTable { 89981ad6265SDimitry Andric let PrimaryKey = ["Opcode2Addr"]; 90081ad6265SDimitry Andric let PrimaryKeyName = "getWMMAMappingInfoFrom2AddrOpcode"; 90181ad6265SDimitry Andric} 90281ad6265SDimitry Andric 90381ad6265SDimitry Andricdef WMMAOpcode3AddrMappingTable : WMMAMappingTable { 90481ad6265SDimitry Andric let PrimaryKey = ["Opcode3Addr"]; 90581ad6265SDimitry Andric let PrimaryKeyName = "getWMMAMappingInfoFrom3AddrOpcode"; 90681ad6265SDimitry Andric} 90781ad6265SDimitry Andric 90881ad6265SDimitry Andric// The WMMA instruction has extra constraints: 90981ad6265SDimitry Andric// Matrices A and B cannot overlap with D. C cannot partially overlap with D, 91081ad6265SDimitry Andric// but it is OK for them to be the same (which is a typical case). 91181ad6265SDimitry Andric// 91281ad6265SDimitry Andric// We implement it as follows: 91381ad6265SDimitry Andric// 1) Map the intrinsic to the pseudo where D is tied to C ($vdst = $src2). 91481ad6265SDimitry Andric// 2) The pass twoaddressinstruction checks if src2 is live and if that is the case 91581ad6265SDimitry Andric// it converts the default pseudo to the pseudo where src2 is not the same as vdst. 91681ad6265SDimitry Andric// 3) @earlyclobber on the destination satisfies the constraint during RA. 91781ad6265SDimitry Andric 9185f757f3fSDimitry Andricmulticlass WMMAInst<string Suffix, string Instr, VOPProfile P, SDPatternOperator node = null_frag, RegisterOperand _Src01RC64 = VRegSrc_256, WMMAType Type, bit convertibleTo3Addr> { 91981ad6265SDimitry Andric 92081ad6265SDimitry Andric defvar WMMAConstraints2Addr = "@earlyclobber $vdst,$vdst = $src2"; 92181ad6265SDimitry Andric defvar WMMAConstraints3Addr = "@earlyclobber $vdst"; 92281ad6265SDimitry Andric 92381ad6265SDimitry Andric defvar WMMAProfile = VOPProfileWMMA<P, Suffix, _Src01RC64, Type.hasClamp, Type.hasOpsel>; 92481ad6265SDimitry Andric let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in { 9255f757f3fSDimitry Andric let Constraints = WMMAConstraints2Addr, isConvertibleToThreeAddress = convertibleTo3Addr in { 9265f757f3fSDimitry Andric def _twoaddr # Suffix : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>; 92781ad6265SDimitry Andric } 92881ad6265SDimitry Andric } 9295f757f3fSDimitry Andric if convertibleTo3Addr then { 93081ad6265SDimitry Andric let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in { 93181ad6265SDimitry Andric let Constraints = WMMAConstraints3Addr, SchedRW = [Write32Bit, Write32Bit] in { 9325f757f3fSDimitry Andric def _threeaddr # Suffix : VOP3P_Pseudo<Instr # Suffix, WMMAProfile>; 93381ad6265SDimitry Andric } 93481ad6265SDimitry Andric } 9355f757f3fSDimitry Andric def : WMMAOpcodeMapping<!cast<Instruction>(NAME # _twoaddr # Suffix), 9365f757f3fSDimitry Andric !cast<Instruction>(NAME # _threeaddr # Suffix)>; 93781ad6265SDimitry Andric } 93881ad6265SDimitry Andric 939*b3edf446SDimitry Andric let SubtargetPredicate = isGFX11Only in { 94081ad6265SDimitry Andric if !eq(Type, WMMAOpSel) then { 94181ad6265SDimitry Andric def : WMMAOpSelPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>; 94281ad6265SDimitry Andric } else if !eq(Type, WMMAUIClamp) then { 94381ad6265SDimitry Andric def : WMMAUIClampPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>; 94481ad6265SDimitry Andric } else { 94581ad6265SDimitry Andric def : WMMARegularPat<!cast<Instruction>(NAME # _twoaddr # Suffix), node, P>; 94681ad6265SDimitry Andric } 94781ad6265SDimitry Andric } 948*b3edf446SDimitry Andric} 949*b3edf446SDimitry Andric 95081ad6265SDimitry Andric 95181ad6265SDimitry Andric 95281ad6265SDimitry Andriclet WaveSizePredicate = isWave32 in { 9535f757f3fSDimitry Andric defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_f16", VOP_V8F32_V16F16_V16F16_V8F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular, 1>; 9545f757f3fSDimitry Andric defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_f32_16x16x16_bf16", VOP_V8F32_V16I16_V16I16_V8F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular, 1>; 9555f757f3fSDimitry Andric defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w32", "v_wmma_f16_16x16x16_f16", VOP_V16F16_V16F16_V16F16_V16F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel, 1>; 9565f757f3fSDimitry Andric defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w32", "v_wmma_bf16_16x16x16_bf16", VOP_V16I16_V16I16_V16I16_V16I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel, 1>; 9575f757f3fSDimitry Andric defm V_WMMA_F16_16X16X16_F16_TIED : WMMAInst<"_w32", "v_wmma_f16_16x16x16_f16", VOP_V16F16_V16F16_V16F16_V16F16, int_amdgcn_wmma_f16_16x16x16_f16_tied, VRegSrc_256, WMMAOpSel, 0>; 9585f757f3fSDimitry Andric defm V_WMMA_BF16_16X16X16_BF16_TIED : WMMAInst<"_w32", "v_wmma_bf16_16x16x16_bf16", VOP_V16I16_V16I16_V16I16_V16I16, int_amdgcn_wmma_bf16_16x16x16_bf16_tied, VRegSrc_256, WMMAOpSel, 0>; 9595f757f3fSDimitry Andric defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu8", VOP_V8I32_V4I32_V4I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp, 1>; 9605f757f3fSDimitry Andric defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w32", "v_wmma_i32_16x16x16_iu4", VOP_V8I32_V2I32_V2I32_V8I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp, 1>; 96181ad6265SDimitry Andric} 96281ad6265SDimitry Andric 96381ad6265SDimitry Andriclet WaveSizePredicate = isWave64 in { 9645f757f3fSDimitry Andric defm V_WMMA_F32_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_f16", VOP_V4F32_V16F16_V16F16_V4F32, int_amdgcn_wmma_f32_16x16x16_f16, VRegSrc_256, WMMARegular, 1>; 9655f757f3fSDimitry Andric defm V_WMMA_F32_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_f32_16x16x16_bf16", VOP_V4F32_V16I16_V16I16_V4F32, int_amdgcn_wmma_f32_16x16x16_bf16, VRegSrc_256, WMMARegular, 1>; 9665f757f3fSDimitry Andric defm V_WMMA_F16_16X16X16_F16 : WMMAInst<"_w64", "v_wmma_f16_16x16x16_f16", VOP_V8F16_V16F16_V16F16_V8F16, int_amdgcn_wmma_f16_16x16x16_f16, VRegSrc_256, WMMAOpSel, 1>; 9675f757f3fSDimitry Andric defm V_WMMA_BF16_16X16X16_BF16 : WMMAInst<"_w64", "v_wmma_bf16_16x16x16_bf16", VOP_V8I16_V16I16_V16I16_V8I16, int_amdgcn_wmma_bf16_16x16x16_bf16, VRegSrc_256, WMMAOpSel, 1>; 9685f757f3fSDimitry Andric defm V_WMMA_F16_16X16X16_F16_TIED : WMMAInst<"_w64", "v_wmma_f16_16x16x16_f16", VOP_V8F16_V16F16_V16F16_V8F16, int_amdgcn_wmma_f16_16x16x16_f16_tied, VRegSrc_256, WMMAOpSel, 0>; 9695f757f3fSDimitry Andric defm V_WMMA_BF16_16X16X16_BF16_TIED : WMMAInst<"_w64", "v_wmma_bf16_16x16x16_bf16", VOP_V8I16_V16I16_V16I16_V8I16, int_amdgcn_wmma_bf16_16x16x16_bf16_tied, VRegSrc_256, WMMAOpSel, 0>; 9705f757f3fSDimitry Andric defm V_WMMA_I32_16X16X16_IU8 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu8", VOP_V4I32_V4I32_V4I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu8, VRegSrc_128, WMMAUIClamp, 1>; 9715f757f3fSDimitry Andric defm V_WMMA_I32_16X16X16_IU4 : WMMAInst<"_w64", "v_wmma_i32_16x16x16_iu4", VOP_V4I32_V2I32_V2I32_V4I32, int_amdgcn_wmma_i32_16x16x16_iu4, VRegSrc_64, WMMAUIClamp, 1>; 97281ad6265SDimitry Andric 97381ad6265SDimitry Andric} 97481ad6265SDimitry Andric 975*b3edf446SDimitry Andricclass VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType, 976*b3edf446SDimitry Andric bit _IsIU, bit _IsFP8BF8> 977*b3edf446SDimitry Andric : VOP3P_Profile<VOPProfile<ArgTy>> { 978*b3edf446SDimitry Andric bit IsIU = _IsIU; 979*b3edf446SDimitry Andric bit IsFP8BF8 = _IsFP8BF8; 980*b3edf446SDimitry Andric bit IsF16BF16 = !not(!or(IsIU, IsFP8BF8)); 981*b3edf446SDimitry Andric 982*b3edf446SDimitry Andric int IndexType = _IndexType; 983*b3edf446SDimitry Andric 984*b3edf446SDimitry Andric let IsPacked = 1; 985*b3edf446SDimitry Andric let IsWMMA = !not(_IsSWMMAC); 986*b3edf446SDimitry Andric let IsSWMMAC = _IsSWMMAC; 987*b3edf446SDimitry Andric 988*b3edf446SDimitry Andric bit IsAB_F16 = !and(IsF16BF16, ArgTy[1].isFP); 989*b3edf446SDimitry Andric bit IsAB_BF16 = !and(IsF16BF16, isIntType<ArgTy[1]>.ret); 990*b3edf446SDimitry Andric bit IsC_F32 = !or(!eq(ArgTy[3], v8f32), !eq(ArgTy[3], v4f32)); 991*b3edf446SDimitry Andric bit IsC_BF16 = !or(!eq(ArgTy[3], v8i16), !eq(ArgTy[3], v4i16)); 992*b3edf446SDimitry Andric bit IsC_F16 = !or(!eq(ArgTy[3], v8f16), !eq(ArgTy[3], v4f16)); 993*b3edf446SDimitry Andric 994*b3edf446SDimitry Andric bit NegLo01 = !or(IsF16BF16, IsIU); 995*b3edf446SDimitry Andric bit NegLo2 = !and(!or(IsF16BF16, IsFP8BF8), IsWMMA); 996*b3edf446SDimitry Andric bit NegHi01 = IsF16BF16; 997*b3edf446SDimitry Andric bit NegHi2 = !and(!or(IsF16BF16, IsFP8BF8), IsWMMA); 998*b3edf446SDimitry Andric bit NegLoAny = !or(NegLo01, NegLo2); 999*b3edf446SDimitry Andric bit NegHiAny = !or(NegHi01, NegHi2); 1000*b3edf446SDimitry Andric 1001*b3edf446SDimitry Andric let DstRC = !cond(!eq(ArgTy[0], v8f32): VDst_256, 1002*b3edf446SDimitry Andric !eq(ArgTy[0], v8i32): VDst_256, 1003*b3edf446SDimitry Andric !eq(ArgTy[0], v8f16): VDst_128, 1004*b3edf446SDimitry Andric !eq(ArgTy[0], v8i16): VDst_128, 1005*b3edf446SDimitry Andric !eq(ArgTy[0], v4f32): VDst_128, 1006*b3edf446SDimitry Andric !eq(ArgTy[0], v4i32): VDst_128, 1007*b3edf446SDimitry Andric !eq(ArgTy[0], v4f16): VDst_64, 1008*b3edf446SDimitry Andric !eq(ArgTy[0], v4i16): VDst_64); 1009*b3edf446SDimitry Andric let Src0RC64 = !cond(!eq(ArgTy[1], v8f16): VRegSrc_128, 1010*b3edf446SDimitry Andric !eq(ArgTy[1], v4f16): VRegSrc_64, 1011*b3edf446SDimitry Andric !eq(ArgTy[1], v4i16): VRegSrc_64, 1012*b3edf446SDimitry Andric !eq(ArgTy[1], v8i16): VRegSrc_128, 1013*b3edf446SDimitry Andric !eq(ArgTy[1], v4i32): VRegSrc_128, 1014*b3edf446SDimitry Andric !eq(ArgTy[1], v2i32): VRegSrc_64, 1015*b3edf446SDimitry Andric !eq(ArgTy[1], i32) : VRegSrc_32); 1016*b3edf446SDimitry Andric let Src1RC64 = !cond(!eq(ArgTy[2], v16f16): VRegSrc_256, 1017*b3edf446SDimitry Andric !eq(ArgTy[2], v16i16): VRegSrc_256, 1018*b3edf446SDimitry Andric !eq(ArgTy[2], v8f16): VRegSrc_128, 1019*b3edf446SDimitry Andric !eq(ArgTy[2], v8i16): VRegSrc_128, 1020*b3edf446SDimitry Andric !eq(ArgTy[2], v4i32): VRegSrc_128, 1021*b3edf446SDimitry Andric !eq(ArgTy[1], v4i16): VRegSrc_64, 1022*b3edf446SDimitry Andric !eq(ArgTy[1], v4f16): VRegSrc_64, 1023*b3edf446SDimitry Andric !eq(ArgTy[2], v2i32): VRegSrc_64, 1024*b3edf446SDimitry Andric !eq(ArgTy[2], i32) : VRegSrc_32); 1025*b3edf446SDimitry Andric let Src2RC64 = !if(IsSWMMAC, DstRC, 1026*b3edf446SDimitry Andric !cond(!eq(ArgTy[3], v8f32): VISrc_256_f32, 1027*b3edf446SDimitry Andric !eq(ArgTy[3], v8i32): VISrc_256_b32, 1028*b3edf446SDimitry Andric !eq(ArgTy[3], v8f16): VISrc_128_f16, 1029*b3edf446SDimitry Andric !eq(ArgTy[3], v8i16): VISrc_128_f32, // bf16 1030*b3edf446SDimitry Andric !eq(ArgTy[3], v4f16): VISrc_64_f16, 1031*b3edf446SDimitry Andric !eq(ArgTy[3], v4i16): VISrc_64_b32, 1032*b3edf446SDimitry Andric !eq(ArgTy[3], v4i32): VISrc_128_b32, 1033*b3edf446SDimitry Andric !eq(ArgTy[3], v4f32): VISrc_128_f32)); 1034*b3edf446SDimitry Andric 1035*b3edf446SDimitry Andric // For f16 and bf16 matrices A and B, each element can be modified by 1036*b3edf446SDimitry Andric // fneg(neg_lo,neg_hi = 1). For iu4 and iu8 matrices A and B neg_lo is 1037*b3edf446SDimitry Andric // overloaded to mean unsigned/signed: neg_lo = 0 (u4 and u8) unsigned(zext) 1038*b3edf446SDimitry Andric // neg_lo = 1 (i4 and i8) signed(sext). For f16, bf16 and f32 matrix C each 1039*b3edf446SDimitry Andric // element can be modified by fneg(neg_lo = 1) or fabs(neg_hi = 1). 1040*b3edf446SDimitry Andric 1041*b3edf446SDimitry Andric // Opcode | src0/src1 - matrix A/B | src2 - matrix C or Index 1042*b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1043*b3edf446SDimitry Andric // wmma f32_f16 | both neg_lo,neg_hi = 1 | neg_lo = 1 neg C(f32) 1044*b3edf446SDimitry Andric // wmma f32_bf16 | neg A/B (f16 or bf16) | neg_hi = 1 abs C(f32) 1045*b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1046*b3edf446SDimitry Andric // wmma f16_f16 | both neg_lo,neg_hi = 1 | neg_lo = 1 neg C(f16 or bf16) 1047*b3edf446SDimitry Andric // wmma bf16_bf16 | neg A/B (f16 or bf16) | neg_hi = 1 abs C(f16 or bf16) 1048*b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1049*b3edf446SDimitry Andric // wmma i32_iu8/iu4 | neg_lo = 0 u4/u8(zext) | not allowed for 1050*b3edf446SDimitry Andric // | neg_lo = 1 i4/i8(sext) | i32 matrices 1051*b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1052*b3edf446SDimitry Andric // wmma f32_fp8/bf8 | not allowed for | neg_lo = 1 neg C(f32) 1053*b3edf446SDimitry Andric // (4 instructions) | f8 and bf8 matrices | neg_hi = 1 abs C(f32) 1054*b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1055*b3edf446SDimitry Andric // swmmac f32_f16 | both neg_lo,neg_hi = 1 | not allowed for sparse matrix 1056*b3edf446SDimitry Andric // swmmac f32_bf16 | neg A/B (f16 or bf16) | A Index - matrix C is in dst 1057*b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1058*b3edf446SDimitry Andric // swmmac f16_f16 | both neg_lo,neg_hi = 1 | not allowed for sparse matrix 1059*b3edf446SDimitry Andric // swmmac bf16_bf16 | neg A/B (f16 or bf16) | A Index - matrix C is in dst 1060*b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1061*b3edf446SDimitry Andric // swmmac i32_iu8/iu4 | neg_lo = 0 u4/u8(zext) | not allowed for sparse matrix 1062*b3edf446SDimitry Andric // | neg_lo = 1 i4/i8(sext) | A Index - matrix C is in dst 1063*b3edf446SDimitry Andric // --------------------------------------------------------------------------- 1064*b3edf446SDimitry Andric // swmmac f32_fp8/bf8 | not allowed for | not allowed for sparse matrix 1065*b3edf446SDimitry Andric // (4 instructions) | f8 and bf8 matrices | A Index - matrix C is in dst 1066*b3edf446SDimitry Andric 1067*b3edf446SDimitry Andric // pseudo 1068*b3edf446SDimitry Andric 1069*b3edf446SDimitry Andric // fp8bf8 wmmas don't use src (0 and 1) modifiers, iu use neg_lo, f16 and bf16 1070*b3edf446SDimitry Andric // use neg_lo and neg_hi. iu wmmas (C is i32) don't use src 2 modifiers, 1071*b3edf446SDimitry Andric // remaining wmmas(f16, bf16 and f8bf8) use neg_lo and neg_hi for C (C is f32 1072*b3edf446SDimitry Andric // f16 or bf16). swmmac use index_key and don't use src 2 modifiers. 1073*b3edf446SDimitry Andric 1074*b3edf446SDimitry Andric dag Src0Mods = !if(IsFP8BF8, (ins), (ins PackedF16InputMods:$src0_modifiers)); 1075*b3edf446SDimitry Andric dag Src1Mods = !if(IsFP8BF8, (ins), (ins PackedF16InputMods:$src1_modifiers)); 1076*b3edf446SDimitry Andric dag Src2Mods = !if(IsIU, (ins), (ins PackedF16InputMods:$src2_modifiers)); 1077*b3edf446SDimitry Andric dag IndexKey = !cond(!eq(IndexType, 0) : (ins), 1078*b3edf446SDimitry Andric !eq(IndexType, 8) : (ins IndexKey8bit:$index_key_8bit), 1079*b3edf446SDimitry Andric !eq(IndexType, 16): (ins IndexKey16bit:$index_key_16bit)); 1080*b3edf446SDimitry Andric dag Clamp = !if(IsIU, (ins clampmod0:$clamp), (ins)); 1081*b3edf446SDimitry Andric dag Neg = !cond(!and(NegLoAny, NegHiAny) : (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi), 1082*b3edf446SDimitry Andric !and(NegLoAny, !not(NegHiAny)) : (ins neg_lo0:$neg_lo), 1083*b3edf446SDimitry Andric !and(!not(NegLoAny), !not(NegHiAny)) : (ins)); 1084*b3edf446SDimitry Andric 1085*b3edf446SDimitry Andric let InsVOP3P = !con(Src0Mods, (ins Src0RC64:$src0), Src1Mods, (ins Src1RC64:$src1), 1086*b3edf446SDimitry Andric !cond(IsWMMA : !con(Src2Mods, (ins Src2RC64:$src2)), 1087*b3edf446SDimitry Andric IsSWMMAC : !con((ins DstRC:$srcTiedDef), (ins VRegSrc_32:$src2), IndexKey)), 1088*b3edf446SDimitry Andric Clamp, Neg); 1089*b3edf446SDimitry Andric 1090*b3edf446SDimitry Andric // asm 1091*b3edf446SDimitry Andric 1092*b3edf446SDimitry Andric string IndexKeyAsm = !cond(!eq(IndexType, 0) : "", 1093*b3edf446SDimitry Andric !eq(IndexType, 8) : "$index_key_8bit", 1094*b3edf446SDimitry Andric !eq(IndexType, 16) : "$index_key_16bit"); 1095*b3edf446SDimitry Andric string ClampAsm = !if(IsIU, "$clamp", ""); 1096*b3edf446SDimitry Andric string NegAsm = !cond(!and(NegLoAny, NegHiAny) : "$neg_lo$neg_hi", 1097*b3edf446SDimitry Andric !and(NegLoAny, !not(NegHiAny)) : "$neg_lo", 1098*b3edf446SDimitry Andric !and(!not(NegLoAny), !not(NegHiAny)) : ""); 1099*b3edf446SDimitry Andric 1100*b3edf446SDimitry Andric let AsmVOP3P = "$vdst, $src0, $src1, $src2"#IndexKeyAsm#NegAsm#ClampAsm; 1101*b3edf446SDimitry Andric 1102*b3edf446SDimitry Andric // isel patterns 1103*b3edf446SDimitry Andric 1104*b3edf446SDimitry Andric dag Src0InPat = !cond(IsAB_F16 : (ins (Src0VT (WMMAModsF16Neg Src0VT:$src0, i32:$src0_modifiers))), 1105*b3edf446SDimitry Andric IsAB_BF16 : (ins Src0VT:$src0), 1106*b3edf446SDimitry Andric IsIU : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0), 1107*b3edf446SDimitry Andric IsFP8BF8 : (ins Src0VT:$src0)); 1108*b3edf446SDimitry Andric dag Src0OutPat = !cond(IsAB_F16 : (ins i32:$src0_modifiers, Src0VT:$src0), 1109*b3edf446SDimitry Andric IsAB_BF16 : (ins (i32 8), Src0VT:$src0), 1110*b3edf446SDimitry Andric IsIU : (ins i32:$src0_modifiers, Src0VT:$src0), 1111*b3edf446SDimitry Andric IsFP8BF8 : (ins Src0VT:$src0)); 1112*b3edf446SDimitry Andric dag Src1InPat = !cond(IsAB_F16 : (ins (Src1VT (WMMAModsF16Neg Src1VT:$src1, i32:$src1_modifiers))), 1113*b3edf446SDimitry Andric IsAB_BF16 : (ins Src1VT:$src1), 1114*b3edf446SDimitry Andric IsIU : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1), 1115*b3edf446SDimitry Andric IsFP8BF8 : (ins Src1VT:$src1)); 1116*b3edf446SDimitry Andric dag Src1OutPat = !cond(IsAB_F16 : (ins i32:$src1_modifiers, Src1VT:$src1), 1117*b3edf446SDimitry Andric IsAB_BF16 : (ins (i32 8), Src1VT:$src1), 1118*b3edf446SDimitry Andric IsIU : (ins i32:$src1_modifiers, Src1VT:$src1), 1119*b3edf446SDimitry Andric IsFP8BF8 : (ins Src1VT:$src1)); 1120*b3edf446SDimitry Andric dag Src2InPatWmma = !cond(IsC_F32 : (ins (Src2VT (WMMAModsF32NegAbs Src2VT:$src2, i32:$src2_modifiers))), 1121*b3edf446SDimitry Andric IsC_F16 : (ins (Src2VT (WMMAModsF16NegAbs Src2VT:$src2, i32:$src2_modifiers))), 1122*b3edf446SDimitry Andric IsC_BF16 : (ins Src2VT:$src2), 1123*b3edf446SDimitry Andric IsIU : (ins Src2VT:$src2), 1124*b3edf446SDimitry Andric IsSWMMAC : (ins)); 1125*b3edf446SDimitry Andric dag Src2OutPatWmma = !cond(IsC_F32 : (ins i32:$src2_modifiers, Src2VT:$src2), 1126*b3edf446SDimitry Andric IsC_F16 : (ins i32:$src2_modifiers, Src2VT:$src2), 1127*b3edf446SDimitry Andric IsC_BF16 : (ins (i32 8), Src2VT:$src2), 1128*b3edf446SDimitry Andric IsIU : (ins Src2VT:$src2), 1129*b3edf446SDimitry Andric IsSWMMAC : (ins)); 1130*b3edf446SDimitry Andric dag ClampPat = !if(IsIU, (ins i1:$clamp), (ins)); 1131*b3edf446SDimitry Andric dag IndexInPat = !cond(!eq(IndexType, 0) : (ins i32:$src2), 1132*b3edf446SDimitry Andric !eq(IndexType, 8) : (ins (i32 (SWMMACIndex8 i32:$src2, i32:$index_key_8bit))), 1133*b3edf446SDimitry Andric !eq(IndexType, 16): (ins (i32 (SWMMACIndex16 i32:$src2, i32:$index_key_16bit)))); 1134*b3edf446SDimitry Andric dag IndexOutPat = !cond(!eq(IndexType, 0) : (ins i32:$src2), 1135*b3edf446SDimitry Andric !eq(IndexType, 8) : (ins i32:$src2, i32:$index_key_8bit), 1136*b3edf446SDimitry Andric !eq(IndexType, 16): (ins i32:$src2, i32:$index_key_16bit)); 1137*b3edf446SDimitry Andric dag Src2InlineInPat = (ins (Src2VT (WMMAVISrc Src2VT:$src2))); 1138*b3edf446SDimitry Andric dag Src2InlineOutPat = !con(!if(IsIU, (ins), (ins (i32 8))), (ins Src2VT:$src2)); 1139*b3edf446SDimitry Andric 1140*b3edf446SDimitry Andric 1141*b3edf446SDimitry Andric dag WmmaInPat = !con(Src0InPat, Src1InPat, Src2InPatWmma, ClampPat); 1142*b3edf446SDimitry Andric dag WmmaOutPat = !con(Src0OutPat, Src1OutPat, Src2OutPatWmma, ClampPat); 1143*b3edf446SDimitry Andric 1144*b3edf446SDimitry Andric dag SwmmacInPat = !con(Src0InPat, Src1InPat, (ins Src2VT:$srcTiedDef), IndexInPat, ClampPat); 1145*b3edf446SDimitry Andric dag SwmmacOutPat = !con(Src0OutPat, Src1OutPat, (ins Src2VT:$srcTiedDef), IndexOutPat, ClampPat); 1146*b3edf446SDimitry Andric 1147*b3edf446SDimitry Andric // wmma pattern where src2 is inline imm uses _threeaddr pseudo, 1148*b3edf446SDimitry Andric // can't use _twoaddr since it would violate src2 tied to vdst constraint. 1149*b3edf446SDimitry Andric dag WmmaInlineInPat = !con(Src0InPat, Src1InPat, Src2InlineInPat, ClampPat); 1150*b3edf446SDimitry Andric dag WmmaInlineOutPat = !con(Src0OutPat, Src1OutPat, Src2InlineOutPat, ClampPat); 1151*b3edf446SDimitry Andric} 1152*b3edf446SDimitry Andric 1153*b3edf446SDimitry Andricmulticlass WMMAInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string PseudoInstrSuffix> { 1154*b3edf446SDimitry Andric let Mnemonic = Instr, mayRaiseFPException = 0, ReadsModeReg = 0 in { 1155*b3edf446SDimitry Andric let Constraints = "@earlyclobber $vdst,$vdst = $src2", isConvertibleToThreeAddress = 1 in 1156*b3edf446SDimitry Andric def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>{ 1157*b3edf446SDimitry Andric let PseudoInstr = Instr#PseudoInstrSuffix; 1158*b3edf446SDimitry Andric } 1159*b3edf446SDimitry Andric 1160*b3edf446SDimitry Andric let Constraints = "@earlyclobber $vdst", SchedRW = [Write32Bit, Write32Bit] in 1161*b3edf446SDimitry Andric def _threeaddr : VOP3P_Pseudo<Instr, WMMAProfile>{ 1162*b3edf446SDimitry Andric let PseudoInstr = Instr#PseudoInstrSuffix; 1163*b3edf446SDimitry Andric } 1164*b3edf446SDimitry Andric 1165*b3edf446SDimitry Andric } 1166*b3edf446SDimitry Andric def : WMMAOpcodeMapping<!cast<Instruction>(NAME # _twoaddr), 1167*b3edf446SDimitry Andric !cast<Instruction>(NAME # _threeaddr)>; 1168*b3edf446SDimitry Andric} 1169*b3edf446SDimitry Andric 1170*b3edf446SDimitry Andricmulticlass SWMMACInstGFX12<string Instr, VOP3PWMMA_Profile WMMAProfile, string PseudoInstrSuffix> { 1171*b3edf446SDimitry Andric def _twoaddr : VOP3P_Pseudo<Instr, WMMAProfile>{ 1172*b3edf446SDimitry Andric let Mnemonic = Instr; 1173*b3edf446SDimitry Andric let PseudoInstr = Instr#PseudoInstrSuffix; 1174*b3edf446SDimitry Andric let mayRaiseFPException = 0; 1175*b3edf446SDimitry Andric let ReadsModeReg = 0; 1176*b3edf446SDimitry Andric let AsmMatchConverter = "cvtSWMMAC"; 1177*b3edf446SDimitry Andric 1178*b3edf446SDimitry Andric let Constraints = "@earlyclobber $vdst,$vdst = $srcTiedDef"; 1179*b3edf446SDimitry Andric } 1180*b3edf446SDimitry Andric} 1181*b3edf446SDimitry Andric 1182*b3edf446SDimitry Andric// First argument in Profile is types for matrices D, A, B and C (D = A * B + C) 1183*b3edf446SDimitry Andric// as used by llvm ir, types are vectors(with matrix elements) 1184*b3edf446SDimitry Andric// wave32: 1185*b3edf446SDimitry Andric// For 16x16 matrices, lanes 0 to 31 will have 8 matrix elts, 1186*b3edf446SDimitry Andric// for 16 x 32 16 elts and for 16 x 64 lanes have 32 elts. 1187*b3edf446SDimitry Andric// wave64: 1188*b3edf446SDimitry Andric// lanes will have half the size of elements in lanes compared to wave32 with 1189*b3edf446SDimitry Andric// exception of 16x16_iu4: lanes0-31 will have 8xi4, remaining lanes are ignored 1190*b3edf446SDimitry Andric 1191*b3edf446SDimitry Andric// general idea on element distribution differences: 1192*b3edf446SDimitry Andric// wave32: lane n has 8 matrix elements 1193*b3edf446SDimitry Andric// wave64: lane n has first 4, lane n+32 has other 4 elements 1194*b3edf446SDimitry Andric 1195*b3edf446SDimitry Andric// index size, for each 2 elements in lane you need 4bits in index 1196*b3edf446SDimitry Andric 1197*b3edf446SDimitry Andric// Non-standard types (iu8, iu4, fp8, bf8) will be packed in vectors of i32s. 1198*b3edf446SDimitry Andric// Original type for them is in comment on the right and refers to A and B. 1199*b3edf446SDimitry Andric 1200*b3edf446SDimitry Andricdef F32_F16_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v8f16, v8f16, v8f32], 0, 0, 0, 0>; 1201*b3edf446SDimitry Andricdef F32_BF16_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v8i16, v8i16, v8f32], 0, 0, 0, 0>; 1202*b3edf446SDimitry Andricdef F16_F16_WMMA_w32 : VOP3PWMMA_Profile<[v8f16, v8f16, v8f16, v8f16], 0, 0, 0, 0>; 1203*b3edf446SDimitry Andricdef BF16_BF16_WMMA_w32 : VOP3PWMMA_Profile<[v8i16, v8i16, v8i16, v8i16], 0, 0, 0, 0>; 1204*b3edf446SDimitry Andricdef I32_IU8_WMMA_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v2i32, v8i32], 0, 0, 1, 0>; // 8xi8 1205*b3edf446SDimitry Andricdef I32_IU4X16_WMMA_w32 : VOP3PWMMA_Profile<[v8i32, i32, i32, v8i32], 0, 0, 1, 0>; // 8xi4 1206*b3edf446SDimitry Andricdef F32_FP8BF8_WMMA_w32 : VOP3PWMMA_Profile<[v8f32, v2i32, v2i32, v8f32], 0, 0, 0, 1>; // 8xf8 1207*b3edf446SDimitry Andricdef I32_IU4X32_WMMA_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v2i32, v8i32], 0, 0, 1, 0>; // 16xi4 1208*b3edf446SDimitry Andric 1209*b3edf446SDimitry Andricdef F32_F16_WMMA_w64 : VOP3PWMMA_Profile<[v4f32, v4f16, v4f16, v4f32], 0, 0, 0, 0>; 1210*b3edf446SDimitry Andricdef F32_BF16_WMMA_w64 : VOP3PWMMA_Profile<[v4f32, v4i16, v4i16, v4f32], 0, 0, 0, 0>; 1211*b3edf446SDimitry Andricdef F16_F16_WMMA_w64 : VOP3PWMMA_Profile<[v4f16, v4f16, v4f16, v4f16], 0, 0, 0, 0>; 1212*b3edf446SDimitry Andricdef BF16_BF16_WMMA_w64 : VOP3PWMMA_Profile<[v4i16, v4i16, v4i16, v4i16], 0, 0, 0, 0>; 1213*b3edf446SDimitry Andricdef I32_IU8_WMMA_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 0, 0, 1, 0>; // 4xi8 1214*b3edf446SDimitry Andricdef I32_IU4X16_WMMA_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 0, 0, 1, 0>; // 8xi4 * 1215*b3edf446SDimitry Andricdef F32_FP8BF8_WMMA_w64 : VOP3PWMMA_Profile<[v4f32, i32, i32, v4f32], 0, 0, 0, 1>; // 4xf8 1216*b3edf446SDimitry Andricdef I32_IU4X32_WMMA_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 0, 0, 1, 0>; // 8xi4 1217*b3edf446SDimitry Andric 1218*b3edf446SDimitry Andricdef F32_F16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v8f16, v16f16, v8f32], 1, 16, 0, 0>; 1219*b3edf446SDimitry Andricdef F32_BF16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v8i16, v16i16, v8f32], 1, 16, 0, 0>; 1220*b3edf446SDimitry Andricdef F16_F16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f16, v8f16, v16f16, v8f16], 1, 16, 0, 0>; 1221*b3edf446SDimitry Andricdef BF16_BF16_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i16, v8i16, v16i16, v8i16], 1, 16, 0, 0>; 1222*b3edf446SDimitry Andricdef I32_IU8_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v4i32, v8i32], 1, 16, 1, 0>; // 8xi8, 16xi8 1223*b3edf446SDimitry Andricdef I32_IU4X32_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i32, i32, v2i32, v8i32], 1, 16, 1, 0>; // 8xi4, 16xi4 1224*b3edf446SDimitry Andricdef I32_IU4X64_SWMMAC_w32 : VOP3PWMMA_Profile<[v8i32, v2i32, v4i32, v8i32], 1, 0, 1, 0>; // 16xi4, 32xi4 ** 1225*b3edf446SDimitry Andricdef F32_FP8BF8_SWMMAC_w32 : VOP3PWMMA_Profile<[v8f32, v2i32, v4i32, v8f32], 1, 16, 0, 1>; // 8xf8, 16xf8 1226*b3edf446SDimitry Andric 1227*b3edf446SDimitry Andricdef F32_F16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f32, v4f16, v8f16, v4f32], 1, 8, 0, 0>; 1228*b3edf446SDimitry Andricdef F32_BF16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f32, v4i16, v8i16, v4f32], 1, 8, 0, 0>; 1229*b3edf446SDimitry Andricdef F16_F16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f16, v4f16, v8f16, v4f16], 1, 8, 0, 0>; 1230*b3edf446SDimitry Andricdef BF16_BF16_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i16, v4i16, v8i16, v4i16], 1, 8, 0, 0>; 1231*b3edf446SDimitry Andricdef I32_IU8_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i32, i32, v2i32, v4i32], 1, 8, 1, 0>; // 4xi8, 8xi8 1232*b3edf446SDimitry Andricdef I32_IU4X32_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i32, i32, i32, v4i32], 1, 16, 1, 0>; // 8xi4, 8xi4 *** 1233*b3edf446SDimitry Andricdef I32_IU4X64_SWMMAC_w64 : VOP3PWMMA_Profile<[v4i32, i32, v2i32, v4i32], 1, 16, 1, 0>; // 8xi4, 16xi4 1234*b3edf446SDimitry Andricdef F32_FP8BF8_SWMMAC_w64 : VOP3PWMMA_Profile<[v4f32, i32, v2i32, v4f32], 1, 8, 0, 1>; // 4xf8, 8xf8 1235*b3edf446SDimitry Andric 1236*b3edf446SDimitry Andric// * IU4X16_WMMA_w64 lanes 0-31 will have 8xi4, remaining lanes are ignored 1237*b3edf446SDimitry Andric// ** IU4X64_SWMMAC_w32 index is i32, index_key is not used 1238*b3edf446SDimitry Andric// *** IU4X32_SWMMAC_w64 lanes 0-31 will have 8xi4 remaining lanes are ignored 1239*b3edf446SDimitry Andric// for matrix A, index is i16; Matrix B uses all lanes 1240*b3edf446SDimitry Andric 1241*b3edf446SDimitry Andriclet WaveSizePredicate = isWave32 in { 1242*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_F16_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_f16", F32_F16_WMMA_w32, "_w32">; 1243*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF16_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf16", F32_BF16_WMMA_w32, "_w32">; 1244*b3edf446SDimitry Andricdefm V_WMMA_F16_16X16X16_F16_w32 : WMMAInstGFX12<"v_wmma_f16_16x16x16_f16", F16_F16_WMMA_w32, "_w32">; 1245*b3edf446SDimitry Andricdefm V_WMMA_BF16_16X16X16_BF16_w32 : WMMAInstGFX12<"v_wmma_bf16_16x16x16_bf16", BF16_BF16_WMMA_w32, "_w32">; 1246*b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU8_w32 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu8", I32_IU8_WMMA_w32, "_w32">; 1247*b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU4_w32 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu4", I32_IU4X16_WMMA_w32, "_w32">; 1248*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_FP8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_fp8", F32_FP8BF8_WMMA_w32, "_w32">; 1249*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_BF8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_bf8", F32_FP8BF8_WMMA_w32, "_w32">; 1250*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_FP8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_fp8", F32_FP8BF8_WMMA_w32, "_w32">; 1251*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_BF8_w32 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_bf8", F32_FP8BF8_WMMA_w32, "_w32">; 1252*b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X32_IU4_w32 : WMMAInstGFX12<"v_wmma_i32_16x16x32_iu4", I32_IU4X32_WMMA_w32, "_w32">; 1253*b3edf446SDimitry Andric 1254*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_F16_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_f16", F32_F16_SWMMAC_w32, "_w32">; 1255*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF16_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf16", F32_BF16_SWMMAC_w32, "_w32">; 1256*b3edf446SDimitry Andricdefm V_SWMMAC_F16_16X16X32_F16_w32 : SWMMACInstGFX12<"v_swmmac_f16_16x16x32_f16", F16_F16_SWMMAC_w32, "_w32">; 1257*b3edf446SDimitry Andricdefm V_SWMMAC_BF16_16X16X32_BF16_w32 : SWMMACInstGFX12<"v_swmmac_bf16_16x16x32_bf16", BF16_BF16_SWMMAC_w32, "_w32">; 1258*b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU8_w32 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu8", I32_IU8_SWMMAC_w32, "_w32">; 1259*b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU4_w32 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu4", I32_IU4X32_SWMMAC_w32, "_w32">; 1260*b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X64_IU4_w32 : SWMMACInstGFX12<"v_swmmac_i32_16x16x64_iu4", I32_IU4X64_SWMMAC_w32, "_w32">; 1261*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_fp8", F32_FP8BF8_SWMMAC_w32, "_w32">; 1262*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_bf8", F32_FP8BF8_SWMMAC_w32, "_w32">; 1263*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_FP8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_fp8", F32_FP8BF8_SWMMAC_w32, "_w32">; 1264*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_BF8_w32 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_bf8", F32_FP8BF8_SWMMAC_w32, "_w32">; 1265*b3edf446SDimitry Andric} 1266*b3edf446SDimitry Andric 1267*b3edf446SDimitry Andriclet WaveSizePredicate = isWave64 in { 1268*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_F16_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_f16", F32_F16_WMMA_w64, "_w64">; 1269*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF16_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf16", F32_BF16_WMMA_w64, "_w64">; 1270*b3edf446SDimitry Andricdefm V_WMMA_F16_16X16X16_F16_w64 : WMMAInstGFX12<"v_wmma_f16_16x16x16_f16", F16_F16_WMMA_w64, "_w64">; 1271*b3edf446SDimitry Andricdefm V_WMMA_BF16_16X16X16_BF16_w64 : WMMAInstGFX12<"v_wmma_bf16_16x16x16_bf16", BF16_BF16_WMMA_w64, "_w64">; 1272*b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU8_w64 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu8", I32_IU8_WMMA_w64, "_w64">; 1273*b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU4_w64 : WMMAInstGFX12<"v_wmma_i32_16x16x16_iu4", I32_IU4X16_WMMA_w64, "_w64">; 1274*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_FP8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_fp8", F32_FP8BF8_WMMA_w64, "_w64">; 1275*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_BF8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_fp8_bf8", F32_FP8BF8_WMMA_w64, "_w64">; 1276*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_FP8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_fp8", F32_FP8BF8_WMMA_w64, "_w64">; 1277*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_BF8_w64 : WMMAInstGFX12<"v_wmma_f32_16x16x16_bf8_bf8", F32_FP8BF8_WMMA_w64, "_w64">; 1278*b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X32_IU4_w64 : WMMAInstGFX12<"v_wmma_i32_16x16x32_iu4", I32_IU4X32_WMMA_w64, "_w64">; 1279*b3edf446SDimitry Andric 1280*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_F16_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_f16", F32_F16_SWMMAC_w64, "_w64">; 1281*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF16_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf16", F32_BF16_SWMMAC_w64, "_w64">; 1282*b3edf446SDimitry Andricdefm V_SWMMAC_F16_16X16X32_F16_w64 : SWMMACInstGFX12<"v_swmmac_f16_16x16x32_f16", F16_F16_SWMMAC_w64, "_w64">; 1283*b3edf446SDimitry Andricdefm V_SWMMAC_BF16_16X16X32_BF16_w64 : SWMMACInstGFX12<"v_swmmac_bf16_16x16x32_bf16", BF16_BF16_SWMMAC_w64, "_w64">; 1284*b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU8_w64 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu8", I32_IU8_SWMMAC_w64, "_w64">; 1285*b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU4_w64 : SWMMACInstGFX12<"v_swmmac_i32_16x16x32_iu4", I32_IU4X32_SWMMAC_w64, "_w64">; 1286*b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X64_IU4_w64 : SWMMACInstGFX12<"v_swmmac_i32_16x16x64_iu4", I32_IU4X64_SWMMAC_w64, "_w64">; 1287*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_FP8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_fp8", F32_FP8BF8_SWMMAC_w64, "_w64">; 1288*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_BF8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_fp8_bf8", F32_FP8BF8_SWMMAC_w64, "_w64">; 1289*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_FP8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_fp8", F32_FP8BF8_SWMMAC_w64, "_w64">; 1290*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_BF8_w64 : SWMMACInstGFX12<"v_swmmac_f32_16x16x32_bf8_bf8", F32_FP8BF8_SWMMAC_w64, "_w64">; 1291*b3edf446SDimitry Andric} 1292*b3edf446SDimitry Andric 1293*b3edf446SDimitry Andric// IsGFX11OpselIntrinsic: f16_f16 and bf16_bf16 Intrinsics have imm operand that 1294*b3edf446SDimitry Andric// controls opsel. Used by gfx11, removed in gfx12 (operand must be 0). 1295*b3edf446SDimitry Andricmulticlass WMMAPat<string Inst, SDPatternOperator node, VOP3PWMMA_Profile P, bit IsGFX11OpselIntrinsic = 0> { 1296*b3edf446SDimitry Andric def : GCNPat <(P.DstVT !setdagop(!con(P.WmmaInPat, !if(IsGFX11OpselIntrinsic, (ins 0), (ins))), node)), 1297*b3edf446SDimitry Andric (P.DstVT !setdagop(P.WmmaOutPat, !cast<Instruction>(Inst#"_twoaddr")))>; 1298*b3edf446SDimitry Andric let AddedComplexity = 4 in 1299*b3edf446SDimitry Andric def : GCNPat <(P.DstVT !setdagop(!con(P.WmmaInlineInPat, !if(IsGFX11OpselIntrinsic, (ins 0), (ins))), node)), 1300*b3edf446SDimitry Andric (P.DstVT !setdagop(P.WmmaInlineOutPat, !cast<Instruction>(Inst#"_threeaddr")))>; 1301*b3edf446SDimitry Andric} 1302*b3edf446SDimitry Andric 1303*b3edf446SDimitry Andricclass SWMMACPat<Instruction Inst, SDPatternOperator node, VOP3PWMMA_Profile P> : 1304*b3edf446SDimitry Andric GCNPat <(P.DstVT !setdagop(P.SwmmacInPat, node)), 1305*b3edf446SDimitry Andric (P.DstVT !setdagop(P.SwmmacOutPat, Inst))>; 1306*b3edf446SDimitry Andric 1307*b3edf446SDimitry Andricclass SWMMACPat_w64<Instruction Inst, SDPatternOperator node, VOP3PWMMA_Profile P> : 1308*b3edf446SDimitry Andric GCNPat <(P.DstVT !setdagop(P.SwmmacInPat, node)), 1309*b3edf446SDimitry Andric (P.DstVT !setdagop(P.SwmmacOutPat, Inst))>{ 1310*b3edf446SDimitry Andric let WaveSizePredicate = isWave64; 1311*b3edf446SDimitry Andric } 1312*b3edf446SDimitry Andric 1313*b3edf446SDimitry Andriclet WaveSizePredicate = isWave32, SubtargetPredicate = isGFX12Plus in { 1314*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_F16_w32", int_amdgcn_wmma_f32_16x16x16_f16, F32_F16_WMMA_w32>; 1315*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_BF16_w32", int_amdgcn_wmma_f32_16x16x16_bf16, F32_BF16_WMMA_w32>; 1316*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F16_16X16X16_F16_w32", int_amdgcn_wmma_f16_16x16x16_f16, F16_F16_WMMA_w32,1>; 1317*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_BF16_16X16X16_BF16_w32", int_amdgcn_wmma_bf16_16x16x16_bf16, BF16_BF16_WMMA_w32,1>; 1318*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_I32_16X16X16_IU8_w32", int_amdgcn_wmma_i32_16x16x16_iu8, I32_IU8_WMMA_w32>; 1319*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_I32_16X16X16_IU4_w32", int_amdgcn_wmma_i32_16x16x16_iu4, I32_IU4X16_WMMA_w32>; 1320*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_FP8_w32", int_amdgcn_wmma_f32_16x16x16_fp8_fp8, F32_FP8BF8_WMMA_w32>; 1321*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_BF8_w32", int_amdgcn_wmma_f32_16x16x16_fp8_bf8, F32_FP8BF8_WMMA_w32>; 1322*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_FP8_w32", int_amdgcn_wmma_f32_16x16x16_bf8_fp8, F32_FP8BF8_WMMA_w32>; 1323*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_BF8_w32", int_amdgcn_wmma_f32_16x16x16_bf8_bf8, F32_FP8BF8_WMMA_w32>; 1324*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_I32_16X16X32_IU4_w32", int_amdgcn_wmma_i32_16x16x32_iu4, I32_IU4X32_WMMA_w32>; 1325*b3edf446SDimitry Andric 1326*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_F16_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_f16, F32_F16_SWMMAC_w32>; 1327*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF16_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf16, F32_BF16_SWMMAC_w32>; 1328*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F16_16X16X32_F16_w32_twoaddr, int_amdgcn_swmmac_f16_16x16x32_f16, F16_F16_SWMMAC_w32>; 1329*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_BF16_16X16X32_BF16_w32_twoaddr, int_amdgcn_swmmac_bf16_16x16x32_bf16, BF16_BF16_SWMMAC_w32>; 1330*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU8_w32_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu8, I32_IU8_SWMMAC_w32>; 1331*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU4_w32_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu4, I32_IU4X32_SWMMAC_w32>; 1332*b3edf446SDimitry Andric def : GCNPat <(I32_IU4X64_SWMMAC_w32.DstVT !setdagop(I32_IU4X64_SWMMAC_w32.SwmmacInPat, int_amdgcn_swmmac_i32_16x16x64_iu4)), 1333*b3edf446SDimitry Andric (I32_IU4X64_SWMMAC_w32.DstVT !setdagop(I32_IU4X64_SWMMAC_w32.SwmmacOutPat, V_SWMMAC_I32_16X16X64_IU4_w32_twoaddr))>; 1334*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_FP8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_fp8, F32_FP8BF8_SWMMAC_w32>; 1335*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_BF8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_bf8, F32_FP8BF8_SWMMAC_w32>; 1336*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_FP8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_fp8, F32_FP8BF8_SWMMAC_w32>; 1337*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_BF8_w32_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_bf8, F32_FP8BF8_SWMMAC_w32>; 1338*b3edf446SDimitry Andric} 1339*b3edf446SDimitry Andric 1340*b3edf446SDimitry Andriclet WaveSizePredicate = isWave64, SubtargetPredicate = isGFX12Plus in { 1341*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_F16_w64", int_amdgcn_wmma_f32_16x16x16_f16, F32_F16_WMMA_w64>; 1342*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_BF16_w64", int_amdgcn_wmma_f32_16x16x16_bf16, F32_BF16_WMMA_w64>; 1343*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F16_16X16X16_F16_w64", int_amdgcn_wmma_f16_16x16x16_f16, F16_F16_WMMA_w64,1>; 1344*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_BF16_16X16X16_BF16_w64", int_amdgcn_wmma_bf16_16x16x16_bf16, BF16_BF16_WMMA_w64,1>; 1345*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_I32_16X16X16_IU8_w64", int_amdgcn_wmma_i32_16x16x16_iu8, I32_IU8_WMMA_w64>; 1346*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_I32_16X16X16_IU4_w64", int_amdgcn_wmma_i32_16x16x16_iu4, I32_IU4X16_WMMA_w64>; 1347*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_FP8_w64", int_amdgcn_wmma_f32_16x16x16_fp8_fp8, F32_FP8BF8_WMMA_w64>; 1348*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_FP8_BF8_w64", int_amdgcn_wmma_f32_16x16x16_fp8_bf8, F32_FP8BF8_WMMA_w64>; 1349*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_FP8_w64", int_amdgcn_wmma_f32_16x16x16_bf8_fp8, F32_FP8BF8_WMMA_w64>; 1350*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_BF8_w64", int_amdgcn_wmma_f32_16x16x16_bf8_bf8, F32_FP8BF8_WMMA_w64>; 1351*b3edf446SDimitry Andric defm : WMMAPat<"V_WMMA_I32_16X16X32_IU4_w64", int_amdgcn_wmma_i32_16x16x32_iu4, I32_IU4X32_WMMA_w64>; 1352*b3edf446SDimitry Andric 1353*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_F16_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_f16, F32_F16_SWMMAC_w64>; 1354*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF16_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf16, F32_BF16_SWMMAC_w64>; 1355*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F16_16X16X32_F16_w64_twoaddr, int_amdgcn_swmmac_f16_16x16x32_f16, F16_F16_SWMMAC_w64>; 1356*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_BF16_16X16X32_BF16_w64_twoaddr, int_amdgcn_swmmac_bf16_16x16x32_bf16, BF16_BF16_SWMMAC_w64>; 1357*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU8_w64_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu8, I32_IU8_SWMMAC_w64>; 1358*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_I32_16X16X32_IU4_w64_twoaddr, int_amdgcn_swmmac_i32_16x16x32_iu4, I32_IU4X32_SWMMAC_w64>; 1359*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_I32_16X16X64_IU4_w64_twoaddr, int_amdgcn_swmmac_i32_16x16x64_iu4, I32_IU4X64_SWMMAC_w64>; 1360*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_FP8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_fp8, F32_FP8BF8_SWMMAC_w64>; 1361*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_FP8_BF8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_fp8_bf8, F32_FP8BF8_SWMMAC_w64>; 1362*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_FP8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_fp8, F32_FP8BF8_SWMMAC_w64>; 1363*b3edf446SDimitry Andric def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF8_BF8_w64_twoaddr, int_amdgcn_swmmac_f32_16x16x32_bf8_bf8, F32_FP8BF8_SWMMAC_w64>; 1364*b3edf446SDimitry Andric} 1365*b3edf446SDimitry Andric 1366*b3edf446SDimitry Andric 1367e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 1368e8d8bef9SDimitry Andric// Begin Real Encodings 1369e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 1370e8d8bef9SDimitry Andric 137181ad6265SDimitry Andricclass VOP3P_DPP16<bits<7> op, VOP_DPP_Pseudo ps, int subtarget, 137281ad6265SDimitry Andric string opName = ps.OpName> 137381ad6265SDimitry Andric : VOP3P_DPP<op, opName, ps.Pfl, 1>, SIMCInstr<ps.PseudoInstr, subtarget> { 137481ad6265SDimitry Andric let hasSideEffects = ps.hasSideEffects; 137581ad6265SDimitry Andric let Defs = ps.Defs; 137681ad6265SDimitry Andric let SchedRW = ps.SchedRW; 137781ad6265SDimitry Andric let Uses = ps.Uses; 137881ad6265SDimitry Andric let AssemblerPredicate = HasDPP16; 137981ad6265SDimitry Andric let SubtargetPredicate = HasDPP16; 138081ad6265SDimitry Andric let OtherPredicates = ps.OtherPredicates; 138181ad6265SDimitry Andric} 138281ad6265SDimitry Andric 138381ad6265SDimitry Andricclass VOP3P_DPP8_Base<bits<7> op, VOP_Pseudo ps, string opName = ps.OpName> 138481ad6265SDimitry Andric : VOP3P_DPP8<op, opName, ps.Pfl> { 138581ad6265SDimitry Andric let hasSideEffects = ps.hasSideEffects; 138681ad6265SDimitry Andric let Defs = ps.Defs; 138781ad6265SDimitry Andric let SchedRW = ps.SchedRW; 138881ad6265SDimitry Andric let Uses = ps.Uses; 138981ad6265SDimitry Andric let OtherPredicates = ps.OtherPredicates; 139081ad6265SDimitry Andric} 139181ad6265SDimitry Andric 139281ad6265SDimitry Andric//===----------------------------------------------------------------------===// 13935f757f3fSDimitry Andric// GFX11, GFX12 139481ad6265SDimitry Andric//===----------------------------------------------------------------------===// 139581ad6265SDimitry Andric 13965f757f3fSDimitry Andricmulticlass VOP3P_Real_Base<GFXGen Gen, bits<7> op, string backing_ps_name = NAME, 139781ad6265SDimitry Andric string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> { 13985f757f3fSDimitry Andric def Gen.Suffix : 13995f757f3fSDimitry Andric VOP3P_Real_Gen<!cast<VOP3P_Pseudo>(backing_ps_name), Gen, asmName>, 14005f757f3fSDimitry Andric VOP3Pe_gfx11_gfx12<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl>; 140181ad6265SDimitry Andric} 140281ad6265SDimitry Andric 1403*b3edf446SDimitry Andricclass VOP3PeWmma<bits<7> op, VOPProfile P, VOP3PWMMA_Profile WMMAP> 1404*b3edf446SDimitry Andric : VOP3Pe_gfx11_gfx12<op, P>{ 1405*b3edf446SDimitry Andric // opsel 1406*b3edf446SDimitry Andric let Inst{11} = !cond(!eq(WMMAP.IndexType, 0) : 0, 1407*b3edf446SDimitry Andric !eq(WMMAP.IndexType, 8) : index_key_8bit{0}, 1408*b3edf446SDimitry Andric !eq(WMMAP.IndexType, 16) : index_key_16bit{0}); 1409*b3edf446SDimitry Andric let Inst{12} = !if(!eq(WMMAP.IndexType, 8), index_key_8bit{1}, 0); 1410*b3edf446SDimitry Andric let Inst{13} = 0; 1411*b3edf446SDimitry Andric // opsel_hi 1412*b3edf446SDimitry Andric let Inst{59} = 1; 1413*b3edf446SDimitry Andric let Inst{60} = 1; 1414*b3edf446SDimitry Andric let Inst{14} = 1; 1415*b3edf446SDimitry Andric // neg_lo 1416*b3edf446SDimitry Andric let Inst{61} = !if(WMMAP.NegLo01, src0_modifiers{0}, 0); 1417*b3edf446SDimitry Andric let Inst{62} = !if(WMMAP.NegLo01, src1_modifiers{0}, 0); 1418*b3edf446SDimitry Andric let Inst{63} = !if(WMMAP.NegLo2, src2_modifiers{0}, 0); 1419*b3edf446SDimitry Andric // neg_hi 1420*b3edf446SDimitry Andric let Inst{8} = !if(WMMAP.NegHi01, src0_modifiers{1}, 0); 1421*b3edf446SDimitry Andric let Inst{9} = !if(WMMAP.NegHi01, src1_modifiers{1}, 0); 1422*b3edf446SDimitry Andric let Inst{10} = !if(WMMAP.NegHi2, src2_modifiers{1}, 0); 1423*b3edf446SDimitry Andric // clamp 1424*b3edf446SDimitry Andric let Inst{15} = !if(WMMAP.IsIU, clamp{0}, 0); 1425*b3edf446SDimitry Andric} 1426*b3edf446SDimitry Andric 1427*b3edf446SDimitry Andricmulticlass VOP3P_WMMA_Real_Base<GFXGen Gen, bits<7> op, VOP3PWMMA_Profile WMMAP, 1428*b3edf446SDimitry Andric string backing_ps_name = NAME, 1429*b3edf446SDimitry Andric string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> { 1430*b3edf446SDimitry Andric def Gen.Suffix : 1431*b3edf446SDimitry Andric VOP3P_Real_Gen<!cast<VOP3P_Pseudo>(backing_ps_name), Gen, asmName>, 1432*b3edf446SDimitry Andric VOP3PeWmma<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl, WMMAP>; 1433*b3edf446SDimitry Andric} 1434*b3edf446SDimitry Andric 1435*b3edf446SDimitry Andricmulticlass VOP3P_Real_WMMA_gfx12 <bits<7> op, VOP3PWMMA_Profile WMMAP> { 1436*b3edf446SDimitry Andric let WaveSizePredicate = isWave32, DecoderNamespace = "GFX12" in { 1437*b3edf446SDimitry Andric defm _twoaddr : VOP3P_WMMA_Real_Base <GFX12Gen, op, WMMAP>; 1438*b3edf446SDimitry Andric } 1439*b3edf446SDimitry Andric} 1440*b3edf446SDimitry Andric 1441*b3edf446SDimitry Andricmulticlass VOP3P_Real_WMMA_gfx12w64 <bits<7> op, VOP3PWMMA_Profile WMMAP> { 1442*b3edf446SDimitry Andric let WaveSizePredicate = isWave64, DecoderNamespace = "WMMAGFX12" in { 1443*b3edf446SDimitry Andric defm _twoaddr : VOP3P_WMMA_Real_Base <GFX12Gen, op, WMMAP>; 1444*b3edf446SDimitry Andric } 1445*b3edf446SDimitry Andric} 1446*b3edf446SDimitry Andric 1447*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x040, F32_F16_WMMA_w32>; 1448*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x041, F32_BF16_WMMA_w32>; 1449*b3edf446SDimitry Andricdefm V_WMMA_F16_16X16X16_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x042, F16_F16_WMMA_w32>; 1450*b3edf446SDimitry Andricdefm V_WMMA_BF16_16X16X16_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x043, BF16_BF16_WMMA_w32>; 1451*b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU8_w32 : VOP3P_Real_WMMA_gfx12 <0x044, I32_IU8_WMMA_w32>; 1452*b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x045, I32_IU4X16_WMMA_w32>; 1453*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x046, F32_FP8BF8_WMMA_w32>; 1454*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x047, F32_FP8BF8_WMMA_w32>; 1455*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x048, F32_FP8BF8_WMMA_w32>; 1456*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x049, F32_FP8BF8_WMMA_w32>; 1457*b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X32_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x04a, I32_IU4X32_WMMA_w32>; 1458*b3edf446SDimitry Andric 1459*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x040, F32_F16_WMMA_w64>; 1460*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x041, F32_BF16_WMMA_w64>; 1461*b3edf446SDimitry Andricdefm V_WMMA_F16_16X16X16_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x042, F16_F16_WMMA_w64>; 1462*b3edf446SDimitry Andricdefm V_WMMA_BF16_16X16X16_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x043, BF16_BF16_WMMA_w64>; 1463*b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x044, I32_IU8_WMMA_w64>; 1464*b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X16_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x045, I32_IU4X16_WMMA_w64>; 1465*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x046, F32_FP8BF8_WMMA_w64>; 1466*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_FP8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x047, F32_FP8BF8_WMMA_w64>; 1467*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x048, F32_FP8BF8_WMMA_w64>; 1468*b3edf446SDimitry Andricdefm V_WMMA_F32_16X16X16_BF8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x049, F32_FP8BF8_WMMA_w64>; 1469*b3edf446SDimitry Andricdefm V_WMMA_I32_16X16X32_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x04a, I32_IU4X32_WMMA_w64>; 1470*b3edf446SDimitry Andric 1471*b3edf446SDimitry Andric 1472*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x050, F32_F16_SWMMAC_w32>; 1473*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x051, F32_BF16_SWMMAC_w32>; 1474*b3edf446SDimitry Andricdefm V_SWMMAC_F16_16X16X32_F16_w32 : VOP3P_Real_WMMA_gfx12 <0x052, F16_F16_SWMMAC_w32>; 1475*b3edf446SDimitry Andricdefm V_SWMMAC_BF16_16X16X32_BF16_w32 : VOP3P_Real_WMMA_gfx12 <0x053, BF16_BF16_SWMMAC_w32>; 1476*b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU8_w32 : VOP3P_Real_WMMA_gfx12 <0x054, I32_IU8_SWMMAC_w32>; 1477*b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x055, I32_IU4X32_SWMMAC_w32>; 1478*b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X64_IU4_w32 : VOP3P_Real_WMMA_gfx12 <0x056, I32_IU4X64_SWMMAC_w32>; 1479*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x057, F32_FP8BF8_SWMMAC_w32>; 1480*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x058, F32_FP8BF8_SWMMAC_w32>; 1481*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_FP8_w32 : VOP3P_Real_WMMA_gfx12 <0x059, F32_FP8BF8_SWMMAC_w32>; 1482*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_BF8_w32 : VOP3P_Real_WMMA_gfx12 <0x05a, F32_FP8BF8_SWMMAC_w32>; 1483*b3edf446SDimitry Andric 1484*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x050, F32_F16_SWMMAC_w64>; 1485*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x051, F32_BF16_SWMMAC_w64>; 1486*b3edf446SDimitry Andricdefm V_SWMMAC_F16_16X16X32_F16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x052, F16_F16_SWMMAC_w64>; 1487*b3edf446SDimitry Andricdefm V_SWMMAC_BF16_16X16X32_BF16_w64 : VOP3P_Real_WMMA_gfx12w64 <0x053, BF16_BF16_SWMMAC_w64>; 1488*b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x054, I32_IU8_SWMMAC_w64>; 1489*b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X32_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x055, I32_IU4X32_SWMMAC_w64>; 1490*b3edf446SDimitry Andricdefm V_SWMMAC_I32_16X16X64_IU4_w64 : VOP3P_Real_WMMA_gfx12w64 <0x056, I32_IU4X64_SWMMAC_w64>; 1491*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x057, F32_FP8BF8_SWMMAC_w64>; 1492*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_FP8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x058, F32_FP8BF8_SWMMAC_w64>; 1493*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_FP8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x059, F32_FP8BF8_SWMMAC_w64>; 1494*b3edf446SDimitry Andricdefm V_SWMMAC_F32_16X16X32_BF8_BF8_w64 : VOP3P_Real_WMMA_gfx12w64 <0x05a, F32_FP8BF8_SWMMAC_w64>; 1495*b3edf446SDimitry Andric 14965f757f3fSDimitry Andricmulticlass VOP3P_Real_with_name<GFXGen Gen, bits<7> op, 14975f757f3fSDimitry Andric string backing_ps_name = NAME, 149881ad6265SDimitry Andric string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> { 149981ad6265SDimitry Andric defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name); 15005f757f3fSDimitry Andric let AsmString = asmName # ps.AsmOperands in 15015f757f3fSDimitry Andric def Gen.Suffix : 15025f757f3fSDimitry Andric VOP3P_Real_Gen<!cast<VOP3P_Pseudo>(backing_ps_name), Gen, asmName>, 15035f757f3fSDimitry Andric VOP3Pe_gfx11_gfx12<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl>, 15045f757f3fSDimitry Andric MnemonicAlias<ps.Mnemonic, asmName>, Requires<[Gen.AssemblerPredicate]>; 15055f757f3fSDimitry Andric} 15065f757f3fSDimitry Andric 15075f757f3fSDimitry Andricmulticlass VOP3P_Real_dpp<GFXGen Gen, bits<7> op, string backing_ps_name = NAME, 15085f757f3fSDimitry Andric string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> { 15095f757f3fSDimitry Andric defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name); 15105f757f3fSDimitry Andric def _dpp#Gen.Suffix 151181ad6265SDimitry Andric : VOP3P_DPP16<op, !cast<VOP_DPP_Pseudo>(backing_ps_name #"_dpp"), 15125f757f3fSDimitry Andric Gen.Subtarget> { 151381ad6265SDimitry Andric let AsmString = asmName #ps.Pfl.AsmVOP3DPP16; 15145f757f3fSDimitry Andric let DecoderNamespace = "DPP"#Gen.DecoderNamespace; 15155f757f3fSDimitry Andric let AssemblerPredicate = Gen.AssemblerPredicate; 151681ad6265SDimitry Andric } 151781ad6265SDimitry Andric} 151881ad6265SDimitry Andric 15195f757f3fSDimitry Andricmulticlass VOP3P_Real_dpp8<GFXGen Gen, bits<7> op, string backing_ps_name = NAME, 152081ad6265SDimitry Andric string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> { 152181ad6265SDimitry Andric defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name); 15225f757f3fSDimitry Andric def _dpp8#Gen.Suffix : VOP3P_DPP8_Base<op, ps> { 152381ad6265SDimitry Andric let AsmString = asmName #ps.Pfl.AsmVOP3DPP8; 15245f757f3fSDimitry Andric let DecoderNamespace = "DPP8"#Gen.DecoderNamespace; 15255f757f3fSDimitry Andric let AssemblerPredicate = Gen.AssemblerPredicate; 152681ad6265SDimitry Andric } 152781ad6265SDimitry Andric} 152881ad6265SDimitry Andric 15295f757f3fSDimitry Andricmulticlass VOP3P_Realtriple<GFXGen Gen, bits<7> op, string backing_ps_name = NAME, 153081ad6265SDimitry Andric string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> 15315f757f3fSDimitry Andric : VOP3P_Real_Base<Gen, op, backing_ps_name, asmName>, 15325f757f3fSDimitry Andric VOP3P_Real_dpp<Gen, op, backing_ps_name, asmName>, 15335f757f3fSDimitry Andric VOP3P_Real_dpp8<Gen, op, backing_ps_name, asmName>; 153481ad6265SDimitry Andric 15355f757f3fSDimitry Andric//===----------------------------------------------------------------------===// 15365f757f3fSDimitry Andric// GFX12 15375f757f3fSDimitry Andric//===----------------------------------------------------------------------===// 15385f757f3fSDimitry Andric 15395f757f3fSDimitry Andricmulticlass VOP3P_Real_gfx12<bits<7> op> : VOP3P_Real_Base<GFX12Gen, op>; 15405f757f3fSDimitry Andric 15415f757f3fSDimitry Andricmulticlass VOP3P_Real_with_name_gfx12<bits<7> op, 15425f757f3fSDimitry Andric string backing_ps_name = NAME, 15435f757f3fSDimitry Andric string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> : 15445f757f3fSDimitry Andric VOP3P_Real_with_name<GFX12Gen, op, backing_ps_name, asmName>; 15455f757f3fSDimitry Andric 15465f757f3fSDimitry Andricdefm V_PK_MIN_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1b, "V_PK_MIN_F16", "v_pk_min_num_f16">; 15475f757f3fSDimitry Andricdefm V_PK_MAX_NUM_F16 : VOP3P_Real_with_name_gfx12<0x1c, "V_PK_MAX_F16", "v_pk_max_num_f16">; 15485f757f3fSDimitry Andric 15495f757f3fSDimitry Andricdefm V_PK_MINIMUM_F16 : VOP3P_Real_gfx12<0x1d>; 15505f757f3fSDimitry Andricdefm V_PK_MAXIMUM_F16 : VOP3P_Real_gfx12<0x1e>; 15515f757f3fSDimitry Andric 15527a6dacacSDimitry Andricdefm V_DOT4_F32_FP8_BF8 : VOP3P_Realtriple<GFX12Gen, 0x24>; 15537a6dacacSDimitry Andricdefm V_DOT4_F32_BF8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x25>; 15547a6dacacSDimitry Andricdefm V_DOT4_F32_FP8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x26>; 15557a6dacacSDimitry Andricdefm V_DOT4_F32_BF8_BF8 : VOP3P_Realtriple<GFX12Gen, 0x27>; 15567a6dacacSDimitry Andric 15575f757f3fSDimitry Andric//===----------------------------------------------------------------------===// 15585f757f3fSDimitry Andric// GFX11 15595f757f3fSDimitry Andric//===----------------------------------------------------------------------===// 15605f757f3fSDimitry Andric 15615f757f3fSDimitry Andricmulticlass VOP3P_Real_gfx11_gfx12<bits<7> op> : 15625f757f3fSDimitry Andric VOP3P_Real_Base<GFX11Gen, op>, VOP3P_Real_Base<GFX12Gen, op>; 15635f757f3fSDimitry Andric 15645f757f3fSDimitry Andricdefm V_DOT4_I32_IU8 : VOP3P_Real_gfx11_gfx12<0x16>; 15655f757f3fSDimitry Andricdefm V_DOT8_I32_IU4 : VOP3P_Real_gfx11_gfx12<0x18>; 15665f757f3fSDimitry Andricdefm V_DOT2_F32_BF16 : VOP3P_Real_gfx11_gfx12<0x1a>; 156781ad6265SDimitry Andric 156881ad6265SDimitry Andricmulticlass VOP3P_Real_WMMA <bits<7> op> { 156981ad6265SDimitry Andric let WaveSizePredicate = isWave32, DecoderNamespace = "GFX11" in { 15705f757f3fSDimitry Andric defm _twoaddr_w32 : VOP3P_Real_Base <GFX11Gen, op>; 157181ad6265SDimitry Andric } 157281ad6265SDimitry Andric let WaveSizePredicate = isWave64, DecoderNamespace = "WMMAGFX11" in { 15735f757f3fSDimitry Andric defm _twoaddr_w64 : VOP3P_Real_Base <GFX11Gen, op>; 157481ad6265SDimitry Andric } 157581ad6265SDimitry Andric} 157681ad6265SDimitry Andric 157781ad6265SDimitry Andricdefm V_WMMA_F32_16X16X16_F16 : VOP3P_Real_WMMA <0x040>; 157881ad6265SDimitry Andricdefm V_WMMA_F32_16X16X16_BF16 : VOP3P_Real_WMMA <0x041>; 157981ad6265SDimitry Andricdefm V_WMMA_F16_16X16X16_F16 : VOP3P_Real_WMMA <0x042>; 158081ad6265SDimitry Andricdefm V_WMMA_BF16_16X16X16_BF16 : VOP3P_Real_WMMA <0x043>; 158181ad6265SDimitry Andricdefm V_WMMA_I32_16X16X16_IU8 : VOP3P_Real_WMMA <0x044>; 158281ad6265SDimitry Andricdefm V_WMMA_I32_16X16X16_IU4 : VOP3P_Real_WMMA <0x045>; 158381ad6265SDimitry Andric 1584e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 1585e8d8bef9SDimitry Andric// GFX8 (VI) 1586e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 1587e8d8bef9SDimitry Andric 1588e8d8bef9SDimitry Andricmulticlass VOP3P_Real_vi<bits<7> op> { 15890b57cec5SDimitry Andric def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>, 15900b57cec5SDimitry Andric VOP3Pe <op, !cast<VOP3_Pseudo>(NAME).Pfl> { 1591480093f4SDimitry Andric let AssemblerPredicate = HasVOP3PInsts; 15920b57cec5SDimitry Andric let DecoderNamespace = "GFX8"; 1593fe6060f1SDimitry Andric let VOP3P = 1; 15940b57cec5SDimitry Andric } 15950b57cec5SDimitry Andric} 15960b57cec5SDimitry Andric 1597e8d8bef9SDimitry Andricmulticlass VOP3P_Real_MAI<bits<7> op> { 1598e8d8bef9SDimitry Andric def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1599fe6060f1SDimitry Andric VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl, ?> { 1600e8d8bef9SDimitry Andric let AssemblerPredicate = HasMAIInsts; 1601e8d8bef9SDimitry Andric let DecoderNamespace = "GFX8"; 1602fe6060f1SDimitry Andric let Inst{14} = ?; // op_sel_hi(2) 1603fe6060f1SDimitry Andric let Inst{59} = ?; // op_sel_hi(0) 1604fe6060f1SDimitry Andric let Inst{60} = ?; // op_sel_hi(1) 1605e8d8bef9SDimitry Andric } 1606e8d8bef9SDimitry Andric} 1607e8d8bef9SDimitry Andric 160804eeddc0SDimitry Andriclet Constraints = "" in { 1609fe6060f1SDimitry Andricmulticlass VOP3P_Real_MFMA_gfx90a<bits<7> op> { 1610fe6060f1SDimitry Andric let SubtargetPredicate = isGFX90AOnly, 1611fe6060f1SDimitry Andric AssemblerPredicate = isGFX90AOnly, DecoderNamespace = "GFX90A" in { 1612fe6060f1SDimitry Andric def _gfx90a_acd : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>, 1613fe6060f1SDimitry Andric VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl, 1>; 1614fe6060f1SDimitry Andric 1615fe6060f1SDimitry Andric def _gfx90a_vcd : VOP3P_Real<!cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64"), SIEncodingFamily.GFX90A>, 1616fe6060f1SDimitry Andric VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64").Pfl, 0>; 1617fe6060f1SDimitry Andric } // End AssemblerPredicate = isGFX90AOnly, DecoderNamespace = "GFX90A" 1618fe6060f1SDimitry Andric} 161981ad6265SDimitry Andric} 1620fe6060f1SDimitry Andric 162181ad6265SDimitry Andricmulticlass VOP3P_Real_MFMA_gfx940_aliases<string NameFrom, string NameTo, string Op, 162281ad6265SDimitry Andric VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(Op # "_e64"), 162381ad6265SDimitry Andric VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(Op # "_vgprcd" # "_e64"), 162481ad6265SDimitry Andric VOPProfile Pfl_ACD = PS_ACD.Pfl, 162581ad6265SDimitry Andric VOPProfile Pfl_VCD = PS_VCD.Pfl> { 162606c3fb27SDimitry Andric if !ne(NameFrom, NameTo) then { 162781ad6265SDimitry Andric def : InstAlias <NameTo # " " # PS_ACD.AsmOperands, 162881ad6265SDimitry Andric (!cast<VOP3P_Real>(Op # "_gfx940_acd") Pfl_ACD.DstRC:$vdst, 162981ad6265SDimitry Andric Pfl_ACD.Src0RC64:$src0, Pfl_ACD.Src1RC64:$src1, Pfl_ACD.Src2RC64:$src2, 163081ad6265SDimitry Andric cbsz:$cbsz, abid:$abid, blgp:$blgp)>, PredicateControl; 163181ad6265SDimitry Andric def : InstAlias <NameTo # " " # PS_VCD.AsmOperands, 163281ad6265SDimitry Andric (!cast<VOP3P_Real>(Op # "_gfx940_vcd") Pfl_VCD.DstRC:$vdst, 163381ad6265SDimitry Andric Pfl_VCD.Src0RC64:$src0, Pfl_VCD.Src1RC64:$src1, Pfl_VCD.Src2RC64:$src2, 163481ad6265SDimitry Andric cbsz:$cbsz, abid:$abid, blgp:$blgp)>, PredicateControl; 163581ad6265SDimitry Andric } 163681ad6265SDimitry Andric} 163781ad6265SDimitry Andric 163881ad6265SDimitry Andricmulticlass VOP3P_Real_MFMA_gfx940<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic, 163981ad6265SDimitry Andric VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(NAME # "_e64"), 164081ad6265SDimitry Andric VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64")> { 164181ad6265SDimitry Andric let SubtargetPredicate = isGFX940Plus, 16425f757f3fSDimitry Andric DecoderNamespace = "GFX940", 164381ad6265SDimitry Andric AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in { 164481ad6265SDimitry Andric def _gfx940_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX940>, 164581ad6265SDimitry Andric VOP3Pe_MAI <op, PS_ACD.Pfl, 1>; 164681ad6265SDimitry Andric 164781ad6265SDimitry Andric def _gfx940_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX940>, 164881ad6265SDimitry Andric VOP3Pe_MAI <op, PS_VCD.Pfl, 0>; 1649bdd1243dSDimitry Andric } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940" 165081ad6265SDimitry Andric 16515f757f3fSDimitry Andric let SubtargetPredicate = isGFX940Plus in { 165281ad6265SDimitry Andric defm : VOP3P_Real_MFMA_gfx940_aliases<Name, PS_ACD.Mnemonic, NAME>; 165381ad6265SDimitry Andric 165406c3fb27SDimitry Andric if !ne(!subst("_1k", "", PS_ACD.Mnemonic), PS_ACD.Mnemonic) then 165581ad6265SDimitry Andric defm : VOP3P_Real_MFMA_gfx940_aliases<Name, !subst("_1k", "", PS_ACD.Mnemonic), NAME>; 165681ad6265SDimitry Andric } 16575f757f3fSDimitry Andric} 165881ad6265SDimitry Andric 16595f757f3fSDimitry Andricmulticlass VOP3P_Real_MFMA_vi<bits<7> op> { 1660e8d8bef9SDimitry Andric def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1661fe6060f1SDimitry Andric VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl, ?> { 16625f757f3fSDimitry Andric let SubtargetPredicate = isGFX8GFX9NotGFX90A; 1663480093f4SDimitry Andric let AssemblerPredicate = HasMAIInsts; 16640b57cec5SDimitry Andric let DecoderNamespace = "GFX8"; 166581ad6265SDimitry Andric let Constraints = ""; 16660b57cec5SDimitry Andric } 16670b57cec5SDimitry Andric} 166881ad6265SDimitry Andric 16695f757f3fSDimitry Andricmulticlass VOP3P_Real_MFMA_vi_gfx90a<bits<7> op> : 16705f757f3fSDimitry Andric VOP3P_Real_MFMA_gfx90a <op>, 16715f757f3fSDimitry Andric VOP3P_Real_MFMA_vi <op>; 16725f757f3fSDimitry Andric 16735f757f3fSDimitry Andricmulticlass VOP3P_Real_MFMA<bits<7> op, string GFX940Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> : 16745f757f3fSDimitry Andric VOP3P_Real_MFMA_vi_gfx90a <op>, 16755f757f3fSDimitry Andric VOP3P_Real_MFMA_gfx940 <op, GFX940Name>; 16765f757f3fSDimitry Andric 167781ad6265SDimitry Andricmulticlass VOP3P_Real_SMFMAC<bits<7> op, string alias> { 167881ad6265SDimitry Andric def _gfx940 : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 167981ad6265SDimitry Andric VOP3Pe_SMFMAC <op> { 168081ad6265SDimitry Andric let AssemblerPredicate = isGFX940Plus; 168181ad6265SDimitry Andric let DecoderNamespace = "GFX8"; 168281ad6265SDimitry Andric } 168381ad6265SDimitry Andric def : MnemonicAlias<alias, !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic>; 168404eeddc0SDimitry Andric} 16850b57cec5SDimitry Andric 16865f757f3fSDimitry Andriclet SubtargetPredicate = isGFX8GFX9 in { 1687e8d8bef9SDimitry Andricdefm V_PK_MAD_I16 : VOP3P_Real_vi <0x00>; 1688e8d8bef9SDimitry Andricdefm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x01>; 1689e8d8bef9SDimitry Andricdefm V_PK_ADD_I16 : VOP3P_Real_vi <0x02>; 1690e8d8bef9SDimitry Andricdefm V_PK_SUB_I16 : VOP3P_Real_vi <0x03>; 1691e8d8bef9SDimitry Andricdefm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x04>; 1692e8d8bef9SDimitry Andricdefm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x05>; 1693e8d8bef9SDimitry Andricdefm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x06>; 1694e8d8bef9SDimitry Andricdefm V_PK_MAX_I16 : VOP3P_Real_vi <0x07>; 1695e8d8bef9SDimitry Andricdefm V_PK_MIN_I16 : VOP3P_Real_vi <0x08>; 1696e8d8bef9SDimitry Andricdefm V_PK_MAD_U16 : VOP3P_Real_vi <0x09>; 16970b57cec5SDimitry Andric 1698e8d8bef9SDimitry Andricdefm V_PK_ADD_U16 : VOP3P_Real_vi <0x0a>; 1699e8d8bef9SDimitry Andricdefm V_PK_SUB_U16 : VOP3P_Real_vi <0x0b>; 1700e8d8bef9SDimitry Andricdefm V_PK_MAX_U16 : VOP3P_Real_vi <0x0c>; 1701e8d8bef9SDimitry Andricdefm V_PK_MIN_U16 : VOP3P_Real_vi <0x0d>; 1702e8d8bef9SDimitry Andricdefm V_PK_FMA_F16 : VOP3P_Real_vi <0x0e>; 1703e8d8bef9SDimitry Andricdefm V_PK_ADD_F16 : VOP3P_Real_vi <0x0f>; 1704e8d8bef9SDimitry Andricdefm V_PK_MUL_F16 : VOP3P_Real_vi <0x10>; 1705e8d8bef9SDimitry Andricdefm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>; 1706e8d8bef9SDimitry Andricdefm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>; 17070b57cec5SDimitry Andric 17085f757f3fSDimitry Andriclet OtherPredicates = [HasMadMixInsts] in { 1709e8d8bef9SDimitry Andricdefm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>; 1710e8d8bef9SDimitry Andricdefm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>; 1711e8d8bef9SDimitry Andricdefm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x22>; 17120b57cec5SDimitry Andric} 17130b57cec5SDimitry Andric 17145f757f3fSDimitry Andriclet OtherPredicates = [HasFmaMixInsts], 17155f757f3fSDimitry Andric DecoderNamespace = "GFX9_DL" in { 17160b57cec5SDimitry Andric// The mad_mix instructions were renamed and their behaviors changed, 17170b57cec5SDimitry Andric// but the opcode stayed the same so we need to put these in a 17180b57cec5SDimitry Andric// different DecoderNamespace to avoid the ambiguity. 1719e8d8bef9SDimitry Andricdefm V_FMA_MIX_F32 : VOP3P_Real_vi <0x20>; 1720e8d8bef9SDimitry Andricdefm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x21>; 1721e8d8bef9SDimitry Andricdefm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x22>; 17220b57cec5SDimitry Andric} 17230b57cec5SDimitry Andric 1724e8d8bef9SDimitry Andricdefm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>; 1725e8d8bef9SDimitry Andricdefm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>; 1726fe6060f1SDimitry Andric 1727fe6060f1SDimitry Andricdefm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>; 1728e8d8bef9SDimitry Andricdefm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>; 1729e8d8bef9SDimitry Andricdefm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>; 17300b57cec5SDimitry Andric 1731e8d8bef9SDimitry Andricdefm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>; 1732e8d8bef9SDimitry Andricdefm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>; 17335f757f3fSDimitry Andric} // End SubtargetPredicate = isGFX8GFX9 17340b57cec5SDimitry Andric 17355f757f3fSDimitry Andriclet OtherPredicates = [HasMAIInsts] in { 17360b57cec5SDimitry Andric 1737e8d8bef9SDimitry Andricdefm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x58>; 1738e8d8bef9SDimitry Andricdefm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x59>; 173981ad6265SDimitry Andricdefm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MFMA <0x40, "v_mfma_f32_32x32x1_2b_f32">; 174081ad6265SDimitry Andricdefm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MFMA <0x41, "v_mfma_f32_16x16x1_4b_f32">; 174181ad6265SDimitry Andricdefm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MFMA <0x42, "v_mfma_f32_4x4x1_16b_f32">; 174281ad6265SDimitry Andricdefm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MFMA <0x44, "v_mfma_f32_32x32x2_f32">; 174381ad6265SDimitry Andricdefm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MFMA <0x45, "v_mfma_f32_16x16x4_f32">; 174481ad6265SDimitry Andricdefm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MFMA <0x48, "v_mfma_f32_32x32x4_2b_f16">; 174581ad6265SDimitry Andricdefm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MFMA <0x49, "v_mfma_f32_16x16x4_4b_f16">; 174681ad6265SDimitry Andricdefm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MFMA <0x4a, "v_mfma_f32_4x4x4_16b_f16">; 174781ad6265SDimitry Andricdefm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MFMA <0x4c, "v_mfma_f32_32x32x8_f16">; 174881ad6265SDimitry Andricdefm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MFMA <0x4d, "v_mfma_f32_16x16x16_f16">; 174981ad6265SDimitry Andricdefm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MFMA <0x50, "v_mfma_i32_32x32x4_2b_i8">; 175081ad6265SDimitry Andricdefm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MFMA <0x51, "v_mfma_i32_16x16x4_4b_i8">; 175181ad6265SDimitry Andricdefm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MFMA <0x52, "v_mfma_i32_4x4x4_16b_i8">; 175281ad6265SDimitry Andric 17535f757f3fSDimitry Andricdefm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MFMA_vi_gfx90a <0x55>; 17545f757f3fSDimitry Andricdefm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MFMA_vi_gfx90a <0x54>; 17555f757f3fSDimitry Andricdefm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x68>; 17565f757f3fSDimitry Andricdefm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x69>; 17575f757f3fSDimitry Andricdefm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x6b>; 17585f757f3fSDimitry Andricdefm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x6c>; 17595f757f3fSDimitry Andricdefm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MFMA_vi_gfx90a <0x6d>; 17600b57cec5SDimitry Andric 17615f757f3fSDimitry Andric} // End OtherPredicates = [HasMAIInsts] 17620b57cec5SDimitry Andric 1763fe6060f1SDimitry Andricdefm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x63>; 1764fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x64>; 1765fe6060f1SDimitry Andricdefm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x65>; 1766fe6060f1SDimitry Andricdefm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx90a <0x66>; 1767fe6060f1SDimitry Andricdefm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx90a <0x67>; 1768fe6060f1SDimitry Andricdefm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx90a <0x6e>; 1769fe6060f1SDimitry Andricdefm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx90a <0x6f>; 1770fe6060f1SDimitry Andric 177181ad6265SDimitry Andricdefm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">; 177281ad6265SDimitry Andricdefm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">; 177381ad6265SDimitry Andricdefm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">; 177481ad6265SDimitry Andricdefm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">; 1775fcaf7f86SDimitry Andricdefm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x70>; 1776fcaf7f86SDimitry Andricdefm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x71>; 1777fcaf7f86SDimitry Andricdefm V_MFMA_F32_16X16X32_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x72>; 1778fcaf7f86SDimitry Andricdefm V_MFMA_F32_16X16X32_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x73>; 1779fcaf7f86SDimitry Andricdefm V_MFMA_F32_32X32X16_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x74>; 1780fcaf7f86SDimitry Andricdefm V_MFMA_F32_32X32X16_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x75>; 1781fcaf7f86SDimitry Andricdefm V_MFMA_F32_32X32X16_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x76>; 1782fcaf7f86SDimitry Andricdefm V_MFMA_F32_32X32X16_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x77>; 178381ad6265SDimitry Andric 178481ad6265SDimitry Andricdefm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">; 178581ad6265SDimitry Andricdefm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">; 178681ad6265SDimitry Andricdefm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5f, "v_mfma_f32_4x4x4_16b_bf16">; 178781ad6265SDimitry Andricdefm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx940 <0x60, "v_mfma_f32_32x32x8_bf16">; 178881ad6265SDimitry Andricdefm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx940 <0x61, "v_mfma_f32_16x16x16_bf16">; 178981ad6265SDimitry Andric 179081ad6265SDimitry Andricdefm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx940 <0x6e, "v_mfma_f64_16x16x4_f64">; 179181ad6265SDimitry Andricdefm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx940 <0x6f, "v_mfma_f64_4x4x4_4b_f64">; 179281ad6265SDimitry Andric 179381ad6265SDimitry Andricdefm V_SMFMAC_F32_16X16X32_F16 : VOP3P_Real_SMFMAC <0x62, "v_smfmac_f32_16x16x32f16">; 179481ad6265SDimitry Andricdefm V_SMFMAC_F32_32X32X16_F16 : VOP3P_Real_SMFMAC <0x64, "v_smfmac_f32_32x32x16f16">; 179581ad6265SDimitry Andricdefm V_SMFMAC_F32_16X16X32_BF16 : VOP3P_Real_SMFMAC <0x66, "v_smfmac_f32_16x16x32bf16">; 179681ad6265SDimitry Andricdefm V_SMFMAC_F32_32X32X16_BF16 : VOP3P_Real_SMFMAC <0x68, "v_smfmac_f32_32x32x16bf16">; 179781ad6265SDimitry Andricdefm V_SMFMAC_I32_16X16X64_I8 : VOP3P_Real_SMFMAC <0x6a, "v_smfmac_i32_16x16x64i8">; 179881ad6265SDimitry Andricdefm V_SMFMAC_I32_32X32X32_I8 : VOP3P_Real_SMFMAC <0x6c, "v_smfmac_i32_32x32x32i8">; 1799fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_BF8_BF8 : VOP3P_Real_SMFMAC <0x78, "v_smfmac_f32_16x16x64bf8bf8">; 1800fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_BF8_FP8 : VOP3P_Real_SMFMAC <0x79, "v_smfmac_f32_16x16x64bf8fp8">; 1801fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_FP8_BF8 : VOP3P_Real_SMFMAC <0x7a, "v_smfmac_f32_16x16x64fp8bf8">; 1802fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_16X16X64_FP8_FP8 : VOP3P_Real_SMFMAC <0x7b, "v_smfmac_f32_16x16x64fp8fp8">; 1803fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_BF8_BF8 : VOP3P_Real_SMFMAC <0x7c, "v_smfmac_f32_32x32x32bf8bf8">; 1804fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_BF8_FP8 : VOP3P_Real_SMFMAC <0x7d, "v_smfmac_f32_32x32x32bf8fp8">; 1805fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_FP8_BF8 : VOP3P_Real_SMFMAC <0x7e, "v_smfmac_f32_32x32x32fp8bf8">; 1806fcaf7f86SDimitry Andricdefm V_SMFMAC_F32_32X32X32_FP8_FP8 : VOP3P_Real_SMFMAC <0x7f, "v_smfmac_f32_32x32x32fp8fp8">; 180781ad6265SDimitry Andric 1808fe6060f1SDimitry Andricdefm V_PK_FMA_F32 : VOP3P_Real_vi <0x30>; 1809fe6060f1SDimitry Andricdefm V_PK_MUL_F32 : VOP3P_Real_vi <0x31>; 1810fe6060f1SDimitry Andricdefm V_PK_ADD_F32 : VOP3P_Real_vi <0x32>; 1811fe6060f1SDimitry Andricdefm V_PK_MOV_B32 : VOP3P_Real_vi <0x33>; 1812fe6060f1SDimitry Andric 18130b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 18140b57cec5SDimitry Andric// GFX10. 18150b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 18160b57cec5SDimitry Andric 181781ad6265SDimitry Andriclet AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10", VOP3P = 1 in { 1818e8d8bef9SDimitry Andric multiclass VOP3P_Real_gfx10<bits<7> op> { 18190b57cec5SDimitry Andric def _gfx10 : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.GFX10>, 18200b57cec5SDimitry Andric VOP3Pe_gfx10 <op, !cast<VOP3P_Pseudo>(NAME).Pfl>; 18210b57cec5SDimitry Andric } 182281ad6265SDimitry Andric} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10", VOP3P = 1 18230b57cec5SDimitry Andric 18245f757f3fSDimitry Andricmulticlass VOP3P_Real_gfx10_gfx11<bits<7> op> : 18255f757f3fSDimitry Andric VOP3P_Real_gfx10<op>, VOP3P_Real_Base<GFX11Gen, op>; 182681ad6265SDimitry Andric 18275f757f3fSDimitry Andricmulticlass VOP3P_Real_gfx10_gfx11_gfx12<bits<7> op> : 18285f757f3fSDimitry Andric VOP3P_Real_gfx10_gfx11<op>, VOP3P_Real_Base<GFX12Gen, op>; 182981ad6265SDimitry Andric 18305f757f3fSDimitry Andricmulticlass VOP3P_Real_gfx10_gfx11_gfx12_Triple<bits<7> op> : 18315f757f3fSDimitry Andric VOP3P_Real_gfx10<op>, VOP3P_Realtriple<GFX11Gen, op>, 18325f757f3fSDimitry Andric VOP3P_Realtriple<GFX12Gen, op>; 18335f757f3fSDimitry Andric 18345f757f3fSDimitry Andricdefm V_PK_MAD_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x00>; 18355f757f3fSDimitry Andricdefm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x01>; 18365f757f3fSDimitry Andricdefm V_PK_ADD_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x02>; 18375f757f3fSDimitry Andricdefm V_PK_SUB_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x03>; 18385f757f3fSDimitry Andricdefm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10_gfx11_gfx12<0x04>; 18395f757f3fSDimitry Andricdefm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10_gfx11_gfx12<0x05>; 18405f757f3fSDimitry Andricdefm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x06>; 18415f757f3fSDimitry Andricdefm V_PK_MAX_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x07>; 18425f757f3fSDimitry Andricdefm V_PK_MIN_I16 : VOP3P_Real_gfx10_gfx11_gfx12<0x08>; 18435f757f3fSDimitry Andricdefm V_PK_MAD_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x09>; 18445f757f3fSDimitry Andricdefm V_PK_ADD_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0a>; 18455f757f3fSDimitry Andricdefm V_PK_SUB_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0b>; 18465f757f3fSDimitry Andricdefm V_PK_MAX_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0c>; 18475f757f3fSDimitry Andricdefm V_PK_MIN_U16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0d>; 18485f757f3fSDimitry Andricdefm V_PK_FMA_F16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0e>; 18495f757f3fSDimitry Andricdefm V_PK_ADD_F16 : VOP3P_Real_gfx10_gfx11_gfx12<0x0f>; 18505f757f3fSDimitry Andricdefm V_PK_MUL_F16 : VOP3P_Real_gfx10_gfx11_gfx12<0x10>; 185181ad6265SDimitry Andricdefm V_PK_MIN_F16 : VOP3P_Real_gfx10_gfx11<0x11>; 185281ad6265SDimitry Andricdefm V_PK_MAX_F16 : VOP3P_Real_gfx10_gfx11<0x12>; 18535f757f3fSDimitry Andricdefm V_FMA_MIX_F32 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x20>; 18545f757f3fSDimitry Andricdefm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x21>; 18555f757f3fSDimitry Andricdefm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x22>; 18560b57cec5SDimitry Andric 1857e8d8bef9SDimitry Andricdefm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>; 1858e8d8bef9SDimitry Andricdefm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>; 1859fe6060f1SDimitry Andric 18605f757f3fSDimitry Andricdefm V_DOT2_F32_F16 : VOP3P_Real_gfx10_gfx11_gfx12_Triple<0x13>; 18615f757f3fSDimitry Andricdefm V_DOT4_U32_U8 : VOP3P_Real_gfx10_gfx11_gfx12<0x17>; 18625f757f3fSDimitry Andricdefm V_DOT8_U32_U4 : VOP3P_Real_gfx10_gfx11_gfx12<0x19>; 18630b57cec5SDimitry Andric 1864e8d8bef9SDimitry Andricdefm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>; 1865e8d8bef9SDimitry Andricdefm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>; 1866