xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP3PInstructions.td (revision 8bcb0991864975618c09697b1aca10683346d9f0)
10b57cec5SDimitry Andric//===-- VOP3PInstructions.td - Vector Instruction Defintions --------------===//
20b57cec5SDimitry Andric//
30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric//
70b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric
90b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
100b57cec5SDimitry Andric// VOP3P Classes
110b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andricclass VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
140b57cec5SDimitry Andric  VOP3P_Pseudo<OpName, P,
150b57cec5SDimitry Andric    !if(P.HasModifiers, getVOP3PModPat<P, node>.ret, getVOP3Pat<P, node>.ret)
160b57cec5SDimitry Andric>;
170b57cec5SDimitry Andric
180b57cec5SDimitry Andric// Non-packed instructions that use the VOP3P encoding.
190b57cec5SDimitry Andric// VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed.
200b57cec5SDimitry Andricclass VOP3_VOP3PInst<string OpName, VOPProfile P, bit UseTiedOutput = 0,
210b57cec5SDimitry Andric                     SDPatternOperator node = null_frag> :
220b57cec5SDimitry Andric  VOP3P_Pseudo<OpName, P> {
230b57cec5SDimitry Andric  // These operands are only sort of f16 operands. Depending on
240b57cec5SDimitry Andric  // op_sel_hi, these may be interpreted as f32. The inline immediate
250b57cec5SDimitry Andric  // values are really f16 converted to f32, so we treat these as f16
260b57cec5SDimitry Andric  // operands.
270b57cec5SDimitry Andric  let InOperandList =
280b57cec5SDimitry Andric    !con(
290b57cec5SDimitry Andric      !con(
300b57cec5SDimitry Andric        (ins FP16InputMods:$src0_modifiers, VCSrc_f16:$src0,
310b57cec5SDimitry Andric             FP16InputMods:$src1_modifiers, VCSrc_f16:$src1,
320b57cec5SDimitry Andric             FP16InputMods:$src2_modifiers, VCSrc_f16:$src2,
330b57cec5SDimitry Andric             clampmod:$clamp),
340b57cec5SDimitry Andric         !if(UseTiedOutput, (ins VGPR_32:$vdst_in), (ins))),
350b57cec5SDimitry Andric         (ins op_sel:$op_sel, op_sel_hi:$op_sel_hi));
360b57cec5SDimitry Andric
370b57cec5SDimitry Andric  let Constraints = !if(UseTiedOutput, "$vdst = $vdst_in", "");
380b57cec5SDimitry Andric  let DisableEncoding = !if(UseTiedOutput, "$vdst_in", "");
390b57cec5SDimitry Andric  let AsmOperands =
400b57cec5SDimitry Andric    " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$op_sel$op_sel_hi$clamp";
410b57cec5SDimitry Andric}
420b57cec5SDimitry Andric
430b57cec5SDimitry Andriclet isCommutable = 1 in {
440b57cec5SDimitry Andricdef V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
450b57cec5SDimitry Andricdef V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
460b57cec5SDimitry Andric
470b57cec5SDimitry Andriclet FPDPRounding = 1 in {
480b57cec5SDimitry Andricdef V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, fma>;
490b57cec5SDimitry Andricdef V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fadd>;
500b57cec5SDimitry Andricdef V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmul>;
510b57cec5SDimitry Andric} // End FPDPRounding = 1
520b57cec5SDimitry Andricdef V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum_like>;
530b57cec5SDimitry Andricdef V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fminnum_like>;
540b57cec5SDimitry Andric
550b57cec5SDimitry Andricdef V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, add>;
560b57cec5SDimitry Andricdef V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
570b57cec5SDimitry Andricdef V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, mul>;
580b57cec5SDimitry Andric
590b57cec5SDimitry Andricdef V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, smin>;
600b57cec5SDimitry Andricdef V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, umin>;
610b57cec5SDimitry Andricdef V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, smax>;
620b57cec5SDimitry Andricdef V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, umax>;
630b57cec5SDimitry Andric}
640b57cec5SDimitry Andric
650b57cec5SDimitry Andricdef V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
660b57cec5SDimitry Andricdef V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, sub>;
670b57cec5SDimitry Andric
680b57cec5SDimitry Andricdef V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshl_rev>;
690b57cec5SDimitry Andricdef V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, ashr_rev>;
700b57cec5SDimitry Andricdef V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>;
710b57cec5SDimitry Andric
720b57cec5SDimitry Andric
730b57cec5SDimitry Andric// Undo sub x, c -> add x, -c canonicalization since c is more likely
740b57cec5SDimitry Andric// an inline immediate than -c.
750b57cec5SDimitry Andric// The constant will be emitted as a mov, and folded later.
760b57cec5SDimitry Andric// TODO: We could directly encode the immediate now
770b57cec5SDimitry Andricdef : GCNPat<
780b57cec5SDimitry Andric  (add (v2i16 (VOP3PMods0 v2i16:$src0, i32:$src0_modifiers, i1:$clamp)), NegSubInlineConstV216:$src1),
790b57cec5SDimitry Andric  (V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1, $clamp)
800b57cec5SDimitry Andric>;
810b57cec5SDimitry Andric
820b57cec5SDimitry Andricmulticlass MadFmaMixPats<SDPatternOperator fma_like,
830b57cec5SDimitry Andric                         Instruction mix_inst,
840b57cec5SDimitry Andric                         Instruction mixlo_inst,
850b57cec5SDimitry Andric                         Instruction mixhi_inst> {
860b57cec5SDimitry Andric  def : GCNPat <
870b57cec5SDimitry Andric    (f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
880b57cec5SDimitry Andric                            (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
890b57cec5SDimitry Andric                            (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))),
900b57cec5SDimitry Andric    (mixlo_inst $src0_modifiers, $src0,
910b57cec5SDimitry Andric                $src1_modifiers, $src1,
920b57cec5SDimitry Andric                $src2_modifiers, $src2,
930b57cec5SDimitry Andric                DSTCLAMP.NONE,
940b57cec5SDimitry Andric                (i32 (IMPLICIT_DEF)))
950b57cec5SDimitry Andric  >;
960b57cec5SDimitry Andric
970b57cec5SDimitry Andric  // FIXME: Special case handling for maxhi (especially for clamp)
980b57cec5SDimitry Andric  // because dealing with the write to high half of the register is
990b57cec5SDimitry Andric  // difficult.
1000b57cec5SDimitry Andric  def : GCNPat <
1010b57cec5SDimitry Andric    (build_vector f16:$elt0, (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
1020b57cec5SDimitry Andric                                                (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
1030b57cec5SDimitry Andric                                                (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))),
1040b57cec5SDimitry Andric    (v2f16 (mixhi_inst $src0_modifiers, $src0,
1050b57cec5SDimitry Andric                       $src1_modifiers, $src1,
1060b57cec5SDimitry Andric                       $src2_modifiers, $src2,
1070b57cec5SDimitry Andric                       DSTCLAMP.NONE,
1080b57cec5SDimitry Andric                       $elt0))
1090b57cec5SDimitry Andric  >;
1100b57cec5SDimitry Andric
1110b57cec5SDimitry Andric  def : GCNPat <
1120b57cec5SDimitry Andric    (build_vector
1130b57cec5SDimitry Andric      f16:$elt0,
1140b57cec5SDimitry Andric      (AMDGPUclamp (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
1150b57cec5SDimitry Andric                                      (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
1160b57cec5SDimitry Andric                                      (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))),
1170b57cec5SDimitry Andric    (v2f16 (mixhi_inst $src0_modifiers, $src0,
1180b57cec5SDimitry Andric                       $src1_modifiers, $src1,
1190b57cec5SDimitry Andric                       $src2_modifiers, $src2,
1200b57cec5SDimitry Andric                       DSTCLAMP.ENABLE,
1210b57cec5SDimitry Andric                       $elt0))
1220b57cec5SDimitry Andric  >;
1230b57cec5SDimitry Andric
1240b57cec5SDimitry Andric  def : GCNPat <
1250b57cec5SDimitry Andric    (AMDGPUclamp (build_vector
1260b57cec5SDimitry Andric      (fpround (fma_like (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)),
1270b57cec5SDimitry Andric                         (f32 (VOP3PMadMixMods f16:$lo_src1, i32:$lo_src1_modifiers)),
1280b57cec5SDimitry Andric                         (f32 (VOP3PMadMixMods f16:$lo_src2, i32:$lo_src2_modifiers)))),
1290b57cec5SDimitry Andric      (fpround (fma_like (f32 (VOP3PMadMixMods f16:$hi_src0, i32:$hi_src0_modifiers)),
1300b57cec5SDimitry Andric                         (f32 (VOP3PMadMixMods f16:$hi_src1, i32:$hi_src1_modifiers)),
1310b57cec5SDimitry Andric                         (f32 (VOP3PMadMixMods f16:$hi_src2, i32:$hi_src2_modifiers)))))),
1320b57cec5SDimitry Andric    (v2f16 (mixhi_inst $hi_src0_modifiers, $hi_src0,
1330b57cec5SDimitry Andric                       $hi_src1_modifiers, $hi_src1,
1340b57cec5SDimitry Andric                       $hi_src2_modifiers, $hi_src2,
1350b57cec5SDimitry Andric                       DSTCLAMP.ENABLE,
1360b57cec5SDimitry Andric                       (mixlo_inst $lo_src0_modifiers, $lo_src0,
1370b57cec5SDimitry Andric                                   $lo_src1_modifiers, $lo_src1,
1380b57cec5SDimitry Andric                                   $lo_src2_modifiers, $lo_src2,
1390b57cec5SDimitry Andric                                   DSTCLAMP.ENABLE,
1400b57cec5SDimitry Andric                                   (i32 (IMPLICIT_DEF)))))
1410b57cec5SDimitry Andric  >;
1420b57cec5SDimitry Andric}
1430b57cec5SDimitry Andric
1440b57cec5SDimitry Andriclet SubtargetPredicate = HasMadMixInsts in {
1450b57cec5SDimitry Andric// These are VOP3a-like opcodes which accept no omod.
1460b57cec5SDimitry Andric// Size of src arguments (16/32) is controlled by op_sel.
1470b57cec5SDimitry Andric// For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi.
1480b57cec5SDimitry Andriclet isCommutable = 1 in {
1490b57cec5SDimitry Andricdef V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;
1500b57cec5SDimitry Andric
1510b57cec5SDimitry Andriclet FPDPRounding = 1 in {
1520b57cec5SDimitry Andric// Clamp modifier is applied after conversion to f16.
1530b57cec5SDimitry Andricdef V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
1540b57cec5SDimitry Andric
1550b57cec5SDimitry Andriclet ClampLo = 0, ClampHi = 1 in {
1560b57cec5SDimitry Andricdef V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
1570b57cec5SDimitry Andric}
1580b57cec5SDimitry Andric} // End FPDPRounding = 1
1590b57cec5SDimitry Andric}
1600b57cec5SDimitry Andric
1610b57cec5SDimitry Andricdefm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
1620b57cec5SDimitry Andric} // End SubtargetPredicate = HasMadMixInsts
1630b57cec5SDimitry Andric
1640b57cec5SDimitry Andric
1650b57cec5SDimitry Andric// Essentially the same as the mad_mix versions
1660b57cec5SDimitry Andriclet SubtargetPredicate = HasFmaMixInsts in {
1670b57cec5SDimitry Andriclet isCommutable = 1 in {
1680b57cec5SDimitry Andricdef V_FMA_MIX_F32 : VOP3_VOP3PInst<"v_fma_mix_f32", VOP3_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;
1690b57cec5SDimitry Andric
1700b57cec5SDimitry Andriclet FPDPRounding = 1 in {
1710b57cec5SDimitry Andric// Clamp modifier is applied after conversion to f16.
1720b57cec5SDimitry Andricdef V_FMA_MIXLO_F16 : VOP3_VOP3PInst<"v_fma_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
1730b57cec5SDimitry Andric
1740b57cec5SDimitry Andriclet ClampLo = 0, ClampHi = 1 in {
1750b57cec5SDimitry Andricdef V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
1760b57cec5SDimitry Andric}
1770b57cec5SDimitry Andric} // End FPDPRounding = 1
1780b57cec5SDimitry Andric}
1790b57cec5SDimitry Andric
1800b57cec5SDimitry Andricdefm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
1810b57cec5SDimitry Andric}
1820b57cec5SDimitry Andric
1830b57cec5SDimitry Andric// Defines patterns that extract signed 4bit from each Idx[0].
1840b57cec5SDimitry Andricforeach Idx = [[0,28],[4,24],[8,20],[12,16],[16,12],[20,8],[24,4]] in
1850b57cec5SDimitry Andric  def ExtractSigned4bit_#Idx[0] : PatFrag<(ops node:$src),
1860b57cec5SDimitry Andric                                          (sra (shl node:$src, (i32 Idx[1])), (i32 28))>;
1870b57cec5SDimitry Andric
1880b57cec5SDimitry Andric// Defines code pattern that extracts U(unsigned/signed) 4/8bit from FromBitIndex.
1890b57cec5SDimitry Andricclass Extract<int FromBitIndex, int BitMask, bit U>: PatFrag<
1900b57cec5SDimitry Andric  (ops node:$src),
1910b57cec5SDimitry Andric  !if (!or (!and (!eq (BitMask, 255), !eq (FromBitIndex, 24)), !eq (FromBitIndex, 28)), // last element
1920b57cec5SDimitry Andric       !if (U, (srl node:$src, (i32 FromBitIndex)), (sra node:$src, (i32 FromBitIndex))),
1930b57cec5SDimitry Andric       !if (!eq (FromBitIndex, 0), // first element
1940b57cec5SDimitry Andric            !if (U, (and node:$src, (i32 BitMask)),
1950b57cec5SDimitry Andric                 !if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src),
1960b57cec5SDimitry Andric                                         (sext_inreg node:$src, i8))),
1970b57cec5SDimitry Andric            !if (U, (and (srl node:$src, (i32 FromBitIndex)), (i32 BitMask)),
1980b57cec5SDimitry Andric                 !if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src),
1990b57cec5SDimitry Andric                      (sext_inreg (srl node:$src, (i32 FromBitIndex)), i8)))))>;
2000b57cec5SDimitry Andric
2010b57cec5SDimitry Andric
2020b57cec5SDimitry Andricforeach Type = ["I", "U"] in
2030b57cec5SDimitry Andric  foreach Index = 0-3 in {
2040b57cec5SDimitry Andric    // Defines patterns that extract each Index'ed 8bit from an unsigned
2050b57cec5SDimitry Andric    // 32bit scalar value;
2060b57cec5SDimitry Andric    def #Type#Index#"_8bit" : Extract<!shl(Index, 3), 255, !if (!eq (Type, "U"), 1, 0)>;
2070b57cec5SDimitry Andric
2080b57cec5SDimitry Andric    // Defines multiplication patterns where the multiplication is happening on each
2090b57cec5SDimitry Andric    // Index'ed 8bit of a 32bit scalar value.
2100b57cec5SDimitry Andric
2110b57cec5SDimitry Andric    def Mul#Type#_Elt#Index : PatFrag<
2120b57cec5SDimitry Andric      (ops node:$src0, node:$src1),
2130b57cec5SDimitry Andric      (!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), AMDGPUmul_i24_oneuse, AMDGPUmul_u24_oneuse))
2140b57cec5SDimitry Andric                            (!cast<Extract>(#Type#Index#"_8bit") node:$src0),
2150b57cec5SDimitry Andric                            (!cast<Extract>(#Type#Index#"_8bit") node:$src1))>;
2160b57cec5SDimitry Andric  }
2170b57cec5SDimitry Andric
2180b57cec5SDimitry Andric// Different variants of dot8 patterns cause a huge increase in the compile time.
2190b57cec5SDimitry Andric// Define non-associative/commutative add/mul to prevent permutation in the dot8
2200b57cec5SDimitry Andric// pattern.
2210b57cec5SDimitry Andricdef NonACAdd        : SDNode<"ISD::ADD"       , SDTIntBinOp>;
2220b57cec5SDimitry Andricdef NonACAdd_oneuse : HasOneUseBinOp<NonACAdd>;
2230b57cec5SDimitry Andric
2240b57cec5SDimitry Andricdef NonACAMDGPUmul_u24        : SDNode<"AMDGPUISD::MUL_U24"       , SDTIntBinOp>;
2250b57cec5SDimitry Andricdef NonACAMDGPUmul_u24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_u24>;
2260b57cec5SDimitry Andric
2270b57cec5SDimitry Andricdef NonACAMDGPUmul_i24        : SDNode<"AMDGPUISD::MUL_I24"       , SDTIntBinOp>;
2280b57cec5SDimitry Andricdef NonACAMDGPUmul_i24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_i24>;
2290b57cec5SDimitry Andric
2300b57cec5SDimitry Andricforeach Type = ["I", "U"] in
2310b57cec5SDimitry Andric  foreach Index = 0-7 in {
2320b57cec5SDimitry Andric    // Defines patterns that extract each Index'ed 4bit from an unsigned
2330b57cec5SDimitry Andric    // 32bit scalar value;
2340b57cec5SDimitry Andric    def #Type#Index#"_4bit" : Extract<!shl(Index, 2), 15, !if (!eq (Type, "U"), 1, 0)>;
2350b57cec5SDimitry Andric
2360b57cec5SDimitry Andric    // Defines multiplication patterns where the multiplication is happening on each
2370b57cec5SDimitry Andric    // Index'ed 8bit of a 32bit scalar value.
2380b57cec5SDimitry Andric    def Mul#Type#Index#"_4bit" : PatFrag<
2390b57cec5SDimitry Andric      (ops node:$src0, node:$src1),
2400b57cec5SDimitry Andric      (!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), NonACAMDGPUmul_i24_oneuse, NonACAMDGPUmul_u24_oneuse))
2410b57cec5SDimitry Andric                             (!cast<Extract>(#Type#Index#"_4bit") node:$src0),
2420b57cec5SDimitry Andric                             (!cast<Extract>(#Type#Index#"_4bit") node:$src1))>;
2430b57cec5SDimitry Andric  }
2440b57cec5SDimitry Andric
2450b57cec5SDimitry Andricclass UDot2Pat<Instruction Inst> : GCNPat <
2460b57cec5SDimitry Andric  (add (add_oneuse (AMDGPUmul_u24_oneuse (srl i32:$src0, (i32 16)),
2470b57cec5SDimitry Andric                                         (srl i32:$src1, (i32 16))), i32:$src2),
2480b57cec5SDimitry Andric       (AMDGPUmul_u24_oneuse (and i32:$src0, (i32 65535)),
2490b57cec5SDimitry Andric                             (and i32:$src1, (i32 65535)))
2500b57cec5SDimitry Andric   ),
2510b57cec5SDimitry Andric  (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> {
2520b57cec5SDimitry Andric  let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate;
2530b57cec5SDimitry Andric}
2540b57cec5SDimitry Andric
2550b57cec5SDimitry Andricclass SDot2Pat<Instruction Inst> : GCNPat <
2560b57cec5SDimitry Andric  (add (add_oneuse (AMDGPUmul_i24_oneuse (sra i32:$src0, (i32 16)),
2570b57cec5SDimitry Andric                                         (sra i32:$src1, (i32 16))), i32:$src2),
2580b57cec5SDimitry Andric       (AMDGPUmul_i24_oneuse (sext_inreg i32:$src0, i16),
2590b57cec5SDimitry Andric                             (sext_inreg i32:$src1, i16))),
2600b57cec5SDimitry Andric  (Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> {
2610b57cec5SDimitry Andric  let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate;
2620b57cec5SDimitry Andric}
2630b57cec5SDimitry Andric
264*8bcb0991SDimitry Andriclet IsDOT = 1 in {
2650b57cec5SDimitry Andriclet SubtargetPredicate = HasDot2Insts in {
2660b57cec5SDimitry Andric
2670b57cec5SDimitry Andricdef V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>>;
2680b57cec5SDimitry Andricdef V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
2690b57cec5SDimitry Andricdef V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>>;
2700b57cec5SDimitry Andricdef V_DOT4_U32_U8  : VOP3PInst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
2710b57cec5SDimitry Andricdef V_DOT8_U32_U4  : VOP3PInst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
2720b57cec5SDimitry Andric
2730b57cec5SDimitry Andric} // End SubtargetPredicate = HasDot2Insts
2740b57cec5SDimitry Andric
2750b57cec5SDimitry Andriclet SubtargetPredicate = HasDot1Insts in {
2760b57cec5SDimitry Andric
2770b57cec5SDimitry Andricdef V_DOT4_I32_I8  : VOP3PInst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
2780b57cec5SDimitry Andricdef V_DOT8_I32_I4  : VOP3PInst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>>;
2790b57cec5SDimitry Andric
2800b57cec5SDimitry Andric} // End SubtargetPredicate = HasDot1Insts
281*8bcb0991SDimitry Andric} // End let IsDOT = 1
2820b57cec5SDimitry Andric
2830b57cec5SDimitry Andricmulticlass DotPats<SDPatternOperator dot_op,
2840b57cec5SDimitry Andric                   VOP3PInst dot_inst> {
2850b57cec5SDimitry Andric  let SubtargetPredicate = dot_inst.SubtargetPredicate in
2860b57cec5SDimitry Andric  def : GCNPat <
2870b57cec5SDimitry Andric    (dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)),
2880b57cec5SDimitry Andric            (dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)),
2890b57cec5SDimitry Andric            (dot_inst.Pfl.Src2VT (VOP3PMods dot_inst.Pfl.Src2VT:$src2, i32:$src2_modifiers)), i1:$clamp),
2900b57cec5SDimitry Andric    (dot_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, (as_i1imm $clamp))>;
2910b57cec5SDimitry Andric}
2920b57cec5SDimitry Andric
2930b57cec5SDimitry Andricdefm : DotPats<AMDGPUfdot2, V_DOT2_F32_F16>;
2940b57cec5SDimitry Andricdefm : DotPats<int_amdgcn_sdot2, V_DOT2_I32_I16>;
2950b57cec5SDimitry Andricdefm : DotPats<int_amdgcn_udot2, V_DOT2_U32_U16>;
2960b57cec5SDimitry Andricdefm : DotPats<int_amdgcn_sdot4, V_DOT4_I32_I8>;
2970b57cec5SDimitry Andricdefm : DotPats<int_amdgcn_udot4, V_DOT4_U32_U8>;
2980b57cec5SDimitry Andricdefm : DotPats<int_amdgcn_sdot8, V_DOT8_I32_I4>;
2990b57cec5SDimitry Andricdefm : DotPats<int_amdgcn_udot8, V_DOT8_U32_U4>;
3000b57cec5SDimitry Andric
3010b57cec5SDimitry Andricdef : UDot2Pat<V_DOT2_U32_U16>;
3020b57cec5SDimitry Andricdef : SDot2Pat<V_DOT2_I32_I16>;
3030b57cec5SDimitry Andric
3040b57cec5SDimitry Andricforeach Type = ["U", "I"] in
3050b57cec5SDimitry Andric  let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT4_"#Type#"32_"#Type#8).SubtargetPredicate in
3060b57cec5SDimitry Andric  def : GCNPat <
3070b57cec5SDimitry Andric    !cast<dag>(!foldl((i32 i32:$src2), [0, 1, 2, 3], lhs, y,
3080b57cec5SDimitry Andric                      (add_oneuse lhs, (!cast<PatFrag>("Mul"#Type#"_Elt"#y) i32:$src0, i32:$src1)))),
3090b57cec5SDimitry Andric    (!cast<VOP3PInst>("V_DOT4_"#Type#"32_"#Type#8) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
3100b57cec5SDimitry Andric
3110b57cec5SDimitry Andricforeach Type = ["U", "I"] in
3120b57cec5SDimitry Andric  let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in
3130b57cec5SDimitry Andric  def : GCNPat <
3140b57cec5SDimitry Andric    !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
3150b57cec5SDimitry Andric                      [1, 2, 3, 4, 5, 6, 7], lhs, y,
3160b57cec5SDimitry Andric                      (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
3170b57cec5SDimitry Andric    (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
3180b57cec5SDimitry Andric
3190b57cec5SDimitry Andric// Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase
3200b57cec5SDimitry Andric// in the compile time. Directly handle the pattern generated by the FE here.
3210b57cec5SDimitry Andricforeach Type = ["U", "I"] in
3220b57cec5SDimitry Andric  let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in
3230b57cec5SDimitry Andric  def : GCNPat <
3240b57cec5SDimitry Andric    !cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
3250b57cec5SDimitry Andric                      [7, 1, 2, 3, 4, 5, 6], lhs, y,
3260b57cec5SDimitry Andric                      (NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
3270b57cec5SDimitry Andric    (!cast<VOP3PInst>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
3280b57cec5SDimitry Andric
3290b57cec5SDimitry Andricdef ADst_32   : VOPDstOperand<AGPR_32>;
3300b57cec5SDimitry Andricdef ADst_128  : VOPDstOperand<AReg_128>;
3310b57cec5SDimitry Andricdef ADst_512  : VOPDstOperand<AReg_512>;
3320b57cec5SDimitry Andricdef ADst_1024 : VOPDstOperand<AReg_1024>;
3330b57cec5SDimitry Andric
3340b57cec5SDimitry Andricdef VOPProfileAccRead : VOP3_Profile<VOP_I32_I32, VOP3_MAI> {
3350b57cec5SDimitry Andric  let Src0RC64 = ARegSrc_32;
3360b57cec5SDimitry Andric}
3370b57cec5SDimitry Andric
3380b57cec5SDimitry Andricdef VOPProfileAccWrite : VOP3_Profile<VOP_I32_I32, VOP3_MAI> {
3390b57cec5SDimitry Andric  let DstRC = ADst_32;
3400b57cec5SDimitry Andric  let Src0RC64 = VISrc_b32;
3410b57cec5SDimitry Andric}
3420b57cec5SDimitry Andric
3430b57cec5SDimitry Andricclass VOPProfileMAI<VOPProfile P, RegisterOperand _SrcRC, RegisterOperand _DstRC,
3440b57cec5SDimitry Andric                    RegisterOperand SrcABRC = AVSrc_32>
3450b57cec5SDimitry Andric  : VOP3_Profile<P, VOP3_MAI> {
3460b57cec5SDimitry Andric  let DstRC = _DstRC;
3470b57cec5SDimitry Andric  let Src0RC64 = SrcABRC;
3480b57cec5SDimitry Andric  let Src1RC64 = SrcABRC;
3490b57cec5SDimitry Andric  let Src2RC64 = _SrcRC;
3500b57cec5SDimitry Andric  let HasOpSel = 0;
3510b57cec5SDimitry Andric  let HasClamp = 0;
3520b57cec5SDimitry Andric  let HasModifiers = 0;
3530b57cec5SDimitry Andric  let Asm64 = " $vdst, $src0, $src1, $src2$cbsz$abid$blgp";
3540b57cec5SDimitry Andric  let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, Src2RC64:$src2, cbsz:$cbsz, abid:$abid, blgp:$blgp);
3550b57cec5SDimitry Andric}
3560b57cec5SDimitry Andric
3570b57cec5SDimitry Andricdef VOPProfileMAI_F32_F32_X4    : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32,       AISrc_128_f32,  ADst_128>;
3580b57cec5SDimitry Andricdef VOPProfileMAI_F32_F32_X16   : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32,     AISrc_512_f32,  ADst_512>;
3590b57cec5SDimitry Andricdef VOPProfileMAI_F32_F32_X32   : VOPProfileMAI<VOP_V32F32_F32_F32_V32F32,     AISrc_1024_f32, ADst_1024>;
3600b57cec5SDimitry Andricdef VOPProfileMAI_I32_I32_X4    : VOPProfileMAI<VOP_V4I32_I32_I32_V4I32,       AISrc_128_b32,  ADst_128>;
3610b57cec5SDimitry Andricdef VOPProfileMAI_I32_I32_X16   : VOPProfileMAI<VOP_V16I32_I32_I32_V16I32,     AISrc_512_b32,  ADst_512>;
3620b57cec5SDimitry Andricdef VOPProfileMAI_I32_I32_X32   : VOPProfileMAI<VOP_V32I32_I32_I32_V32I32,     AISrc_1024_b32, ADst_1024>;
3630b57cec5SDimitry Andricdef VOPProfileMAI_F32_V2I16_X4  : VOPProfileMAI<VOP_V4F32_V2I16_V2I16_V4F32,   AISrc_128_b32,  ADst_128>;
3640b57cec5SDimitry Andricdef VOPProfileMAI_F32_V2I16_X16 : VOPProfileMAI<VOP_V16F32_V2I16_V2I16_V16F32, AISrc_512_b32,  ADst_512>;
3650b57cec5SDimitry Andricdef VOPProfileMAI_F32_V2I16_X32 : VOPProfileMAI<VOP_V32F32_V2I16_V2I16_V32F32, AISrc_1024_b32, ADst_1024>;
3660b57cec5SDimitry Andricdef VOPProfileMAI_F32_V4F16_X4  : VOPProfileMAI<VOP_V4F32_V4F16_V4F16_V4F32,   AISrc_128_b32,  ADst_128,  AVSrc_64>;
3670b57cec5SDimitry Andricdef VOPProfileMAI_F32_V4F16_X16 : VOPProfileMAI<VOP_V16F32_V4F16_V4F16_V16F32, AISrc_512_b32,  ADst_512,  AVSrc_64>;
3680b57cec5SDimitry Andricdef VOPProfileMAI_F32_V4F16_X32 : VOPProfileMAI<VOP_V32F32_V4F16_V4F16_V32F32, AISrc_1024_b32, ADst_1024, AVSrc_64>;
3690b57cec5SDimitry Andric
3700b57cec5SDimitry Andriclet Predicates = [HasMAIInsts] in {
3710b57cec5SDimitry Andricdef V_ACCVGPR_READ_B32  : VOP3Inst<"v_accvgpr_read_b32",  VOPProfileAccRead>;
3720b57cec5SDimitry Andricdef V_ACCVGPR_WRITE_B32 : VOP3Inst<"v_accvgpr_write_b32", VOPProfileAccWrite> {
3730b57cec5SDimitry Andric  let isMoveImm = 1;
3740b57cec5SDimitry Andric}
3750b57cec5SDimitry Andric
3760b57cec5SDimitry Andriclet isConvergent = 1 in {
3770b57cec5SDimitry Andricdef V_MFMA_F32_4X4X1F32    : VOP3Inst<"v_mfma_f32_4x4x1f32",    VOPProfileMAI_F32_F32_X4,    int_amdgcn_mfma_f32_4x4x1f32>;
3780b57cec5SDimitry Andricdef V_MFMA_F32_4X4X4F16    : VOP3Inst<"v_mfma_f32_4x4x4f16",    VOPProfileMAI_F32_V4F16_X4,  int_amdgcn_mfma_f32_4x4x4f16>;
3790b57cec5SDimitry Andricdef V_MFMA_I32_4X4X4I8     : VOP3Inst<"v_mfma_i32_4x4x4i8",     VOPProfileMAI_I32_I32_X4,    int_amdgcn_mfma_i32_4x4x4i8>;
3800b57cec5SDimitry Andricdef V_MFMA_F32_4X4X2BF16   : VOP3Inst<"v_mfma_f32_4x4x2bf16",   VOPProfileMAI_F32_V2I16_X4,  int_amdgcn_mfma_f32_4x4x2bf16>;
3810b57cec5SDimitry Andricdef V_MFMA_F32_16X16X1F32  : VOP3Inst<"v_mfma_f32_16x16x1f32",  VOPProfileMAI_F32_F32_X16,   int_amdgcn_mfma_f32_16x16x1f32>;
3820b57cec5SDimitry Andricdef V_MFMA_F32_16X16X4F32  : VOP3Inst<"v_mfma_f32_16x16x4f32",  VOPProfileMAI_F32_F32_X4,    int_amdgcn_mfma_f32_16x16x4f32>;
3830b57cec5SDimitry Andricdef V_MFMA_F32_16X16X4F16  : VOP3Inst<"v_mfma_f32_16x16x4f16",  VOPProfileMAI_F32_V4F16_X16, int_amdgcn_mfma_f32_16x16x4f16>;
3840b57cec5SDimitry Andricdef V_MFMA_F32_16X16X16F16 : VOP3Inst<"v_mfma_f32_16x16x16f16", VOPProfileMAI_F32_V4F16_X4,  int_amdgcn_mfma_f32_16x16x16f16>;
3850b57cec5SDimitry Andricdef V_MFMA_I32_16X16X4I8   : VOP3Inst<"v_mfma_i32_16x16x4i8",   VOPProfileMAI_I32_I32_X16,   int_amdgcn_mfma_i32_16x16x4i8>;
3860b57cec5SDimitry Andricdef V_MFMA_I32_16X16X16I8  : VOP3Inst<"v_mfma_i32_16x16x16i8",  VOPProfileMAI_I32_I32_X4,    int_amdgcn_mfma_i32_16x16x16i8>;
3870b57cec5SDimitry Andricdef V_MFMA_F32_16X16X2BF16 : VOP3Inst<"v_mfma_f32_16x16x2bf16", VOPProfileMAI_F32_V2I16_X16, int_amdgcn_mfma_f32_16x16x2bf16>;
3880b57cec5SDimitry Andricdef V_MFMA_F32_16X16X8BF16 : VOP3Inst<"v_mfma_f32_16x16x8bf16", VOPProfileMAI_F32_V2I16_X4,  int_amdgcn_mfma_f32_16x16x8bf16>;
3890b57cec5SDimitry Andricdef V_MFMA_F32_32X32X1F32  : VOP3Inst<"v_mfma_f32_32x32x1f32",  VOPProfileMAI_F32_F32_X32,   int_amdgcn_mfma_f32_32x32x1f32>;
3900b57cec5SDimitry Andricdef V_MFMA_F32_32X32X2F32  : VOP3Inst<"v_mfma_f32_32x32x2f32",  VOPProfileMAI_F32_F32_X16,   int_amdgcn_mfma_f32_32x32x2f32>;
3910b57cec5SDimitry Andricdef V_MFMA_F32_32X32X4F16  : VOP3Inst<"v_mfma_f32_32x32x4f16",  VOPProfileMAI_F32_V4F16_X32, int_amdgcn_mfma_f32_32x32x4f16>;
3920b57cec5SDimitry Andricdef V_MFMA_F32_32X32X8F16  : VOP3Inst<"v_mfma_f32_32x32x8f16",  VOPProfileMAI_F32_V4F16_X16, int_amdgcn_mfma_f32_32x32x8f16>;
3930b57cec5SDimitry Andricdef V_MFMA_I32_32X32X4I8   : VOP3Inst<"v_mfma_i32_32x32x4i8",   VOPProfileMAI_I32_I32_X32,   int_amdgcn_mfma_i32_32x32x4i8>;
3940b57cec5SDimitry Andricdef V_MFMA_I32_32X32X8I8   : VOP3Inst<"v_mfma_i32_32x32x8i8",   VOPProfileMAI_I32_I32_X16,   int_amdgcn_mfma_i32_32x32x8i8>;
3950b57cec5SDimitry Andricdef V_MFMA_F32_32X32X2BF16 : VOP3Inst<"v_mfma_f32_32x32x2bf16", VOPProfileMAI_F32_V2I16_X32, int_amdgcn_mfma_f32_32x32x2bf16>;
3960b57cec5SDimitry Andricdef V_MFMA_F32_32X32X4BF16 : VOP3Inst<"v_mfma_f32_32x32x4bf16", VOPProfileMAI_F32_V2I16_X16, int_amdgcn_mfma_f32_32x32x4bf16>;
3970b57cec5SDimitry Andric} // End isConvergent = 1
3980b57cec5SDimitry Andric
3990b57cec5SDimitry Andric} // End SubtargetPredicate = HasMAIInsts
4000b57cec5SDimitry Andric
4010b57cec5SDimitry Andricdef : MnemonicAlias<"v_accvgpr_read",  "v_accvgpr_read_b32">;
4020b57cec5SDimitry Andricdef : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">;
4030b57cec5SDimitry Andric
4040b57cec5SDimitry Andricmulticlass VOP3P_Real_vi<bits<10> op> {
4050b57cec5SDimitry Andric  def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
4060b57cec5SDimitry Andric            VOP3Pe <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
4070b57cec5SDimitry Andric    let AssemblerPredicates = [HasVOP3PInsts];
4080b57cec5SDimitry Andric    let DecoderNamespace = "GFX8";
4090b57cec5SDimitry Andric  }
4100b57cec5SDimitry Andric}
4110b57cec5SDimitry Andric
4120b57cec5SDimitry Andricmulticlass VOP3P_Real_MAI<bits<10> op> {
4130b57cec5SDimitry Andric  def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
4140b57cec5SDimitry Andric            VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
4150b57cec5SDimitry Andric    let AssemblerPredicates = [HasMAIInsts];
4160b57cec5SDimitry Andric    let DecoderNamespace = "GFX8";
4170b57cec5SDimitry Andric  }
4180b57cec5SDimitry Andric}
4190b57cec5SDimitry Andric
4200b57cec5SDimitry Andricdefm V_PK_MAD_I16 : VOP3P_Real_vi <0x380>;
4210b57cec5SDimitry Andricdefm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x381>;
4220b57cec5SDimitry Andricdefm V_PK_ADD_I16 : VOP3P_Real_vi <0x382>;
4230b57cec5SDimitry Andricdefm V_PK_SUB_I16 : VOP3P_Real_vi <0x383>;
4240b57cec5SDimitry Andricdefm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x384>;
4250b57cec5SDimitry Andricdefm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x385>;
4260b57cec5SDimitry Andricdefm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x386>;
4270b57cec5SDimitry Andricdefm V_PK_MAX_I16 : VOP3P_Real_vi <0x387>;
4280b57cec5SDimitry Andricdefm V_PK_MIN_I16 : VOP3P_Real_vi <0x388>;
4290b57cec5SDimitry Andricdefm V_PK_MAD_U16 : VOP3P_Real_vi <0x389>;
4300b57cec5SDimitry Andric
4310b57cec5SDimitry Andricdefm V_PK_ADD_U16 : VOP3P_Real_vi <0x38a>;
4320b57cec5SDimitry Andricdefm V_PK_SUB_U16 : VOP3P_Real_vi <0x38b>;
4330b57cec5SDimitry Andricdefm V_PK_MAX_U16 : VOP3P_Real_vi <0x38c>;
4340b57cec5SDimitry Andricdefm V_PK_MIN_U16 : VOP3P_Real_vi <0x38d>;
4350b57cec5SDimitry Andricdefm V_PK_FMA_F16 : VOP3P_Real_vi <0x38e>;
4360b57cec5SDimitry Andricdefm V_PK_ADD_F16 : VOP3P_Real_vi <0x38f>;
4370b57cec5SDimitry Andricdefm V_PK_MUL_F16 : VOP3P_Real_vi <0x390>;
4380b57cec5SDimitry Andricdefm V_PK_MIN_F16 : VOP3P_Real_vi <0x391>;
4390b57cec5SDimitry Andricdefm V_PK_MAX_F16 : VOP3P_Real_vi <0x392>;
4400b57cec5SDimitry Andric
4410b57cec5SDimitry Andric
4420b57cec5SDimitry Andriclet SubtargetPredicate = HasMadMixInsts in {
4430b57cec5SDimitry Andricdefm V_MAD_MIX_F32 : VOP3P_Real_vi <0x3a0>;
4440b57cec5SDimitry Andricdefm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x3a1>;
4450b57cec5SDimitry Andricdefm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
4460b57cec5SDimitry Andric}
4470b57cec5SDimitry Andric
4480b57cec5SDimitry Andriclet SubtargetPredicate = HasFmaMixInsts in {
4490b57cec5SDimitry Andriclet DecoderNamespace = "GFX9_DL" in {
4500b57cec5SDimitry Andric// The mad_mix instructions were renamed and their behaviors changed,
4510b57cec5SDimitry Andric// but the opcode stayed the same so we need to put these in a
4520b57cec5SDimitry Andric// different DecoderNamespace to avoid the ambiguity.
4530b57cec5SDimitry Andricdefm V_FMA_MIX_F32 : VOP3P_Real_vi <0x3a0>;
4540b57cec5SDimitry Andricdefm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x3a1>;
4550b57cec5SDimitry Andricdefm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
4560b57cec5SDimitry Andric}
4570b57cec5SDimitry Andric}
4580b57cec5SDimitry Andric
4590b57cec5SDimitry Andric
4600b57cec5SDimitry Andriclet SubtargetPredicate = HasDot2Insts in {
4610b57cec5SDimitry Andric
4620b57cec5SDimitry Andricdefm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>;
4630b57cec5SDimitry Andricdefm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>;
4640b57cec5SDimitry Andricdefm V_DOT2_U32_U16 : VOP3P_Real_vi <0x3a7>;
4650b57cec5SDimitry Andricdefm V_DOT4_U32_U8  : VOP3P_Real_vi <0x3a9>;
4660b57cec5SDimitry Andricdefm V_DOT8_U32_U4  : VOP3P_Real_vi <0x3ab>;
4670b57cec5SDimitry Andric
4680b57cec5SDimitry Andric} // End SubtargetPredicate = HasDot2Insts
4690b57cec5SDimitry Andric
4700b57cec5SDimitry Andriclet SubtargetPredicate = HasDot1Insts in {
4710b57cec5SDimitry Andric
4720b57cec5SDimitry Andricdefm V_DOT4_I32_I8  : VOP3P_Real_vi <0x3a8>;
4730b57cec5SDimitry Andricdefm V_DOT8_I32_I4  : VOP3P_Real_vi <0x3aa>;
4740b57cec5SDimitry Andric
4750b57cec5SDimitry Andric} // End SubtargetPredicate = HasDot1Insts
4760b57cec5SDimitry Andric
4770b57cec5SDimitry Andriclet SubtargetPredicate = HasMAIInsts in {
4780b57cec5SDimitry Andric
4790b57cec5SDimitry Andricdefm V_ACCVGPR_READ_B32  : VOP3P_Real_MAI <0x3d8>;
4800b57cec5SDimitry Andricdefm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x3d9>;
4810b57cec5SDimitry Andricdefm V_MFMA_F32_32X32X1F32  : VOP3P_Real_MAI <0x3c0>;
4820b57cec5SDimitry Andricdefm V_MFMA_F32_16X16X1F32  : VOP3P_Real_MAI <0x3c1>;
4830b57cec5SDimitry Andricdefm V_MFMA_F32_4X4X1F32    : VOP3P_Real_MAI <0x3c2>;
4840b57cec5SDimitry Andricdefm V_MFMA_F32_32X32X2F32  : VOP3P_Real_MAI <0x3c4>;
4850b57cec5SDimitry Andricdefm V_MFMA_F32_16X16X4F32  : VOP3P_Real_MAI <0x3c5>;
4860b57cec5SDimitry Andricdefm V_MFMA_F32_32X32X4F16  : VOP3P_Real_MAI <0x3c8>;
4870b57cec5SDimitry Andricdefm V_MFMA_F32_16X16X4F16  : VOP3P_Real_MAI <0x3c9>;
4880b57cec5SDimitry Andricdefm V_MFMA_F32_4X4X4F16    : VOP3P_Real_MAI <0x3ca>;
4890b57cec5SDimitry Andricdefm V_MFMA_F32_32X32X8F16  : VOP3P_Real_MAI <0x3cc>;
4900b57cec5SDimitry Andricdefm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MAI <0x3cd>;
4910b57cec5SDimitry Andricdefm V_MFMA_I32_32X32X4I8   : VOP3P_Real_MAI <0x3d0>;
4920b57cec5SDimitry Andricdefm V_MFMA_I32_16X16X4I8   : VOP3P_Real_MAI <0x3d1>;
4930b57cec5SDimitry Andricdefm V_MFMA_I32_4X4X4I8     : VOP3P_Real_MAI <0x3d2>;
4940b57cec5SDimitry Andricdefm V_MFMA_I32_32X32X8I8   : VOP3P_Real_MAI <0x3d4>;
4950b57cec5SDimitry Andricdefm V_MFMA_I32_16X16X16I8  : VOP3P_Real_MAI <0x3d5>;
4960b57cec5SDimitry Andricdefm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MAI <0x3e8>;
4970b57cec5SDimitry Andricdefm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MAI <0x3e9>;
4980b57cec5SDimitry Andricdefm V_MFMA_F32_4X4X2BF16   : VOP3P_Real_MAI <0x3eb>;
4990b57cec5SDimitry Andricdefm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MAI <0x3ec>;
5000b57cec5SDimitry Andricdefm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MAI <0x3ed>;
5010b57cec5SDimitry Andric
5020b57cec5SDimitry Andric} // End SubtargetPredicate = HasMAIInsts
5030b57cec5SDimitry Andric
5040b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
5050b57cec5SDimitry Andric// GFX10.
5060b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
5070b57cec5SDimitry Andric
5080b57cec5SDimitry Andriclet AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
5090b57cec5SDimitry Andric  multiclass VOP3P_Real_gfx10<bits<10> op> {
5100b57cec5SDimitry Andric    def _gfx10 : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.GFX10>,
5110b57cec5SDimitry Andric                 VOP3Pe_gfx10 <op, !cast<VOP3P_Pseudo>(NAME).Pfl>;
5120b57cec5SDimitry Andric  }
5130b57cec5SDimitry Andric} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
5140b57cec5SDimitry Andric
5150b57cec5SDimitry Andricdefm V_PK_MAD_I16     : VOP3P_Real_gfx10<0x000>;
5160b57cec5SDimitry Andricdefm V_PK_MUL_LO_U16  : VOP3P_Real_gfx10<0x001>;
5170b57cec5SDimitry Andricdefm V_PK_ADD_I16     : VOP3P_Real_gfx10<0x002>;
5180b57cec5SDimitry Andricdefm V_PK_SUB_I16     : VOP3P_Real_gfx10<0x003>;
5190b57cec5SDimitry Andricdefm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x004>;
5200b57cec5SDimitry Andricdefm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x005>;
5210b57cec5SDimitry Andricdefm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x006>;
5220b57cec5SDimitry Andricdefm V_PK_MAX_I16     : VOP3P_Real_gfx10<0x007>;
5230b57cec5SDimitry Andricdefm V_PK_MIN_I16     : VOP3P_Real_gfx10<0x008>;
5240b57cec5SDimitry Andricdefm V_PK_MAD_U16     : VOP3P_Real_gfx10<0x009>;
5250b57cec5SDimitry Andricdefm V_PK_ADD_U16     : VOP3P_Real_gfx10<0x00a>;
5260b57cec5SDimitry Andricdefm V_PK_SUB_U16     : VOP3P_Real_gfx10<0x00b>;
5270b57cec5SDimitry Andricdefm V_PK_MAX_U16     : VOP3P_Real_gfx10<0x00c>;
5280b57cec5SDimitry Andricdefm V_PK_MIN_U16     : VOP3P_Real_gfx10<0x00d>;
5290b57cec5SDimitry Andricdefm V_PK_FMA_F16     : VOP3P_Real_gfx10<0x00e>;
5300b57cec5SDimitry Andricdefm V_PK_ADD_F16     : VOP3P_Real_gfx10<0x00f>;
5310b57cec5SDimitry Andricdefm V_PK_MUL_F16     : VOP3P_Real_gfx10<0x010>;
5320b57cec5SDimitry Andricdefm V_PK_MIN_F16     : VOP3P_Real_gfx10<0x011>;
5330b57cec5SDimitry Andricdefm V_PK_MAX_F16     : VOP3P_Real_gfx10<0x012>;
5340b57cec5SDimitry Andricdefm V_FMA_MIX_F32    : VOP3P_Real_gfx10<0x020>;
5350b57cec5SDimitry Andricdefm V_FMA_MIXLO_F16  : VOP3P_Real_gfx10<0x021>;
5360b57cec5SDimitry Andricdefm V_FMA_MIXHI_F16  : VOP3P_Real_gfx10<0x022>;
5370b57cec5SDimitry Andric
5380b57cec5SDimitry Andriclet SubtargetPredicate = HasDot2Insts in {
5390b57cec5SDimitry Andric
5400b57cec5SDimitry Andricdefm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x013>;
5410b57cec5SDimitry Andricdefm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x014>;
5420b57cec5SDimitry Andricdefm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x015>;
5430b57cec5SDimitry Andricdefm V_DOT4_U32_U8  : VOP3P_Real_gfx10 <0x017>;
5440b57cec5SDimitry Andricdefm V_DOT8_U32_U4  : VOP3P_Real_gfx10 <0x019>;
5450b57cec5SDimitry Andric
5460b57cec5SDimitry Andric} // End SubtargetPredicate = HasDot2Insts
5470b57cec5SDimitry Andric
5480b57cec5SDimitry Andriclet SubtargetPredicate = HasDot1Insts in {
5490b57cec5SDimitry Andric
5500b57cec5SDimitry Andricdefm V_DOT4_I32_I8  : VOP3P_Real_gfx10 <0x016>;
5510b57cec5SDimitry Andricdefm V_DOT8_I32_I4  : VOP3P_Real_gfx10 <0x018>;
5520b57cec5SDimitry Andric
5530b57cec5SDimitry Andric} // End SubtargetPredicate = HasDot1Insts
554