xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td (revision a2464ee12761660f50d0b6f59f233949ebcacc87)
1//===-- VOP2Instructions.td - Vector Instruction Definitions --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9//===----------------------------------------------------------------------===//
10// VOP2 Classes
11//===----------------------------------------------------------------------===//
12
13class VOP2e <bits<6> op, VOPProfile P> : Enc32 {
14  bits<8> vdst;
15  bits<9> src0;
16  bits<8> src1;
17
18  let Inst{8-0}   = !if(P.HasSrc0, src0, 0);
19  let Inst{16-9}  = !if(P.HasSrc1, src1, 0);
20  let Inst{24-17} = !if(P.EmitDst, vdst, 0);
21  let Inst{30-25} = op;
22  let Inst{31}    = 0x0; //encoding
23}
24
25class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 {
26  bits<8>  vdst;
27  bits<9>  src0;
28  bits<8>  src1;
29  bits<32> imm;
30
31  let Inst{8-0}   = !if(P.HasSrc0, src0, 0);
32  let Inst{16-9}  = !if(P.HasSrc1, src1, 0);
33  let Inst{24-17} = !if(P.EmitDst, vdst, 0);
34  let Inst{30-25} = op;
35  let Inst{31}    = 0x0; // encoding
36  let Inst{63-32} = imm;
37}
38
39class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> {
40  bits<8> vdst;
41  bits<8> src1;
42
43  let Inst{8-0}   = 0xf9; // sdwa
44  let Inst{16-9}  = !if(P.HasSrc1, src1{7-0}, 0);
45  let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
46  let Inst{30-25} = op;
47  let Inst{31}    = 0x0; // encoding
48}
49
50class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> {
51  bits<8> vdst;
52  bits<9> src1;
53
54  let Inst{8-0}   = 0xf9; // sdwa
55  let Inst{16-9}  = !if(P.HasSrc1, src1{7-0}, 0);
56  let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
57  let Inst{30-25} = op;
58  let Inst{31}    = 0x0; // encoding
59  let Inst{63}    = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr
60}
61
62class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> :
63  VOP_Pseudo <opName, suffix, P, P.Outs32, P.Ins32, "", pattern> {
64
65  let AsmOperands = P.Asm32;
66
67  let Size = 4;
68  let mayLoad = 0;
69  let mayStore = 0;
70  let hasSideEffects = 0;
71
72  let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret);
73
74  let mayRaiseFPException = ReadsModeReg;
75
76  let VOP2 = 1;
77  let VALU = 1;
78  let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]);
79
80  let AsmVariantName = AMDGPUAsmVariants.Default;
81}
82
83class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily> :
84  VOP_Real <ps>,
85  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
86  SIMCInstr <ps.PseudoInstr, EncodingFamily> {
87
88  let VALU = 1;
89  let VOP2 = 1;
90  let isPseudo = 0;
91  let isCodeGenOnly = 0;
92
93  let Constraints     = ps.Constraints;
94  let DisableEncoding = ps.DisableEncoding;
95
96  // copy relevant pseudo op flags
97  let SubtargetPredicate = ps.SubtargetPredicate;
98  let OtherPredicates    = ps.OtherPredicates;
99  let AsmMatchConverter  = ps.AsmMatchConverter;
100  let AsmVariantName     = ps.AsmVariantName;
101  let Constraints        = ps.Constraints;
102  let DisableEncoding    = ps.DisableEncoding;
103  let TSFlags            = ps.TSFlags;
104  let UseNamedOperandTable = ps.UseNamedOperandTable;
105  let Uses                 = ps.Uses;
106  let Defs                 = ps.Defs;
107  let SchedRW              = ps.SchedRW;
108  let mayLoad              = ps.mayLoad;
109  let mayStore             = ps.mayStore;
110}
111
112class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
113  VOP_SDWA_Pseudo <OpName, P, pattern> {
114  let AsmMatchConverter = "cvtSdwaVOP2";
115}
116
117class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
118  VOP_DPP_Pseudo <OpName, P, pattern> {
119}
120
121
122class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
123  list<dag> ret = !if(P.HasModifiers,
124    [(set P.DstVT:$vdst,
125      (node (P.Src0VT
126              !if(P.HasOMod,
127                  (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
128                  (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))),
129            (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
130    [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]);
131}
132
133multiclass VOP2Inst_e32<string opName,
134                        VOPProfile P,
135                        SDPatternOperator node = null_frag,
136                        string revOp = opName,
137                        bit GFX9Renamed = 0> {
138  let renamedInGFX9 = GFX9Renamed in {
139    def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
140               Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
141  } // End renamedInGFX9 = GFX9Renamed
142}
143
144multiclass VOP2Inst_e64<string opName,
145                        VOPProfile P,
146                        SDPatternOperator node = null_frag,
147                        string revOp = opName,
148                        bit GFX9Renamed = 0> {
149  let renamedInGFX9 = GFX9Renamed in {
150    def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
151               Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
152  } // End renamedInGFX9 = GFX9Renamed
153}
154
155multiclass VOP2Inst_sdwa<string opName,
156                         VOPProfile P,
157                         bit GFX9Renamed = 0> {
158  let renamedInGFX9 = GFX9Renamed in {
159    foreach _ = BoolToList<P.HasExtSDWA>.ret in
160      def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
161  } // End renamedInGFX9 = GFX9Renamed
162}
163
164multiclass VOP2Inst<string opName,
165                    VOPProfile P,
166                    SDPatternOperator node = null_frag,
167                    string revOp = opName,
168                    bit GFX9Renamed = 0> :
169    VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>,
170    VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
171    VOP2Inst_sdwa<opName, P, GFX9Renamed> {
172  let renamedInGFX9 = GFX9Renamed in {
173    foreach _ = BoolToList<P.HasExtDPP>.ret in
174      def _dpp  : VOP2_DPP_Pseudo <opName, P>;
175  }
176}
177
178multiclass VOP2bInst <string opName,
179                      VOPProfile P,
180                      SDPatternOperator node = null_frag,
181                      string revOp = opName,
182                      bit GFX9Renamed = 0,
183                      bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
184  let renamedInGFX9 = GFX9Renamed in {
185    let SchedRW = [Write32Bit, WriteSALU] in {
186      let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
187        def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
188                   Commutable_REV<revOp#"_e32", !eq(revOp, opName)> {
189          let usesCustomInserter = true;
190        }
191
192        foreach _ = BoolToList<P.HasExtSDWA>.ret in
193          def _sdwa  : VOP2_SDWA_Pseudo <opName, P> {
194            let AsmMatchConverter = "cvtSdwaVOP2b";
195          }
196        foreach _ = BoolToList<P.HasExtDPP>.ret in
197          def _dpp  : VOP2_DPP_Pseudo <opName, P>;
198      }
199
200      def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
201                 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
202    }
203  }
204}
205
206class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst,
207                      string OpName, string opnd> :
208  InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32),
209             (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0,
210                   ps.Pfl.Src1RC32:$src1)>,
211  PredicateControl {
212}
213
214multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> {
215  let WaveSizePredicate = isWave32 in {
216    def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">;
217  }
218  let WaveSizePredicate = isWave64 in {
219    def : VOP2bInstAlias<ps, inst, OpName, "vcc">;
220  }
221}
222
223multiclass VOP2eInst <string opName,
224                      VOPProfile P,
225                      SDPatternOperator node = null_frag,
226                      string revOp = opName,
227                      bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
228
229  let SchedRW = [Write32Bit] in {
230    let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
231      def _e32 : VOP2_Pseudo <opName, P>,
232                 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
233
234      foreach _ = BoolToList<P.HasExtSDWA>.ret in
235        def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
236          let AsmMatchConverter = "cvtSdwaVOP2e";
237        }
238
239      foreach _ = BoolToList<P.HasExtDPP>.ret in
240        def _dpp  : VOP2_DPP_Pseudo <opName, P>;
241    }
242
243    def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
244               Commutable_REV<revOp#"_e64", !eq(revOp, opName)> {
245      let isReMaterializable = 1;
246    }
247  }
248}
249
250class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> :
251  InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd,
252             (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0,
253                   ps.Pfl.Src1RC32:$src1)>, PredicateControl;
254
255class VOP2e64InstAlias <VOP3_Pseudo ps, Instruction inst> :
256  InstAlias <ps.OpName#" "#ps.Pfl.Asm64,
257             (inst ps.Pfl.DstRC:$vdst, VOPDstS64orS32:$sdst,
258                   ps.Pfl.Src0RC32:$src0, ps.Pfl.Src1RC32:$src1, clampmod:$clamp)>,
259  PredicateControl;
260
261multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
262  let WaveSizePredicate = isWave32 in {
263    def : VOP2eInstAlias<ps, inst, "vcc_lo">;
264  }
265  let WaveSizePredicate = isWave64 in {
266    def : VOP2eInstAlias<ps, inst, "vcc">;
267  }
268}
269
270class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
271  field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
272  field dag Ins32 = !if(!eq(vt.Size, 32),
273                        (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm),
274                        (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm));
275  field string Asm32 = "$vdst, $src0, $src1, $imm";
276  field bit HasExt = 0;
277  let IsSingle = 1;
278}
279
280def VOP_MADAK_F16 : VOP_MADAK <f16>;
281def VOP_MADAK_F32 : VOP_MADAK <f32>;
282
283class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
284  field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
285  field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1);
286  field string Asm32 = "$vdst, $src0, $imm, $src1";
287  field bit HasExt = 0;
288  let IsSingle = 1;
289}
290
291def VOP_MADMK_F16 : VOP_MADMK <f16>;
292def VOP_MADMK_F32 : VOP_MADMK <f32>;
293
294class getRegisterOperandForVT<ValueType VT> {
295  RegisterOperand ret = RegisterOperand<getVregSrcForVT<VT>.ret>;
296}
297
298// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory
299// and processing time but it makes it easier to convert to mad.
300class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> {
301  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT>.ret:$src2);
302  let Ins64 = getIns64<Src0RC64, Src1RC64, getRegisterOperandForVT<Src2VT>.ret, 3,
303                       0, HasModifiers, HasModifiers, HasOMod,
304                       Src0Mod, Src1Mod, Src2Mod>.ret;
305  let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
306                    Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
307                    getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
308                    dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
309                    bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
310  let InsDPP16 = !con(InsDPP, (ins FI:$fi));
311  let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
312                     Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
313                     getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
314                     dpp8:$dpp8, FI:$fi);
315  let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
316                     Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
317                     getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
318                     clampmod:$clamp, omod:$omod,
319                     dst_sel:$dst_sel, dst_unused:$dst_unused,
320                     src0_sel:$src0_sel, src1_sel:$src1_sel);
321  let Asm32 = getAsm32<1, 2, vt0>.ret;
322  let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, vt0>.ret;
323  let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret;
324  let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret;
325  let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret;
326  let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret;
327  let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret;
328  let HasSrc2 = 0;
329  let HasSrc2Mods = 0;
330
331  let HasExt = 1;
332  let HasExtDPP = 1;
333  let HasExtSDWA = 1;
334  let HasExtSDWA9 = 0;
335  let TieRegDPP = "$src2";
336}
337
338def VOP_MAC_F16 : VOP_MAC <f16>;
339def VOP_MAC_F32 : VOP_MAC <f32>;
340let HasExtDPP = 0 in
341def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>;
342let HasExtSDWA = 0, HasExt64BitDPP = 1 in
343def VOP_MAC_F64 : VOP_MAC <f64>;
344
345class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> {
346  let HasClamp = 0;
347  let HasExtSDWA = 0;
348  let HasOpSel = 0;
349  let IsPacked = 0;
350}
351
352def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> {
353  let Src0ModDPP = FPVRegInputMods;
354  let Src1ModDPP = FPVRegInputMods;
355}
356
357def VOP_DOT_ACC_I32_I32   : VOP_DOT_ACC<i32, i32> {
358  let HasSrc0Mods = 1;
359  let HasSrc1Mods = 1;
360}
361
362// Write out to vcc or arbitrary SGPR.
363def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> {
364  let Asm32 = "$vdst, vcc, $src0, $src1";
365  let Asm64 = "$vdst, $sdst, $src0, $src1$clamp";
366  let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
367  let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
368  let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
369  let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi";
370  let AsmDPP16 = AsmDPP#"$fi";
371  let Outs32 = (outs DstRC:$vdst);
372  let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
373}
374
375// Write out to vcc or arbitrary SGPR and read in from vcc or
376// arbitrary SGPR.
377def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> {
378  let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
379  let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp";
380  let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
381  let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
382  let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
383  let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi";
384  let AsmDPP16 = AsmDPP#"$fi";
385  let Outs32 = (outs DstRC:$vdst);
386  let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
387
388  // Suppress src2 implied by type since the 32-bit encoding uses an
389  // implicit VCC use.
390  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
391
392  let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
393                     Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
394                     clampmod:$clamp,
395                     dst_sel:$dst_sel, dst_unused:$dst_unused,
396                     src0_sel:$src0_sel, src1_sel:$src1_sel);
397
398  let InsDPP = (ins DstRCDPP:$old,
399                    Src0DPP:$src0,
400                    Src1DPP:$src1,
401                    dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
402                    bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
403  let InsDPP16 = !con(InsDPP, (ins FI:$fi));
404
405  let HasExt = 1;
406  let HasExtDPP = 1;
407  let HasExtSDWA = 1;
408  let HasExtSDWA9 = 1;
409}
410
411// Read in from vcc or arbitrary SGPR.
412def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> {
413  let Asm32 = "$vdst, $src0, $src1";
414  let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2";
415  let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
416  let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
417  let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
418  let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi";
419  let AsmDPP16 = AsmDPP#"$fi";
420
421  let Outs32 = (outs DstRC:$vdst);
422  let Outs64 = (outs DstRC:$vdst);
423
424  // Suppress src2 implied by type since the 32-bit encoding uses an
425  // implicit VCC use.
426  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
427
428  let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
429                     Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
430                     clampmod:$clamp,
431                     dst_sel:$dst_sel, dst_unused:$dst_unused,
432                     src0_sel:$src0_sel, src1_sel:$src1_sel);
433
434  let InsDPP = (ins DstRCDPP:$old,
435                    Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
436                    Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
437                    dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
438                    bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
439  let InsDPP16 = !con(InsDPP, (ins FI:$fi));
440
441  let HasExt = 1;
442  let HasExtDPP = 1;
443  let HasExtSDWA = 1;
444  let HasExtSDWA9 = 1;
445}
446
447def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
448  let Outs32 = (outs SReg_32:$vdst);
449  let Outs64 = Outs32;
450  let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1);
451  let Ins64 = Ins32;
452  let Asm32 = " $vdst, $src0, $src1";
453  let Asm64 = Asm32;
454
455  let HasExt = 0;
456  let HasExtDPP = 0;
457  let HasExt64BitDPP = 0;
458  let HasExtSDWA = 0;
459  let HasExtSDWA9 = 0;
460}
461
462def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
463  let Outs32 = (outs VGPR_32:$vdst);
464  let Outs64 = Outs32;
465  let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in);
466  let Ins64 = Ins32;
467  let Asm32 = " $vdst, $src0, $src1";
468  let Asm64 = Asm32;
469  let HasSrc2 = 0;
470  let HasSrc2Mods = 0;
471
472  let HasExt = 0;
473  let HasExtDPP = 0;
474  let HasExt64BitDPP = 0;
475  let HasExtSDWA = 0;
476  let HasExtSDWA9 = 0;
477}
478
479//===----------------------------------------------------------------------===//
480// VOP2 Instructions
481//===----------------------------------------------------------------------===//
482
483defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
484let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in
485def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
486
487let isCommutable = 1 in {
488let isReMaterializable = 1 in {
489defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, any_fadd>;
490defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, any_fsub>;
491defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
492defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
493defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>;
494defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>;
495defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
496defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>;
497defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
498defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
499defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
500defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
501defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
502defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
503defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
504defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">;
505defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">;
506defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, clshl_rev_32, "v_lshl_b32">;
507defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
508defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
509defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
510} // End isReMaterializable = 1
511
512let mayRaiseFPException = 0 in {
513let OtherPredicates = [HasMadMacF32Insts] in {
514let Constraints = "$vdst = $src2", DisableEncoding="$src2",
515    isConvertibleToThreeAddress = 1 in {
516defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>;
517
518let SubtargetPredicate = isGFX6GFX7GFX10 in
519defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_LEGACY_F32>;
520} // End Constraints = "$vdst = $src2", DisableEncoding="$src2",
521  //     isConvertibleToThreeAddress = 1
522
523let isReMaterializable = 1 in
524def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>;
525} // End OtherPredicates = [HasMadMacF32Insts]
526} // End mayRaiseFPException = 0
527
528// No patterns so that the scalar instructions are always selected.
529// The scalar versions will be replaced with vector when needed later.
530defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32", 1>;
531defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>;
532defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>;
533defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>;
534defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>;
535defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>;
536
537
538let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in {
539defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_add_u32", 1>;
540defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
541defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>;
542}
543
544} // End isCommutable = 1
545
546// These are special and do not read the exec mask.
547let isConvergent = 1, Uses = []<Register> in {
548def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
549  [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>;
550
551let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
552def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE,
553  [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>;
554} // End $vdst = $vdst_in, DisableEncoding $vdst_in
555} // End isConvergent = 1
556
557let isReMaterializable = 1 in {
558defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT<VOP_I32_I32_I32>>;
559defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, add_ctpop>;
560defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_lo>;
561defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>;
562defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>;
563
564let ReadsModeReg = 0, mayRaiseFPException = 0 in {
565defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_i16_f32>;
566defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_u16_f32>;
567}
568
569defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_V2F16_F32_F32>, AMDGPUpkrtz_f16_f32>;
570defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_u16_u32>;
571defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_i16_i32>;
572
573
574let SubtargetPredicate = isGFX6GFX7 in {
575defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>;
576defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>;
577} // End SubtargetPredicate = isGFX6GFX7
578
579let isCommutable = 1 in {
580let SubtargetPredicate = isGFX6GFX7 in {
581defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, csrl_32>;
582defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, csra_32>;
583defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, cshl_32>;
584} // End SubtargetPredicate = isGFX6GFX7
585} // End isCommutable = 1
586} // End isReMaterializable = 1
587
588defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst"
589
590class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
591  GCNPat<
592      (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1),
593      !if(!cast<Commutable_REV>(Inst).IsOrig,
594        (Inst $src0, $src1),
595        (Inst $src1, $src0)
596      )
597  >;
598
599class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
600  GCNPat<
601      (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1),
602      !if(!cast<Commutable_REV>(Inst).IsOrig,
603        (Inst $src0, $src1, 0),
604        (Inst $src1, $src0, 0)
605      )
606  >;
607
608def : DivergentBinOp<csrl_32, V_LSHRREV_B32_e64>;
609def : DivergentBinOp<csra_32, V_ASHRREV_I32_e64>;
610def : DivergentBinOp<cshl_32, V_LSHLREV_B32_e64>;
611
612let SubtargetPredicate = HasAddNoCarryInsts in {
613  def : DivergentClampingBinOp<add, V_ADD_U32_e64>;
614  def : DivergentClampingBinOp<sub, V_SUB_U32_e64>;
615}
616
617let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in {
618def : DivergentClampingBinOp<add, V_ADD_CO_U32_e64>;
619def : DivergentClampingBinOp<sub, V_SUB_CO_U32_e64>;
620}
621
622def : DivergentBinOp<adde, V_ADDC_U32_e32>;
623def : DivergentBinOp<sube, V_SUBB_U32_e32>;
624
625class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> :
626  GCNPat<
627      (getDivergentFrag<Op>.ret i64:$src0, i64:$src1),
628      (REG_SEQUENCE VReg_64,
629        (Inst
630          (i32 (EXTRACT_SUBREG $src0, sub0)),
631          (i32 (EXTRACT_SUBREG $src1, sub0))
632        ), sub0,
633        (Inst
634          (i32 (EXTRACT_SUBREG $src0, sub1)),
635          (i32 (EXTRACT_SUBREG $src1, sub1))
636        ), sub1
637      )
638  >;
639
640def :  divergent_i64_BinOp <and, V_AND_B32_e64>;
641def :  divergent_i64_BinOp <or,  V_OR_B32_e64>;
642def :  divergent_i64_BinOp <xor, V_XOR_B32_e64>;
643
644let SubtargetPredicate = Has16BitInsts in {
645
646let FPDPRounding = 1 in {
647def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">;
648defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>;
649} // End FPDPRounding = 1
650
651defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>;
652defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>;
653defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>;
654
655let isCommutable = 1 in {
656let FPDPRounding = 1 in {
657defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, any_fadd>;
658defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, any_fsub>;
659defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">;
660defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, any_fmul>;
661
662let mayRaiseFPException = 0 in {
663def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">;
664}
665
666} // End FPDPRounding = 1
667defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>;
668defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>;
669defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">;
670defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>;
671defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>;
672defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>;
673defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16, umax>;
674defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16, smax>;
675defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16, umin>;
676defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16, smin>;
677
678let Constraints = "$vdst = $src2", DisableEncoding="$src2",
679    isConvertibleToThreeAddress = 1 in {
680defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>;
681}
682} // End isCommutable = 1
683
684} // End SubtargetPredicate = Has16BitInsts
685
686let SubtargetPredicate = HasDLInsts in {
687
688let isReMaterializable = 1 in
689defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>;
690
691def : GCNPat<
692  (i32 (DivergentUnaryFrag<not> (xor_oneuse i32:$src0, i32:$src1))),
693  (i32 (V_XNOR_B32_e64 $src0, $src1))
694>;
695
696def : GCNPat<
697  (i32 (DivergentBinFrag<xor_oneuse> (not i32:$src0), i32:$src1)),
698  (i32 (V_XNOR_B32_e64 $src0, $src1))
699>;
700
701def : GCNPat<
702  (i64 (DivergentUnaryFrag<not> (xor_oneuse i64:$src0, i64:$src1))),
703  (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64
704                            (i32 (EXTRACT_SUBREG $src0, sub0)),
705                            (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0,
706                     (i32 (V_XNOR_B32_e64
707                            (i32 (EXTRACT_SUBREG $src0, sub1)),
708                            (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1)
709>;
710
711def : GCNPat<
712  (i64 (DivergentBinFrag<xor_oneuse> (not i64:$src0), i64:$src1)),
713  (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64
714                            (i32 (EXTRACT_SUBREG $src0, sub0)),
715                            (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0,
716                     (i32 (V_XNOR_B32_e64
717                            (i32 (EXTRACT_SUBREG $src0, sub1)),
718                            (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1)
719>;
720
721let Constraints = "$vdst = $src2",
722    DisableEncoding = "$src2",
723    isConvertibleToThreeAddress = 1,
724    isCommutable = 1 in
725defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>;
726
727} // End SubtargetPredicate = HasDLInsts
728
729let SubtargetPredicate = HasFmaLegacy32 in {
730
731let Constraints = "$vdst = $src2",
732    DisableEncoding = "$src2",
733    isConvertibleToThreeAddress = 1,
734    isCommutable = 1 in
735defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>;
736
737} // End SubtargetPredicate = HasFmaLegacy32
738
739let SubtargetPredicate = isGFX90APlus,
740    Constraints = "$vdst = $src2",
741    DisableEncoding="$src2",
742    isConvertibleToThreeAddress = 1,
743    isCommutable = 1,
744    SchedRW = [WriteDoubleAdd] in
745defm V_FMAC_F64 : VOP2Inst <"v_fmac_f64", VOP_MAC_F64>;
746
747let Constraints = "$vdst = $src2",
748      DisableEncoding="$src2",
749      isConvertibleToThreeAddress = 1,
750      isCommutable = 1,
751      IsDOT = 1 in {
752  let SubtargetPredicate = HasDot5Insts in
753    defm V_DOT2C_F32_F16 : VOP2Inst<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>;
754  let SubtargetPredicate = HasDot6Insts in
755    defm V_DOT4C_I32_I8  : VOP2Inst<"v_dot4c_i32_i8",  VOP_DOT_ACC_I32_I32>;
756
757  let SubtargetPredicate = HasDot4Insts in
758    defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>;
759  let SubtargetPredicate = HasDot3Insts in
760    defm V_DOT8C_I32_I4  : VOP2Inst<"v_dot8c_i32_i4",  VOP_DOT_ACC_I32_I32>;
761}
762
763let AddedComplexity = 30 in {
764  def : GCNPat<
765    (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))),
766    (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2))
767  > {
768    let SubtargetPredicate = HasDot5Insts;
769  }
770  def : GCNPat<
771    (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
772    (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2))
773  > {
774    let SubtargetPredicate = HasDot6Insts;
775  }
776  def : GCNPat<
777    (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
778    (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2))
779  > {
780    let SubtargetPredicate = HasDot4Insts;
781  }
782  def : GCNPat<
783    (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
784    (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2))
785  > {
786    let SubtargetPredicate = HasDot3Insts;
787  }
788} // End AddedComplexity = 30
789
790let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in {
791def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">;
792
793let isCommutable = 1 in
794def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">;
795}
796
797let SubtargetPredicate = isGFX10Plus in {
798
799let FPDPRounding = 1 in {
800def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">;
801
802let isCommutable = 1 in
803def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">;
804} // End FPDPRounding  = 1
805
806let Constraints = "$vdst = $src2",
807    DisableEncoding="$src2",
808    isConvertibleToThreeAddress = 1,
809    isCommutable = 1 in {
810defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>;
811}
812
813} // End SubtargetPredicate = isGFX10Plus
814
815let SubtargetPredicate = HasPkFmacF16Inst in {
816defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>;
817} // End SubtargetPredicate = HasPkFmacF16Inst
818
819// Note: 16-bit instructions produce a 0 result in the high 16-bits
820// on GFX8 and GFX9 and preserve high 16 bits on GFX10+
821multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> {
822
823def : GCNPat<
824  (i32 (zext (op i16:$src0, i16:$src1))),
825  (inst VSrc_b16:$src0, VSrc_b16:$src1)
826>;
827
828def : GCNPat<
829  (i64 (zext (op i16:$src0, i16:$src1))),
830   (REG_SEQUENCE VReg_64,
831     (inst $src0, $src1), sub0,
832     (V_MOV_B32_e32 (i32 0)), sub1)
833>;
834}
835
836class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
837  (i16 (ext i1:$src)),
838  (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/),
839                     (i32 0/*src1mod*/), (i32 1/*src1*/),
840                     $src)
841>;
842
843foreach vt = [i16, v2i16] in {
844def : GCNPat <
845  (and vt:$src0, vt:$src1),
846  (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
847>;
848
849def : GCNPat <
850  (or vt:$src0, vt:$src1),
851  (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
852>;
853
854def : GCNPat <
855  (xor vt:$src0, vt:$src1),
856  (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
857>;
858}
859
860let Predicates = [Has16BitInsts] in {
861
862// Undo sub x, c -> add x, -c canonicalization since c is more likely
863// an inline immediate than -c.
864// TODO: Also do for 64-bit.
865def : GCNPat<
866  (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
867  (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1)
868>;
869
870
871let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in {
872
873def : GCNPat<
874  (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))),
875  (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1)
876>;
877
878defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>;
879defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>;
880defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>;
881defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>;
882defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>;
883defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>;
884defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>;
885defm : Arithmetic_i16_0Hi_Pats<clshl_rev_16, V_LSHLREV_B16_e64>;
886defm : Arithmetic_i16_0Hi_Pats<clshr_rev_16, V_LSHRREV_B16_e64>;
887defm : Arithmetic_i16_0Hi_Pats<cashr_rev_16, V_ASHRREV_I16_e64>;
888}  // End Predicates = [Has16BitInsts, isGFX7GFX8GFX9]
889
890def : ZExt_i16_i1_Pat<zext>;
891def : ZExt_i16_i1_Pat<anyext>;
892
893def : GCNPat <
894  (i16 (sext i1:$src)),
895  (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
896                     /*src1mod*/(i32 0), /*src1*/(i32 -1), $src)
897>;
898
899} // End Predicates = [Has16BitInsts]
900
901
902let SubtargetPredicate = HasIntClamp in {
903// Set clamp bit for saturation.
904def : VOPBinOpClampPat<uaddsat, V_ADD_CO_U32_e64, i32>;
905def : VOPBinOpClampPat<usubsat, V_SUB_CO_U32_e64, i32>;
906}
907
908let SubtargetPredicate = HasAddNoCarryInsts, OtherPredicates = [HasIntClamp] in {
909let AddedComplexity = 1 in { // Prefer over form with carry-out.
910def : VOPBinOpClampPat<uaddsat, V_ADD_U32_e64, i32>;
911def : VOPBinOpClampPat<usubsat, V_SUB_U32_e64, i32>;
912}
913}
914
915let SubtargetPredicate = Has16BitInsts, OtherPredicates = [HasIntClamp] in {
916def : VOPBinOpClampPat<uaddsat, V_ADD_U16_e64, i16>;
917def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>;
918}
919
920//===----------------------------------------------------------------------===//
921// Target-specific instruction encodings.
922//===----------------------------------------------------------------------===//
923
924class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps,
925               string opName = ps.OpName, VOPProfile p = ps.Pfl,
926               bit IsDPP16 = 0> :
927    VOP_DPP<opName, p, IsDPP16> {
928  let hasSideEffects = ps.hasSideEffects;
929  let Defs = ps.Defs;
930  let SchedRW = ps.SchedRW;
931  let Uses = ps.Uses;
932
933  bits<8> vdst;
934  bits<8> src1;
935  let Inst{8-0}   = 0xfa;
936  let Inst{16-9}  = !if(p.HasSrc1, src1{7-0}, 0);
937  let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
938  let Inst{30-25} = op;
939  let Inst{31}    = 0x0;
940}
941
942class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
943                 string opName = ps.OpName, VOPProfile p = ps.Pfl> :
944    VOP2_DPP<op, ps, opName, p, 1> {
945  let AssemblerPredicate = HasDPP16;
946  let SubtargetPredicate = HasDPP16;
947  let OtherPredicates = ps.OtherPredicates;
948}
949
950class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
951                 string opName = ps.OpName, VOPProfile p = ps.Pfl> :
952    Base_VOP2_DPP16<op, ps, opName, p>,
953    SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10>;
954
955class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
956                VOPProfile p = ps.Pfl> :
957    VOP_DPP8<ps.OpName, p> {
958  let hasSideEffects = ps.hasSideEffects;
959  let Defs = ps.Defs;
960  let SchedRW = ps.SchedRW;
961  let Uses = ps.Uses;
962
963  bits<8> vdst;
964  bits<8> src1;
965
966  let Inst{8-0}   = fi;
967  let Inst{16-9}  = !if(p.HasSrc1, src1{7-0}, 0);
968  let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
969  let Inst{30-25} = op;
970  let Inst{31}    = 0x0;
971
972  let OtherPredicates = ps.OtherPredicates;
973}
974
975//===----------------------------------------------------------------------===//
976// GFX10.
977//===----------------------------------------------------------------------===//
978
979let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
980  //===------------------------------- VOP2 -------------------------------===//
981  multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> {
982    def _gfx10 :
983      VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>,
984      VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
985  }
986  multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName,
987                                                string asmName> {
988    def _gfx10 :
989        VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>,
990        VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> {
991      VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName);
992      let AsmString = asmName # ps.AsmOperands;
993    }
994  }
995  multiclass VOP2_Real_e32_gfx10<bits<6> op> {
996    def _e32_gfx10 :
997      VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>,
998      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
999  }
1000  multiclass VOP2_Real_e64_gfx10<bits<6> op> {
1001    def _e64_gfx10 :
1002      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
1003      VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1004  }
1005  multiclass VOP2_Real_sdwa_gfx10<bits<6> op> {
1006    foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
1007    def _sdwa_gfx10 :
1008      VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
1009      VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
1010      let DecoderNamespace = "SDWA10";
1011    }
1012  }
1013  multiclass VOP2_Real_dpp_gfx10<bits<6> op> {
1014    foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
1015    def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
1016      let DecoderNamespace = "SDWA10";
1017    }
1018  }
1019  multiclass VOP2_Real_dpp8_gfx10<bits<6> op> {
1020    foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
1021    def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
1022      let DecoderNamespace = "DPP8";
1023    }
1024  }
1025
1026  //===------------------------- VOP2 (with name) -------------------------===//
1027  multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName,
1028                                           string asmName> {
1029    def _e32_gfx10 :
1030      VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>,
1031      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> {
1032        VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
1033        let AsmString = asmName # ps.AsmOperands;
1034      }
1035  }
1036  multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName,
1037                                           string asmName> {
1038    def _e64_gfx10 :
1039      VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
1040      VOP3e_gfx10<{0, 1, 0, 0, op{5-0}},
1041                  !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
1042        VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64");
1043        let AsmString = asmName # ps.AsmOperands;
1044      }
1045  }
1046  let DecoderNamespace = "SDWA10" in {
1047    multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName,
1048                                              string asmName> {
1049      foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in
1050      def _sdwa_gfx10 :
1051        VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
1052        VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
1053          VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
1054          let AsmString = asmName # ps.AsmOperands;
1055        }
1056    }
1057    multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName,
1058                                             string asmName> {
1059      foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
1060      def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp")> {
1061        VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
1062        let AsmString = asmName # ps.Pfl.AsmDPP16;
1063      }
1064    }
1065    multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName,
1066                                              string asmName> {
1067      foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
1068      def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
1069        VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
1070        let AsmString = asmName # ps.Pfl.AsmDPP8;
1071        let DecoderNamespace = "DPP8";
1072      }
1073    }
1074  } // End DecoderNamespace = "SDWA10"
1075
1076  //===------------------------------ VOP2be ------------------------------===//
1077  multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> {
1078    def _e32_gfx10 :
1079      VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>,
1080      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> {
1081        VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32");
1082        let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
1083      }
1084  }
1085  multiclass VOP2be_Real_e64_gfx10<bits<6> op, string opName, string asmName> {
1086    def _e64_gfx10 :
1087      VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
1088      VOP3be_gfx10<{0, 1, 0, 0, op{5-0}},
1089                   !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
1090        VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64");
1091        let AsmString = asmName # Ps.AsmOperands;
1092      }
1093  }
1094  multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> {
1095    foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in
1096    def _sdwa_gfx10 :
1097      VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
1098      VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
1099        VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
1100        let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
1101        let DecoderNamespace = "SDWA10";
1102      }
1103    foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in
1104    def _sdwa_w32_gfx10 :
1105      Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
1106      VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
1107        VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
1108        let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands);
1109        let isAsmParserOnly = 1;
1110        let DecoderNamespace = "SDWA10";
1111        let WaveSizePredicate = isWave32;
1112      }
1113    foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in
1114    def _sdwa_w64_gfx10 :
1115      Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
1116      VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
1117        VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
1118        let AsmString = asmName # Ps.AsmOperands;
1119        let isAsmParserOnly = 1;
1120        let DecoderNamespace = "SDWA10";
1121        let WaveSizePredicate = isWave64;
1122      }
1123  }
1124  multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> {
1125    foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
1126    def _dpp_gfx10 :
1127      VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
1128        string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
1129        let AsmString = asmName # !subst(", vcc", "", AsmDPP);
1130        let DecoderNamespace = "SDWA10";
1131      }
1132    foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
1133    def _dpp_w32_gfx10 :
1134      Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
1135        string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
1136        let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
1137        let isAsmParserOnly = 1;
1138        let WaveSizePredicate = isWave32;
1139      }
1140    foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
1141    def _dpp_w64_gfx10 :
1142      Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
1143        string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
1144        let AsmString = asmName # AsmDPP;
1145        let isAsmParserOnly = 1;
1146        let WaveSizePredicate = isWave64;
1147      }
1148  }
1149  multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> {
1150    foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
1151    def _dpp8_gfx10 :
1152      VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
1153        string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
1154        let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
1155        let DecoderNamespace = "DPP8";
1156      }
1157    foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
1158    def _dpp8_w32_gfx10 :
1159      VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
1160        string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
1161        let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
1162        let isAsmParserOnly = 1;
1163        let WaveSizePredicate = isWave32;
1164      }
1165    foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
1166    def _dpp8_w64_gfx10 :
1167      VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
1168        string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
1169        let AsmString = asmName # AsmDPP8;
1170        let isAsmParserOnly = 1;
1171        let WaveSizePredicate = isWave64;
1172      }
1173  }
1174
1175  //===----------------------------- VOP3Only -----------------------------===//
1176  multiclass VOP3Only_Real_gfx10<bits<10> op> {
1177    def _e64_gfx10 :
1178      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
1179      VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
1180        let IsSingle = 1;
1181      }
1182  }
1183
1184  //===---------------------------- VOP3beOnly ----------------------------===//
1185  multiclass VOP3beOnly_Real_gfx10<bits<10> op> {
1186    def _e64_gfx10 :
1187      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
1188      VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
1189        let IsSingle = 1;
1190      }
1191  }
1192} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
1193
1194multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> :
1195  VOP2be_Real_e32_gfx10<op, opName, asmName>,
1196  VOP2be_Real_e64_gfx10<op, opName, asmName>,
1197  VOP2be_Real_sdwa_gfx10<op, opName, asmName>,
1198  VOP2be_Real_dpp_gfx10<op, opName, asmName>,
1199  VOP2be_Real_dpp8_gfx10<op, opName, asmName>;
1200
1201multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> :
1202  VOP2_Real_e32_gfx10<op>,
1203  VOP2_Real_e64_gfx10<op>,
1204  VOP2be_Real_sdwa_gfx10<op, opName, asmName>,
1205  VOP2be_Real_dpp_gfx10<op, opName, asmName>,
1206  VOP2be_Real_dpp8_gfx10<op, opName, asmName>;
1207
1208multiclass VOP2_Real_gfx10<bits<6> op> :
1209  VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>,
1210  VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>;
1211
1212multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName,
1213                                     string asmName> :
1214  VOP2_Real_e32_gfx10_with_name<op, opName, asmName>,
1215  VOP2_Real_e64_gfx10_with_name<op, opName, asmName>,
1216  VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>,
1217  VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>,
1218  VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>;
1219
1220// NB: Same opcode as v_mac_legacy_f32
1221let DecoderNamespace = "GFX10_B" in
1222defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>;
1223
1224defm V_XNOR_B32        : VOP2_Real_gfx10<0x01e>;
1225defm V_FMAC_F32        : VOP2_Real_gfx10<0x02b>;
1226defm V_FMAMK_F32       : VOP2Only_Real_MADK_gfx10<0x02c>;
1227defm V_FMAAK_F32       : VOP2Only_Real_MADK_gfx10<0x02d>;
1228defm V_ADD_F16         : VOP2_Real_gfx10<0x032>;
1229defm V_SUB_F16         : VOP2_Real_gfx10<0x033>;
1230defm V_SUBREV_F16      : VOP2_Real_gfx10<0x034>;
1231defm V_MUL_F16         : VOP2_Real_gfx10<0x035>;
1232defm V_FMAC_F16        : VOP2_Real_gfx10<0x036>;
1233defm V_FMAMK_F16       : VOP2Only_Real_MADK_gfx10<0x037>;
1234defm V_FMAAK_F16       : VOP2Only_Real_MADK_gfx10<0x038>;
1235defm V_MAX_F16         : VOP2_Real_gfx10<0x039>;
1236defm V_MIN_F16         : VOP2_Real_gfx10<0x03a>;
1237defm V_LDEXP_F16       : VOP2_Real_gfx10<0x03b>;
1238
1239let IsSingle = 1 in {
1240defm V_PK_FMAC_F16     : VOP2_Real_e32_gfx10<0x03c>;
1241}
1242
1243// VOP2 no carry-in, carry-out.
1244defm V_ADD_NC_U32 :
1245  VOP2_Real_gfx10_with_name<0x025, "V_ADD_U32", "v_add_nc_u32">;
1246defm V_SUB_NC_U32 :
1247  VOP2_Real_gfx10_with_name<0x026, "V_SUB_U32", "v_sub_nc_u32">;
1248defm V_SUBREV_NC_U32 :
1249  VOP2_Real_gfx10_with_name<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">;
1250
1251// VOP2 carry-in, carry-out.
1252defm V_ADD_CO_CI_U32 :
1253  VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">;
1254defm V_SUB_CO_CI_U32 :
1255  VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">;
1256defm V_SUBREV_CO_CI_U32 :
1257  VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">;
1258
1259defm V_CNDMASK_B32 :
1260  VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">;
1261
1262// VOP3 only.
1263defm V_BFM_B32            : VOP3Only_Real_gfx10<0x363>;
1264defm V_BCNT_U32_B32       : VOP3Only_Real_gfx10<0x364>;
1265defm V_MBCNT_LO_U32_B32   : VOP3Only_Real_gfx10<0x365>;
1266defm V_MBCNT_HI_U32_B32   : VOP3Only_Real_gfx10<0x366>;
1267defm V_LDEXP_F32          : VOP3Only_Real_gfx10<0x362>;
1268defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>;
1269defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>;
1270defm V_CVT_PK_U16_U32     : VOP3Only_Real_gfx10<0x36a>;
1271defm V_CVT_PK_I16_I32     : VOP3Only_Real_gfx10<0x36b>;
1272
1273// VOP3 carry-out.
1274defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>;
1275defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>;
1276defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>;
1277
1278let SubtargetPredicate = isGFX10Plus in {
1279  defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>;
1280
1281  defm : VOP2bInstAliases<
1282    V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">;
1283  defm : VOP2bInstAliases<
1284    V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">;
1285  defm : VOP2bInstAliases<
1286    V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">;
1287} // End SubtargetPredicate = isGFX10Plus
1288
1289//===----------------------------------------------------------------------===//
1290// GFX6, GFX7, GFX10.
1291//===----------------------------------------------------------------------===//
1292
1293class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
1294  VOP_DPPe <P> {
1295  bits<8> vdst;
1296  bits<8> src1;
1297  let Inst{8-0}   = 0xfa; //dpp
1298  let Inst{16-9}  = !if(P.HasSrc1, src1{7-0}, 0);
1299  let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
1300  let Inst{30-25} = op;
1301  let Inst{31}    = 0x0; //encoding
1302}
1303
1304let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
1305  multiclass VOP2_Lane_Real_gfx6_gfx7<bits<6> op> {
1306    def _gfx6_gfx7 :
1307      VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
1308      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
1309  }
1310  multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> {
1311    def _gfx6_gfx7 :
1312      VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
1313      VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
1314  }
1315  multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op, string opName = NAME> {
1316    def _e32_gfx6_gfx7 :
1317      VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.SI>,
1318      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl>;
1319  }
1320  multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> {
1321    def _e64_gfx6_gfx7 :
1322      VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>,
1323      VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>;
1324  }
1325  multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> {
1326    def _e64_gfx6_gfx7 :
1327      VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>,
1328      VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>;
1329  }
1330} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
1331
1332multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> :
1333  VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>;
1334
1335multiclass VOP2_Real_gfx6_gfx7<bits<6> op> :
1336  VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>;
1337
1338multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> :
1339  VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>;
1340
1341multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> :
1342  VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>;
1343
1344multiclass VOP2be_Real_gfx6_gfx7_with_name<bits<6> op,
1345  string opName, string asmName>  {
1346  defvar ps32 = !cast<VOP2_Pseudo>(opName#"_e32");
1347  defvar ps64 = !cast<VOP3_Pseudo>(opName#"_e64");
1348
1349  let AsmString = asmName # ps32.AsmOperands in {
1350    defm "" : VOP2_Real_e32_gfx6_gfx7<op, opName>;
1351  }
1352
1353   let AsmString = asmName # ps64.AsmOperands in {
1354    defm "" : VOP2be_Real_e64_gfx6_gfx7<op, opName>;
1355  }
1356}
1357
1358defm V_CNDMASK_B32        : VOP2_Real_gfx6_gfx7<0x000>;
1359defm V_MIN_LEGACY_F32     : VOP2_Real_gfx6_gfx7<0x00d>;
1360defm V_MAX_LEGACY_F32     : VOP2_Real_gfx6_gfx7<0x00e>;
1361defm V_LSHR_B32           : VOP2_Real_gfx6_gfx7<0x015>;
1362defm V_ASHR_I32           : VOP2_Real_gfx6_gfx7<0x017>;
1363defm V_LSHL_B32           : VOP2_Real_gfx6_gfx7<0x019>;
1364defm V_BFM_B32            : VOP2_Real_gfx6_gfx7<0x01e>;
1365defm V_BCNT_U32_B32       : VOP2_Real_gfx6_gfx7<0x022>;
1366defm V_MBCNT_LO_U32_B32   : VOP2_Real_gfx6_gfx7<0x023>;
1367defm V_MBCNT_HI_U32_B32   : VOP2_Real_gfx6_gfx7<0x024>;
1368defm V_LDEXP_F32          : VOP2_Real_gfx6_gfx7<0x02b>;
1369defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>;
1370defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>;
1371defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>;
1372defm V_CVT_PK_U16_U32     : VOP2_Real_gfx6_gfx7<0x030>;
1373defm V_CVT_PK_I16_I32     : VOP2_Real_gfx6_gfx7<0x031>;
1374
1375// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in
1376// VI, but the VI instructions behave the same as the SI versions.
1377defm V_ADD_I32            : VOP2be_Real_gfx6_gfx7_with_name<0x025, "V_ADD_CO_U32", "v_add_i32">;
1378defm V_SUB_I32            : VOP2be_Real_gfx6_gfx7_with_name<0x026, "V_SUB_CO_U32", "v_sub_i32">;
1379defm V_SUBREV_I32         : VOP2be_Real_gfx6_gfx7_with_name<0x027, "V_SUBREV_CO_U32", "v_subrev_i32">;
1380defm V_ADDC_U32           : VOP2be_Real_gfx6_gfx7<0x028>;
1381defm V_SUBB_U32           : VOP2be_Real_gfx6_gfx7<0x029>;
1382defm V_SUBBREV_U32        : VOP2be_Real_gfx6_gfx7<0x02a>;
1383
1384defm V_READLANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x001>;
1385
1386let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
1387  defm V_WRITELANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x002>;
1388} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
1389
1390let SubtargetPredicate = isGFX6GFX7 in {
1391  defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>;
1392  defm : VOP2eInstAliases<V_ADD_CO_U32_e32, V_ADD_I32_e32_gfx6_gfx7>;
1393  defm : VOP2eInstAliases<V_SUB_CO_U32_e32, V_SUB_I32_e32_gfx6_gfx7>;
1394  defm : VOP2eInstAliases<V_SUBREV_CO_U32_e32, V_SUBREV_I32_e32_gfx6_gfx7>;
1395
1396  def : VOP2e64InstAlias<V_ADD_CO_U32_e64, V_ADD_I32_e64_gfx6_gfx7>;
1397  def : VOP2e64InstAlias<V_SUB_CO_U32_e64, V_SUB_I32_e64_gfx6_gfx7>;
1398  def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>;
1399} // End SubtargetPredicate = isGFX6GFX7
1400
1401defm V_ADD_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x003>;
1402defm V_SUB_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x004>;
1403defm V_SUBREV_F32         : VOP2_Real_gfx6_gfx7_gfx10<0x005>;
1404defm V_MAC_LEGACY_F32     : VOP2_Real_gfx6_gfx7_gfx10<0x006>;
1405defm V_MUL_LEGACY_F32     : VOP2_Real_gfx6_gfx7_gfx10<0x007>;
1406defm V_MUL_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x008>;
1407defm V_MUL_I32_I24        : VOP2_Real_gfx6_gfx7_gfx10<0x009>;
1408defm V_MUL_HI_I32_I24     : VOP2_Real_gfx6_gfx7_gfx10<0x00a>;
1409defm V_MUL_U32_U24        : VOP2_Real_gfx6_gfx7_gfx10<0x00b>;
1410defm V_MUL_HI_U32_U24     : VOP2_Real_gfx6_gfx7_gfx10<0x00c>;
1411defm V_MIN_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x00f>;
1412defm V_MAX_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x010>;
1413defm V_MIN_I32            : VOP2_Real_gfx6_gfx7_gfx10<0x011>;
1414defm V_MAX_I32            : VOP2_Real_gfx6_gfx7_gfx10<0x012>;
1415defm V_MIN_U32            : VOP2_Real_gfx6_gfx7_gfx10<0x013>;
1416defm V_MAX_U32            : VOP2_Real_gfx6_gfx7_gfx10<0x014>;
1417defm V_LSHRREV_B32        : VOP2_Real_gfx6_gfx7_gfx10<0x016>;
1418defm V_ASHRREV_I32        : VOP2_Real_gfx6_gfx7_gfx10<0x018>;
1419defm V_LSHLREV_B32        : VOP2_Real_gfx6_gfx7_gfx10<0x01a>;
1420defm V_AND_B32            : VOP2_Real_gfx6_gfx7_gfx10<0x01b>;
1421defm V_OR_B32             : VOP2_Real_gfx6_gfx7_gfx10<0x01c>;
1422defm V_XOR_B32            : VOP2_Real_gfx6_gfx7_gfx10<0x01d>;
1423defm V_MAC_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x01f>;
1424defm V_CVT_PKRTZ_F16_F32  : VOP2_Real_gfx6_gfx7_gfx10<0x02f>;
1425defm V_MADMK_F32          : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>;
1426defm V_MADAK_F32          : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>;
1427
1428//===----------------------------------------------------------------------===//
1429// GFX8, GFX9 (VI).
1430//===----------------------------------------------------------------------===//
1431
1432let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in {
1433
1434multiclass VOP2_Real_MADK_vi <bits<6> op> {
1435  def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
1436            VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
1437}
1438
1439multiclass VOP2_Real_e32_vi <bits<6> op> {
1440  def _e32_vi :
1441    VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
1442    VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
1443}
1444
1445multiclass VOP2_Real_e64_vi <bits<10> op> {
1446  def _e64_vi :
1447    VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
1448    VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1449}
1450
1451multiclass VOP2_Real_e64only_vi <bits<10> op> {
1452  def _e64_vi :
1453    VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
1454    VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
1455      let IsSingle = 1;
1456    }
1457}
1458
1459multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> :
1460  VOP2_Real_e32_vi<op>,
1461  VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>;
1462
1463} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8"
1464
1465multiclass VOP2_SDWA_Real <bits<6> op> {
1466  foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in
1467  def _sdwa_vi :
1468    VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
1469    VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
1470}
1471
1472multiclass VOP2_SDWA9_Real <bits<6> op> {
1473  foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
1474  def _sdwa_gfx9 :
1475    VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
1476    VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
1477}
1478
1479let AssemblerPredicate = isGFX8Only in {
1480
1481multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> {
1482  def _e32_vi :
1483    VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>,
1484    VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> {
1485      VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32");
1486      let AsmString = AsmName # ps.AsmOperands;
1487      let DecoderNamespace = "GFX8";
1488    }
1489  def _e64_vi :
1490    VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>,
1491    VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> {
1492      VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64");
1493      let AsmString = AsmName # ps.AsmOperands;
1494      let DecoderNamespace = "GFX8";
1495    }
1496  foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA>.ret in
1497    def _sdwa_vi :
1498      VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
1499      VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> {
1500        VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
1501        let AsmString = AsmName # ps.AsmOperands;
1502      }
1503  foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in
1504    def _dpp_vi :
1505      VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>,
1506      VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
1507        VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
1508        let AsmString = AsmName # ps.AsmOperands;
1509      }
1510}
1511}
1512
1513let AssemblerPredicate = isGFX9Only in {
1514
1515multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
1516  def _e32_gfx9 :
1517    VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>,
1518    VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> {
1519      VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32");
1520      let AsmString = AsmName # ps.AsmOperands;
1521      let DecoderNamespace = "GFX9";
1522    }
1523  def _e64_gfx9 :
1524    VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>,
1525    VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> {
1526      VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64");
1527      let AsmString = AsmName # ps.AsmOperands;
1528      let DecoderNamespace = "GFX9";
1529    }
1530  foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9>.ret in
1531    def _sdwa_gfx9 :
1532      VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
1533      VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> {
1534        VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
1535        let AsmString = AsmName # ps.AsmOperands;
1536      }
1537  foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in
1538    def _dpp_gfx9 :
1539      VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>,
1540      VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
1541        VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
1542        let AsmString = AsmName # ps.AsmOperands;
1543        let DecoderNamespace = "SDWA9";
1544      }
1545}
1546
1547multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
1548  def _e32_gfx9 :
1549    VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>,
1550    VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>{
1551      let DecoderNamespace = "GFX9";
1552    }
1553  def _e64_gfx9 :
1554    VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
1555    VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
1556      let DecoderNamespace = "GFX9";
1557    }
1558  foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
1559    def _sdwa_gfx9 :
1560      VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
1561      VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
1562      }
1563  foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
1564    def _dpp_gfx9 :
1565      VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
1566      VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
1567        let DecoderNamespace = "SDWA9";
1568      }
1569}
1570
1571} // AssemblerPredicate = isGFX9Only
1572
1573multiclass VOP2_Real_e32e64_vi <bits<6> op> :
1574  Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
1575
1576  foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
1577    def _dpp_vi :
1578      VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
1579      VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
1580}
1581
1582defm V_CNDMASK_B32        : VOP2_Real_e32e64_vi <0x0>;
1583defm V_ADD_F32            : VOP2_Real_e32e64_vi <0x1>;
1584defm V_SUB_F32            : VOP2_Real_e32e64_vi <0x2>;
1585defm V_SUBREV_F32         : VOP2_Real_e32e64_vi <0x3>;
1586let AssemblerPredicate = isGCN3ExcludingGFX90A in
1587defm V_MUL_LEGACY_F32     : VOP2_Real_e32e64_vi <0x4>;
1588defm V_MUL_F32            : VOP2_Real_e32e64_vi <0x5>;
1589defm V_MUL_I32_I24        : VOP2_Real_e32e64_vi <0x6>;
1590defm V_MUL_HI_I32_I24     : VOP2_Real_e32e64_vi <0x7>;
1591defm V_MUL_U32_U24        : VOP2_Real_e32e64_vi <0x8>;
1592defm V_MUL_HI_U32_U24     : VOP2_Real_e32e64_vi <0x9>;
1593defm V_MIN_F32            : VOP2_Real_e32e64_vi <0xa>;
1594defm V_MAX_F32            : VOP2_Real_e32e64_vi <0xb>;
1595defm V_MIN_I32            : VOP2_Real_e32e64_vi <0xc>;
1596defm V_MAX_I32            : VOP2_Real_e32e64_vi <0xd>;
1597defm V_MIN_U32            : VOP2_Real_e32e64_vi <0xe>;
1598defm V_MAX_U32            : VOP2_Real_e32e64_vi <0xf>;
1599defm V_LSHRREV_B32        : VOP2_Real_e32e64_vi <0x10>;
1600defm V_ASHRREV_I32        : VOP2_Real_e32e64_vi <0x11>;
1601defm V_LSHLREV_B32        : VOP2_Real_e32e64_vi <0x12>;
1602defm V_AND_B32            : VOP2_Real_e32e64_vi <0x13>;
1603defm V_OR_B32             : VOP2_Real_e32e64_vi <0x14>;
1604defm V_XOR_B32            : VOP2_Real_e32e64_vi <0x15>;
1605defm V_MAC_F32            : VOP2_Real_e32e64_vi <0x16>;
1606defm V_MADMK_F32          : VOP2_Real_MADK_vi <0x17>;
1607defm V_MADAK_F32          : VOP2_Real_MADK_vi <0x18>;
1608
1609defm V_ADD_U32            : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_CO_U32",     "v_add_u32">;
1610defm V_SUB_U32            : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_CO_U32",     "v_sub_u32">;
1611defm V_SUBREV_U32         : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_CO_U32",  "v_subrev_u32">;
1612defm V_ADDC_U32           : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32",    "v_addc_u32">;
1613defm V_SUBB_U32           : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32",    "v_subb_u32">;
1614defm V_SUBBREV_U32        : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">;
1615
1616defm V_ADD_CO_U32         : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32",     "v_add_co_u32">;
1617defm V_SUB_CO_U32         : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32",     "v_sub_co_u32">;
1618defm V_SUBREV_CO_U32      : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32",  "v_subrev_co_u32">;
1619defm V_ADDC_CO_U32        : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32",    "v_addc_co_u32">;
1620defm V_SUBB_CO_U32        : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32",    "v_subb_co_u32">;
1621defm V_SUBBREV_CO_U32     : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">;
1622
1623defm V_ADD_U32            : VOP2_Real_e32e64_gfx9 <0x34>;
1624defm V_SUB_U32            : VOP2_Real_e32e64_gfx9 <0x35>;
1625defm V_SUBREV_U32         : VOP2_Real_e32e64_gfx9 <0x36>;
1626
1627defm V_BFM_B32            : VOP2_Real_e64only_vi <0x293>;
1628defm V_BCNT_U32_B32       : VOP2_Real_e64only_vi <0x28b>;
1629defm V_MBCNT_LO_U32_B32   : VOP2_Real_e64only_vi <0x28c>;
1630defm V_MBCNT_HI_U32_B32   : VOP2_Real_e64only_vi <0x28d>;
1631defm V_LDEXP_F32          : VOP2_Real_e64only_vi <0x288>;
1632defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>;
1633defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>;
1634defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>;
1635defm V_CVT_PKRTZ_F16_F32  : VOP2_Real_e64only_vi <0x296>;
1636defm V_CVT_PK_U16_U32     : VOP2_Real_e64only_vi <0x297>;
1637defm V_CVT_PK_I16_I32     : VOP2_Real_e64only_vi <0x298>;
1638
1639defm V_ADD_F16            : VOP2_Real_e32e64_vi <0x1f>;
1640defm V_SUB_F16            : VOP2_Real_e32e64_vi <0x20>;
1641defm V_SUBREV_F16         : VOP2_Real_e32e64_vi <0x21>;
1642defm V_MUL_F16            : VOP2_Real_e32e64_vi <0x22>;
1643defm V_MAC_F16            : VOP2_Real_e32e64_vi <0x23>;
1644defm V_MADMK_F16          : VOP2_Real_MADK_vi <0x24>;
1645defm V_MADAK_F16          : VOP2_Real_MADK_vi <0x25>;
1646defm V_ADD_U16            : VOP2_Real_e32e64_vi <0x26>;
1647defm V_SUB_U16            : VOP2_Real_e32e64_vi <0x27>;
1648defm V_SUBREV_U16         : VOP2_Real_e32e64_vi <0x28>;
1649defm V_MUL_LO_U16         : VOP2_Real_e32e64_vi <0x29>;
1650defm V_LSHLREV_B16        : VOP2_Real_e32e64_vi <0x2a>;
1651defm V_LSHRREV_B16        : VOP2_Real_e32e64_vi <0x2b>;
1652defm V_ASHRREV_I16        : VOP2_Real_e32e64_vi <0x2c>;
1653defm V_MAX_F16            : VOP2_Real_e32e64_vi <0x2d>;
1654defm V_MIN_F16            : VOP2_Real_e32e64_vi <0x2e>;
1655defm V_MAX_U16            : VOP2_Real_e32e64_vi <0x2f>;
1656defm V_MAX_I16            : VOP2_Real_e32e64_vi <0x30>;
1657defm V_MIN_U16            : VOP2_Real_e32e64_vi <0x31>;
1658defm V_MIN_I16            : VOP2_Real_e32e64_vi <0x32>;
1659defm V_LDEXP_F16          : VOP2_Real_e32e64_vi <0x33>;
1660
1661let SubtargetPredicate = isGFX8GFX9 in {
1662
1663// Aliases to simplify matching of floating-point instructions that
1664// are VOP2 on SI and VOP3 on VI.
1665class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias <
1666  name#" $dst, $src0, $src1",
1667  !if(inst.Pfl.HasOMod,
1668      (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0),
1669      (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0))
1670>, PredicateControl {
1671  let UseInstAsmMatchConverter = 0;
1672  let AsmVariantName = AMDGPUAsmVariants.VOP3;
1673}
1674
1675def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>;
1676def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>;
1677def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
1678def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
1679def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
1680
1681defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>;
1682
1683} // End SubtargetPredicate = isGFX8GFX9
1684
1685let SubtargetPredicate = isGFX9Only in {
1686
1687defm : VOP2bInstAliases<V_ADD_U32_e32,     V_ADD_CO_U32_e32_gfx9,     "v_add_co_u32">;
1688defm : VOP2bInstAliases<V_ADDC_U32_e32,    V_ADDC_CO_U32_e32_gfx9,    "v_addc_co_u32">;
1689defm : VOP2bInstAliases<V_SUB_U32_e32,     V_SUB_CO_U32_e32_gfx9,     "v_sub_co_u32">;
1690defm : VOP2bInstAliases<V_SUBB_U32_e32,    V_SUBB_CO_U32_e32_gfx9,    "v_subb_co_u32">;
1691defm : VOP2bInstAliases<V_SUBREV_U32_e32,  V_SUBREV_CO_U32_e32_gfx9,  "v_subrev_co_u32">;
1692defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">;
1693
1694} // End SubtargetPredicate = isGFX9Only
1695
1696let SubtargetPredicate = HasDLInsts in {
1697
1698defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>;
1699defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>;
1700
1701} // End SubtargetPredicate = HasDLInsts
1702
1703let AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" in {
1704  multiclass VOP2_Real_e32_gfx90a <bits<6> op> {
1705    def _e32_gfx90a :
1706      VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX90A>,
1707      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
1708  }
1709
1710  multiclass VOP2_Real_e64_gfx90a <bits<10> op> {
1711    def _e64_gfx90a :
1712      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>,
1713      VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1714  }
1715
1716  multiclass Base_VOP2_Real_e32e64_gfx90a <bits<6> op> :
1717    VOP2_Real_e32_gfx90a<op>,
1718    VOP2_Real_e64_gfx90a<{0, 1, 0, 0, op{5-0}}>;
1719
1720  multiclass VOP2_Real_e32e64_gfx90a <bits<6> op> :
1721    Base_VOP2_Real_e32e64_gfx90a<op> {
1722
1723    foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
1724      def _dpp_gfx90a :
1725        VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX90A>,
1726        VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
1727          let DecoderNamespace = "SDWA9";
1728        }
1729  }
1730} // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A"
1731
1732let SubtargetPredicate = isGFX90APlus in {
1733  defm V_FMAC_F64       : VOP2_Real_e32e64_gfx90a <0x4>;
1734  let IsSingle = 1 in {
1735    defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>;
1736  }
1737} // End SubtargetPredicate = isGFX90APlus
1738
1739multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : VOP2_Real_e32_vi<op> {
1740  def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
1741}
1742
1743multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> :
1744  VOP2_Real_e32_gfx10<op>,
1745  VOP2_Real_dpp_gfx10<op>,
1746  VOP2_Real_dpp8_gfx10<op>;
1747
1748let SubtargetPredicate = HasDot5Insts in {
1749  defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>;
1750  // NB: Opcode conflicts with V_DOT8C_I32_I4
1751  // This opcode exists in gfx 10.1* only
1752  defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>;
1753}
1754
1755let SubtargetPredicate = HasDot6Insts in {
1756  defm V_DOT4C_I32_I8  : VOP2_Real_DOT_ACC_gfx9<0x39>;
1757  defm V_DOT4C_I32_I8  : VOP2_Real_DOT_ACC_gfx10<0x0d>;
1758}
1759
1760let SubtargetPredicate = HasDot4Insts in {
1761  defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>;
1762}
1763let SubtargetPredicate = HasDot3Insts in {
1764  defm V_DOT8C_I32_I4  : VOP2_Real_DOT_ACC_gfx9<0x3a>;
1765}
1766
1767let SubtargetPredicate = HasPkFmacF16Inst in {
1768defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>;
1769} // End SubtargetPredicate = HasPkFmacF16Inst
1770
1771let SubtargetPredicate = HasDot3Insts in {
1772  // NB: Opcode conflicts with V_DOT2C_F32_F16
1773  let DecoderNamespace = "GFX10_B" in
1774  defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>;
1775}
1776