xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td (revision 5d6d6278979b9eab598a23c804d23e930d2f7268)
1//===-- SIInstrInfo.td -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
10  AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
11def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
12  AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
13
14class GCNPredicateControl : PredicateControl {
15  Predicate SIAssemblerPredicate = isGFX6GFX7;
16  Predicate VIAssemblerPredicate = isGFX8GFX9;
17}
18
19// Except for the NONE field, this must be kept in sync with the
20// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the
21// getMCOpcodeGen table.
22def SIEncodingFamily {
23  int NONE = -1;
24  int SI = 0;
25  int VI = 1;
26  int SDWA = 2;
27  int SDWA9 = 3;
28  int GFX80 = 4;
29  int GFX9 = 5;
30  int GFX10 = 6;
31  int SDWA10 = 7;
32  int GFX90A = 8;
33  int GFX940 = 9;
34  int GFX11 = 10;
35  int GFX12 = 11;
36}
37
38//===----------------------------------------------------------------------===//
39// SI DAG Nodes
40//===----------------------------------------------------------------------===//
41
42def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
43
44def SDTSBufferLoad : SDTypeProfile<1, 3,
45    [                    // vdata
46     SDTCisVT<1, v4i32>, // rsrc
47     SDTCisVT<2, i32>,   // offset(imm)
48     SDTCisVT<3, i32>]>; // cachepolicy
49
50def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", SDTSBufferLoad,
51                            [SDNPMayLoad, SDNPMemOperand]>;
52
53def SIsbuffer_load_byte : SDNode<"AMDGPUISD::SBUFFER_LOAD_BYTE", SDTSBufferLoad,
54                                 [SDNPMayLoad, SDNPMemOperand]>;
55
56def SIsbuffer_load_ubyte
57    : SDNode<"AMDGPUISD::SBUFFER_LOAD_UBYTE", SDTSBufferLoad,
58             [SDNPMayLoad, SDNPMemOperand]>;
59
60def SIsbuffer_load_short
61    : SDNode<"AMDGPUISD::SBUFFER_LOAD_SHORT", SDTSBufferLoad,
62             [SDNPMayLoad, SDNPMemOperand]>;
63
64def SIsbuffer_load_ushort
65    : SDNode<"AMDGPUISD::SBUFFER_LOAD_USHORT", SDTSBufferLoad,
66             [SDNPMayLoad, SDNPMemOperand]>;
67
68def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
69  SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
70  [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
71>;
72
73def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
74  SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
75]>;
76
77def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32,
78  [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
79>;
80
81def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32,
82  [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
83>;
84
85// load_d16_{lo|hi} ptr, tied_input
86def SIload_d16 : SDTypeProfile<1, 2, [
87  SDTCisPtrTy<1>,
88  SDTCisSameAs<0, 2>
89]>;
90
91
92def SDTtbuffer_load : SDTypeProfile<1, 8,
93  [                     // vdata
94   SDTCisVT<1, v4i32>,  // rsrc
95   SDTCisVT<2, i32>,    // vindex(VGPR)
96   SDTCisVT<3, i32>,    // voffset(VGPR)
97   SDTCisVT<4, i32>,    // soffset(SGPR)
98   SDTCisVT<5, i32>,    // offset(imm)
99   SDTCisVT<6, i32>,    // format(imm)
100   SDTCisVT<7, i32>,    // cachepolicy, swizzled buffer(imm)
101   SDTCisVT<8, i1>      // idxen(imm)
102  ]>;
103
104def SItbuffer_load :   SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load,
105                              [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
106def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16",
107                                SDTtbuffer_load,
108                                [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
109
110def SDTtbuffer_store : SDTypeProfile<0, 9,
111    [                     // vdata
112     SDTCisVT<1, v4i32>,  // rsrc
113     SDTCisVT<2, i32>,    // vindex(VGPR)
114     SDTCisVT<3, i32>,    // voffset(VGPR)
115     SDTCisVT<4, i32>,    // soffset(SGPR)
116     SDTCisVT<5, i32>,    // offset(imm)
117     SDTCisVT<6, i32>,    // format(imm)
118     SDTCisVT<7, i32>,    // cachepolicy, swizzled buffer(imm)
119     SDTCisVT<8, i1>      // idxen(imm)
120    ]>;
121
122def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
123                             [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
124def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16",
125                                SDTtbuffer_store,
126                                [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
127
128def SDTBufferLoad : SDTypeProfile<1, 7,
129    [                    // vdata
130     SDTCisVT<1, v4i32>, // rsrc
131     SDTCisVT<2, i32>,   // vindex(VGPR)
132     SDTCisVT<3, i32>,   // voffset(VGPR)
133     SDTCisVT<4, i32>,   // soffset(SGPR)
134     SDTCisVT<5, i32>,   // offset(imm)
135     SDTCisVT<6, i32>,   // cachepolicy, swizzled buffer(imm)
136     SDTCisVT<7, i1>]>;  // idxen(imm)
137
138def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
139                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
140def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad,
141                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
142def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad,
143                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
144def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad,
145                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
146def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad,
147                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
148def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
149                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
150def SIbuffer_load_format_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_TFE", SDTBufferLoad,
151                               [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
152def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16",
153                                SDTBufferLoad,
154                                [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
155
156def SDTBufferStore : SDTypeProfile<0, 8,
157    [                    // vdata
158     SDTCisVT<1, v4i32>, // rsrc
159     SDTCisVT<2, i32>,   // vindex(VGPR)
160     SDTCisVT<3, i32>,   // voffset(VGPR)
161     SDTCisVT<4, i32>,   // soffset(SGPR)
162     SDTCisVT<5, i32>,   // offset(imm)
163     SDTCisVT<6, i32>,   // cachepolicy, swizzled buffer(imm)
164     SDTCisVT<7, i1>]>;  // idxen(imm)
165
166def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
167                             [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
168def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE",
169                         SDTBufferStore,
170                         [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
171def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT",
172                           SDTBufferStore,
173                           [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
174def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT",
175                            SDTBufferStore,
176                            [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
177def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
178                            SDTBufferStore,
179                            [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
180
181multiclass SDBufferAtomic<string opcode> {
182  def "" : SDNode <opcode,
183    SDTypeProfile<1, 8,
184         [SDTCisVT<2, v4i32>, // rsrc
185         SDTCisVT<3, i32>,   // vindex(VGPR)
186         SDTCisVT<4, i32>,   // voffset(VGPR)
187         SDTCisVT<5, i32>,   // soffset(SGPR)
188         SDTCisVT<6, i32>,   // offset(imm)
189         SDTCisVT<7, i32>,   // cachepolicy(imm)
190         SDTCisVT<8, i1>]>,  // idxen(imm)
191    [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
192  >;
193  def "_noret" : PatFrag<
194    (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset,
195      node:$offset, node:$cachepolicy, node:$idxen),
196    (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex,
197      node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
198      node:$idxen)> {
199    let HasNoUse = true;
200  }
201}
202
203defm SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
204defm SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
205defm SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
206defm SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">;
207defm SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">;
208defm SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">;
209defm SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
210defm SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
211defm SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
212defm SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
213defm SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
214defm SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
215defm SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">;
216defm SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">;
217defm SIbuffer_atomic_fadd_bf16 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD_BF16">;
218defm SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">;
219defm SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">;
220defm SIbuffer_atomic_cond_sub_u32 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_COND_SUB_U32">;
221
222def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
223  SDTypeProfile<1, 9,
224    [SDTCisVT<3, v4i32>, // rsrc
225     SDTCisVT<4, i32>,   // vindex(VGPR)
226     SDTCisVT<5, i32>,   // voffset(VGPR)
227     SDTCisVT<6, i32>,   // soffset(SGPR)
228     SDTCisVT<7, i32>,   // offset(imm)
229     SDTCisVT<8, i32>,   // cachepolicy(imm)
230     SDTCisVT<9, i1>]>,  // idxen(imm)
231  [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
232>;
233
234def SIbuffer_atomic_cmpswap_noret : PatFrag<
235  (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset,
236    node:$soffset, node:$offset, node:$cachepolicy, node:$idxen),
237  (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex,
238    node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
239    node:$idxen)> {
240  let HasNoUse = true;
241}
242
243class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
244  SDTypeProfile<0, 2,
245      [SDTCisPtrTy<0>,     // vaddr
246       SDTCisVT<1, ty>]>,  // vdata
247  [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
248>;
249
250def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
251  SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
252>;
253
254def SIlds : SDNode<"AMDGPUISD::LDS",
255  SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
256>;
257
258def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO",
259  SIload_d16,
260  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
261>;
262
263def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8",
264  SIload_d16,
265  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
266>;
267
268def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8",
269  SIload_d16,
270  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
271>;
272
273def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI",
274  SIload_d16,
275  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
276>;
277
278def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8",
279  SIload_d16,
280  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
281>;
282
283def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8",
284  SIload_d16,
285  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
286>;
287
288def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
289  SDTypeProfile<0 ,1, [SDTCisInt<0>]>,
290  [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
291>;
292
293def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD",
294  SDTFPRoundOp
295>;
296
297def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD",
298  SDTFPRoundOp
299>;
300
301//===----------------------------------------------------------------------===//
302// ValueType helpers
303//===----------------------------------------------------------------------===//
304
305class isIntType<ValueType SrcVT> {
306  bit ret = !and(SrcVT.isInteger, !ne(SrcVT.Value, i1.Value));
307}
308
309//===----------------------------------------------------------------------===//
310// PatFrags for global memory operations
311//===----------------------------------------------------------------------===//
312
313defm atomic_load_fmin : binary_atomic_op_all_as<SIatomic_fmin, 0>;
314defm atomic_load_fmax : binary_atomic_op_all_as<SIatomic_fmax, 0>;
315
316//===----------------------------------------------------------------------===//
317// SDNodes PatFrags for loads/stores with a glue input.
318// This is for SDNodes and PatFrag for local loads and stores to
319// enable s_mov_b32 m0, -1 to be glued to the memory instructions.
320//
321// These mirror the regular load/store PatFrags and rely on special
322// processing during Select() to add the glued copy.
323//
324//===----------------------------------------------------------------------===//
325
326def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad,
327  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
328>;
329
330def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad,
331  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
332>;
333
334def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> {
335  let IsLoad = 1;
336  let IsUnindexed = 1;
337}
338
339def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
340  let IsLoad = 1;
341  let IsNonExtLoad = 1;
342}
343
344def atomic_load_8_glue : PatFrag<(ops node:$ptr),
345  (AMDGPUatomic_ld_glue node:$ptr)> {
346  let IsAtomic = 1;
347  let MemoryVT = i8;
348}
349
350def atomic_load_16_glue : PatFrag<(ops node:$ptr),
351  (AMDGPUatomic_ld_glue node:$ptr)> {
352  let IsAtomic = 1;
353  let MemoryVT = i16;
354}
355
356def atomic_load_32_glue : PatFrag<(ops node:$ptr),
357  (AMDGPUatomic_ld_glue node:$ptr)> {
358  let IsAtomic = 1;
359  let MemoryVT = i32;
360}
361
362def atomic_load_64_glue : PatFrag<(ops node:$ptr),
363  (AMDGPUatomic_ld_glue node:$ptr)> {
364  let IsAtomic = 1;
365  let MemoryVT = i64;
366}
367
368def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
369  let IsLoad = 1;
370  let IsAnyExtLoad = 1;
371}
372
373def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
374  let IsLoad = 1;
375  let IsSignExtLoad = 1;
376}
377
378def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
379  let IsLoad = 1;
380  let IsZeroExtLoad = 1;
381}
382
383def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
384  let IsLoad = 1;
385  let MemoryVT = i8;
386}
387
388def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
389  let IsLoad = 1;
390  let MemoryVT = i8;
391}
392
393def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
394  let IsLoad = 1;
395  let MemoryVT = i16;
396}
397
398def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
399  let IsLoad = 1;
400  let MemoryVT = i16;
401}
402
403def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
404  let IsLoad = 1;
405  let MemoryVT = i8;
406}
407
408def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
409  let IsLoad = 1;
410  let MemoryVT = i16;
411}
412
413
414let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
415def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
416  let IsNonExtLoad = 1;
417}
418
419def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>;
420def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>;
421def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>;
422
423def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>;
424def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>;
425def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>;
426} // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces
427
428def load_align8_local_m0 : PatFrag<(ops node:$ptr),
429                                   (load_local_m0 node:$ptr)> {
430  let IsLoad = 1;
431  int MinAlignment = 8;
432}
433
434def load_align16_local_m0 : PatFrag<(ops node:$ptr),
435                                   (load_local_m0 node:$ptr)> {
436  let IsLoad = 1;
437  int MinAlignment = 16;
438}
439
440let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
441def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr),
442                                      (atomic_load_8_glue node:$ptr)>;
443def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr),
444                                      (atomic_load_16_glue node:$ptr)>;
445def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
446                                      (atomic_load_32_glue node:$ptr)>;
447def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
448                                       (atomic_load_64_glue node:$ptr)>;
449} // End let AddressSpaces = LoadAddress_local.AddrSpaces
450
451
452def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore,
453  [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
454>;
455
456def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore,
457  [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
458>;
459
460def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr),
461                                   (AMDGPUst_glue node:$val, node:$ptr)> {
462  let IsStore = 1;
463  let IsUnindexed = 1;
464}
465
466def store_glue : PatFrag<(ops node:$val, node:$ptr),
467                         (unindexedstore_glue node:$val, node:$ptr)> {
468  let IsStore = 1;
469  let IsTruncStore = 0;
470}
471
472def truncstore_glue : PatFrag<(ops node:$val, node:$ptr),
473  (unindexedstore_glue node:$val, node:$ptr)> {
474  let IsStore = 1;
475  let IsTruncStore = 1;
476}
477
478def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr),
479                           (truncstore_glue node:$val, node:$ptr)> {
480  let IsStore = 1;
481  let MemoryVT = i8;
482  let IsTruncStore = 1;
483}
484
485def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr),
486                           (truncstore_glue node:$val, node:$ptr)> {
487  let IsStore = 1;
488  let MemoryVT = i16;
489  let IsTruncStore = 1;
490}
491
492let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
493def store_local_m0 : PatFrag<(ops node:$val, node:$ptr),
494                             (store_glue node:$val, node:$ptr)>;
495def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
496                                    (truncstorei8_glue node:$val, node:$ptr)>;
497def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
498                                    (truncstorei16_glue node:$val, node:$ptr)>;
499}
500
501def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr),
502                                     (store_local_m0 node:$value, node:$ptr)>,
503                            Aligned<8> {
504  let IsStore = 1;
505}
506
507def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr),
508                                     (store_local_m0 node:$value, node:$ptr)>,
509                            Aligned<16> {
510  let IsStore = 1;
511}
512
513let PredicateCode = [{return cast<MemSDNode>(N)->getAlign() < 4;}],
514    GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}],
515    AddressSpaces = [ AddrSpaces.Local ] in {
516def load_align_less_than_4_local : PatFrag<(ops node:$ptr),
517                                           (load_local node:$ptr)> {
518  let IsLoad = 1;
519  let IsNonExtLoad = 1;
520}
521
522def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr),
523                                              (load_local_m0 node:$ptr)> {
524  let IsLoad = 1;
525  let IsNonExtLoad = 1;
526}
527
528def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr),
529                                             (store_local node:$value, node:$ptr)> {
530  let IsStore = 1;
531  let IsTruncStore = 0;
532}
533
534def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr),
535                                                (store_local_m0 node:$value, node:$ptr)> {
536  let IsStore = 1;
537  let IsTruncStore = 0;
538}
539}
540
541def atomic_store_8_glue : PatFrag <
542  (ops node:$ptr, node:$value),
543  (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
544  let IsAtomic = 1;
545  let MemoryVT = i8;
546}
547
548def atomic_store_16_glue : PatFrag <
549  (ops node:$ptr, node:$value),
550  (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
551  let IsAtomic = 1;
552  let MemoryVT = i16;
553}
554
555def atomic_store_32_glue : PatFrag <
556  (ops node:$ptr, node:$value),
557  (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
558  let IsAtomic = 1;
559  let MemoryVT = i32;
560}
561
562def atomic_store_64_glue : PatFrag <
563  (ops node:$ptr, node:$value),
564  (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
565  let IsAtomic = 1;
566  let MemoryVT = i64;
567}
568
569let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
570def atomic_store_8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
571                                       (atomic_store_8_glue node:$val, node:$ptr)>;
572def atomic_store_16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
573                                       (atomic_store_16_glue node:$val, node:$ptr)>;
574def atomic_store_32_local_m0 : PatFrag<(ops node:$val, node:$ptr),
575                                       (atomic_store_32_glue node:$val, node:$ptr)>;
576def atomic_store_64_local_m0 : PatFrag<(ops node:$val, node:$ptr),
577                                       (atomic_store_64_glue node:$val, node:$ptr)>;
578} // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces
579
580
581//===----------------------------------------------------------------------===//
582// SDNodes PatFrags for a16 loads and stores with 3 components.
583// v3f16/v3i16 is widened to v4f16/v4i16, so we need to match on the memory
584// load/store size.
585//===----------------------------------------------------------------------===//
586
587class mubuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag <
588  (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
589            node:$auxiliary, node:$idxen),
590  (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
591            node:$auxiliary, node:$idxen)> {
592  let IsLoad = 1;
593  let MemoryVT = vt;
594}
595
596class mubuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag <
597  (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
598            node:$auxiliary, node:$idxen),
599  (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
600            node:$auxiliary, node:$idxen)> {
601  let IsStore = 1;
602  let MemoryVT = vt;
603}
604
605class mtbuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag <
606  (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
607            node:$format, node:$auxiliary, node:$idxen),
608  (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
609            node:$format, node:$auxiliary, node:$idxen)> {
610  let IsLoad = 1;
611  let MemoryVT = vt;
612}
613
614class mtbuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag <
615  (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
616            node:$format, node:$auxiliary, node:$idxen),
617  (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
618            node:$format, node:$auxiliary, node:$idxen)> {
619  let IsStore = 1;
620  let MemoryVT = vt;
621}
622
623//===----------------------------------------------------------------------===//
624// SDNodes PatFrags for d16 loads
625//===----------------------------------------------------------------------===//
626
627class LoadD16Frag <SDPatternOperator op> : PatFrag<
628  (ops node:$ptr, node:$tied_in),
629  (op node:$ptr, node:$tied_in)> {
630  let IsLoad = 1;
631}
632
633foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
634let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
635
636def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>;
637
638def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> {
639  let MemoryVT = i8;
640}
641
642def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> {
643  let MemoryVT = i8;
644}
645
646def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>;
647
648def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> {
649  let MemoryVT = i8;
650}
651
652def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> {
653  let MemoryVT = i8;
654}
655
656} // End let AddressSpaces = ...
657} // End foreach AddrSpace
658
659def lshr_rev : PatFrag <
660  (ops node:$src1, node:$src0),
661  (srl $src0, $src1)
662>;
663
664def ashr_rev : PatFrag <
665  (ops node:$src1, node:$src0),
666  (sra $src0, $src1)
667>;
668
669def lshl_rev : PatFrag <
670  (ops node:$src1, node:$src0),
671  (shl $src0, $src1)
672>;
673
674def add_ctpop : PatFrag <
675  (ops node:$src0, node:$src1),
676  (add (ctpop $src0), $src1)
677>;
678
679def xnor : PatFrag <
680  (ops node:$src0, node:$src1),
681  (not (xor $src0, $src1))
682>;
683
684foreach I = 1-4 in {
685def shl#I#_add : PatFrag <
686  (ops node:$src0, node:$src1),
687  (add (shl_oneuse $src0, (i32 I)), $src1)> {
688  // FIXME: Poor substitute for disabling pattern in SelectionDAG
689  let PredicateCode = [{return false;}];
690  let GISelPredicateCode = [{return true;}];
691}
692}
693
694multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
695                            SDTypeProfile tc = SDTAtomic2,
696                            bit IsInt = 1> {
697
698  def _glue : SDNode <
699    !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc,
700    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
701  >;
702
703  let AddressSpaces = StoreAddress_local.AddrSpaces in {
704    defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
705    defm _local_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"),
706                                                 IsInt>;
707  }
708
709  let AddressSpaces = StoreAddress_region.AddrSpaces in {
710    defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
711    defm _region_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"),
712                                                  IsInt>;
713  }
714}
715
716defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
717defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
718defm atomic_load_uinc_wrap : SIAtomicM0Glue2 <"LOAD_UINC_WRAP">;
719defm atomic_load_udec_wrap : SIAtomicM0Glue2 <"LOAD_UDEC_WRAP">;
720defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
721defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
722defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
723defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
724defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
725defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
726defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
727defm atomic_swap : SIAtomicM0Glue2 <"SWAP">;
728defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>;
729defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>;
730defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>;
731
732def as_i1timm : SDNodeXForm<timm, [{
733  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
734}]>;
735
736def as_i8imm : SDNodeXForm<imm, [{
737  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
738}]>;
739
740def as_i8timm : SDNodeXForm<timm, [{
741  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
742}]>;
743
744def as_i16imm : SDNodeXForm<imm, [{
745  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
746}]>;
747
748def as_i16timm : SDNodeXForm<timm, [{
749  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
750}]>;
751
752def as_i32imm: SDNodeXForm<imm, [{
753  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
754}]>;
755
756def as_i32timm: SDNodeXForm<timm, [{
757  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
758}]>;
759
760def as_i64imm: SDNodeXForm<imm, [{
761  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
762}]>;
763
764def cond_as_i32imm: SDNodeXForm<cond, [{
765  return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32);
766}]>;
767
768// Copied from the AArch64 backend:
769def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
770return CurDAG->getTargetConstant(
771  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
772}]>;
773
774def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{
775  auto FI = cast<FrameIndexSDNode>(N);
776  return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32);
777}]>;
778
779// Copied from the AArch64 backend:
780def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
781return CurDAG->getTargetConstant(
782  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
783}]>;
784
785class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
786  uint64_t Imm = N->getZExtValue();
787  unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
788  return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1);
789}]>;
790
791def SIMM16bit : TImmLeaf <i32,
792  [{return isInt<16>(Imm) || isUInt<16>(Imm);}],
793  as_i16timm
794>;
795
796def i64imm_32bit : ImmLeaf<i64, [{
797  return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
798}]>;
799
800def InlineImm16 : ImmLeaf<i16, [{
801  return isInlineImmediate16(Imm);
802}]>;
803
804def InlineImm32 : ImmLeaf<i32, [{
805  return isInlineImmediate32(Imm);
806}]>;
807
808def InlineImm64 : ImmLeaf<i64, [{
809  return isInlineImmediate64(Imm);
810}]>;
811
812def InlineImmFP32 : FPImmLeaf<f32, [{
813  return isInlineImmediate(Imm);
814}]>;
815
816def InlineImmFP64 : FPImmLeaf<f64, [{
817  return isInlineImmediate(Imm);
818}]>;
819
820
821class VGPRImm <dag frag> : PatLeaf<frag, [{
822  return isVGPRImm(N);
823}]>;
824
825def NegateImm : SDNodeXForm<imm, [{
826  return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
827}]>;
828
829// TODO: When FP inline imm values work?
830def NegSubInlineConst32 : ImmLeaf<i32, [{
831  return Imm < -16 && Imm >= -64;
832}], NegateImm>;
833
834def NegSubInlineIntConst16 : ImmLeaf<i16, [{
835  return Imm < -16 && Imm >= -64;
836}], NegateImm>;
837
838def ShiftAmt32Imm : ImmLeaf <i32, [{
839  return Imm < 32;
840}]>;
841
842def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{
843  return fp16SrcZerosHighBits(N->getOpcode());
844}]>;
845
846
847//===----------------------------------------------------------------------===//
848// MUBUF/SMEM Patterns
849//===----------------------------------------------------------------------===//
850
851def extract_cpol : SDNodeXForm<timm, [{
852  return CurDAG->getTargetConstant(
853      N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12
854                               ? AMDGPU::CPol::ALL
855                               : AMDGPU::CPol::ALL_pregfx12),
856      SDLoc(N), MVT::i8);
857}]>;
858
859def extract_swz : SDNodeXForm<timm, [{
860  const bool Swizzle =
861      N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12
862                               ? AMDGPU::CPol::SWZ
863                               : AMDGPU::CPol::SWZ_pregfx12);
864  return CurDAG->getTargetConstant(Swizzle, SDLoc(N), MVT::i8);
865}]>;
866
867def extract_cpol_set_glc : SDNodeXForm<timm, [{
868  const uint32_t cpol = N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12
869                               ? AMDGPU::CPol::ALL
870                               : AMDGPU::CPol::ALL_pregfx12);
871  return CurDAG->getTargetConstant(cpol | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8);
872}]>;
873
874//===----------------------------------------------------------------------===//
875// Custom Operands
876//===----------------------------------------------------------------------===//
877
878def SOPPBrTarget : CustomOperand<OtherVT> {
879  let PrintMethod = "printOperand";
880  let EncoderMethod = "getSOPPBrEncoding";
881  let DecoderMethod = "decodeSOPPBrTarget";
882  let OperandType = "OPERAND_PCREL";
883}
884
885def si_ga : Operand<iPTR>;
886
887def InterpSlot : CustomOperand<i32>;
888
889// It appears to be necessary to create a separate operand for this to
890// be able to parse attr<num> with no space.
891def InterpAttr : CustomOperand<i32>;
892
893def InterpAttrChan : ImmOperand<i32>;
894
895def SplitBarrier : ImmOperand<i32> {
896  let OperandNamespace = "AMDGPU";
897  let OperandType = "OPERAND_INLINE_SPLIT_BARRIER_INT32";
898  let DecoderMethod = "decodeSplitBarrier";
899  let PrintMethod = "printOperand";
900}
901
902def VReg32OrOffClass : AsmOperandClass {
903  let Name = "VReg32OrOff";
904  let ParserMethod = "parseVReg32OrOff";
905}
906
907def SendMsg : CustomOperand<i32>;
908
909def Swizzle : CustomOperand<i16, 1>;
910
911def Endpgm : CustomOperand<i16, 1>;
912
913def SWaitCnt : CustomOperand<i32>;
914
915def DepCtr : CustomOperand<i32>;
916
917def SDelayALU : CustomOperand<i32>;
918
919include "SIInstrFormats.td"
920include "VIInstrFormats.td"
921
922def BoolReg : AsmOperandClass {
923  let Name = "BoolReg";
924  let ParserMethod = "parseBoolReg";
925  let RenderMethod = "addRegOperands";
926}
927
928class BoolRC : RegisterOperand<SReg_1> {
929  let ParserMatchClass = BoolReg;
930  let DecoderMethod = "decodeBoolReg";
931}
932
933def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
934  let ParserMatchClass = BoolReg;
935  let DecoderMethod = "decodeBoolReg";
936}
937
938def VOPDstS64orS32 : BoolRC {
939  let PrintMethod = "printVOPDst";
940}
941
942// SCSrc_i1 is the operand for pseudo instructions only.
943// Boolean immediates shall not be exposed to codegen instructions.
944def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
945  let OperandNamespace = "AMDGPU";
946  let OperandType = "OPERAND_REG_IMM_INT32";
947  let ParserMatchClass = BoolReg;
948  let DecoderMethod = "decodeBoolReg";
949}
950
951// ===----------------------------------------------------------------------===//
952// ExpSrc* Special cases for exp src operands which are printed as
953// "off" depending on en operand.
954// ===----------------------------------------------------------------------===//
955
956def ExpSrc0 : RegisterOperand<VGPR_32> {
957  let PrintMethod = "printExpSrc0";
958  let ParserMatchClass = VReg32OrOffClass;
959}
960
961def ExpSrc1 : RegisterOperand<VGPR_32> {
962  let PrintMethod = "printExpSrc1";
963  let ParserMatchClass = VReg32OrOffClass;
964}
965
966def ExpSrc2 : RegisterOperand<VGPR_32> {
967  let PrintMethod = "printExpSrc2";
968  let ParserMatchClass = VReg32OrOffClass;
969}
970
971def ExpSrc3 : RegisterOperand<VGPR_32> {
972  let PrintMethod = "printExpSrc3";
973  let ParserMatchClass = VReg32OrOffClass;
974}
975
976class SDWASrc<ValueType vt> : RegisterOperand<VS_32> {
977  let OperandNamespace = "AMDGPU";
978  string Type = !if(vt.isFP, "FP", "INT");
979  let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size;
980  let DecoderMethod = "decodeSDWASrc"#vt.Size;
981  let EncoderMethod = "getSDWASrcEncoding";
982}
983
984def SDWASrc_i32 : SDWASrc<i32>;
985def SDWASrc_i16 : SDWASrc<i16>;
986def SDWASrc_f32 : SDWASrc<f32>;
987def SDWASrc_f16 : SDWASrc<f16>;
988
989def SDWAVopcDst : BoolRC {
990  let OperandNamespace = "AMDGPU";
991  let OperandType = "OPERAND_SDWA_VOPC_DST";
992  let EncoderMethod = "getSDWAVopcDstEncoding";
993  let DecoderMethod = "decodeSDWAVopcDst";
994  let PrintMethod = "printVOPDst";
995}
996
997class NamedIntOperand<ValueType Type, string Prefix, string Name = NAME,
998                      string ConvertMethod = "nullptr">
999    : CustomOperand<Type, 1, Name> {
1000  let ParserMethod =
1001    "[this](OperandVector &Operands) -> ParseStatus { "#
1002    "return parseIntWithPrefix(\""#Prefix#"\", Operands, "#
1003    "AMDGPUOperand::"#ImmTy#", "#ConvertMethod#"); }";
1004}
1005
1006class NamedBitOperand<string Id, string Name = NAME>
1007    : CustomOperand<i1, 1, Name> {
1008  let PredicateMethod = "isImmTy<AMDGPUOperand::"#ImmTy#">";
1009  let ParserMethod =
1010    "[this](OperandVector &Operands) -> ParseStatus { "#
1011    "return parseNamedBit(\""#Id#"\", Operands, AMDGPUOperand::"#ImmTy#"); }";
1012  let PrintMethod = "[this](const MCInst *MI, unsigned OpNo, "#
1013    "const MCSubtargetInfo &STI, raw_ostream &O) { "#
1014    "printNamedBit(MI, OpNo, O, \""#Id#"\"); }";
1015}
1016
1017class DefaultOperand<CustomOperand Op, int Value>
1018  : OperandWithDefaultOps<Op.Type, (ops (Op.Type Value))>,
1019    CustomOperandProps<1> {
1020  let ParserMatchClass = Op.ParserMatchClass;
1021  let PrintMethod = Op.PrintMethod;
1022}
1023
1024class SDWAOperand<string Id, string Name = NAME>
1025    : CustomOperand<i32, 1, Name> {
1026  let ParserMethod =
1027    "[this](OperandVector &Operands) -> ParseStatus { "#
1028    "return parseSDWASel(Operands, \""#Id#"\", AMDGPUOperand::"#ImmTy#"); }";
1029}
1030
1031class ArrayOperand0<string Id, string Name = NAME>
1032  : OperandWithDefaultOps<i32, (ops (i32 0))>,
1033    CustomOperandProps<1, Name> {
1034  let ParserMethod =
1035    "[this](OperandVector &Operands) -> ParseStatus { "#
1036    "return parseOperandArrayWithPrefix(\""#Id#"\", Operands, "#
1037    "AMDGPUOperand::"#ImmTy#"); }";
1038}
1039
1040let ImmTy = "ImmTyOffset" in
1041def flat_offset : CustomOperand<i32, 1, "FlatOffset">;
1042def offset : NamedIntOperand<i32, "offset", "Offset">;
1043def offset0 : NamedIntOperand<i8, "offset0", "Offset0">;
1044def offset1 : NamedIntOperand<i8, "offset1", "Offset1">;
1045
1046def gds : NamedBitOperand<"gds", "GDS">;
1047
1048def omod : CustomOperand<i32, 1, "OModSI">;
1049def omod0 : DefaultOperand<omod, 0>;
1050
1051// We need to make the cases with a default of 0 distinct from no
1052// default to help deal with some cases where the operand appears
1053// before a mandatory operand.
1054def clampmod : NamedBitOperand<"clamp", "ClampSI">;
1055def clampmod0 : DefaultOperand<clampmod, 0>;
1056def highmod : NamedBitOperand<"high", "High">;
1057
1058def CPol : CustomOperand<i32, 1>;
1059def CPol_0 : DefaultOperand<CPol, 0>;
1060def CPol_GLC1 : DefaultOperand<CPol, 1>;
1061def CPol_GLC : ValuePredicatedOperand<CPol, "Op.getImm() & CPol::GLC">;
1062def CPol_NonGLC : ValuePredicatedOperand<CPol, "!(Op.getImm() & CPol::GLC)", 1>;
1063def CPol_GLC_WithDefault : DefaultOperand<CPol_GLC, !shl(1, CPolBit.GLC)>;
1064def CPol_NonGLC_WithDefault : DefaultOperand<CPol_NonGLC, 0>;
1065
1066def TFE : NamedBitOperand<"tfe">;
1067def UNorm : NamedBitOperand<"unorm">;
1068def DA : NamedBitOperand<"da">;
1069def R128A16 : CustomOperand<i1, 1>;
1070def A16 : NamedBitOperand<"a16">;
1071def D16 : NamedBitOperand<"d16">;
1072def LWE : NamedBitOperand<"lwe">;
1073def exp_compr : NamedBitOperand<"compr", "ExpCompr">;
1074def exp_vm : NamedBitOperand<"vm", "ExpVM">;
1075
1076def FORMAT : CustomOperand<i8>;
1077
1078def DMask : NamedIntOperand<i16, "dmask">;
1079def Dim : CustomOperand<i8>;
1080
1081def dst_sel : SDWAOperand<"dst_sel", "SDWADstSel">;
1082def src0_sel : SDWAOperand<"src0_sel", "SDWASrc0Sel">;
1083def src1_sel : SDWAOperand<"src1_sel", "SDWASrc1Sel">;
1084def dst_unused : CustomOperand<i32, 1, "SDWADstUnused">;
1085
1086def op_sel0 : ArrayOperand0<"op_sel", "OpSel">;
1087def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">;
1088def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">;
1089def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">;
1090
1091def IndexKey16bit : CustomOperand<i32, 1>;
1092def IndexKey8bit : CustomOperand<i32, 1>;
1093
1094def dpp8 : CustomOperand<i32, 0, "DPP8">;
1095def dpp_ctrl : CustomOperand<i32, 0, "DPPCtrl">;
1096
1097let DefaultValue = "0xf" in {
1098def row_mask : NamedIntOperand<i32, "row_mask", "DppRowMask">;
1099def bank_mask : NamedIntOperand<i32, "bank_mask", "DppBankMask">;
1100}
1101def bound_ctrl : NamedIntOperand<i1, "bound_ctrl", "DppBoundCtrl",
1102    "[this] (int64_t &BC) -> bool { return convertDppBoundCtrl(BC); }">;
1103def FI : NamedIntOperand<i32, "fi", "DppFI">;
1104
1105def blgp : CustomOperand<i32, 1, "BLGP">;
1106def cbsz : NamedIntOperand<i32, "cbsz", "CBSZ">;
1107def abid : NamedIntOperand<i32, "abid", "ABID">;
1108
1109def hwreg : CustomOperand<i32, 0, "Hwreg">;
1110
1111def exp_tgt : CustomOperand<i32, 0, "ExpTgt">;
1112
1113def wait_vdst : NamedIntOperand<i8, "wait_vdst", "WaitVDST">;
1114def wait_exp : NamedIntOperand<i8, "wait_exp", "WaitEXP">;
1115def wait_va_vdst : NamedIntOperand<i8, "wait_va_vdst", "WaitVAVDst">;
1116def wait_va_vsrc : NamedIntOperand<i8, "wait_vm_vsrc", "WaitVMVSrc">;
1117
1118class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
1119  let OperandNamespace = "AMDGPU";
1120  let OperandType = "OPERAND_KIMM"#vt.Size;
1121  let PrintMethod = "printU"#vt.Size#"ImmOperand";
1122  let DecoderMethod = "decodeOperand_KImmFP";
1123}
1124
1125// 32-bit VALU immediate operand that uses the constant bus.
1126def KImmFP32 : KImmFPOperand<i32>;
1127
1128// 32-bit VALU immediate operand with a 16-bit value that uses the
1129// constant bus.
1130def KImmFP16 : KImmFPOperand<i16>;
1131
1132class FPInputModsMatchClass <int opSize> : AsmOperandClass {
1133  let Name = "RegOrImmWithFP"#opSize#"InputMods";
1134  let ParserMethod = "parseRegOrImmWithFPInputMods";
1135  let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
1136}
1137
1138class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> {
1139  let Name = "RegOrInlineImmWithFP"#opSize#"InputMods";
1140  let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods";
1141}
1142
1143def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
1144def FPT16InputModsMatchClass : FPInputModsMatchClass<16> {
1145  let Name = "RegOrImmWithFPT16InputMods";
1146  let PredicateMethod = "isRegOrImmWithFPT16InputMods";
1147}
1148def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
1149def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
1150
1151def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>;
1152def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>;
1153
1154class InputMods <AsmOperandClass matchClass> : Operand <i32> {
1155  let OperandNamespace = "AMDGPU";
1156  let OperandType = "OPERAND_INPUT_MODS";
1157  let ParserMatchClass = matchClass;
1158}
1159
1160class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
1161  let PrintMethod = "printOperandAndFPInputMods";
1162}
1163
1164def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
1165def FPT16InputMods : FPInputMods<FPT16InputModsMatchClass>;
1166def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
1167def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
1168
1169def FP16VCSrcInputMods : FPInputMods<FP16VCSrcInputModsMatchClass>;
1170def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>;
1171
1172class IntInputModsMatchClass <int opSize> : AsmOperandClass {
1173  let Name = "RegOrImmWithInt"#opSize#"InputMods";
1174  let ParserMethod = "parseRegOrImmWithIntInputMods";
1175  let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods";
1176}
1177class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize> {
1178  let Name = "RegOrInlineImmWithInt"#opSize#"InputMods";
1179  let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods";
1180}
1181def IntT16InputModsMatchClass : IntInputModsMatchClass<16> {
1182  let Name = "RegOrImmWithIntT16InputMods";
1183  let PredicateMethod = "isRegOrImmWithIntT16InputMods";
1184}
1185def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
1186def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
1187def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>;
1188
1189class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
1190  let PrintMethod = "printOperandAndIntInputMods";
1191}
1192def IntT16InputMods : IntInputMods<IntT16InputModsMatchClass>;
1193def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
1194def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
1195def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>;
1196
1197class OpSelModsMatchClass : AsmOperandClass {
1198  let Name = "OpSelMods";
1199  let ParserMethod = "parseRegOrImm";
1200  let PredicateMethod = "isRegOrImm";
1201}
1202
1203def IntOpSelModsMatchClass : OpSelModsMatchClass;
1204def IntOpSelMods : InputMods<IntOpSelModsMatchClass>;
1205
1206class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1207  let Name = "SDWAWithFP"#opSize#"InputMods";
1208  let ParserMethod = "parseRegOrImmWithFPInputMods";
1209  let PredicateMethod = "isSDWAFP"#opSize#"Operand";
1210}
1211
1212def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>;
1213def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>;
1214
1215class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> :
1216  InputMods <matchClass> {
1217  let PrintMethod = "printOperandAndFPInputMods";
1218}
1219
1220def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>;
1221def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>;
1222
1223def FPVRegInputModsMatchClass : AsmOperandClass {
1224  let Name = "VRegWithFPInputMods";
1225  let ParserMethod = "parseRegWithFPInputMods";
1226  let PredicateMethod = "isVRegWithInputMods";
1227}
1228
1229class FPT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
1230  let Name = !if(IsFake16, "Fake16VRegWithFPInputMods",
1231                 "T16VRegWithFPInputMods");
1232  let ParserMethod = "parseRegWithFPInputMods";
1233  let PredicateMethod = "isT16VRegWithInputMods<" #
1234                        !if(IsFake16, "true", "false") # ">";
1235}
1236
1237def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
1238  let PrintMethod = "printOperandAndFPInputMods";
1239}
1240
1241class FPT16VRegInputMods<bit IsFake16>
1242    : InputMods <FPT16VRegInputModsMatchClass<IsFake16>> {
1243  let PrintMethod = "printOperandAndFPInputMods";
1244}
1245
1246class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1247  let Name = "SDWAWithInt"#opSize#"InputMods";
1248  let ParserMethod = "parseRegOrImmWithIntInputMods";
1249  let PredicateMethod = "isSDWAInt"#opSize#"Operand";
1250}
1251
1252def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>;
1253def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>;
1254def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> {
1255  let Name = "SDWAWithBin32InputMods";
1256  let ParserMethod = "parseRegOrImm";
1257}
1258
1259class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> :
1260  InputMods <matchClass> {
1261  let PrintMethod = "printOperandAndIntInputMods";
1262}
1263
1264def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>;
1265def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>;
1266def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>;
1267
1268def IntVRegInputModsMatchClass : AsmOperandClass {
1269  let Name = "VRegWithIntInputMods";
1270  let ParserMethod = "parseRegWithIntInputMods";
1271  let PredicateMethod = "isVRegWithInputMods";
1272}
1273
1274class IntT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
1275  let Name = !if(IsFake16, "Fake16VRegWithIntInputMods",
1276                 "T16VRegWithIntInputMods");
1277  let ParserMethod = "parseRegWithIntInputMods";
1278  let PredicateMethod = "isT16VRegWithInputMods<" #
1279                        !if(IsFake16, "true", "false") # ">";
1280}
1281
1282class IntT16VRegInputMods<bit IsFake16>
1283    : InputMods <IntT16VRegInputModsMatchClass<IsFake16>> {
1284  let PrintMethod = "printOperandAndIntInputMods";
1285}
1286
1287def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
1288  let PrintMethod = "printOperandAndIntInputMods";
1289}
1290
1291class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
1292  let Name = "PackedFP"#opSize#"InputMods";
1293  let ParserMethod = "parseRegOrImm";
1294  let PredicateMethod = "isRegOrImm";
1295//  let PredicateMethod = "isPackedFP"#opSize#"InputMods";
1296}
1297
1298class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
1299  let Name = "PackedInt"#opSize#"InputMods";
1300  let ParserMethod = "parseRegOrImm";
1301  let PredicateMethod = "isRegOrImm";
1302//  let PredicateMethod = "isPackedInt"#opSize#"InputMods";
1303}
1304
1305def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
1306def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
1307
1308class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
1309//  let PrintMethod = "printPackedFPInputMods";
1310}
1311
1312class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
1313  //let PrintMethod = "printPackedIntInputMods";
1314}
1315
1316def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
1317def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
1318
1319//===----------------------------------------------------------------------===//
1320// Complex patterns
1321//===----------------------------------------------------------------------===//
1322
1323def DS1Addr1Offset : ComplexPattern<iPTR, 2, "SelectDS1Addr1Offset">;
1324def DS64Bit4ByteAligned : ComplexPattern<iPTR, 3, "SelectDS64Bit4ByteAligned">;
1325def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">;
1326
1327def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">;
1328
1329def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
1330
1331// Modifiers for floating point instructions.
1332def VOP3Mods  : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
1333
1334// VOP3 modifiers used for instructions that do not read canonicalized
1335// floating point values (i.e. integer operations with FP source
1336// modifiers)
1337def VOP3ModsNonCanonicalizing : ComplexPattern<untyped, 2,
1338  "SelectVOP3ModsNonCanonicalizing">;
1339
1340def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
1341
1342def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
1343
1344def VOP3PMods  : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
1345
1346def VOP3PModsDOT  : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;
1347def VOP3PModsNeg  : ComplexPattern<untyped, 1, "SelectVOP3PModsNeg">;
1348def WMMAOpSelVOP3PMods  : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">;
1349
1350def WMMAModsF32NegAbs  : ComplexPattern<untyped, 2, "SelectWMMAModsF32NegAbs">;
1351def WMMAModsF16Neg  : ComplexPattern<untyped, 2, "SelectWMMAModsF16Neg">;
1352def WMMAModsF16NegAbs  : ComplexPattern<untyped, 2, "SelectWMMAModsF16NegAbs">;
1353def WMMAVISrc  : ComplexPattern<untyped, 1, "SelectWMMAVISrc">;
1354def SWMMACIndex8  : ComplexPattern<untyped, 2, "SelectSWMMACIndex8">;
1355def SWMMACIndex16  : ComplexPattern<untyped, 2, "SelectSWMMACIndex16">;
1356
1357def VOP3OpSel  : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
1358
1359def VOP3OpSelMods  : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
1360
1361def VOP3PMadMixModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixModsExt">;
1362def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
1363
1364def VINTERPMods  : ComplexPattern<untyped, 2, "SelectVINTERPMods">;
1365def VINTERPModsHi  : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">;
1366
1367//===----------------------------------------------------------------------===//
1368// SI assembler operands
1369//===----------------------------------------------------------------------===//
1370
1371def SIOperand {
1372  int ZERO = 0x80;
1373  int VCC = 0x6A;
1374  int FLAT_SCR = 0x68;
1375}
1376
1377// This should be kept in sync with SISrcMods enum
1378def SRCMODS {
1379  int NONE = 0;
1380  int NEG = 1;
1381  int ABS = 2;
1382  int NEG_ABS = 3;
1383
1384  int NEG_HI = ABS;
1385  int OP_SEL_0 = 4;
1386  int OP_SEL_1 = 8;
1387  int DST_OP_SEL = 8;
1388}
1389
1390def DSTCLAMP {
1391  int NONE = 0;
1392  int ENABLE = 1;
1393}
1394
1395def DSTOMOD {
1396  int NONE = 0;
1397}
1398
1399def HWREG {
1400  int MODE = 1;
1401  int STATUS = 2;
1402  int TRAPSTS = 3;
1403  int HW_ID = 4;
1404  int GPR_ALLOC = 5;
1405  int LDS_ALLOC = 6;
1406  int IB_STS = 7;
1407  int MEM_BASES = 15;
1408  int TBA_LO = 16;
1409  int TBA_HI = 17;
1410  int TMA_LO = 18;
1411  int TMA_HI = 19;
1412  int FLAT_SCR_LO = 20;
1413  int FLAT_SCR_HI = 21;
1414  int XNACK_MASK = 22;
1415  int POPS_PACKER = 25;
1416  int SHADER_CYCLES = 29;
1417}
1418
1419class getHwRegImm<int Reg, int Offset = 0, int Size = 32> {
1420  int ret = !and(!or(Reg,
1421                     !shl(Offset, 6),
1422                     !shl(!add(Size, -1), 11)), 65535);
1423}
1424
1425//===----------------------------------------------------------------------===//
1426//
1427// SI Instruction multiclass helpers.
1428//
1429// Instructions with _32 take 32-bit operands.
1430// Instructions with _64 take 64-bit operands.
1431//
1432// VOP_* instructions can use either a 32-bit or 64-bit encoding.  The 32-bit
1433// encoding is the standard encoding, but instruction that make use of
1434// any of the instruction modifiers must use the 64-bit encoding.
1435//
1436// Instructions with _e32 use the 32-bit encoding.
1437// Instructions with _e64 use the 64-bit encoding.
1438//
1439//===----------------------------------------------------------------------===//
1440
1441class SIMCInstr <string pseudo, int subtarget> {
1442  string PseudoInstr = pseudo;
1443  int Subtarget = subtarget;
1444}
1445
1446//===----------------------------------------------------------------------===//
1447// Vector ALU classes
1448//===----------------------------------------------------------------------===//
1449
1450class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
1451  int ret =
1452    !if (!eq(Src0.Value, untyped.Value),      0,
1453      !if (!eq(Src1.Value, untyped.Value),    1,   // VOP1
1454         !if (!eq(Src2.Value, untyped.Value), 2,   // VOP2
1455                                              3))); // VOP3
1456}
1457
1458// Returns the register class to use for the destination of VOP[123C]
1459// instructions for the given VT.
1460class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> {
1461  defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16,
1462                                   VOPDstOperand_t16Lo128),
1463                    VOPDstOperand<VGPR_32>);
1464  RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
1465                          !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
1466                            !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
1467                              !if(!eq(VT.Size, 16), op16,
1468                              VOPDstS64orS32)))); // else VT == i1
1469}
1470
1471class getVALUDstForVT_fake16<ValueType VT> {
1472  RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
1473                          !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
1474                            !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
1475                              !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32_Lo128>,
1476                              VOPDstS64orS32)))); // else VT == i1
1477}
1478
1479// Returns the register class to use for the destination of VOP[12C]
1480// instructions with SDWA extension
1481class getSDWADstForVT<ValueType VT> {
1482  RegisterOperand ret = !if(!eq(VT.Size, 1),
1483                            SDWAVopcDst, // VOPC
1484                            VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst
1485}
1486
1487// Returns the register class to use for source 0 of VOP[12C]
1488// instructions for the given VT.
1489class getVOPSrc0ForVT<ValueType VT, bit IsTrue16, bit IsFake16 = 1> {
1490  RegisterOperand ret =
1491    !if(VT.isFP,
1492      !if(!eq(VT.Size, 64),
1493         VSrc_f64,
1494         !if(!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1495            !if(IsTrue16,
1496              !if(IsFake16, VSrcFake16_f16_Lo128, VSrcT_f16_Lo128),
1497              VSrc_f16
1498            ),
1499            !if(!or(!eq(VT.Value, v2f16.Value), !eq(VT.Value, v2bf16.Value)),
1500               VSrc_v2f16,
1501               !if(!or(!eq(VT.Value, v4f16.Value), !eq(VT.Value, v4bf16.Value)),
1502                 AVSrc_64,
1503                 VSrc_f32
1504               )
1505            )
1506         )
1507       ),
1508       !if(!eq(VT.Size, 64),
1509          VSrc_b64,
1510          !if(!eq(VT.Value, i16.Value),
1511            !if(IsTrue16,
1512              !if(IsFake16, VSrcFake16_b16_Lo128, VSrcT_b16_Lo128),
1513              VSrc_b16
1514            ),
1515             !if(!eq(VT.Value, v2i16.Value),
1516                VSrc_v2b16,
1517                VSrc_b32
1518             )
1519          )
1520       )
1521    );
1522}
1523
1524class getSOPSrcForVT<ValueType VT> {
1525  RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32);
1526}
1527
1528// Returns the vreg register class to use for source operand given VT
1529class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> {
1530  RegisterOperand ret =
1531      !if (!eq(VT.Size, 128), RegisterOperand<VReg_128>,
1532           !if (!eq(VT.Size, 96), RegisterOperand<VReg_96>,
1533                !if (!eq(VT.Size, 64), RegisterOperand<VReg_64>,
1534                     !if (!eq(VT.Size, 48), RegisterOperand<VReg_64>,
1535                          !if (!eq(VT.Size, 16),
1536                               !if (IsTrue16,
1537                                    !if (IsFake16, VGPRSrc_32_Lo128, VGPRSrc_16_Lo128),
1538                                    RegisterOperand<VGPR_32>),
1539                               RegisterOperand<VGPR_32>)))));
1540}
1541
1542class getSDWASrcForVT <ValueType VT> {
1543  RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
1544  RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
1545  RegisterOperand ret = !if(VT.isFP, retFlt, retInt);
1546}
1547
1548// Returns the register class to use for sources of VOP3 instructions for the
1549// given VT.
1550class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
1551  RegisterOperand ret =
1552  !if(!eq(VT.Size, 128),
1553     VRegSrc_128,
1554     !if(!eq(VT.Size, 64),
1555        !if(VT.isFP,
1556           !if(!eq(VT.Value, v2f32.Value),
1557               VSrc_v2f32,
1558               VSrc_f64),
1559           !if(!eq(VT.Value, v2i32.Value),
1560               VSrc_v2b32,
1561           VSrc_b64)),
1562        !if(!eq(VT.Value, i1.Value),
1563           SSrc_i1,
1564           !if(VT.isFP,
1565              !if(!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1566                 !if(IsTrue16, VSrcT_f16, VSrc_f16),
1567                 !if(!or(!eq(VT.Value, v2f16.Value), !eq(VT.Value, v2bf16.Value)),
1568                    VSrc_v2f16,
1569                    !if(!or(!eq(VT.Value, v4f16.Value), !eq(VT.Value, v4bf16.Value)),
1570                      AVSrc_64,
1571                      VSrc_f32
1572                    )
1573                 )
1574              ),
1575              !if(!eq(VT.Value, i16.Value),
1576                 !if(IsTrue16, VSrcT_b16, VSrc_b16),
1577                 !if(!eq(VT.Value, v2i16.Value),
1578                    VSrc_v2b16,
1579                    VSrc_b32
1580                 )
1581              )
1582           )
1583        )
1584     )
1585  );
1586}
1587
1588// Src2 of VOP3 DPP instructions cannot be a literal
1589class getVOP3DPPSrcForVT<ValueType VT> {
1590  RegisterOperand ret =
1591      !if (!eq(VT.Value, i1.Value), SSrc_i1,
1592           !if (VT.isFP,
1593                !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), VCSrc_f16,
1594                     !if (!or(!eq(VT.Value, v2f16.Value), !eq(VT.Value, v2bf16.Value)), VCSrc_v2f16, VCSrc_f32)),
1595                !if (!eq(VT.Value, i16.Value), VCSrc_b16,
1596                     !if (!eq(VT.Value, v2i16.Value), VCSrc_v2b16,
1597                          VCSrc_b32))));
1598}
1599
1600// Float or packed int
1601class isModifierType<ValueType SrcVT> {
1602  bit ret = !or(!eq(SrcVT.Value, f16.Value),
1603                !eq(SrcVT.Value, bf16.Value),
1604                !eq(SrcVT.Value, f32.Value),
1605                !eq(SrcVT.Value, f64.Value),
1606                !eq(SrcVT.Value, v2f16.Value),
1607                !eq(SrcVT.Value, v2i16.Value),
1608                !eq(SrcVT.Value, v2bf16.Value),
1609                !eq(SrcVT.Value, v2f32.Value),
1610                !eq(SrcVT.Value, v2i32.Value),
1611                !eq(SrcVT.Value, v4f16.Value),
1612                !eq(SrcVT.Value, v4i16.Value),
1613                !eq(SrcVT.Value, v4bf16.Value),
1614                !eq(SrcVT.Value, v4f32.Value),
1615                !eq(SrcVT.Value, v4i32.Value),
1616                !eq(SrcVT.Value, v8f16.Value),
1617                !eq(SrcVT.Value, v8i16.Value),
1618                !eq(SrcVT.Value, v8bf16.Value),
1619                !eq(SrcVT.Value, v8f32.Value),
1620                !eq(SrcVT.Value, v8i32.Value),
1621                !eq(SrcVT.Value, v16f16.Value),
1622                !eq(SrcVT.Value, v16i16.Value),
1623                !eq(SrcVT.Value, v16bf16.Value));
1624}
1625
1626// Return type of input modifiers operand for specified input operand
1627class getSrcMod <ValueType VT, bit IsTrue16 = 0> {
1628  Operand ret =  !if(!eq(VT.Size, 64),
1629                     !if(VT.isFP, FP64InputMods, Int64InputMods),
1630                     !if(!eq(VT.Size, 16),
1631                         !if(VT.isFP, !if(IsTrue16, FPT16InputMods, FP16InputMods),
1632                                      !if(IsTrue16, IntT16InputMods, IntOpSelMods)),
1633                         !if(VT.isFP, FP32InputMods, Int32InputMods)));
1634}
1635
1636class getOpSelMod <ValueType VT> {
1637  Operand ret = !if(!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1638                    FP16InputMods, IntOpSelMods);
1639}
1640
1641// Return type of input modifiers operand specified input operand for DPP
1642class getSrcModDPP <ValueType VT> {
1643  Operand ret = !if(VT.isFP, FPVRegInputMods, IntVRegInputMods);
1644}
1645
1646class getSrcModDPP_t16 <ValueType VT, bit IsFake16 = 1> {
1647  Operand ret =
1648      !if (VT.isFP,
1649           !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1650                FPT16VRegInputMods<IsFake16>, FPVRegInputMods),
1651           !if (!eq(VT.Value, i16.Value),
1652                IntT16VRegInputMods<IsFake16>, IntVRegInputMods));
1653}
1654
1655// Return type of input modifiers operand for specified input operand for DPP
1656class getSrcModVOP3DPP <ValueType VT> {
1657  Operand ret =
1658      !if (VT.isFP,
1659           !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1660                FP16VCSrcInputMods, FP32VCSrcInputMods),
1661           Int32VCSrcInputMods);
1662}
1663
1664// Return type of input modifiers operand specified input operand for SDWA
1665class getSrcModSDWA <ValueType VT> {
1666  Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods,
1667                !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods,
1668                !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods,
1669                !if(!eq(VT.Value, bf16.Value), FP16SDWAInputMods,
1670                Int32SDWAInputMods))));
1671}
1672
1673// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
1674class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs> {
1675  dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0),               // VOP1
1676            !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
1677                                    (ins)));
1678}
1679
1680// Returns the input arguments for VOP3 instructions for the given SrcVT.
1681class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
1682                RegisterOperand Src2RC, int NumSrcArgs,
1683                bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
1684                Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1685
1686  dag ret =
1687    !if (!eq(NumSrcArgs, 0),
1688      // VOP1 without input operands (V_NOP, V_CLREXCP)
1689      (ins),
1690      /* else */
1691    !if (!eq(NumSrcArgs, 1),
1692      !if (HasModifiers,
1693        // VOP1 with modifiers
1694        !if(HasOMod,
1695          (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1696               clampmod0:$clamp, omod0:$omod),
1697          !if (HasClamp,
1698            (ins Src0Mod:$src0_modifiers, Src0RC:$src0, clampmod0:$clamp),
1699            (ins Src0Mod:$src0_modifiers, Src0RC:$src0)))
1700      /* else */,
1701        // VOP1 without modifiers
1702        !if (HasClamp,
1703          (ins Src0RC:$src0, clampmod0:$clamp),
1704          (ins Src0RC:$src0))
1705      /* endif */ ),
1706    !if (!eq(NumSrcArgs, 2),
1707      !if (HasModifiers,
1708        // VOP 2 with modifiers
1709        !if(HasOMod,
1710          (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1711               Src1Mod:$src1_modifiers, Src1RC:$src1,
1712               clampmod0:$clamp, omod0:$omod),
1713           (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1714               Src1Mod:$src1_modifiers, Src1RC:$src1,
1715               clampmod0:$clamp))
1716      /* else */,
1717        // VOP2 without modifiers
1718        !if (HasClamp,
1719          (ins Src0RC:$src0, Src1RC:$src1, clampmod0:$clamp),
1720          (ins Src0RC:$src0, Src1RC:$src1))
1721
1722      /* endif */ )
1723    /* NumSrcArgs == 3 */,
1724      !if (HasModifiers,
1725        !if (HasSrc2Mods,
1726          // VOP3 with modifiers
1727          !if (HasOMod,
1728            (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1729                 Src1Mod:$src1_modifiers, Src1RC:$src1,
1730                 Src2Mod:$src2_modifiers, Src2RC:$src2,
1731                 clampmod0:$clamp, omod0:$omod),
1732            !if (HasClamp,
1733              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1734                   Src1Mod:$src1_modifiers, Src1RC:$src1,
1735                   Src2Mod:$src2_modifiers, Src2RC:$src2,
1736                   clampmod0:$clamp),
1737              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1738                   Src1Mod:$src1_modifiers, Src1RC:$src1,
1739                   Src2Mod:$src2_modifiers, Src2RC:$src2))),
1740          // VOP3 with modifiers except src2
1741          !if (HasOMod,
1742            (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1743                 Src1Mod:$src1_modifiers, Src1RC:$src1,
1744                 Src2RC:$src2, clampmod0:$clamp, omod0:$omod),
1745            !if (HasClamp,
1746              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1747                   Src1Mod:$src1_modifiers, Src1RC:$src1,
1748                   Src2RC:$src2, clampmod0:$clamp),
1749              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1750                   Src1Mod:$src1_modifiers, Src1RC:$src1,
1751                   Src2RC:$src2))))
1752      /* else */,
1753        // VOP3 without modifiers
1754        !if (HasClamp,
1755          (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod0:$clamp),
1756          (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2))
1757      /* endif */ ))));
1758}
1759
1760class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
1761                RegisterOperand Src2RC, int NumSrcArgs,
1762                bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
1763                Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> {
1764  // getInst64 handles clamp and omod. implicit mutex between vop3p and omod
1765  dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs,
1766                HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
1767                Src0Mod, Src1Mod, Src2Mod>.ret;
1768  dag opsel = (ins op_sel0:$op_sel);
1769  dag ret = !con(base, !if(HasOpSel, opsel, (ins)));
1770}
1771
1772class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
1773                   RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel,
1774                   Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1775  dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
1776                    HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/,
1777                    0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret;
1778
1779  dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
1780  dag vop3p_neg = (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi);
1781
1782  dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg);
1783  dag ret = !con(base, vop3pFields);
1784}
1785
1786class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
1787                       RegisterOperand Src2RC, int NumSrcArgs,
1788                       bit HasClamp, bit HasOMod,
1789                       Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1790  dag ret = getInsVOP3Base<Src0RC, Src1RC,
1791                    Src2RC, NumSrcArgs,
1792                    HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod,
1793                    Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1>.ret;
1794}
1795
1796class getInsDPPBase <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
1797                     RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
1798                     Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> {
1799  dag ret = !if(!eq(NumSrcArgs, 0),
1800                // VOP1 without input operands (V_NOP)
1801                (ins ),
1802                !con(
1803                  !if(HasOld ,(ins OldRC:$old), (ins)),
1804                  !if (!eq(NumSrcArgs, 1),
1805                    !if (HasModifiers,
1806                      // VOP1_DPP with modifiers
1807                      (ins Src0Mod:$src0_modifiers, Src0RC:$src0)
1808                    /* else */,
1809                      // VOP1_DPP without modifiers
1810                      (ins Src0RC:$src0)
1811                    /* endif */),
1812                  !if (!eq(NumSrcArgs, 2),
1813                    !if (HasModifiers,
1814                      // VOP2_DPP with modifiers
1815                      (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1816                       Src1Mod:$src1_modifiers, Src1RC:$src1)
1817                    /* else */,
1818                      // VOP2_DPP without modifiers
1819                      (ins Src0RC:$src0, Src1RC:$src1)
1820                    )
1821                    /* NumSrcArgs == 3, VOP3 */,
1822                    !if (HasModifiers,
1823                      // VOP3_DPP with modifiers
1824                      (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1825                       Src1Mod:$src1_modifiers, Src1RC:$src1,
1826                       Src2Mod:$src2_modifiers, Src2RC:$src2)
1827                    /* else */,
1828                      // VOP3_DPP without modifiers
1829                      (ins Src0RC:$src0, Src1RC:$src1,
1830                       Src2RC:$src2)
1831                      )
1832                    )
1833                  )
1834                )
1835            );
1836}
1837
1838class getInsDPP <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
1839                 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
1840                 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
1841  dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
1842                           HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
1843                 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1844                     bank_mask:$bank_mask, bound_ctrl:$bound_ctrl));
1845}
1846
1847class getInsDPP16 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
1848                   RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
1849                   Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
1850  dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
1851                           HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
1852                 (ins FI:$fi));
1853}
1854
1855class getInsDPP8 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
1856                  RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
1857                  Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
1858  dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
1859                           HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
1860                 (ins dpp8:$dpp8, FI:$fi));
1861}
1862
1863class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> {
1864  dag old = ( ins OldRC:$old );
1865  dag base = VOP3Base;
1866  dag ret =  !con(
1867                !if(!and(HasOld,!ne(NumSrcArgs, 0)), old, (ins)),
1868                base
1869              );
1870}
1871
1872class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
1873  dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
1874                 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
1875                     bank_mask:$bank_mask, bound_ctrl:$bound_ctrl));
1876}
1877
1878class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
1879  dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
1880                 (ins FI:$fi));
1881}
1882
1883class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
1884  dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
1885                 (ins dpp8:$dpp8, FI:$fi));
1886}
1887
1888// Ins for SDWA
1889class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
1890                  bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod,
1891                  ValueType DstVT> {
1892
1893  dag ret = !if(!eq(NumSrcArgs, 0),
1894               // VOP1 without input operands (V_NOP)
1895               (ins),
1896            !if(!eq(NumSrcArgs, 1),
1897               // VOP1
1898               !if(!not(HasSDWAOMod),
1899                  // VOP1_SDWA without omod
1900                  (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1901                       clampmod:$clamp,
1902                       dst_sel:$dst_sel, dst_unused:$dst_unused,
1903                       src0_sel:$src0_sel),
1904                  // VOP1_SDWA with omod
1905                  (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1906                       clampmod:$clamp, omod:$omod,
1907                       dst_sel:$dst_sel, dst_unused:$dst_unused,
1908                       src0_sel:$src0_sel)),
1909            !if(!eq(NumSrcArgs, 2),
1910               !if(!eq(DstVT.Size, 1),
1911                  // VOPC_SDWA
1912                  (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1913                       Src1Mod:$src1_modifiers, Src1RC:$src1,
1914                       clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
1915                  // VOP2_SDWA
1916                  !if(!not(HasSDWAOMod),
1917                     // VOP2_SDWA without omod
1918                     (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1919                          Src1Mod:$src1_modifiers, Src1RC:$src1,
1920                          clampmod:$clamp,
1921                          dst_sel:$dst_sel, dst_unused:$dst_unused,
1922                          src0_sel:$src0_sel, src1_sel:$src1_sel),
1923                     // VOP2_SDWA with omod
1924                     (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1925                          Src1Mod:$src1_modifiers, Src1RC:$src1,
1926                          clampmod:$clamp, omod:$omod,
1927                          dst_sel:$dst_sel, dst_unused:$dst_unused,
1928                          src0_sel:$src0_sel, src1_sel:$src1_sel))),
1929            (ins)/* endif */)));
1930}
1931
1932// Outs for DPP
1933class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
1934  dag ret = !if(HasDst,
1935                !if(!eq(DstVT.Size, 1),
1936                    (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions
1937                    (outs DstRCDPP:$vdst)),
1938                (outs)); // V_NOP
1939}
1940
1941// Outs for SDWA
1942class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> {
1943  dag ret = !if(HasDst,
1944                !if(!eq(DstVT.Size, 1),
1945                    (outs DstRCSDWA:$sdst),
1946                    (outs DstRCSDWA:$vdst)),
1947                (outs)); // V_NOP
1948}
1949
1950// Returns the assembly string for the inputs and outputs of a VOP[12C]
1951// instruction.
1952class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
1953  string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
1954  string src0 = ", $src0";
1955  string src1 = ", $src1";
1956  string src2 = ", $src2";
1957  string ret = !if(HasDst, dst, "") #
1958               !if(!eq(NumSrcArgs, 1), src0, "") #
1959               !if(!eq(NumSrcArgs, 2), src0#src1, "") #
1960               !if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
1961}
1962
1963class getAsmVOPDPart <int NumSrcArgs, string XorY> {
1964  string dst = "$vdst" # XorY;
1965  string src0 = ", $src0" # XorY;
1966  string src1 = ", $vsrc1" # XorY;
1967  string ret = dst #
1968               !if(!ge(NumSrcArgs, 1), src0, "") #
1969               !if(!ge(NumSrcArgs, 2), src1, "");
1970}
1971
1972// Returns the assembly string for the inputs and outputs of a VOP3P
1973// instruction.
1974class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
1975                   bit HasClamp, bit HasOpSel> {
1976  string dst = "$vdst";
1977  string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
1978  string src1 = !if(!eq(NumSrcArgs, 1), "",
1979                   !if(!eq(NumSrcArgs, 2), " $src1",
1980                                           " $src1,"));
1981  string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
1982
1983  string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
1984  string clamp = !if(HasClamp, "$clamp", "");
1985  string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", "");
1986
1987  // Each modifier is printed as an array of bits for each operand, so
1988  // all operands are printed as part of src0_modifiers.
1989  string ret = dst#", "#src0#src1#src2#opsel#mods#clamp;
1990}
1991
1992class getAsmVOP3OpSel <int NumSrcArgs,
1993                       bit HasClamp,
1994                       bit HasOMod,
1995                       bit Src0HasMods,
1996                       bit Src1HasMods,
1997                       bit Src2HasMods> {
1998  string dst = "$vdst";
1999
2000  string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
2001  string isrc1 = !if(!eq(NumSrcArgs, 1), "",
2002                     !if(!eq(NumSrcArgs, 2), " $src1",
2003                                             " $src1,"));
2004  string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
2005
2006  string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2007  string fsrc1 = !if(!eq(NumSrcArgs, 1), "",
2008                     !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2009                                             " $src1_modifiers,"));
2010  string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
2011
2012  string src0 = !if(Src0HasMods, fsrc0, isrc0);
2013  string src1 = !if(Src1HasMods, fsrc1, isrc1);
2014  string src2 = !if(Src2HasMods, fsrc2, isrc2);
2015
2016  string clamp = !if(HasClamp, "$clamp", "");
2017  string omod = !if(HasOMod, "$omod", "");
2018  string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod;
2019}
2020
2021class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
2022  string dst = !if(HasDst,
2023                   !if(!eq(DstVT.Size, 1),
2024                       "$sdst",
2025                       "$vdst"),
2026                    ""); // use $sdst for VOPC
2027  string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2028  string src1 = !if(!eq(NumSrcArgs, 1), "",
2029                   !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2030                                           " $src1_modifiers,"));
2031  string args = !if(!not(HasModifiers),
2032                     getAsm32<0, NumSrcArgs, DstVT>.ret,
2033                     ", "#src0#src1);
2034  string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
2035}
2036
2037class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
2038  string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi";
2039}
2040
2041class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32>
2042  : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>{
2043  let ret = dst#args#" $dpp8$fi";
2044}
2045
2046class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
2047                       bit HasOpSel, bit HasOMod, bit IsVOP3P,
2048                       bit HasModifiers, bit Src0HasMods,
2049                       bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32> {
2050  string dst = !if(HasDst,
2051                   !if(!eq(DstVT.Size, 1),
2052                       "$sdst",
2053                       "$vdst"),
2054                    ""); // use $sdst for VOPC
2055  string src0nomods = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
2056  string src1nomods = !if(!eq(NumSrcArgs, 1), "",
2057                    !if(!eq(NumSrcArgs, 2), " $src1",
2058                                            " $src1,"));
2059  string src2nomods = !if(!eq(NumSrcArgs, 3), " $src2", "");
2060
2061  string src0mods = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2062  string src1mods = !if(!eq(NumSrcArgs, 1), "",
2063                    !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2064                                            " $src1_modifiers,"));
2065  string src2mods = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
2066
2067  string src0 = !if(Src0HasMods, src0mods, src0nomods);
2068  string src1 = !if(Src1HasMods, src1mods, src1nomods);
2069  string src2 = !if(Src2HasMods, src2mods, src2nomods);
2070  string opsel = !if(HasOpSel, "$op_sel", "");
2071  string 3PMods = !if(IsVOP3P,
2072                      !if(HasOpSel, "$op_sel_hi", "")
2073                        #!if(HasModifiers, "$neg_lo$neg_hi", ""),
2074                      "");
2075  string clamp = !if(HasClamp, "$clamp", "");
2076  string omod = !if(HasOMod, "$omod", "");
2077
2078  string ret = dst#!if(!gt(NumSrcArgs,0),", "#src0#src1#src2#opsel#3PMods#clamp#omod, "");
2079
2080}
2081
2082class getAsmVOP3DPP<string base> {
2083  string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
2084}
2085
2086class getAsmVOP3DPP16<string base> {
2087  string ret = getAsmVOP3DPP<base>.ret # "$fi";
2088}
2089
2090class getAsmVOP3DPP8<string base> {
2091  string ret = base # " $dpp8$fi";
2092}
2093
2094
2095class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
2096  string dst = !if(HasDst,
2097                   !if(!eq(DstVT.Size, 1),
2098                       " vcc", // use vcc token as dst for VOPC instructions
2099                       "$vdst"),
2100                    "");
2101  string src0 = "$src0_modifiers";
2102  string src1 = "$src1_modifiers";
2103  string args = !if(!eq(NumSrcArgs, 0),
2104                    "",
2105                    !if(!eq(NumSrcArgs, 1),
2106                        ", "#src0#"$clamp",
2107                        ", "#src0#", "#src1#"$clamp"
2108                     )
2109                );
2110  string sdwa = !if(!eq(NumSrcArgs, 0),
2111                    "",
2112                    !if(!eq(NumSrcArgs, 1),
2113                        " $dst_sel $dst_unused $src0_sel",
2114                        !if(!eq(DstVT.Size, 1),
2115                            " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC
2116                            " $dst_sel $dst_unused $src0_sel $src1_sel"
2117                        )
2118                    )
2119                );
2120  string ret = dst#args#sdwa;
2121}
2122
2123class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs,
2124                   ValueType DstVT = i32> {
2125  string dst = !if(HasDst,
2126                   !if(!eq(DstVT.Size, 1),
2127                       "$sdst", // VOPC
2128                       "$vdst"), // VOP1/2
2129                    "");
2130  string src0 = "$src0_modifiers";
2131  string src1 = "$src1_modifiers";
2132  string out_mods = !if(!not(HasOMod), "$clamp", "$clamp$omod");
2133  string args = !if(!eq(NumSrcArgs, 0), "",
2134                    !if(!eq(NumSrcArgs, 1),
2135                        ", "#src0,
2136                        ", "#src0#", "#src1
2137                     )
2138                );
2139  string sdwa = !if(!eq(NumSrcArgs, 0), "",
2140                    !if(!eq(NumSrcArgs, 1),
2141                        out_mods#" $dst_sel $dst_unused $src0_sel",
2142                        !if(!eq(DstVT.Size, 1),
2143                            " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC
2144                            out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel"
2145                        )
2146                    )
2147                );
2148  string ret = dst#args#sdwa;
2149}
2150
2151class getHas64BitOps <int NumSrcArgs, ValueType DstVT, ValueType Src0VT,
2152                      ValueType Src1VT> {
2153  bit ret = !if(!eq(NumSrcArgs, 3),
2154                0,
2155                !if(!eq(DstVT.Size, 64),
2156                    1,
2157                    !if(!eq(Src0VT.Size, 64),
2158                        1,
2159                        !if(!eq(Src1VT.Size, 64),
2160                            1,
2161                            0
2162                        )
2163                    )
2164                )
2165            );
2166}
2167
2168class getHasSDWA <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2169                  ValueType Src1VT = i32> {
2170  bit ret = !if(!eq(NumSrcArgs, 3),
2171                0, // NumSrcArgs == 3 - No SDWA for VOP3
2172                !if(!eq(DstVT.Size, 64),
2173                    0, // 64-bit dst - No SDWA for 64-bit operands
2174                    !if(!eq(Src0VT.Size, 64),
2175                        0, // 64-bit src0
2176                        !if(!eq(Src1VT.Size, 64),
2177                            0, // 64-bit src2
2178                            1
2179                        )
2180                    )
2181                )
2182            );
2183}
2184
2185class getHasDPP <int NumSrcArgs> {
2186  bit ret = !if(!eq(NumSrcArgs, 3),
2187                0, // NumSrcArgs == 3 - No DPP for VOP3
2188                1);
2189}
2190
2191class getHasExt32BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2192                 ValueType Src1VT = i32> {
2193  bit ret = !and(getHasDPP<NumSrcArgs>.ret,
2194                 !not(getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret));
2195}
2196
2197class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2198                 ValueType Src1VT = i32> {
2199  bit ret = !and(getHasDPP<NumSrcArgs>.ret,
2200                 getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
2201}
2202
2203// Function that checks if instruction supports DPP and SDWA
2204class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2205                 ValueType Src1VT = i32> {
2206  bit ret = !or(getHasDPP<NumSrcArgs>.ret,
2207                getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
2208}
2209
2210// Return an AGPR+VGPR operand class for the given VGPR register class.
2211class getLdStRegisterOperand<RegisterClass RC> {
2212  RegisterOperand ret =
2213    !if(!eq(RC.Size, 32), AVLdSt_32,
2214      !if(!eq(RC.Size, 64), AVLdSt_64,
2215        !if(!eq(RC.Size, 96), AVLdSt_96,
2216          !if(!eq(RC.Size, 128), AVLdSt_128,
2217            !if(!eq(RC.Size, 160), AVLdSt_160,
2218              RegisterOperand<VReg_1> // invalid register
2219    )))));
2220}
2221
2222class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32,
2223                 ValueType Src1VT = i32, ValueType Src2VT = i32> {
2224  bit ret =    !if(!eq(DstVT.Size, 64),
2225                    0, // 64-bit dst No DPP for 64-bit operands
2226                    !if(!eq(Src0VT.Size, 64),
2227                        0, // 64-bit src0
2228                        !if(!eq(Src1VT.Size, 64),
2229                            0, // 64-bit src1
2230                            !if(!eq(Src2VT.Size, 64),
2231                                0, // 64-bit src2
2232                                1
2233                            )
2234                        )
2235                    )
2236                );
2237}
2238
2239
2240def PatGenMode {
2241  int NoPattern = 0;
2242  int Pattern   = 1;
2243}
2244
2245class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
2246
2247  field list<ValueType> ArgVT = _ArgVT;
2248  field bit EnableClamp = _EnableClamp;
2249  field bit IsTrue16 = 0;
2250  field bit IsRealTrue16 = 0;
2251
2252  field ValueType DstVT = ArgVT[0];
2253  field ValueType Src0VT = ArgVT[1];
2254  field ValueType Src1VT = ArgVT[2];
2255  field ValueType Src2VT = ArgVT[3];
2256  field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
2257  field RegisterOperand DstRCDPP = DstRC;
2258  field RegisterOperand DstRC64 = DstRC;
2259  field RegisterOperand DstRCVOP3DPP = DstRC64;
2260  field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret;
2261  field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT, IsTrue16>.ret;
2262  field RegisterOperand Src1RC32 = getVregSrcForVT<Src1VT>.ret;
2263  field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
2264  field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
2265  field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
2266  field RegisterOperand Src0DPP = getVregSrcForVT<Src0VT>.ret;
2267  field RegisterOperand Src1DPP = getVregSrcForVT<Src1VT>.ret;
2268  field RegisterOperand Src2DPP = getVregSrcForVT<Src2VT>.ret;
2269  field RegisterOperand Src0VOP3DPP = VGPRSrc_32;
2270  field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret;
2271  field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
2272  field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
2273  field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
2274  field Operand Src0Mod = getSrcMod<Src0VT>.ret;
2275  field Operand Src1Mod = getSrcMod<Src1VT>.ret;
2276  field Operand Src2Mod = getSrcMod<Src2VT>.ret;
2277  field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret;
2278  field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret;
2279  field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret;
2280  field Operand Src0ModVOP3DPP = getSrcModDPP<Src0VT>.ret;
2281  field Operand Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret;
2282  field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret;
2283  field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
2284  field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
2285
2286
2287  field bit IsMAI = 0;
2288  field bit IsVOP3P = 0;
2289  field bit IsDOT = 0;
2290  field bit IsSingle = 0;
2291  field bit IsWMMA = 0;
2292  field bit IsSWMMAC = 0;
2293
2294  field bit IsFP8 = 0;
2295
2296  field bit HasDst = !ne(DstVT.Value, untyped.Value);
2297  field bit HasDst32 = HasDst;
2298  field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
2299  field bit EmitDstSel = EmitDst;
2300  field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
2301  field bit HasSrc0 = !ne(Src0VT.Value, untyped.Value);
2302  field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value);
2303  field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value);
2304
2305  field bit HasSrc0FloatMods = Src0VT.isFP;
2306  field bit HasSrc1FloatMods = Src1VT.isFP;
2307  field bit HasSrc2FloatMods = Src2VT.isFP;
2308
2309  field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
2310  field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
2311  field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
2312
2313  field bit HasClamp = !or(isModifierType<Src0VT>.ret, EnableClamp);
2314  field bit HasSDWAClamp = EmitDst;
2315  field bit HasFPClamp = !and(DstVT.isFP, HasClamp);
2316  field bit HasIntClamp = !if(DstVT.isFP, 0, HasClamp);
2317  field bit HasClampLo = HasClamp;
2318  field bit HasClampHi = !and(DstVT.isVector, HasClamp);
2319  field bit HasHigh = 0;
2320
2321  field bit IsPacked = Src0VT.isVector;
2322  field bit HasOpSel = IsPacked;
2323  field bit HasOMod = !if(IsVOP3P, 0, DstVT.isFP);
2324  field bit HasSDWAOMod = DstVT.isFP;
2325
2326  field bit HasModifiers = !or(isModifierType<Src0VT>.ret,
2327                               isModifierType<Src1VT>.ret,
2328                               isModifierType<Src2VT>.ret,
2329                               HasOMod);
2330
2331  field bit HasSrc0Mods = HasModifiers;
2332  field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0);
2333  field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0);
2334
2335  field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2336  field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret;
2337  field bit HasExtDPP = !or(getHasDPP<NumSrcArgs>.ret, HasExtVOP3DPP);
2338  field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2339  field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2340  field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2341  field bit HasExtSDWA9 = HasExtSDWA;
2342  field int NeedPatGen = PatGenMode.NoPattern;
2343
2344  field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
2345  field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
2346  field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
2347
2348  field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
2349
2350  // VOP3b instructions are a special case with a second explicit
2351  // output. This is manually overridden for them.
2352  field dag Outs32 = Outs;
2353  field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs));
2354  field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret;
2355  field dag OutsDPP8 = OutsDPP;
2356  field dag OutsVOP3DPP = getOutsDPP<HasDst, DstVT, DstRCVOP3DPP>.ret;
2357  field dag OutsVOP3DPP8 = OutsVOP3DPP;
2358  field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
2359
2360  field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
2361  field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
2362                             HasIntClamp, HasModifiers, HasSrc2Mods,
2363                             HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
2364  field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
2365                                   NumSrcArgs, HasClamp, HasOpSel,
2366                                   Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
2367  field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
2368                                NumSrcArgs, HasClamp, HasOMod,
2369                                getOpSelMod<Src0VT>.ret,
2370                                getOpSelMod<Src1VT>.ret,
2371                                getOpSelMod<Src2VT>.ret>.ret;
2372  field dag InsDPP = !if(HasExtDPP,
2373                         getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
2374                                   HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret,
2375                         (ins));
2376  field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
2377                                   HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
2378  field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP,
2379                                 NumSrcArgs, HasModifiers,
2380                                 Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
2381  defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
2382                  Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
2383                  Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret;
2384  defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP,
2385                  Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel,
2386                  Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret;
2387
2388  field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase);
2389
2390  field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
2391  field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
2392  field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
2393  field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
2394                                 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
2395                                 DstVT>.ret;
2396  field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X);
2397  // It is a slight misnomer to use the deferred f32 operand type for non-float
2398  // operands, but this operand type will only be used if the other dual
2399  // component is FMAAK or FMAMK
2400  field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X);
2401  field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y);
2402  field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y);
2403
2404
2405  field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
2406  field string AsmDPP = !if(HasExtDPP,
2407                            getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
2408  field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
2409  // DPP8 encoding has no fields for modifiers, and it is enforced by setting
2410  // the asm operand name via this HasModifiers flag
2411  field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret;
2412  field string AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
2413   HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers,
2414   HasModifiers, DstVT>.ret;
2415  field string Asm64 = AsmVOP3Base;
2416  field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret;
2417  field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
2418                                              HasClamp,
2419                                              HasOMod,
2420                                              HasSrc0FloatMods,
2421                                              HasSrc1FloatMods,
2422                                              HasSrc2FloatMods>.ret;
2423  field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3Base>.ret;
2424  field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3Base>.ret;
2425  field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3Base>.ret;
2426  field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
2427  field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
2428  field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret;
2429  field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret;
2430  field string TieRegDPP = "$old";
2431}
2432
2433  class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
2434  let HasExt = 0;
2435  let HasExtDPP = 0;
2436  let HasExtVOP3DPP = 0;
2437  let HasExt32BitDPP = 0;
2438  let HasExt64BitDPP = 0;
2439  let HasExtSDWA = 0;
2440  let HasExtSDWA9 = 0;
2441}
2442
2443class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> {
2444  let NeedPatGen = mode;
2445}
2446
2447// VOPC_Profile_t16, VOPC_NoSdst_Profile_t16, VOPC_Class_Profile_t16,
2448// VOPC_Class_NoSdst_Profile_t16, and  VOP_MAC_F16_t16 do not inherit from this
2449// class, so copy changes to this class in those profiles
2450class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
2451  let IsTrue16 = 1;
2452  let IsRealTrue16 = 1;
2453  // Most DstVT are 16-bit, but not all.
2454  let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
2455  let DstRC64 = getVALUDstForVT<DstVT>.ret;
2456  let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2457  let Src1RC32 = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2458  let Src0DPP = getVregSrcForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2459  let Src1DPP = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2460  let Src2DPP = getVregSrcForVT<Src2VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2461  let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0 /*IsFake16*/>.ret;
2462  let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0 /*IsFake16*/>.ret;
2463  let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0 /*IsFake16*/>.ret;
2464
2465  let DstRC64 = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret;
2466  let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret;
2467  let Src1RC64 = getVOP3SrcForVT<Src1VT, 1 /*IsTrue16*/>.ret;
2468  let Src2RC64 = getVOP3SrcForVT<Src2VT, 1 /*IsTrue16*/>.ret;
2469  let Src0Mod = getSrcMod<Src0VT, 1 /*IsTrue16*/>.ret;
2470  let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/>.ret;
2471  let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/>.ret;
2472}
2473
2474class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
2475  let IsTrue16 = 1;
2476  // Most DstVT are 16-bit, but not all
2477  let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
2478  let DstRC64 = getVALUDstForVT<DstVT>.ret;
2479  let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2480  let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2481  let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2482  let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2483  let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
2484  let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
2485  let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
2486}
2487
2488def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>;
2489def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>;
2490def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
2491def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>;
2492
2493def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
2494def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
2495def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
2496def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
2497def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>;
2498
2499def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
2500def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
2501
2502def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>;
2503def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>;
2504def VOP_I16_I32 : VOPProfile <[i16, i32, untyped, untyped]>;
2505
2506def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
2507def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
2508def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
2509
2510def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
2511def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
2512def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
2513def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
2514
2515def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>;
2516def VOP_I16_V2I16_V2I16_I16 : VOPProfile <[i16, v2i16, v2i16, i16]>;
2517def VOP_F32_V2I16_V2I16_F32 : VOPProfile <[f32, v2i16, v2i16, f32]>;
2518
2519def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
2520
2521def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
2522
2523def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
2524def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
2525def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
2526def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>;
2527def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>;
2528def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
2529def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
2530def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
2531def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
2532def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
2533def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
2534def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>;
2535
2536def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
2537def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
2538def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
2539def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
2540def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
2541def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
2542def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
2543def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
2544def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>;
2545def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
2546def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
2547
2548def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
2549def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
2550def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
2551
2552def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>;
2553def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
2554def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
2555def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
2556def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
2557def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
2558def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
2559def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
2560def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
2561
2562def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>;
2563def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>;
2564
2565def VOP_V4F32_F32_F32_V4F32       : VOPProfile <[v4f32,  f32,   f32,   v4f32]>;
2566def VOP_V16F32_F32_F32_V16F32     : VOPProfile <[v16f32, f32,   f32,   v16f32]>;
2567def VOP_V32F32_F32_F32_V32F32     : VOPProfile <[v32f32, f32,   f32,   v32f32]>;
2568def VOP_V4F32_V4F16_V4F16_V4F32   : VOPProfile <[v4f32,  v4f16, v4f16, v4f32]>;
2569def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>;
2570def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>;
2571def VOP_V4F32_V2I16_V2I16_V4F32   : VOPProfile <[v4f32,  v2i16, v2i16, v4f32]>;
2572def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>;
2573def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>;
2574def VOP_V4I32_I32_I32_V4I32       : VOPProfile <[v4i32,  i32,   i32,   v4i32]>;
2575def VOP_V16I32_I32_I32_V16I32     : VOPProfile <[v16i32, i32,   i32,   v16i32]>;
2576def VOP_V32I32_I32_I32_V32I32     : VOPProfile <[v32i32, i32,   i32,   v32i32]>;
2577
2578def VOP_V4F64_F64_F64_V4F64 : VOPProfile <[v4f64, f64, f64, v4f64]>;
2579def VOP_V1F64_F64_F64_V1F64 : VOPProfile <[v1f64, f64, f64, v1f64]>;
2580
2581def VOP_V2F32_V2F32_V2F32_V2F32   : VOPProfile <[v2f32,  v2f32, v2f32, v2f32]>;
2582def VOP_V2F32_V2F32_V2F32         : VOPProfile <[v2f32,  v2f32, v2f32, untyped]>;
2583def VOP_V2I32_V2I32_V2I32         : VOPProfile <[v2i32,  v2i32, v2i32, untyped]>;
2584def VOP_V4F32_V4I16_V4I16_V4F32   : VOPProfile <[v4f32,  v4i16, v4i16, v4f32]>;
2585def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>;
2586def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>;
2587
2588def VOP_V4I32_I64_I64_V4I32       : VOPProfile <[v4i32,  i64,   i64,   v4i32]>;
2589def VOP_V16I32_I64_I64_V16I32     : VOPProfile <[v16i32, i64,   i64,   v16i32]>;
2590def VOP_V4F32_V2F32_V2F32_V4F32   : VOPProfile <[v4f32,  v2f32, v2f32, v4f32]>;
2591def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>;
2592def VOP_V4F32_I64_I64_V4F32       : VOPProfile <[v4f32,  i64,   i64,   v4f32]>;
2593def VOP_V16F32_I64_I64_V16F32     : VOPProfile <[v16f32, i64,   i64,   v16f32]>;
2594
2595def VOP_V4F32_V4F16_V8F16_I32     : VOPProfile <[v4f32,  v4f16, v8f16, i32]>;
2596def VOP_V16F32_V4F16_V8F16_I32    : VOPProfile <[v16f32, v4f16, v8f16, i32]>;
2597def VOP_V4F32_V4I16_V8I16_I32     : VOPProfile <[v4f32,  v4i16, v8i16, i32]>;
2598def VOP_V16F32_V4I16_V8I16_I32    : VOPProfile <[v16f32, v4i16, v8i16, i32]>;
2599def VOP_V4I32_V2I32_V4I32_I32     : VOPProfile <[v4i32,  v2i32, v4i32, i32]>;
2600def VOP_V16I32_V2I32_V4I32_I32    : VOPProfile <[v16i32, v2i32, v4i32, i32]>;
2601def VOP_V4F32_V2I32_V4I32_I32     : VOPProfile <[v4f32,  v2i32, v4i32, i32]>;
2602def VOP_V16F32_V2I32_V4I32_I32    : VOPProfile <[v16f32, v2i32, v4i32, i32]>;
2603
2604class Commutable_REV <string revOp, bit isOrig> {
2605  string RevOp = revOp;
2606  bit IsOrig = isOrig;
2607}
2608
2609class AtomicNoRet <string noRetOp, bit isRet> {
2610  string NoRetOp = noRetOp;
2611  bit IsRet = isRet;
2612}
2613
2614//===----------------------------------------------------------------------===//
2615// Interpolation opcodes
2616//===----------------------------------------------------------------------===//
2617
2618class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">;
2619
2620class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
2621  VINTRPCommon <outs, ins, "", pattern>,
2622  SIMCInstr<opName, SIEncodingFamily.NONE> {
2623  let isPseudo = 1;
2624  let isCodeGenOnly = 1;
2625}
2626
2627// FIXME-GFX10: WIP.
2628class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
2629                      string asm, int encodingFamily> :
2630  VINTRPCommon <outs, ins, asm, []>,
2631  VINTRPe <op>,
2632  SIMCInstr<opName, encodingFamily> {
2633}
2634
2635class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
2636                      string asm> :
2637  VINTRPCommon <outs, ins, asm, []>,
2638  VINTRPe_vi <op>,
2639  SIMCInstr<opName, SIEncodingFamily.VI> {
2640  let AssemblerPredicate = VIAssemblerPredicate;
2641  let DecoderNamespace = "GFX8";
2642}
2643
2644// FIXME-GFX10: WIP.
2645multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
2646                     list<dag> pattern = []> {
2647  def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>;
2648
2649  let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
2650    def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>;
2651  } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
2652
2653  def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
2654
2655  let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
2656    def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>;
2657  } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
2658}
2659
2660//===----------------------------------------------------------------------===//
2661// Vector instruction mappings
2662//===----------------------------------------------------------------------===//
2663
2664// Maps an opcode in e32 form to its e64 equivalent
2665def getVOPe64 : InstrMapping {
2666  let FilterClass = "VOP";
2667  let RowFields = ["OpName"];
2668  let ColFields = ["Size", "VOP3"];
2669  let KeyCol = ["4", "0"];
2670  let ValueCols = [["8", "1"]];
2671}
2672
2673// Maps an opcode in e64 form to its e32 equivalent
2674def getVOPe32 : InstrMapping {
2675  let FilterClass = "VOP";
2676  let RowFields = ["OpName"];
2677  let ColFields = ["Size", "VOP3"];
2678  let KeyCol = ["8", "1"];
2679  let ValueCols = [["4", "0"]];
2680}
2681
2682// Maps ordinary instructions to their SDWA counterparts
2683def getSDWAOp : InstrMapping {
2684  let FilterClass = "VOP";
2685  let RowFields = ["OpName"];
2686  let ColFields = ["AsmVariantName"];
2687  let KeyCol = ["Default"];
2688  let ValueCols = [["SDWA"]];
2689}
2690
2691// Maps SDWA instructions to their ordinary counterparts
2692def getBasicFromSDWAOp : InstrMapping {
2693  let FilterClass = "VOP";
2694  let RowFields = ["OpName"];
2695  let ColFields = ["AsmVariantName"];
2696  let KeyCol = ["SDWA"];
2697  let ValueCols = [["Default"]];
2698}
2699
2700// Maps ordinary instructions to their DPP counterparts
2701def getDPPOp32 : InstrMapping {
2702  let FilterClass = "VOP";
2703  let RowFields = ["OpName"];
2704  let ColFields = ["AsmVariantName"];
2705  let KeyCol = ["Default"];
2706  let ValueCols = [["DPP"]];
2707}
2708
2709def getDPPOp64 : InstrMapping {
2710  let FilterClass = "VOP";
2711  let RowFields = ["OpName"];
2712  let ColFields = ["AsmVariantName"];
2713  let KeyCol = ["VOP3"];
2714  let ValueCols = [["VOP3_DPP"]];
2715}
2716
2717// Maps an commuted opcode to its original version
2718def getCommuteOrig : InstrMapping {
2719  let FilterClass = "Commutable_REV";
2720  let RowFields = ["RevOp"];
2721  let ColFields = ["IsOrig"];
2722  let KeyCol = ["0"];
2723  let ValueCols = [["1"]];
2724}
2725
2726// Maps an original opcode to its commuted version
2727def getCommuteRev : InstrMapping {
2728  let FilterClass = "Commutable_REV";
2729  let RowFields = ["RevOp"];
2730  let ColFields = ["IsOrig"];
2731  let KeyCol = ["1"];
2732  let ValueCols = [["0"]];
2733}
2734
2735def getMCOpcodeGen : InstrMapping {
2736  let FilterClass = "SIMCInstr";
2737  let RowFields = ["PseudoInstr"];
2738  let ColFields = ["Subtarget"];
2739  let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
2740  // These columns must be kept in sync with the SIEncodingFamily enumeration.
2741  let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
2742                   [!cast<string>(SIEncodingFamily.VI)],
2743                   [!cast<string>(SIEncodingFamily.SDWA)],
2744                   [!cast<string>(SIEncodingFamily.SDWA9)],
2745                   // GFX80 encoding is added to work around a multiple matching
2746                   // issue for buffer instructions with unpacked d16 data. This
2747                   // does not actually change the encoding, and thus may be
2748                   // removed later.
2749                   [!cast<string>(SIEncodingFamily.GFX80)],
2750                   [!cast<string>(SIEncodingFamily.GFX9)],
2751                   [!cast<string>(SIEncodingFamily.GFX10)],
2752                   [!cast<string>(SIEncodingFamily.SDWA10)],
2753                   [!cast<string>(SIEncodingFamily.GFX90A)],
2754                   [!cast<string>(SIEncodingFamily.GFX940)],
2755                   [!cast<string>(SIEncodingFamily.GFX11)],
2756                   [!cast<string>(SIEncodingFamily.GFX12)]];
2757}
2758
2759// Get equivalent SOPK instruction.
2760def getSOPKOp : InstrMapping {
2761  let FilterClass = "SOPKInstTable";
2762  let RowFields = ["BaseCmpOp"];
2763  let ColFields = ["IsSOPK"];
2764  let KeyCol = ["0"];
2765  let ValueCols = [["1"]];
2766}
2767
2768def getAddr64Inst : InstrMapping {
2769  let FilterClass = "MUBUFAddr64Table";
2770  let RowFields = ["OpName"];
2771  let ColFields = ["IsAddr64"];
2772  let KeyCol = ["0"];
2773  let ValueCols = [["1"]];
2774}
2775
2776def getIfAddr64Inst : InstrMapping {
2777  let FilterClass = "MUBUFAddr64Table";
2778  let RowFields = ["OpName"];
2779  let ColFields = ["IsAddr64"];
2780  let KeyCol = ["1"];
2781  let ValueCols = [["1"]];
2782}
2783
2784// Maps an atomic opcode to its returnless version.
2785def getAtomicNoRetOp : InstrMapping {
2786  let FilterClass = "AtomicNoRet";
2787  let RowFields = ["NoRetOp"];
2788  let ColFields = ["IsRet"];
2789  let KeyCol = ["1"];
2790  let ValueCols = [["0"]];
2791}
2792
2793// Maps a GLOBAL to its SADDR form.
2794def getGlobalSaddrOp : InstrMapping {
2795  let FilterClass = "GlobalSaddrTable";
2796  let RowFields = ["SaddrOp"];
2797  let ColFields = ["IsSaddr"];
2798  let KeyCol = ["0"];
2799  let ValueCols = [["1"]];
2800}
2801
2802// Maps a GLOBAL SADDR to its VADDR form.
2803def getGlobalVaddrOp : InstrMapping {
2804  let FilterClass = "GlobalSaddrTable";
2805  let RowFields = ["SaddrOp"];
2806  let ColFields = ["IsSaddr"];
2807  let KeyCol = ["1"];
2808  let ValueCols = [["0"]];
2809}
2810
2811// Maps a v_cmpx opcode with sdst to opcode without sdst.
2812def getVCMPXNoSDstOp : InstrMapping {
2813  let FilterClass = "VCMPXNoSDstTable";
2814  let RowFields = ["NoSDstOp"];
2815  let ColFields = ["HasSDst"];
2816  let KeyCol = ["1"];
2817  let ValueCols = [["0"]];
2818}
2819
2820// Maps a SOPP to a SOPP with S_NOP
2821def getSOPPWithRelaxation : InstrMapping {
2822  let FilterClass = "SOPPRelaxTable";
2823  let RowFields = ["KeyName"];
2824  let ColFields = ["IsRelaxed"];
2825  let KeyCol = ["0"];
2826  let ValueCols = [["1"]];
2827}
2828
2829// Maps flat scratch opcodes by addressing modes
2830def getFlatScratchInstSTfromSS : InstrMapping {
2831  let FilterClass = "FlatScratchInst";
2832  let RowFields = ["SVOp"];
2833  let ColFields = ["Mode"];
2834  let KeyCol = ["SS"];
2835  let ValueCols = [["ST"]];
2836}
2837
2838def getFlatScratchInstSSfromSV : InstrMapping {
2839  let FilterClass = "FlatScratchInst";
2840  let RowFields = ["SVOp"];
2841  let ColFields = ["Mode"];
2842  let KeyCol = ["SV"];
2843  let ValueCols = [["SS"]];
2844}
2845
2846def getFlatScratchInstSVfromSVS : InstrMapping {
2847  let FilterClass = "FlatScratchInst";
2848  let RowFields = ["SVOp"];
2849  let ColFields = ["Mode"];
2850  let KeyCol = ["SVS"];
2851  let ValueCols = [["SV"]];
2852}
2853
2854def getFlatScratchInstSVfromSS : InstrMapping {
2855  let FilterClass = "FlatScratchInst";
2856  let RowFields = ["SVOp"];
2857  let ColFields = ["Mode"];
2858  let KeyCol = ["SS"];
2859  let ValueCols = [["SV"]];
2860}
2861
2862def getMFMAEarlyClobberOp : InstrMapping {
2863  let FilterClass = "MFMATable";
2864  let RowFields = ["FMAOp"];
2865  let ColFields = ["IsMac"];
2866  let KeyCol = ["1"];
2867  let ValueCols = [["0"]];
2868}
2869
2870// Maps an v_cmp instruction to its v_cmpx equivalent.
2871def getVCMPXOpFromVCMP : InstrMapping {
2872  let FilterClass = "VCMPVCMPXTable";
2873  let RowFields = ["VCMPOp"];
2874  let ColFields = ["IsVCMPX"];
2875  let KeyCol = ["0"];
2876  let ValueCols = [["1"]];
2877}
2878
2879def VOPDComponentTable : GenericTable {
2880  let FilterClass = "VOPD_Component";
2881  let CppTypeName = "VOPDComponentInfo";
2882  let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX"];
2883  let PrimaryKey = ["BaseVOP"];
2884  let PrimaryKeyName = "getVOPDComponentHelper";
2885}
2886
2887def getVOPDBaseFromComponent : SearchIndex {
2888  let Table = VOPDComponentTable;
2889  let Key = ["VOPDOp"];
2890}
2891
2892def VOPDPairs : GenericTable {
2893  let FilterClass = "VOPD_Base";
2894  let CppTypeName = "VOPDInfo";
2895  let Fields = ["Opcode", "OpX", "OpY", "SubTgt"];
2896  let PrimaryKey = ["Opcode"];
2897  let PrimaryKeyName = "getVOPDOpcodeHelper";
2898}
2899
2900def getVOPDInfoFromComponentOpcodes : SearchIndex {
2901  let Table = VOPDPairs;
2902  let Key = ["OpX", "OpY", "SubTgt"];
2903}
2904
2905include "SIInstructions.td"
2906
2907include "DSInstructions.td"
2908include "MIMGInstructions.td"
2909