xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1//===-- SIInstrInfo.td -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">,
10  AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
11def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">,
12  AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
13
14class AMDGPUMnemonicAlias<string From, string To, string VariantName = "">
15    : MnemonicAlias<From, To, VariantName>, PredicateControl;
16
17// Except for the NONE field, this must be kept in sync with the
18// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the
19// getMCOpcodeGen table.
20def SIEncodingFamily {
21  int NONE = -1;
22  int SI = 0;
23  int VI = 1;
24  int SDWA = 2;
25  int SDWA9 = 3;
26  int GFX80 = 4;
27  int GFX9 = 5;
28  int GFX10 = 6;
29  int SDWA10 = 7;
30  int GFX90A = 8;
31  int GFX940 = 9;
32  int GFX11 = 10;
33  int GFX12 = 11;
34}
35
36//===----------------------------------------------------------------------===//
37// Subtarget info
38//===----------------------------------------------------------------------===//
39
40class GFXGen<Predicate pred, string dn, string suffix, int sub> {
41  Predicate AssemblerPredicate = pred;
42  string DecoderNamespace = dn;
43  string Suffix = suffix;
44  int Subtarget = sub;
45}
46
47def GFX12Gen : GFXGen<isGFX12Only, "GFX12", "_gfx12", SIEncodingFamily.GFX12>;
48def GFX11Gen : GFXGen<isGFX11Only, "GFX11", "_gfx11", SIEncodingFamily.GFX11>;
49def GFX10Gen : GFXGen<isGFX10Only, "GFX10", "_gfx10", SIEncodingFamily.GFX10>;
50
51//===----------------------------------------------------------------------===//
52// SI DAG Nodes
53//===----------------------------------------------------------------------===//
54
55def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
56
57def SDTSBufferLoad : SDTypeProfile<1, 3,
58    [                    // vdata
59     SDTCisVT<1, v4i32>, // rsrc
60     SDTCisVT<2, i32>,   // offset(imm)
61     SDTCisVT<3, i32>]>; // cachepolicy
62
63def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", SDTSBufferLoad,
64                            [SDNPMayLoad, SDNPMemOperand]>;
65
66def SIsbuffer_load_byte : SDNode<"AMDGPUISD::SBUFFER_LOAD_BYTE", SDTSBufferLoad,
67                                 [SDNPMayLoad, SDNPMemOperand]>;
68
69def SIsbuffer_load_ubyte
70    : SDNode<"AMDGPUISD::SBUFFER_LOAD_UBYTE", SDTSBufferLoad,
71             [SDNPMayLoad, SDNPMemOperand]>;
72
73def SIsbuffer_load_short
74    : SDNode<"AMDGPUISD::SBUFFER_LOAD_SHORT", SDTSBufferLoad,
75             [SDNPMayLoad, SDNPMemOperand]>;
76
77def SIsbuffer_load_ushort
78    : SDNode<"AMDGPUISD::SBUFFER_LOAD_USHORT", SDTSBufferLoad,
79             [SDNPMayLoad, SDNPMemOperand]>;
80
81def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
82  SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
83  [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
84>;
85
86def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
87  SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
88]>;
89
90// load_d16_{lo|hi} ptr, tied_input
91def SIload_d16 : SDTypeProfile<1, 2, [
92  SDTCisPtrTy<1>,
93  SDTCisSameAs<0, 2>
94]>;
95
96
97def SDTtbuffer_load : SDTypeProfile<1, 8,
98  [                     // vdata
99   SDTCisVT<1, v4i32>,  // rsrc
100   SDTCisVT<2, i32>,    // vindex(VGPR)
101   SDTCisVT<3, i32>,    // voffset(VGPR)
102   SDTCisVT<4, i32>,    // soffset(SGPR)
103   SDTCisVT<5, i32>,    // offset(imm)
104   SDTCisVT<6, i32>,    // format(imm)
105   SDTCisVT<7, i32>,    // cachepolicy, swizzled buffer(imm)
106   SDTCisVT<8, i1>      // idxen(imm)
107  ]>;
108
109def SItbuffer_load :   SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load,
110                              [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
111def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16",
112                                SDTtbuffer_load,
113                                [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
114
115def SDTtbuffer_store : SDTypeProfile<0, 9,
116    [                     // vdata
117     SDTCisVT<1, v4i32>,  // rsrc
118     SDTCisVT<2, i32>,    // vindex(VGPR)
119     SDTCisVT<3, i32>,    // voffset(VGPR)
120     SDTCisVT<4, i32>,    // soffset(SGPR)
121     SDTCisVT<5, i32>,    // offset(imm)
122     SDTCisVT<6, i32>,    // format(imm)
123     SDTCisVT<7, i32>,    // cachepolicy, swizzled buffer(imm)
124     SDTCisVT<8, i1>      // idxen(imm)
125    ]>;
126
127def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
128                             [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
129def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16",
130                                SDTtbuffer_store,
131                                [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
132
133def SDTBufferLoad : SDTypeProfile<1, 7,
134    [                    // vdata
135     SDTCisVT<1, v4i32>, // rsrc
136     SDTCisVT<2, i32>,   // vindex(VGPR)
137     SDTCisVT<3, i32>,   // voffset(VGPR)
138     SDTCisVT<4, i32>,   // soffset(SGPR)
139     SDTCisVT<5, i32>,   // offset(imm)
140     SDTCisVT<6, i32>,   // cachepolicy, swizzled buffer(imm)
141     SDTCisVT<7, i1>]>;  // idxen(imm)
142
143def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
144                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
145def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad,
146                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
147def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad,
148                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
149def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad,
150                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
151def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad,
152                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
153def SIbuffer_load_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_TFE", SDTBufferLoad,
154                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
155def SIbuffer_load_ubyte_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE_TFE", SDTBufferLoad,
156                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
157def SIbuffer_load_ushort_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT_TFE", SDTBufferLoad,
158                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
159def SIbuffer_load_byte_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE_TFE", SDTBufferLoad,
160                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
161def SIbuffer_load_short_tfe: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT_TFE", SDTBufferLoad,
162                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
163def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
164                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
165def SIbuffer_load_format_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_TFE", SDTBufferLoad,
166                               [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
167def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16",
168                                SDTBufferLoad,
169                                [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
170
171def SDTBufferStore : SDTypeProfile<0, 8,
172    [                    // vdata
173     SDTCisVT<1, v4i32>, // rsrc
174     SDTCisVT<2, i32>,   // vindex(VGPR)
175     SDTCisVT<3, i32>,   // voffset(VGPR)
176     SDTCisVT<4, i32>,   // soffset(SGPR)
177     SDTCisVT<5, i32>,   // offset(imm)
178     SDTCisVT<6, i32>,   // cachepolicy, swizzled buffer(imm)
179     SDTCisVT<7, i1>]>;  // idxen(imm)
180
181def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
182                             [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
183def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE",
184                         SDTBufferStore,
185                         [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
186def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT",
187                           SDTBufferStore,
188                           [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
189def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT",
190                            SDTBufferStore,
191                            [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
192def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
193                            SDTBufferStore,
194                            [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
195
196multiclass SDBufferAtomic<string opcode> {
197  def "" : SDNode <opcode,
198    SDTypeProfile<1, 8,
199         [SDTCisVT<2, v4i32>, // rsrc
200         SDTCisVT<3, i32>,   // vindex(VGPR)
201         SDTCisVT<4, i32>,   // voffset(VGPR)
202         SDTCisVT<5, i32>,   // soffset(SGPR)
203         SDTCisVT<6, i32>,   // offset(imm)
204         SDTCisVT<7, i32>,   // cachepolicy(imm)
205         SDTCisVT<8, i1>]>,  // idxen(imm)
206    [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
207  >;
208  def "_noret" : PatFrag<
209    (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset,
210      node:$offset, node:$cachepolicy, node:$idxen),
211    (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex,
212      node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
213      node:$idxen)> {
214    let HasNoUse = true;
215  }
216}
217
218defm SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
219defm SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
220defm SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
221defm SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">;
222defm SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">;
223defm SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">;
224defm SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
225defm SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
226defm SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
227defm SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
228defm SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
229defm SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
230defm SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">;
231defm SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">;
232defm SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">;
233defm SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">;
234defm SIbuffer_atomic_cond_sub_u32 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_COND_SUB_U32">;
235
236def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
237  SDTypeProfile<1, 9,
238    [SDTCisVT<3, v4i32>, // rsrc
239     SDTCisVT<4, i32>,   // vindex(VGPR)
240     SDTCisVT<5, i32>,   // voffset(VGPR)
241     SDTCisVT<6, i32>,   // soffset(SGPR)
242     SDTCisVT<7, i32>,   // offset(imm)
243     SDTCisVT<8, i32>,   // cachepolicy(imm)
244     SDTCisVT<9, i1>]>,  // idxen(imm)
245  [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
246>;
247
248def SIbuffer_atomic_cmpswap_noret : PatFrag<
249  (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset,
250    node:$soffset, node:$offset, node:$cachepolicy, node:$idxen),
251  (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex,
252    node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
253    node:$idxen)> {
254  let HasNoUse = true;
255}
256
257class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
258  SDTypeProfile<0, 2,
259      [SDTCisPtrTy<0>,     // vaddr
260       SDTCisVT<1, ty>]>,  // vdata
261  [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
262>;
263
264def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
265  SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
266>;
267
268def SIlds : SDNode<"AMDGPUISD::LDS",
269  SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
270>;
271
272def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO",
273  SIload_d16,
274  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
275>;
276
277def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8",
278  SIload_d16,
279  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
280>;
281
282def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8",
283  SIload_d16,
284  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
285>;
286
287def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI",
288  SIload_d16,
289  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
290>;
291
292def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8",
293  SIload_d16,
294  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
295>;
296
297def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8",
298  SIload_d16,
299  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
300>;
301
302def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
303  SDTypeProfile<0 ,1, [SDTCisInt<0>]>,
304  [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
305>;
306
307def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD",
308  SDTFPRoundOp
309>;
310
311def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD",
312  SDTFPRoundOp
313>;
314
315//===----------------------------------------------------------------------===//
316// ValueType helpers
317//===----------------------------------------------------------------------===//
318
319class isIntType<ValueType SrcVT> {
320  bit ret = !and(SrcVT.isInteger, !ne(SrcVT.Value, i1.Value));
321}
322
323//===----------------------------------------------------------------------===//
324// SDNodes PatFrags for loads/stores with a glue input.
325// This is for SDNodes and PatFrag for local loads and stores to
326// enable s_mov_b32 m0, -1 to be glued to the memory instructions.
327//
328// These mirror the regular load/store PatFrags and rely on special
329// processing during Select() to add the glued copy.
330//
331//===----------------------------------------------------------------------===//
332
333def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad,
334  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
335>;
336
337def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad,
338  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
339>;
340
341def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> {
342  let IsLoad = 1;
343  let IsUnindexed = 1;
344}
345
346def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
347  let IsLoad = 1;
348  let IsNonExtLoad = 1;
349}
350
351def atomic_load_8_glue : PatFrag<(ops node:$ptr),
352  (AMDGPUatomic_ld_glue node:$ptr)> {
353  let IsAtomic = 1;
354  let MemoryVT = i8;
355}
356
357def atomic_load_16_glue : PatFrag<(ops node:$ptr),
358  (AMDGPUatomic_ld_glue node:$ptr)> {
359  let IsAtomic = 1;
360  let MemoryVT = i16;
361}
362
363def atomic_load_32_glue : PatFrag<(ops node:$ptr),
364  (AMDGPUatomic_ld_glue node:$ptr)> {
365  let IsAtomic = 1;
366  let MemoryVT = i32;
367}
368
369def atomic_load_64_glue : PatFrag<(ops node:$ptr),
370  (AMDGPUatomic_ld_glue node:$ptr)> {
371  let IsAtomic = 1;
372  let MemoryVT = i64;
373}
374
375def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
376  let IsLoad = 1;
377  let IsAnyExtLoad = 1;
378}
379
380def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
381  let IsLoad = 1;
382  let IsSignExtLoad = 1;
383}
384
385def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
386  let IsLoad = 1;
387  let IsZeroExtLoad = 1;
388}
389
390def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
391  let IsLoad = 1;
392  let MemoryVT = i8;
393}
394
395def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
396  let IsLoad = 1;
397  let MemoryVT = i8;
398}
399
400def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
401  let IsLoad = 1;
402  let MemoryVT = i16;
403}
404
405def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
406  let IsLoad = 1;
407  let MemoryVT = i16;
408}
409
410def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
411  let IsLoad = 1;
412  let MemoryVT = i8;
413}
414
415def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
416  let IsLoad = 1;
417  let MemoryVT = i16;
418}
419
420
421let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
422def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
423  let IsNonExtLoad = 1;
424}
425
426def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>;
427def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>;
428def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>;
429
430def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>;
431def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>;
432def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>;
433} // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces
434
435def load_align8_local_m0 : PatFrag<(ops node:$ptr),
436                                   (load_local_m0 node:$ptr)> {
437  let IsLoad = 1;
438  int MinAlignment = 8;
439}
440
441def load_align16_local_m0 : PatFrag<(ops node:$ptr),
442                                   (load_local_m0 node:$ptr)> {
443  let IsLoad = 1;
444  int MinAlignment = 16;
445}
446
447let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
448def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr),
449                                      (atomic_load_8_glue node:$ptr)>;
450def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr),
451                                      (atomic_load_16_glue node:$ptr)>;
452def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
453                                      (atomic_load_32_glue node:$ptr)>;
454def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
455                                       (atomic_load_64_glue node:$ptr)>;
456} // End let AddressSpaces = LoadAddress_local.AddrSpaces
457
458
459def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore,
460  [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
461>;
462
463def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore,
464  [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
465>;
466
467def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr),
468                                   (AMDGPUst_glue node:$val, node:$ptr)> {
469  let IsStore = 1;
470  let IsUnindexed = 1;
471}
472
473def store_glue : PatFrag<(ops node:$val, node:$ptr),
474                         (unindexedstore_glue node:$val, node:$ptr)> {
475  let IsStore = 1;
476  let IsTruncStore = 0;
477}
478
479def truncstore_glue : PatFrag<(ops node:$val, node:$ptr),
480  (unindexedstore_glue node:$val, node:$ptr)> {
481  let IsStore = 1;
482  let IsTruncStore = 1;
483}
484
485def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr),
486                           (truncstore_glue node:$val, node:$ptr)> {
487  let IsStore = 1;
488  let MemoryVT = i8;
489  let IsTruncStore = 1;
490}
491
492def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr),
493                           (truncstore_glue node:$val, node:$ptr)> {
494  let IsStore = 1;
495  let MemoryVT = i16;
496  let IsTruncStore = 1;
497}
498
499let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
500def store_local_m0 : PatFrag<(ops node:$val, node:$ptr),
501                             (store_glue node:$val, node:$ptr)>;
502def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
503                                    (truncstorei8_glue node:$val, node:$ptr)>;
504def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
505                                    (truncstorei16_glue node:$val, node:$ptr)>;
506}
507
508def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr),
509                                     (store_local_m0 node:$value, node:$ptr)>,
510                            Aligned<8> {
511  let IsStore = 1;
512}
513
514def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr),
515                                     (store_local_m0 node:$value, node:$ptr)>,
516                            Aligned<16> {
517  let IsStore = 1;
518}
519
520let PredicateCode = [{return cast<MemSDNode>(N)->getAlign() < 4;}],
521    GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}],
522    AddressSpaces = [ AddrSpaces.Local ] in {
523def load_align_less_than_4_local : PatFrag<(ops node:$ptr),
524                                           (load_local node:$ptr)> {
525  let IsLoad = 1;
526  let IsNonExtLoad = 1;
527}
528
529def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr),
530                                              (load_local_m0 node:$ptr)> {
531  let IsLoad = 1;
532  let IsNonExtLoad = 1;
533}
534
535def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr),
536                                             (store_local node:$value, node:$ptr)> {
537  let IsStore = 1;
538  let IsTruncStore = 0;
539}
540
541def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr),
542                                                (store_local_m0 node:$value, node:$ptr)> {
543  let IsStore = 1;
544  let IsTruncStore = 0;
545}
546}
547
548def atomic_store_8_glue : PatFrag <
549  (ops node:$ptr, node:$value),
550  (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
551  let IsAtomic = 1;
552  let MemoryVT = i8;
553}
554
555def atomic_store_16_glue : PatFrag <
556  (ops node:$ptr, node:$value),
557  (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
558  let IsAtomic = 1;
559  let MemoryVT = i16;
560}
561
562def atomic_store_32_glue : PatFrag <
563  (ops node:$ptr, node:$value),
564  (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
565  let IsAtomic = 1;
566  let MemoryVT = i32;
567}
568
569def atomic_store_64_glue : PatFrag <
570  (ops node:$ptr, node:$value),
571  (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
572  let IsAtomic = 1;
573  let MemoryVT = i64;
574}
575
576let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
577def atomic_store_8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
578                                       (atomic_store_8_glue node:$val, node:$ptr)>;
579def atomic_store_16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
580                                       (atomic_store_16_glue node:$val, node:$ptr)>;
581def atomic_store_32_local_m0 : PatFrag<(ops node:$val, node:$ptr),
582                                       (atomic_store_32_glue node:$val, node:$ptr)>;
583def atomic_store_64_local_m0 : PatFrag<(ops node:$val, node:$ptr),
584                                       (atomic_store_64_glue node:$val, node:$ptr)>;
585} // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces
586
587
588//===----------------------------------------------------------------------===//
589// SDNodes PatFrags for a16 loads and stores with 3 components.
590// v3f16/v3i16 is widened to v4f16/v4i16, so we need to match on the memory
591// load/store size.
592//===----------------------------------------------------------------------===//
593
594class mubuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag <
595  (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
596            node:$auxiliary, node:$idxen),
597  (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
598            node:$auxiliary, node:$idxen)> {
599  let IsLoad = 1;
600  let MemoryVT = vt;
601}
602
603class mubuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag <
604  (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
605            node:$auxiliary, node:$idxen),
606  (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
607            node:$auxiliary, node:$idxen)> {
608  let IsStore = 1;
609  let MemoryVT = vt;
610}
611
612class mtbuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag <
613  (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
614            node:$format, node:$auxiliary, node:$idxen),
615  (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
616            node:$format, node:$auxiliary, node:$idxen)> {
617  let IsLoad = 1;
618  let MemoryVT = vt;
619}
620
621class mtbuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag <
622  (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
623            node:$format, node:$auxiliary, node:$idxen),
624  (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
625            node:$format, node:$auxiliary, node:$idxen)> {
626  let IsStore = 1;
627  let MemoryVT = vt;
628}
629
630//===----------------------------------------------------------------------===//
631// SDNodes PatFrags for d16 loads
632//===----------------------------------------------------------------------===//
633
634class LoadD16Frag <SDPatternOperator op> : PatFrag<
635  (ops node:$ptr, node:$tied_in),
636  (op node:$ptr, node:$tied_in)> {
637  let IsLoad = 1;
638}
639
640foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
641let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
642
643def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>;
644
645def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> {
646  let MemoryVT = i8;
647}
648
649def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> {
650  let MemoryVT = i8;
651}
652
653def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>;
654
655def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> {
656  let MemoryVT = i8;
657}
658
659def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> {
660  let MemoryVT = i8;
661}
662
663} // End let AddressSpaces = ...
664} // End foreach AddrSpace
665
666def lshr_rev : PatFrag <
667  (ops node:$src1, node:$src0),
668  (srl $src0, $src1)
669>;
670
671def ashr_rev : PatFrag <
672  (ops node:$src1, node:$src0),
673  (sra $src0, $src1)
674>;
675
676def lshl_rev : PatFrag <
677  (ops node:$src1, node:$src0),
678  (shl $src0, $src1)
679>;
680
681def add_ctpop : PatFrag <
682  (ops node:$src0, node:$src1),
683  (add (ctpop $src0), $src1)
684>;
685
686def xnor : PatFrag <
687  (ops node:$src0, node:$src1),
688  (not (xor $src0, $src1))
689>;
690
691foreach I = 1-4 in {
692def shl#I#_add : PatFrag <
693  (ops node:$src0, node:$src1),
694  (add (shl_oneuse $src0, (i32 I)), $src1)> {
695  // FIXME: Poor substitute for disabling pattern in SelectionDAG
696  let PredicateCode = [{return false;}];
697  let GISelPredicateCode = [{return true;}];
698}
699}
700
701multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
702                            SDTypeProfile tc = SDTAtomic2,
703                            bit IsInt = 1> {
704
705  def _glue : SDNode <
706    !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc,
707    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
708  >;
709
710  let AddressSpaces = StoreAddress_local.AddrSpaces in {
711
712    if IsInt then {
713      defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
714      defm _local_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
715    } else {
716      defm _local_m0 : binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>;
717      defm _local_m0 : noret_binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>;
718     }
719  }
720
721  let AddressSpaces = StoreAddress_region.AddrSpaces in {
722    if IsInt then {
723      defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
724      defm _region_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
725    } else {
726      defm _region_m0 : binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>;
727      defm _region_m0 : noret_binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>;
728    }
729  }
730}
731
732defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
733defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
734defm atomic_load_uinc_wrap : SIAtomicM0Glue2 <"LOAD_UINC_WRAP">;
735defm atomic_load_udec_wrap : SIAtomicM0Glue2 <"LOAD_UDEC_WRAP">;
736defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
737defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
738defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
739defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
740defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
741defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
742defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
743defm atomic_swap : SIAtomicM0Glue2 <"SWAP">;
744defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>;
745defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 0, SDTAtomic2_f32, 0>;
746defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 0, SDTAtomic2_f32, 0>;
747
748def as_i1timm : SDNodeXForm<timm, [{
749  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
750}]>;
751
752def as_i8imm : SDNodeXForm<imm, [{
753  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
754}]>;
755
756def as_i8timm : SDNodeXForm<timm, [{
757  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
758}]>;
759
760def as_i16imm : SDNodeXForm<imm, [{
761  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
762}]>;
763
764def as_i16timm : SDNodeXForm<timm, [{
765  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
766}]>;
767
768def as_i32imm: SDNodeXForm<imm, [{
769  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
770}]>;
771
772def as_i32timm: SDNodeXForm<timm, [{
773  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
774}]>;
775
776def as_i64imm: SDNodeXForm<imm, [{
777  return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
778}]>;
779
780def cond_as_i32imm: SDNodeXForm<cond, [{
781  return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32);
782}]>;
783
784// Copied from the AArch64 backend:
785def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
786return CurDAG->getTargetConstant(
787  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
788}]>;
789
790def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{
791  auto FI = cast<FrameIndexSDNode>(N);
792  return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32);
793}]>;
794
795// Copied from the AArch64 backend:
796def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
797return CurDAG->getTargetConstant(
798  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
799}]>;
800
801class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
802  uint64_t Imm = N->getZExtValue();
803  unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
804  return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1);
805}]>;
806
807def SIMM16bit : TImmLeaf <i32,
808  [{return isInt<16>(Imm) || isUInt<16>(Imm);}],
809  as_i16timm
810>;
811
812def i64imm_32bit : ImmLeaf<i64, [{
813  return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
814}]>;
815
816def InlineImm64 : IntImmLeaf<i64, [{
817  return isInlineImmediate(Imm);
818}]>;
819
820def InlineImmFP32 : FPImmLeaf<f32, [{
821  return isInlineImmediate(Imm);
822}]>;
823
824def InlineImmFP64 : FPImmLeaf<f64, [{
825  return isInlineImmediate(Imm);
826}]>;
827
828
829class VGPRImm <dag frag> : PatLeaf<frag, [{
830  return isVGPRImm(N);
831}]>;
832
833def NegateImm : SDNodeXForm<imm, [{
834  return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
835}]>;
836
837// TODO: When FP inline imm values work?
838def NegSubInlineConst32 : ImmLeaf<i32, [{
839  return Imm < -16 && Imm >= -64;
840}], NegateImm>;
841
842def NegSubInlineIntConst16 : ImmLeaf<i16, [{
843  return Imm < -16 && Imm >= -64;
844}], NegateImm>;
845
846def ShiftAmt32Imm : ImmLeaf <i32, [{
847  return Imm < 32;
848}]>;
849
850def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{
851  return fp16SrcZerosHighBits(N->getOpcode());
852}]>;
853
854def is_canonicalized : PatLeaf<(fAny srcvalue:$src), [{
855  const SITargetLowering &Lowering =
856      *static_cast<const SITargetLowering *>(getTargetLowering());
857  return Lowering.isCanonicalized(*CurDAG, SDValue(N, 0));
858}]> {
859  let GISelPredicateCode = [{
860    const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
861        MF.getSubtarget().getTargetLowering());
862    const MachineOperand &Dst = MI.getOperand(0);
863    assert(Dst.isDef());
864    return TLI->isCanonicalized(Dst.getReg(), MF);
865   }];
866}
867
868//===----------------------------------------------------------------------===//
869// MUBUF/SMEM Patterns
870//===----------------------------------------------------------------------===//
871
872def extract_cpol : SDNodeXForm<timm, [{
873  return CurDAG->getTargetConstant(
874      N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12
875                               ? AMDGPU::CPol::ALL
876                               : AMDGPU::CPol::ALL_pregfx12),
877      SDLoc(N), MVT::i8);
878}]>;
879
880def extract_swz : SDNodeXForm<timm, [{
881  const bool Swizzle =
882      N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12
883                               ? AMDGPU::CPol::SWZ
884                               : AMDGPU::CPol::SWZ_pregfx12);
885  return CurDAG->getTargetConstant(Swizzle, SDLoc(N), MVT::i8);
886}]>;
887
888def extract_cpol_set_glc : SDNodeXForm<timm, [{
889  const uint32_t cpol = N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12
890                               ? AMDGPU::CPol::ALL
891                               : AMDGPU::CPol::ALL_pregfx12);
892  return CurDAG->getTargetConstant(cpol | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8);
893}]>;
894
895//===----------------------------------------------------------------------===//
896// Custom Operands
897//===----------------------------------------------------------------------===//
898
899def SOPPBrTarget : CustomOperand<OtherVT> {
900  let PrintMethod = "printOperand";
901  let EncoderMethod = "getSOPPBrEncoding";
902  let DecoderMethod = "decodeSOPPBrTarget";
903  let OperandType = "OPERAND_PCREL";
904}
905
906def si_ga : Operand<iPTR>;
907
908def InterpSlot : CustomOperand<i32>;
909
910// It appears to be necessary to create a separate operand for this to
911// be able to parse attr<num> with no space.
912def InterpAttr : CustomOperand<i32>;
913
914def InterpAttrChan : ImmOperand<i32>;
915
916def SplitBarrier : ImmOperand<i32> {
917  let OperandNamespace = "AMDGPU";
918  let OperandType = "OPERAND_INLINE_SPLIT_BARRIER_INT32";
919  let DecoderMethod = "decodeSplitBarrier";
920  let PrintMethod = "printOperand";
921}
922
923def VReg32OrOffClass : AsmOperandClass {
924  let Name = "VReg32OrOff";
925  let ParserMethod = "parseVReg32OrOff";
926}
927
928def SendMsg : CustomOperand<i32>;
929
930def Swizzle : CustomOperand<i16, 1>;
931
932def Endpgm : CustomOperand<i16, 1>;
933
934def SWaitCnt : CustomOperand<i32>;
935
936def DepCtr : CustomOperand<i32>;
937
938def SDelayALU : CustomOperand<i32>;
939
940include "SIInstrFormats.td"
941include "VIInstrFormats.td"
942
943def BoolReg : AsmOperandClass {
944  let Name = "BoolReg";
945  let ParserMethod = "parseBoolReg";
946  let RenderMethod = "addRegOperands";
947}
948
949class BoolRC : RegisterOperand<SReg_1> {
950  let ParserMatchClass = BoolReg;
951  let DecoderMethod = "decodeBoolReg";
952}
953
954def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
955  let ParserMatchClass = BoolReg;
956  let DecoderMethod = "decodeBoolReg";
957}
958
959def VOPDstS64orS32 : BoolRC {
960  let PrintMethod = "printVOPDst";
961}
962
963// SCSrc_i1 is the operand for pseudo instructions only.
964// Boolean immediates shall not be exposed to codegen instructions.
965def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
966  let OperandNamespace = "AMDGPU";
967  let OperandType = "OPERAND_REG_IMM_INT32";
968  let ParserMatchClass = BoolReg;
969  let DecoderMethod = "decodeBoolReg";
970}
971
972// ===----------------------------------------------------------------------===//
973// ExpSrc* Special cases for exp src operands which are printed as
974// "off" depending on en operand.
975// ===----------------------------------------------------------------------===//
976
977def ExpSrc0 : RegisterOperand<VGPR_32> {
978  let PrintMethod = "printExpSrc0";
979  let ParserMatchClass = VReg32OrOffClass;
980}
981
982def ExpSrc1 : RegisterOperand<VGPR_32> {
983  let PrintMethod = "printExpSrc1";
984  let ParserMatchClass = VReg32OrOffClass;
985}
986
987def ExpSrc2 : RegisterOperand<VGPR_32> {
988  let PrintMethod = "printExpSrc2";
989  let ParserMatchClass = VReg32OrOffClass;
990}
991
992def ExpSrc3 : RegisterOperand<VGPR_32> {
993  let PrintMethod = "printExpSrc3";
994  let ParserMatchClass = VReg32OrOffClass;
995}
996
997class SDWASrc<ValueType vt> : RegisterOperand<VS_32> {
998  let OperandNamespace = "AMDGPU";
999  string Type = !if(vt.isFP, "FP", "INT");
1000  let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size;
1001  let DecoderMethod = "decodeSDWASrc"#vt.Size;
1002  let EncoderMethod = "getSDWASrcEncoding";
1003}
1004
1005def SDWASrc_i32 : SDWASrc<i32>;
1006def SDWASrc_i16 : SDWASrc<i16>;
1007def SDWASrc_f32 : SDWASrc<f32>;
1008def SDWASrc_f16 : SDWASrc<f16>;
1009
1010def SDWAVopcDst : BoolRC {
1011  let OperandNamespace = "AMDGPU";
1012  let OperandType = "OPERAND_SDWA_VOPC_DST";
1013  let EncoderMethod = "getSDWAVopcDstEncoding";
1014  let DecoderMethod = "decodeSDWAVopcDst";
1015  let PrintMethod = "printVOPDst";
1016}
1017
1018class NamedIntOperand<ValueType Type, string Prefix, bit Optional = 1,
1019                      string name = NAME>
1020    : CustomOperand<Type, Optional, name> {
1021  let PredicateMethod =
1022    "getPredicate([](const AMDGPUOperand &Op) -> bool { "#
1023    "return Op.isImmTy(AMDGPUOperand::"#ImmTy#"); })";
1024  string Validator = "[](int64_t V) { return true; }";
1025  string ConvertMethod = "[](int64_t &V) { return "#Validator#"(V); }";
1026  let ParserMethod =
1027    "[this](OperandVector &Operands) -> ParseStatus { "#
1028    "return parseIntWithPrefix(\""#Prefix#"\", Operands, "#
1029    "AMDGPUOperand::"#ImmTy#", "#ConvertMethod#"); }";
1030}
1031
1032class NamedBitOperand<string Id, string Name = NAME>
1033    : CustomOperand<i1, 1, Name> {
1034  let PredicateMethod = "isImmTy<AMDGPUOperand::"#ImmTy#">";
1035  let ParserMethod =
1036    "[this](OperandVector &Operands) -> ParseStatus { "#
1037    "return parseNamedBit(\""#Id#"\", Operands, AMDGPUOperand::"#ImmTy#"); }";
1038  let PrintMethod = "[this](const MCInst *MI, unsigned OpNo, "#
1039    "const MCSubtargetInfo &STI, raw_ostream &O) { "#
1040    "printNamedBit(MI, OpNo, O, \""#Id#"\"); }";
1041}
1042
1043class DefaultOperand<CustomOperand Op, int Value>
1044  : OperandWithDefaultOps<Op.Type, (ops (Op.Type Value))>,
1045    CustomOperandProps<1> {
1046  let ParserMatchClass = Op.ParserMatchClass;
1047  let PrintMethod = Op.PrintMethod;
1048}
1049
1050class SDWAOperand<string Id, string Name = NAME>
1051    : CustomOperand<i32, 1, Name> {
1052  let ParserMethod =
1053    "[this](OperandVector &Operands) -> ParseStatus { "#
1054    "return parseSDWASel(Operands, \""#Id#"\", AMDGPUOperand::"#ImmTy#"); }";
1055}
1056
1057class ArrayOperand0<string Id, string Name = NAME>
1058  : OperandWithDefaultOps<i32, (ops (i32 0))>,
1059    CustomOperandProps<1, Name> {
1060  let ParserMethod =
1061    "[this](OperandVector &Operands) -> ParseStatus { "#
1062    "return parseOperandArrayWithPrefix(\""#Id#"\", Operands, "#
1063    "AMDGPUOperand::"#ImmTy#"); }";
1064}
1065
1066let ImmTy = "ImmTyOffset" in
1067def flat_offset : CustomOperand<i32, 1, "FlatOffset">;
1068def Offset : NamedIntOperand<i32, "offset">;
1069let Validator = "isUInt<8>" in {
1070def Offset0 : NamedIntOperand<i8, "offset0">;
1071def Offset1 : NamedIntOperand<i8, "offset1">;
1072}
1073
1074def gds : NamedBitOperand<"gds", "GDS">;
1075
1076def omod : CustomOperand<i32, 1, "OModSI">;
1077def omod0 : DefaultOperand<omod, 0>;
1078
1079// We need to make the cases with a default of 0 distinct from no
1080// default to help deal with some cases where the operand appears
1081// before a mandatory operand.
1082def Clamp : NamedBitOperand<"clamp">;
1083def Clamp0 : DefaultOperand<Clamp, 0>;
1084def highmod : NamedBitOperand<"high", "High">;
1085
1086def CPol : CustomOperand<i32, 1>;
1087def CPol_0 : DefaultOperand<CPol, 0>;
1088def CPol_GLC1 : DefaultOperand<CPol, 1>;
1089def CPol_GLC : ValuePredicatedOperand<CPol, "Op.getImm() & CPol::GLC">;
1090def CPol_NonGLC : ValuePredicatedOperand<CPol, "!(Op.getImm() & CPol::GLC)", 1>;
1091def CPol_GLC_WithDefault : DefaultOperand<CPol_GLC, !shl(1, CPolBit.GLC)>;
1092def CPol_NonGLC_WithDefault : DefaultOperand<CPol_NonGLC, 0>;
1093
1094def TFE : NamedBitOperand<"tfe">;
1095def UNorm : NamedBitOperand<"unorm">;
1096def DA : NamedBitOperand<"da">;
1097def R128A16 : CustomOperand<i1, 1>;
1098def A16 : NamedBitOperand<"a16">;
1099def D16 : NamedBitOperand<"d16">;
1100def LWE : NamedBitOperand<"lwe">;
1101def exp_compr : NamedBitOperand<"compr", "ExpCompr">;
1102def exp_vm : NamedBitOperand<"vm", "ExpVM">;
1103
1104def FORMAT : CustomOperand<i8>;
1105
1106def DMask : NamedIntOperand<i16, "dmask">;
1107def Dim : CustomOperand<i8>;
1108
1109def dst_sel : SDWAOperand<"dst_sel", "SDWADstSel">;
1110def src0_sel : SDWAOperand<"src0_sel", "SDWASrc0Sel">;
1111def src1_sel : SDWAOperand<"src1_sel", "SDWASrc1Sel">;
1112def dst_unused : CustomOperand<i32, 1, "SDWADstUnused">;
1113
1114def op_sel0 : ArrayOperand0<"op_sel", "OpSel">;
1115def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">;
1116def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">;
1117def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">;
1118
1119def IndexKey16bit : CustomOperand<i32, 1>;
1120def IndexKey8bit : CustomOperand<i32, 1>;
1121
1122def dpp8 : CustomOperand<i32, 0, "DPP8">;
1123def dpp_ctrl : CustomOperand<i32, 0, "DPPCtrl">;
1124
1125let DefaultValue = "0xf" in {
1126def DppRowMask : NamedIntOperand<i32, "row_mask">;
1127def DppBankMask : NamedIntOperand<i32, "bank_mask">;
1128}
1129def DppBoundCtrl : NamedIntOperand<i1, "bound_ctrl"> {
1130  let ConvertMethod = "[this] (int64_t &BC) -> bool { return convertDppBoundCtrl(BC); }";
1131}
1132
1133let DecoderMethod = "decodeDpp8FI" in
1134def Dpp8FI : NamedIntOperand<i32, "fi", 1, "DppFI">;
1135def Dpp16FI : NamedIntOperand<i32, "fi", 1, "DppFI">;
1136
1137def blgp : CustomOperand<i32, 1, "BLGP">;
1138def CBSZ : NamedIntOperand<i32, "cbsz"> {
1139  let Validator = "isUInt<3>";
1140}
1141def ABID : NamedIntOperand<i32, "abid"> {
1142  let Validator = "isUInt<4>";
1143}
1144def hwreg : CustomOperand<i32, 0, "Hwreg">;
1145
1146def exp_tgt : CustomOperand<i32, 0, "ExpTgt">;
1147
1148def WaitVDST : NamedIntOperand<i8, "wait_vdst"> {
1149  let Validator = "isUInt<4>";
1150}
1151def WaitEXP : NamedIntOperand<i8, "wait_exp"> {
1152  let Validator = "isUInt<3>";
1153}
1154def WaitVAVDst : NamedIntOperand<i8, "wait_va_vdst"> {
1155  let Validator = "isUInt<4>";
1156}
1157def WaitVMVSrc : NamedIntOperand<i8, "wait_vm_vsrc"> {
1158  let Validator = "isUInt<1>";
1159}
1160
1161def ByteSel : NamedIntOperand<i8, "byte_sel"> {
1162  let Validator = "isUInt<2>";
1163}
1164
1165class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
1166  let OperandNamespace = "AMDGPU";
1167  let OperandType = "OPERAND_KIMM"#vt.Size;
1168  let PrintMethod = "printU"#vt.Size#"ImmOperand";
1169  let DecoderMethod = "decodeOperand_KImmFP";
1170}
1171
1172// 32-bit VALU immediate operand that uses the constant bus.
1173def KImmFP32 : KImmFPOperand<i32>;
1174
1175// 32-bit VALU immediate operand with a 16-bit value that uses the
1176// constant bus.
1177def KImmFP16 : KImmFPOperand<i16>;
1178
1179class FPInputModsMatchClass <int opSize> : AsmOperandClass {
1180  let Name = "RegOrImmWithFP"#opSize#"InputMods";
1181  let ParserMethod = "parseRegOrImmWithFPInputMods";
1182  let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
1183}
1184
1185class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> {
1186  let Name = "RegOrInlineImmWithFP"#opSize#"InputMods";
1187  let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods";
1188}
1189
1190def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
1191def FPT16InputModsMatchClass : FPInputModsMatchClass<16> {
1192  let Name = "RegOrImmWithFPT16InputMods";
1193  let PredicateMethod = "isRegOrImmWithFPT16InputMods";
1194}
1195def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
1196def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
1197
1198class FP16VCSrcInputModsMatchClass<bit IsFake16>
1199    : FPVCSrcInputModsMatchClass<16> {
1200  let Name = !if(IsFake16, "RegOrInlineImmWithFPFake16InputMods",
1201                 "RegOrInlineImmWithFPT16InputMods");
1202  let PredicateMethod = "isRegOrInlineImmWithFP16InputMods<" #
1203                        !if(IsFake16, "true", "false") # ">";
1204}
1205def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>;
1206
1207class InputMods <AsmOperandClass matchClass> : Operand <i32> {
1208  let OperandNamespace = "AMDGPU";
1209  let OperandType = "OPERAND_INPUT_MODS";
1210  let ParserMatchClass = matchClass;
1211}
1212
1213class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
1214  let PrintMethod = "printOperandAndFPInputMods";
1215}
1216
1217def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
1218def FPT16InputMods : FPInputMods<FPT16InputModsMatchClass>;
1219def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
1220def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
1221
1222class FP16VCSrcInputMods<bit IsFake16>
1223  : FPInputMods<FP16VCSrcInputModsMatchClass<IsFake16>>;
1224def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>;
1225
1226class IntInputModsMatchClass <int opSize> : AsmOperandClass {
1227  let Name = "RegOrImmWithInt"#opSize#"InputMods";
1228  let ParserMethod = "parseRegOrImmWithIntInputMods";
1229  let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods";
1230}
1231class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize> {
1232  let Name = "RegOrInlineImmWithInt"#opSize#"InputMods";
1233  let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods";
1234}
1235def IntT16InputModsMatchClass : IntInputModsMatchClass<16> {
1236  let Name = "RegOrImmWithIntT16InputMods";
1237  let PredicateMethod = "isRegOrImmWithIntT16InputMods";
1238}
1239def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
1240def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
1241def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>;
1242
1243class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
1244  let PrintMethod = "printOperandAndIntInputMods";
1245}
1246def IntT16InputMods : IntInputMods<IntT16InputModsMatchClass>;
1247def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
1248def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
1249def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>;
1250
1251class OpSelModsMatchClass : AsmOperandClass {
1252  let Name = "OpSelMods";
1253  let ParserMethod = "parseRegOrImm";
1254  let PredicateMethod = "isRegOrImm";
1255}
1256
1257def IntOpSelModsMatchClass : OpSelModsMatchClass;
1258def IntOpSelMods : InputMods<IntOpSelModsMatchClass>;
1259
1260class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1261  let Name = "SDWAWithFP"#opSize#"InputMods";
1262  let ParserMethod = "parseRegOrImmWithFPInputMods";
1263  let PredicateMethod = "isSDWAFP"#opSize#"Operand";
1264}
1265
1266def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>;
1267def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>;
1268
1269class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> :
1270  InputMods <matchClass> {
1271  let PrintMethod = "printOperandAndFPInputMods";
1272}
1273
1274def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>;
1275def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>;
1276
1277def FPVRegInputModsMatchClass : AsmOperandClass {
1278  let Name = "VRegWithFPInputMods";
1279  let ParserMethod = "parseRegWithFPInputMods";
1280  let PredicateMethod = "isVRegWithInputMods";
1281}
1282
1283class FPT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
1284  let Name = !if(IsFake16, "Fake16VRegWithFPInputMods",
1285                 "T16VRegWithFPInputMods");
1286  let ParserMethod = "parseRegWithFPInputMods";
1287  let PredicateMethod = "isT16VRegWithInputMods<" #
1288                        !if(IsFake16, "true", "false") # ">";
1289}
1290
1291def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
1292  let PrintMethod = "printOperandAndFPInputMods";
1293}
1294
1295class FPT16VRegInputMods<bit IsFake16>
1296    : InputMods <FPT16VRegInputModsMatchClass<IsFake16>> {
1297  let PrintMethod = "printOperandAndFPInputMods";
1298}
1299
1300class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1301  let Name = "SDWAWithInt"#opSize#"InputMods";
1302  let ParserMethod = "parseRegOrImmWithIntInputMods";
1303  let PredicateMethod = "isSDWAInt"#opSize#"Operand";
1304}
1305
1306def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>;
1307def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>;
1308def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> {
1309  let Name = "SDWAWithBin32InputMods";
1310  let ParserMethod = "parseRegOrImm";
1311}
1312
1313class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> :
1314  InputMods <matchClass> {
1315  let PrintMethod = "printOperandAndIntInputMods";
1316}
1317
1318def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>;
1319def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>;
1320def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>;
1321
1322def IntVRegInputModsMatchClass : AsmOperandClass {
1323  let Name = "VRegWithIntInputMods";
1324  let ParserMethod = "parseRegWithIntInputMods";
1325  let PredicateMethod = "isVRegWithInputMods";
1326}
1327
1328class IntT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
1329  let Name = !if(IsFake16, "Fake16VRegWithIntInputMods",
1330                 "T16VRegWithIntInputMods");
1331  let ParserMethod = "parseRegWithIntInputMods";
1332  let PredicateMethod = "isT16VRegWithInputMods<" #
1333                        !if(IsFake16, "true", "false") # ">";
1334}
1335
1336class IntT16VRegInputMods<bit IsFake16>
1337    : InputMods <IntT16VRegInputModsMatchClass<IsFake16>> {
1338  let PrintMethod = "printOperandAndIntInputMods";
1339}
1340
1341def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
1342  let PrintMethod = "printOperandAndIntInputMods";
1343}
1344
1345class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
1346  let Name = "PackedFP"#opSize#"InputMods";
1347  let ParserMethod = "parseRegOrImmWithFPInputMods";
1348  let PredicateMethod = "isPackedFP"#opSize#"InputMods";
1349}
1350
1351class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
1352  let Name = "PackedInt"#opSize#"InputMods";
1353  let ParserMethod = "parseRegOrImm";
1354  let PredicateMethod = "isRegOrImm";
1355//  let PredicateMethod = "isPackedInt"#opSize#"InputMods";
1356}
1357
1358def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
1359def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
1360
1361class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
1362  let PrintMethod = "printOperandAndFPInputMods";
1363}
1364
1365class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
1366  //let PrintMethod = "printPackedIntInputMods";
1367}
1368
1369def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
1370def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
1371
1372//===----------------------------------------------------------------------===//
1373// Complex patterns
1374//===----------------------------------------------------------------------===//
1375
1376def DS1Addr1Offset : ComplexPattern<iPTR, 2, "SelectDS1Addr1Offset">;
1377def DS64Bit4ByteAligned : ComplexPattern<iPTR, 3, "SelectDS64Bit4ByteAligned">;
1378def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">;
1379
1380def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">;
1381
1382def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
1383
1384// Modifiers for floating point instructions.
1385def VOP3Mods  : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
1386
1387// VOP3 modifiers used for instructions that do not read canonicalized
1388// floating point values (i.e. integer operations with FP source
1389// modifiers)
1390def VOP3ModsNonCanonicalizing : ComplexPattern<untyped, 2,
1391  "SelectVOP3ModsNonCanonicalizing">;
1392
1393def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
1394
1395def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
1396
1397def VOP3PMods  : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
1398
1399def VOP3PModsDOT  : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;
1400def VOP3PModsNeg  : ComplexPattern<untyped, 1, "SelectVOP3PModsNeg">;
1401def WMMAOpSelVOP3PMods  : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">;
1402
1403def WMMAModsF32NegAbs  : ComplexPattern<untyped, 2, "SelectWMMAModsF32NegAbs">;
1404def WMMAModsF16Neg  : ComplexPattern<untyped, 2, "SelectWMMAModsF16Neg">;
1405def WMMAModsF16NegAbs  : ComplexPattern<untyped, 2, "SelectWMMAModsF16NegAbs">;
1406def WMMAVISrc  : ComplexPattern<untyped, 1, "SelectWMMAVISrc">;
1407def SWMMACIndex8  : ComplexPattern<untyped, 2, "SelectSWMMACIndex8">;
1408def SWMMACIndex16  : ComplexPattern<untyped, 2, "SelectSWMMACIndex16">;
1409
1410def VOP3OpSel  : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
1411
1412def VOP3OpSelMods  : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
1413
1414def VOP3PMadMixModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixModsExt">;
1415def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
1416
1417def VINTERPMods  : ComplexPattern<untyped, 2, "SelectVINTERPMods">;
1418def VINTERPModsHi  : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">;
1419
1420//===----------------------------------------------------------------------===//
1421// SI assembler operands
1422//===----------------------------------------------------------------------===//
1423
1424def SIOperand {
1425  int ZERO = 0x80;
1426  int VCC = 0x6A;
1427  int FLAT_SCR = 0x68;
1428}
1429
1430// This should be kept in sync with SISrcMods enum
1431def SRCMODS {
1432  int NONE = 0;
1433  int NEG = 1;
1434  int ABS = 2;
1435  int NEG_ABS = 3;
1436
1437  int NEG_HI = ABS;
1438  int OP_SEL_0 = 4;
1439  int OP_SEL_1 = 8;
1440  int DST_OP_SEL = 8;
1441}
1442
1443def DSTCLAMP {
1444  int NONE = 0;
1445  int ENABLE = 1;
1446}
1447
1448def DSTOMOD {
1449  int NONE = 0;
1450}
1451
1452def HWREG {
1453  int MODE = 1;
1454  int STATUS = 2;
1455  int TRAPSTS = 3;
1456  int HW_ID = 4;
1457  int GPR_ALLOC = 5;
1458  int LDS_ALLOC = 6;
1459  int IB_STS = 7;
1460  int MEM_BASES = 15;
1461  int TBA_LO = 16;
1462  int TBA_HI = 17;
1463  int TMA_LO = 18;
1464  int TMA_HI = 19;
1465  int FLAT_SCR_LO = 20;
1466  int FLAT_SCR_HI = 21;
1467  int XNACK_MASK = 22;
1468  int POPS_PACKER = 25;
1469  int SHADER_CYCLES = 29;
1470}
1471
1472class getHwRegImm<int Reg, int Offset = 0, int Size = 32> {
1473  int ret = !and(!or(Reg,
1474                     !shl(Offset, 6),
1475                     !shl(!add(Size, -1), 11)), 65535);
1476}
1477
1478//===----------------------------------------------------------------------===//
1479//
1480// SI Instruction multiclass helpers.
1481//
1482// Instructions with _32 take 32-bit operands.
1483// Instructions with _64 take 64-bit operands.
1484//
1485// VOP_* instructions can use either a 32-bit or 64-bit encoding.  The 32-bit
1486// encoding is the standard encoding, but instruction that make use of
1487// any of the instruction modifiers must use the 64-bit encoding.
1488//
1489// Instructions with _e32 use the 32-bit encoding.
1490// Instructions with _e64 use the 64-bit encoding.
1491//
1492//===----------------------------------------------------------------------===//
1493
1494class SIMCInstr <string pseudo, int subtarget> {
1495  string PseudoInstr = pseudo;
1496  int Subtarget = subtarget;
1497}
1498
1499//===----------------------------------------------------------------------===//
1500// Vector ALU classes
1501//===----------------------------------------------------------------------===//
1502
1503class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
1504  int ret =
1505    !if (!eq(Src0.Value, untyped.Value),      0,
1506      !if (!eq(Src1.Value, untyped.Value),    1,   // VOP1
1507         !if (!eq(Src2.Value, untyped.Value), 2,   // VOP2
1508                                              3))); // VOP3
1509}
1510
1511// Returns the register class to use for the destination of VOP[123C]
1512// instructions for the given VT.
1513class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> {
1514  defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16,
1515                                   VOPDstOperand_t16Lo128),
1516                    VOPDstOperand<VGPR_32>);
1517  RegisterOperand ret = !cond(!eq(VT.Size, 256) : VOPDstOperand<VReg_256>,
1518                              !eq(VT.Size, 128) : VOPDstOperand<VReg_128>,
1519                              !eq(VT.Size, 64)  : VOPDstOperand<VReg_64>,
1520                              !eq(VT.Size, 32)  : VOPDstOperand<VGPR_32>,
1521                              !eq(VT.Size, 16)  : op16,
1522                              1                 : VOPDstS64orS32); // else VT == i1
1523}
1524
1525class getVALUDstForVT_fake16<ValueType VT> {
1526  RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
1527                          !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
1528                            !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
1529                              !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32_Lo128>,
1530                              VOPDstS64orS32)))); // else VT == i1
1531}
1532
1533// Returns the register class to use for the destination of VOP[12C]
1534// instructions with SDWA extension
1535class getSDWADstForVT<ValueType VT> {
1536  RegisterOperand ret = !if(!eq(VT.Size, 1),
1537                            SDWAVopcDst, // VOPC
1538                            VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst
1539}
1540
1541// Returns the register class to use for source 0 of VOP[12C]
1542// instructions for the given VT.
1543class getVOPSrc0ForVT<ValueType VT, bit IsTrue16, bit IsFake16 = 1> {
1544  RegisterOperand ret =
1545  !cond(!eq(VT, i64)    : VSrc_b64,
1546        !eq(VT, f64)    : VSrc_f64,
1547        !eq(VT, i32)    : VSrc_b32,
1548        !eq(VT, f32)    : VSrc_f32,
1549        !eq(VT, i16)    : !if(IsTrue16,
1550                              !if(IsFake16, VSrcFake16_b16_Lo128, VSrcT_b16_Lo128),
1551                              VSrc_b16),
1552        !eq(VT, f16)    : !if(IsTrue16,
1553                              !if(IsFake16, VSrcFake16_f16_Lo128, VSrcT_f16_Lo128),
1554                              VSrc_f16),
1555        !eq(VT, bf16)   : !if(IsTrue16,
1556                              !if(IsFake16, VSrcFake16_bf16_Lo128, VSrcT_bf16_Lo128),
1557                              VSrc_bf16),
1558        !eq(VT, v2i16)  : VSrc_v2b16,
1559        !eq(VT, v2f16)  : VSrc_v2f16,
1560        !eq(VT, v2bf16) : VSrc_v2bf16,
1561        !eq(VT, v4f16)  : AVSrc_64,
1562        !eq(VT, v4bf16) : AVSrc_64,
1563        1               : VSrc_b32);
1564}
1565
1566class getSOPSrcForVT<ValueType VT> {
1567  RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32);
1568}
1569
1570// Returns the vreg register class to use for source operand given VT
1571class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> {
1572  RegisterOperand ret =
1573  !cond(!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
1574        !eq(VT.Size, 96)  : RegisterOperand<VReg_96>,
1575        !eq(VT.Size, 64)  : RegisterOperand<VReg_64>,
1576        !eq(VT.Size, 48)  : RegisterOperand<VReg_64>,
1577        !eq(VT.Size, 16)  : !if(IsTrue16,
1578                                !if(IsFake16, VGPRSrc_32_Lo128, VGPRSrc_16_Lo128),
1579                                RegisterOperand<VGPR_32>),
1580        1                 : RegisterOperand<VGPR_32>);
1581}
1582
1583class getSDWASrcForVT <ValueType VT> {
1584  RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
1585  RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
1586  RegisterOperand ret = !if(VT.isFP, retFlt, retInt);
1587}
1588
1589// Returns the register class to use for sources of VOP3 instructions for the
1590// given VT.
1591class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
1592  RegisterOperand ret =
1593  !cond(!eq(VT, f64)      : VSrc_f64,
1594        !eq(VT, f32)      : VSrc_f32,
1595        !eq(VT, f16)      : !if(IsTrue16, VSrcT_f16, VSrc_f16),
1596        !eq(VT, bf16)     : !if(IsTrue16, VSrcT_bf16, VSrc_bf16),
1597        !eq(VT, i16)      : !if(IsTrue16, VSrcT_b16, VSrc_b16),
1598        !eq(VT, i1)       : SSrc_i1,
1599        !eq(VT, v2f32)    : VSrc_v2f32,
1600        !eq(VT, v2i32)    : VSrc_v2b32,
1601        !eq(VT, v2f16)    : VSrc_v2f16,
1602        !eq(VT, v2bf16)   : VSrc_v2bf16,
1603        !eq(VT, v2i16)    : VSrc_v2b16,
1604        !eq(VT, v4f16)    : AVSrc_64,
1605        !eq(VT, v4bf16)   : AVSrc_64,
1606        !eq(VT.Size, 128) : VRegSrc_128,
1607        !eq(VT.Size, 96)  : VRegSrc_96,
1608        !eq(VT.Size, 64)  : VSrc_b64,
1609        1                 : VSrc_b32);
1610}
1611
1612// Src2 of VOP3 DPP instructions cannot be a literal
1613class getVOP3DPPSrcForVT<ValueType VT> {
1614  RegisterOperand ret =
1615  !cond(!eq(VT, i1)     : SSrc_i1,
1616        !eq(VT, i16)    : VCSrc_b16,
1617        !eq(VT, f16)    : VCSrc_f16,
1618        !eq(VT, bf16)   : VCSrc_bf16,
1619        !eq(VT, v2i16)  : VCSrc_v2b16,
1620        !eq(VT, v2f16)  : VCSrc_v2f16,
1621        !eq(VT, v2bf16) : VCSrc_v2bf16,
1622        !eq(VT, f32)    : VCSrc_f32,
1623        1               : VCSrc_b32);
1624}
1625
1626// Float or packed int
1627class isModifierType<ValueType SrcVT> {
1628  bit ret = !or(!eq(SrcVT.Value, f16.Value),
1629                !eq(SrcVT.Value, bf16.Value),
1630                !eq(SrcVT.Value, f32.Value),
1631                !eq(SrcVT.Value, f64.Value),
1632                !eq(SrcVT.Value, v2f16.Value),
1633                !eq(SrcVT.Value, v2i16.Value),
1634                !eq(SrcVT.Value, v2bf16.Value),
1635                !eq(SrcVT.Value, v2f32.Value),
1636                !eq(SrcVT.Value, v2i32.Value),
1637                !eq(SrcVT.Value, v4f16.Value),
1638                !eq(SrcVT.Value, v4i16.Value),
1639                !eq(SrcVT.Value, v4bf16.Value),
1640                !eq(SrcVT.Value, v4f32.Value),
1641                !eq(SrcVT.Value, v4i32.Value),
1642                !eq(SrcVT.Value, v8f16.Value),
1643                !eq(SrcVT.Value, v8i16.Value),
1644                !eq(SrcVT.Value, v8bf16.Value),
1645                !eq(SrcVT.Value, v8f32.Value),
1646                !eq(SrcVT.Value, v8i32.Value),
1647                !eq(SrcVT.Value, v16f16.Value),
1648                !eq(SrcVT.Value, v16i16.Value),
1649                !eq(SrcVT.Value, v16bf16.Value));
1650}
1651
1652// Return type of input modifiers operand for specified input operand
1653class getSrcMod <ValueType VT, bit IsTrue16 = 0> {
1654  Operand ret =  !if(!eq(VT.Size, 64),
1655                     !if(VT.isFP, FP64InputMods, Int64InputMods),
1656                     !if(!eq(VT.Size, 16),
1657                         !if(VT.isFP, !if(IsTrue16, FPT16InputMods, FP16InputMods),
1658                                      !if(IsTrue16, IntT16InputMods, IntOpSelMods)),
1659                         !if(VT.isFP, FP32InputMods, Int32InputMods)));
1660}
1661
1662class getOpSelMod <ValueType VT> {
1663  Operand ret = !cond(!eq(VT, f16) : FP16InputMods,
1664                      !eq(VT, bf16) : FP16InputMods,
1665                      !eq(VT, v2f16) : PackedF16InputMods,
1666                      !eq(VT, v2bf16) : PackedF16InputMods,
1667                      1 : IntOpSelMods);
1668}
1669
1670// Return type of input modifiers operand specified input operand for DPP
1671class getSrcModDPP <ValueType VT> {
1672  Operand ret = !if(VT.isFP, FPVRegInputMods, IntVRegInputMods);
1673}
1674
1675class getSrcModDPP_t16 <ValueType VT, bit IsFake16 = 1> {
1676  Operand ret =
1677      !if (VT.isFP,
1678           !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1679                FPT16VRegInputMods<IsFake16>, FPVRegInputMods),
1680           !if (!eq(VT.Value, i16.Value),
1681                IntT16VRegInputMods<IsFake16>, IntVRegInputMods));
1682}
1683
1684// Return type of input modifiers operand for specified input operand for DPP
1685class getSrcModVOP3DPP <ValueType VT, bit IsFake16 = 1> {
1686  Operand ret =
1687      !if (VT.isFP,
1688           !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1689                FP16VCSrcInputMods<IsFake16>, FP32VCSrcInputMods),
1690           Int32VCSrcInputMods);
1691}
1692
1693// Return type of input modifiers operand specified input operand for SDWA
1694class getSrcModSDWA <ValueType VT> {
1695  Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods,
1696                !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods,
1697                !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods,
1698                !if(!eq(VT.Value, bf16.Value), FP16SDWAInputMods,
1699                Int32SDWAInputMods))));
1700}
1701
1702// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
1703class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs> {
1704  dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0),               // VOP1
1705            !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
1706                                    (ins)));
1707}
1708
1709// Returns the input arguments for VOP3 instructions for the given SrcVT.
1710class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
1711                RegisterOperand Src2RC, int NumSrcArgs,
1712                bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
1713                Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1714
1715  dag ret =
1716    !if (!eq(NumSrcArgs, 0),
1717      // VOP1 without input operands (V_NOP, V_CLREXCP)
1718      (ins),
1719      /* else */
1720    !if (!eq(NumSrcArgs, 1),
1721      !if (HasModifiers,
1722        // VOP1 with modifiers
1723        !if(HasOMod,
1724          (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1725               Clamp0:$clamp, omod0:$omod),
1726          !if (HasClamp,
1727            (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Clamp0:$clamp),
1728            (ins Src0Mod:$src0_modifiers, Src0RC:$src0)))
1729      /* else */,
1730        // VOP1 without modifiers
1731        !if (HasClamp,
1732          (ins Src0RC:$src0, Clamp0:$clamp),
1733          (ins Src0RC:$src0))
1734      /* endif */ ),
1735    !if (!eq(NumSrcArgs, 2),
1736      !if (HasModifiers,
1737        // VOP 2 with modifiers
1738        !if(HasOMod,
1739          (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1740               Src1Mod:$src1_modifiers, Src1RC:$src1,
1741               Clamp0:$clamp, omod0:$omod),
1742          !con((ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1743                    Src1Mod:$src1_modifiers, Src1RC:$src1),
1744                !if(HasClamp, (ins Clamp0:$clamp), (ins))))
1745      /* else */,
1746        // VOP2 without modifiers
1747        !if (HasClamp,
1748          (ins Src0RC:$src0, Src1RC:$src1, Clamp0:$clamp),
1749          (ins Src0RC:$src0, Src1RC:$src1))
1750
1751      /* endif */ )
1752    /* NumSrcArgs == 3 */,
1753      !if (HasModifiers,
1754        !if (HasSrc2Mods,
1755          // VOP3 with modifiers
1756          !if (HasOMod,
1757            (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1758                 Src1Mod:$src1_modifiers, Src1RC:$src1,
1759                 Src2Mod:$src2_modifiers, Src2RC:$src2,
1760                 Clamp0:$clamp, omod0:$omod),
1761            !if (HasClamp,
1762              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1763                   Src1Mod:$src1_modifiers, Src1RC:$src1,
1764                   Src2Mod:$src2_modifiers, Src2RC:$src2,
1765                   Clamp0:$clamp),
1766              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1767                   Src1Mod:$src1_modifiers, Src1RC:$src1,
1768                   Src2Mod:$src2_modifiers, Src2RC:$src2))),
1769          // VOP3 with modifiers except src2
1770          !if (HasOMod,
1771            (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1772                 Src1Mod:$src1_modifiers, Src1RC:$src1,
1773                 Src2RC:$src2, Clamp0:$clamp, omod0:$omod),
1774            !if (HasClamp,
1775              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1776                   Src1Mod:$src1_modifiers, Src1RC:$src1,
1777                   Src2RC:$src2, Clamp0:$clamp),
1778              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1779                   Src1Mod:$src1_modifiers, Src1RC:$src1,
1780                   Src2RC:$src2))))
1781      /* else */,
1782        // VOP3 without modifiers
1783        !if (HasClamp,
1784          (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, Clamp0:$clamp),
1785          (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2))
1786      /* endif */ ))));
1787}
1788
1789class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
1790                RegisterOperand Src2RC, int NumSrcArgs,
1791                bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
1792                Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> {
1793  // getInst64 handles clamp and omod. implicit mutex between vop3p and omod
1794  dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs,
1795                HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
1796                Src0Mod, Src1Mod, Src2Mod>.ret;
1797  dag opsel = (ins op_sel0:$op_sel);
1798  dag ret = !con(base, !if(HasOpSel, opsel, (ins)));
1799}
1800
1801class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
1802                   RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel,
1803                   Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1804  dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
1805                    HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/,
1806                    0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret;
1807
1808  dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
1809  dag vop3p_neg = (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi);
1810
1811  dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg);
1812  dag ret = !con(base, vop3pFields);
1813}
1814
1815class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
1816                       RegisterOperand Src2RC, int NumSrcArgs,
1817                       bit HasClamp, bit HasOMod,
1818                       Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1819  dag ret = getInsVOP3Base<Src0RC, Src1RC,
1820                    Src2RC, NumSrcArgs,
1821                    HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod,
1822                    Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1>.ret;
1823}
1824
1825class getInsDPPBase <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
1826                     RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
1827                     Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> {
1828  dag ret = !if(!eq(NumSrcArgs, 0),
1829                // VOP1 without input operands (V_NOP)
1830                (ins ),
1831                !con(
1832                  !if(HasOld ,(ins OldRC:$old), (ins)),
1833                  !if (!eq(NumSrcArgs, 1),
1834                    !if (HasModifiers,
1835                      // VOP1_DPP with modifiers
1836                      (ins Src0Mod:$src0_modifiers, Src0RC:$src0)
1837                    /* else */,
1838                      // VOP1_DPP without modifiers
1839                      (ins Src0RC:$src0)
1840                    /* endif */),
1841                  !if (!eq(NumSrcArgs, 2),
1842                    !if (HasModifiers,
1843                      // VOP2_DPP with modifiers
1844                      (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1845                       Src1Mod:$src1_modifiers, Src1RC:$src1)
1846                    /* else */,
1847                      // VOP2_DPP without modifiers
1848                      (ins Src0RC:$src0, Src1RC:$src1)
1849                    )
1850                    /* NumSrcArgs == 3, VOP3 */,
1851                    !if (HasModifiers,
1852                      // VOP3_DPP with modifiers
1853                      (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1854                       Src1Mod:$src1_modifiers, Src1RC:$src1,
1855                       Src2Mod:$src2_modifiers, Src2RC:$src2)
1856                    /* else */,
1857                      // VOP3_DPP without modifiers
1858                      (ins Src0RC:$src0, Src1RC:$src1,
1859                       Src2RC:$src2)
1860                      )
1861                    )
1862                  )
1863                )
1864            );
1865}
1866
1867class getInsDPP <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
1868                 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
1869                 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
1870  dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
1871                           HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
1872                 (ins dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
1873                      DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl));
1874}
1875
1876class getInsDPP16 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
1877                   RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
1878                   Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
1879  dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
1880                           HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
1881                 (ins Dpp16FI:$fi));
1882}
1883
1884class getInsDPP8 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
1885                  RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
1886                  Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
1887  dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
1888                           HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
1889                 (ins dpp8:$dpp8, Dpp8FI:$fi));
1890}
1891
1892class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> {
1893  dag old = ( ins OldRC:$old );
1894  dag base = VOP3Base;
1895  dag ret =  !con(
1896                !if(!and(HasOld,!ne(NumSrcArgs, 0)), old, (ins)),
1897                base
1898              );
1899}
1900
1901class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
1902  dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
1903                 (ins dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
1904                      DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl));
1905}
1906
1907class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
1908  dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
1909                 (ins Dpp16FI:$fi));
1910}
1911
1912class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
1913  dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
1914                 (ins dpp8:$dpp8, Dpp8FI:$fi));
1915}
1916
1917// Ins for SDWA
1918class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
1919                  bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod,
1920                  ValueType DstVT> {
1921
1922  dag ret = !if(!eq(NumSrcArgs, 0),
1923               // VOP1 without input operands (V_NOP)
1924               (ins),
1925            !if(!eq(NumSrcArgs, 1),
1926               // VOP1
1927               !if(!not(HasSDWAOMod),
1928                  // VOP1_SDWA without omod
1929                  (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1930                       Clamp:$clamp,
1931                       dst_sel:$dst_sel, dst_unused:$dst_unused,
1932                       src0_sel:$src0_sel),
1933                  // VOP1_SDWA with omod
1934                  (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1935                       Clamp:$clamp, omod:$omod,
1936                       dst_sel:$dst_sel, dst_unused:$dst_unused,
1937                       src0_sel:$src0_sel)),
1938            !if(!eq(NumSrcArgs, 2),
1939               !if(!eq(DstVT.Size, 1),
1940                  // VOPC_SDWA
1941                  (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1942                       Src1Mod:$src1_modifiers, Src1RC:$src1,
1943                       Clamp:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
1944                  // VOP2_SDWA
1945                  !if(!not(HasSDWAOMod),
1946                     // VOP2_SDWA without omod
1947                     (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1948                          Src1Mod:$src1_modifiers, Src1RC:$src1,
1949                          Clamp:$clamp,
1950                          dst_sel:$dst_sel, dst_unused:$dst_unused,
1951                          src0_sel:$src0_sel, src1_sel:$src1_sel),
1952                     // VOP2_SDWA with omod
1953                     (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1954                          Src1Mod:$src1_modifiers, Src1RC:$src1,
1955                          Clamp:$clamp, omod:$omod,
1956                          dst_sel:$dst_sel, dst_unused:$dst_unused,
1957                          src0_sel:$src0_sel, src1_sel:$src1_sel))),
1958            (ins)/* endif */)));
1959}
1960
1961// Outs for DPP
1962class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
1963  dag ret = !if(HasDst,
1964                !if(!eq(DstVT.Size, 1),
1965                    (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions
1966                    (outs DstRCDPP:$vdst)),
1967                (outs)); // V_NOP
1968}
1969
1970// Outs for SDWA
1971class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> {
1972  dag ret = !if(HasDst,
1973                !if(!eq(DstVT.Size, 1),
1974                    (outs DstRCSDWA:$sdst),
1975                    (outs DstRCSDWA:$vdst)),
1976                (outs)); // V_NOP
1977}
1978
1979// Returns the assembly string for the inputs and outputs of a VOP[12C]
1980// instruction.
1981class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
1982  string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
1983  string src0 = ", $src0";
1984  string src1 = ", $src1";
1985  string src2 = ", $src2";
1986  string ret = !if(HasDst, dst, "") #
1987               !if(!eq(NumSrcArgs, 1), src0, "") #
1988               !if(!eq(NumSrcArgs, 2), src0#src1, "") #
1989               !if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
1990}
1991
1992class getAsmVOPDPart <int NumSrcArgs, string XorY> {
1993  string dst = "$vdst" # XorY;
1994  string src0 = ", $src0" # XorY;
1995  string src1 = ", $vsrc1" # XorY;
1996  string ret = dst #
1997               !if(!ge(NumSrcArgs, 1), src0, "") #
1998               !if(!ge(NumSrcArgs, 2), src1, "");
1999}
2000
2001// Returns the assembly string for the inputs and outputs of a VOP3P
2002// instruction.
2003class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
2004                   bit HasClamp, bit HasOpSel> {
2005  string dst = "$vdst";
2006  string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
2007  string src1 = !if(!eq(NumSrcArgs, 1), "",
2008                   !if(!eq(NumSrcArgs, 2), " $src1",
2009                                           " $src1,"));
2010  string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
2011
2012  string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
2013  string clamp = !if(HasClamp, "$clamp", "");
2014  string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", "");
2015
2016  // Each modifier is printed as an array of bits for each operand, so
2017  // all operands are printed as part of src0_modifiers.
2018  string ret = dst#", "#src0#src1#src2#opsel#mods#clamp;
2019}
2020
2021class getAsmVOP3OpSel <int NumSrcArgs,
2022                       bit HasClamp,
2023                       bit HasOMod,
2024                       bit Src0HasMods,
2025                       bit Src1HasMods,
2026                       bit Src2HasMods> {
2027  string dst = "$vdst";
2028
2029  string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
2030  string isrc1 = !if(!eq(NumSrcArgs, 1), "",
2031                     !if(!eq(NumSrcArgs, 2), " $src1",
2032                                             " $src1,"));
2033  string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
2034
2035  string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2036  string fsrc1 = !if(!eq(NumSrcArgs, 1), "",
2037                     !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2038                                             " $src1_modifiers,"));
2039  string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
2040
2041  string src0 = !if(Src0HasMods, fsrc0, isrc0);
2042  string src1 = !if(Src1HasMods, fsrc1, isrc1);
2043  string src2 = !if(Src2HasMods, fsrc2, isrc2);
2044
2045  string clamp = !if(HasClamp, "$clamp", "");
2046  string omod = !if(HasOMod, "$omod", "");
2047  string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod;
2048}
2049
2050class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
2051  string dst = !if(HasDst,
2052                   !if(!eq(DstVT.Size, 1),
2053                       "$sdst",
2054                       "$vdst"),
2055                    ""); // use $sdst for VOPC
2056  string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2057  string src1 = !if(!eq(NumSrcArgs, 1), "",
2058                   !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2059                                           " $src1_modifiers,"));
2060  string args = !if(!not(HasModifiers),
2061                     getAsm32<0, NumSrcArgs, DstVT>.ret,
2062                     ", "#src0#src1);
2063  string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
2064}
2065
2066class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
2067  string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi";
2068}
2069
2070class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32>
2071  : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>{
2072  let ret = dst#args#" $dpp8$fi";
2073}
2074
2075class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
2076                       bit HasOpSel, bit HasOMod, bit IsVOP3P,
2077                       bit HasModifiers, bit Src0HasMods,
2078                       bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32,
2079                       bit HasByteSel = 0> {
2080  string dst = !if(HasDst,
2081                   !if(!eq(DstVT.Size, 1),
2082                       "$sdst",
2083                       "$vdst"),
2084                    ""); // use $sdst for VOPC
2085  string src0nomods = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
2086  string src1nomods = !if(!eq(NumSrcArgs, 1), "",
2087                    !if(!eq(NumSrcArgs, 2), " $src1",
2088                                            " $src1,"));
2089  string src2nomods = !if(!eq(NumSrcArgs, 3), " $src2", "");
2090
2091  string src0mods = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2092  string src1mods = !if(!eq(NumSrcArgs, 1), "",
2093                    !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2094                                            " $src1_modifiers,"));
2095  string src2mods = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
2096
2097  string src0 = !if(Src0HasMods, src0mods, src0nomods);
2098  string src1 = !if(Src1HasMods, src1mods, src1nomods);
2099  string src2 = !if(Src2HasMods, src2mods, src2nomods);
2100  string opsel = !if(HasOpSel, "$op_sel", "");
2101  string bytesel = !if(HasByteSel, "$byte_sel", "");
2102  string 3PMods = !if(IsVOP3P,
2103                      !if(HasOpSel, "$op_sel_hi", "")
2104                        #!if(HasModifiers, "$neg_lo$neg_hi", ""),
2105                      "");
2106  string clamp = !if(HasClamp, "$clamp", "");
2107  string omod = !if(HasOMod, "$omod", "");
2108
2109  string ret = dst#!if(!gt(NumSrcArgs,0),", "#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod, "");
2110
2111}
2112
2113class getAsmVOP3DPP<string base> {
2114  string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
2115}
2116
2117class getAsmVOP3DPP16<string base> {
2118  string ret = getAsmVOP3DPP<base>.ret # "$fi";
2119}
2120
2121class getAsmVOP3DPP8<string base> {
2122  string ret = base # " $dpp8$fi";
2123}
2124
2125
2126class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
2127  string dst = !if(HasDst,
2128                   !if(!eq(DstVT.Size, 1),
2129                       " vcc", // use vcc token as dst for VOPC instructions
2130                       "$vdst"),
2131                    "");
2132  string src0 = "$src0_modifiers";
2133  string src1 = "$src1_modifiers";
2134  string args = !if(!eq(NumSrcArgs, 0),
2135                    "",
2136                    !if(!eq(NumSrcArgs, 1),
2137                        ", "#src0#"$clamp",
2138                        ", "#src0#", "#src1#"$clamp"
2139                     )
2140                );
2141  string sdwa = !if(!eq(NumSrcArgs, 0),
2142                    "",
2143                    !if(!eq(NumSrcArgs, 1),
2144                        " $dst_sel $dst_unused $src0_sel",
2145                        !if(!eq(DstVT.Size, 1),
2146                            " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC
2147                            " $dst_sel $dst_unused $src0_sel $src1_sel"
2148                        )
2149                    )
2150                );
2151  string ret = dst#args#sdwa;
2152}
2153
2154class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs,
2155                   ValueType DstVT = i32> {
2156  string dst = !if(HasDst,
2157                   !if(!eq(DstVT.Size, 1),
2158                       "$sdst", // VOPC
2159                       "$vdst"), // VOP1/2
2160                    "");
2161  string src0 = "$src0_modifiers";
2162  string src1 = "$src1_modifiers";
2163  string out_mods = !if(!not(HasOMod), "$clamp", "$clamp$omod");
2164  string args = !if(!eq(NumSrcArgs, 0), "",
2165                    !if(!eq(NumSrcArgs, 1),
2166                        ", "#src0,
2167                        ", "#src0#", "#src1
2168                     )
2169                );
2170  string sdwa = !if(!eq(NumSrcArgs, 0), "",
2171                    !if(!eq(NumSrcArgs, 1),
2172                        out_mods#" $dst_sel $dst_unused $src0_sel",
2173                        !if(!eq(DstVT.Size, 1),
2174                            " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC
2175                            out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel"
2176                        )
2177                    )
2178                );
2179  string ret = dst#args#sdwa;
2180}
2181
2182class getHas64BitOps <int NumSrcArgs, ValueType DstVT, ValueType Src0VT,
2183                      ValueType Src1VT> {
2184  bit ret = !if(!eq(NumSrcArgs, 3),
2185                0,
2186                !if(!eq(DstVT.Size, 64),
2187                    1,
2188                    !if(!eq(Src0VT.Size, 64),
2189                        1,
2190                        !if(!eq(Src1VT.Size, 64),
2191                            1,
2192                            0
2193                        )
2194                    )
2195                )
2196            );
2197}
2198
2199class getHasSDWA <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2200                  ValueType Src1VT = i32> {
2201  bit ret = !if(!eq(NumSrcArgs, 3),
2202                0, // NumSrcArgs == 3 - No SDWA for VOP3
2203                !if(!eq(DstVT.Size, 64),
2204                    0, // 64-bit dst - No SDWA for 64-bit operands
2205                    !if(!eq(Src0VT.Size, 64),
2206                        0, // 64-bit src0
2207                        !if(!eq(Src1VT.Size, 64),
2208                            0, // 64-bit src2
2209                            1
2210                        )
2211                    )
2212                )
2213            );
2214}
2215
2216class getHasDPP <int NumSrcArgs> {
2217  bit ret = !if(!eq(NumSrcArgs, 3),
2218                0, // NumSrcArgs == 3 - No DPP for VOP3
2219                1);
2220}
2221
2222class getHasExt32BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2223                 ValueType Src1VT = i32> {
2224  bit ret = !and(getHasDPP<NumSrcArgs>.ret,
2225                 !not(getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret));
2226}
2227
2228class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2229                 ValueType Src1VT = i32> {
2230  bit ret = !and(getHasDPP<NumSrcArgs>.ret,
2231                 getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
2232}
2233
2234// Function that checks if instruction supports DPP and SDWA
2235class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2236                 ValueType Src1VT = i32> {
2237  bit ret = !or(getHasDPP<NumSrcArgs>.ret,
2238                getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
2239}
2240
2241// Return an AGPR+VGPR operand class for the given VGPR register class.
2242class getLdStRegisterOperand<RegisterClass RC> {
2243  RegisterOperand ret =
2244    !cond(!eq(RC.Size, 32)   : AVLdSt_32,
2245          !eq(RC.Size, 64)   : AVLdSt_64,
2246          !eq(RC.Size, 96)   : AVLdSt_96,
2247          !eq(RC.Size, 128)  : AVLdSt_128,
2248          !eq(RC.Size, 160)  : AVLdSt_160,
2249          !eq(RC.Size, 1024) : AVLdSt_1024);
2250}
2251
2252class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32,
2253                 ValueType Src1VT = i32, ValueType Src2VT = i32> {
2254  bit ret =    !if(!eq(DstVT.Size, 64),
2255                    0, // 64-bit dst No DPP for 64-bit operands
2256                    !if(!eq(Src0VT.Size, 64),
2257                        0, // 64-bit src0
2258                        !if(!eq(Src1VT.Size, 64),
2259                            0, // 64-bit src1
2260                            !if(!eq(Src2VT.Size, 64),
2261                                0, // 64-bit src2
2262                                1
2263                            )
2264                        )
2265                    )
2266                );
2267}
2268
2269
2270def PatGenMode {
2271  int NoPattern = 0;
2272  int Pattern   = 1;
2273}
2274
2275class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
2276
2277  field list<ValueType> ArgVT = _ArgVT;
2278  field bit EnableClamp = _EnableClamp;
2279  field bit IsTrue16 = 0;
2280  field bit IsRealTrue16 = 0;
2281  field bit IsInvalidSingleUseConsumer = 0;
2282  field bit IsInvalidSingleUseProducer = 0;
2283
2284  field ValueType DstVT = ArgVT[0];
2285  field ValueType Src0VT = ArgVT[1];
2286  field ValueType Src1VT = ArgVT[2];
2287  field ValueType Src2VT = ArgVT[3];
2288  field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
2289  field RegisterOperand DstRCDPP = DstRC;
2290  field RegisterOperand DstRC64 = DstRC;
2291  field RegisterOperand DstRCVOP3DPP = DstRC64;
2292  field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret;
2293  field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT, IsTrue16>.ret;
2294  field RegisterOperand Src1RC32 = getVregSrcForVT<Src1VT>.ret;
2295  field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
2296  field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
2297  field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
2298  field RegisterOperand Src0DPP = getVregSrcForVT<Src0VT>.ret;
2299  field RegisterOperand Src1DPP = getVregSrcForVT<Src1VT>.ret;
2300  field RegisterOperand Src2DPP = getVregSrcForVT<Src2VT>.ret;
2301  field RegisterOperand Src0VOP3DPP = VGPRSrc_32;
2302  field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret;
2303  field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
2304  field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
2305  field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
2306  field Operand Src0Mod = getSrcMod<Src0VT>.ret;
2307  field Operand Src1Mod = getSrcMod<Src1VT>.ret;
2308  field Operand Src2Mod = getSrcMod<Src2VT>.ret;
2309  field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret;
2310  field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret;
2311  field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret;
2312  field Operand Src0ModVOP3DPP = getSrcModDPP<Src0VT>.ret;
2313  field Operand Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT>.ret;
2314  field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret;
2315  field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
2316  field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
2317
2318
2319  field bit IsMAI = 0;
2320  field bit IsVOP3P = 0;
2321  field bit IsDOT = 0;
2322  field bit IsSingle = 0;
2323  field bit IsWMMA = 0;
2324  field bit IsSWMMAC = 0;
2325
2326  field bit IsFP8SrcByteSel = 0;
2327  field bit IsFP8DstByteSel = 0;
2328  field bit IsFP8ByteSel = !or(IsFP8SrcByteSel, IsFP8DstByteSel);
2329
2330  field bit HasDst = !ne(DstVT.Value, untyped.Value);
2331  field bit HasDst32 = HasDst;
2332  field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
2333  field bit EmitDstSel = EmitDst;
2334  field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
2335  field bit HasSrc0 = !ne(Src0VT.Value, untyped.Value);
2336  field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value);
2337  field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value);
2338
2339  field bit HasSrc0FloatMods = Src0VT.isFP;
2340  field bit HasSrc1FloatMods = Src1VT.isFP;
2341  field bit HasSrc2FloatMods = Src2VT.isFP;
2342
2343  field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
2344  field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
2345  field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
2346
2347  field bit HasClamp = !or(isModifierType<Src0VT>.ret, EnableClamp);
2348  field bit HasSDWAClamp = EmitDst;
2349  field bit HasFPClamp = !and(DstVT.isFP, HasClamp);
2350  field bit HasIntClamp = !if(DstVT.isFP, 0, HasClamp);
2351  field bit HasClampLo = HasClamp;
2352  field bit HasClampHi = !and(DstVT.isVector, HasClamp);
2353  field bit HasHigh = 0;
2354
2355  field bit IsPacked = Src0VT.isVector;
2356  field bit HasOpSel = IsPacked;
2357  field bit HasOMod = !if(IsVOP3P, 0, DstVT.isFP);
2358  field bit HasSDWAOMod = DstVT.isFP;
2359
2360  field bit HasModifiers = !or(isModifierType<Src0VT>.ret,
2361                               isModifierType<Src1VT>.ret,
2362                               isModifierType<Src2VT>.ret,
2363                               HasOMod);
2364
2365  field bit HasSrc0Mods = HasModifiers;
2366  field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0);
2367  field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0);
2368
2369  field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2370  field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret;
2371  field bit HasExtDPP = !or(getHasDPP<NumSrcArgs>.ret, HasExtVOP3DPP);
2372  field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2373  field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2374  field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2375  field bit HasExtSDWA9 = HasExtSDWA;
2376  field int NeedPatGen = PatGenMode.NoPattern;
2377
2378  field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
2379  field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
2380  field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
2381
2382  field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
2383
2384  // VOP3b instructions are a special case with a second explicit
2385  // output. This is manually overridden for them.
2386  field dag Outs32 = Outs;
2387  field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs));
2388  field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret;
2389  field dag OutsDPP8 = OutsDPP;
2390  field dag OutsVOP3DPP = getOutsDPP<HasDst, DstVT, DstRCVOP3DPP>.ret;
2391  field dag OutsVOP3DPP8 = OutsVOP3DPP;
2392  field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
2393
2394  field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
2395  field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
2396                             HasClamp, HasModifiers, HasSrc2Mods,
2397                             HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
2398  field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
2399                                   NumSrcArgs, HasClamp, HasOpSel,
2400                                   Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
2401  field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
2402                                NumSrcArgs, HasClamp, HasOMod,
2403                                getOpSelMod<Src0VT>.ret,
2404                                getOpSelMod<Src1VT>.ret,
2405                                getOpSelMod<Src2VT>.ret>.ret;
2406  field dag InsDPP = !if(HasExtDPP,
2407                         getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
2408                                   HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret,
2409                         (ins));
2410  field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
2411                                   HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
2412  field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP,
2413                                 NumSrcArgs, HasModifiers,
2414                                 Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
2415  defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
2416                  Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
2417                  Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret;
2418  defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP,
2419                  Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel,
2420                  Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret;
2421
2422  field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase);
2423
2424  field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
2425  field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
2426  field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
2427  field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
2428                                 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
2429                                 DstVT>.ret;
2430  field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X);
2431  // It is a slight misnomer to use the deferred f32 operand type for non-float
2432  // operands, but this operand type will only be used if the other dual
2433  // component is FMAAK or FMAMK
2434  field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X);
2435  field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y);
2436  field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y);
2437
2438
2439  field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
2440  field string AsmDPP = !if(HasExtDPP,
2441                            getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
2442  field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
2443  // DPP8 encoding has no fields for modifiers, and it is enforced by setting
2444  // the asm operand name via this HasModifiers flag
2445  field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret;
2446  field string AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
2447   HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers,
2448   HasModifiers, DstVT, IsFP8ByteSel>.ret;
2449  field string Asm64 = AsmVOP3Base;
2450  field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret;
2451  field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
2452                                              HasClamp,
2453                                              HasOMod,
2454                                              HasSrc0FloatMods,
2455                                              HasSrc1FloatMods,
2456                                              HasSrc2FloatMods>.ret;
2457  field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3Base>.ret;
2458  field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3Base>.ret;
2459  field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3Base>.ret;
2460  field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
2461  field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
2462  field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret;
2463  field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret;
2464  field string TieRegDPP = "$old";
2465}
2466
2467  class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
2468  let HasExt = 0;
2469  let HasExtDPP = 0;
2470  let HasExtVOP3DPP = 0;
2471  let HasExt32BitDPP = 0;
2472  let HasExt64BitDPP = 0;
2473  let HasExtSDWA = 0;
2474  let HasExtSDWA9 = 0;
2475}
2476
2477class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> {
2478  let NeedPatGen = mode;
2479}
2480
2481// VOPC_Profile_t16, VOPC_NoSdst_Profile_t16, VOPC_Class_Profile_t16,
2482// VOPC_Class_NoSdst_Profile_t16, and  VOP_MAC_F16_t16 do not inherit from this
2483// class, so copy changes to this class in those profiles
2484class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
2485  let IsTrue16 = 1;
2486  let IsRealTrue16 = 1;
2487
2488  let HasOpSel = 1;
2489  let HasModifiers = 1; // All instructions at least have OpSel.
2490
2491  // Most DstVT are 16-bit, but not all.
2492  let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
2493  let DstRC64 = getVALUDstForVT<DstVT>.ret;
2494  let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2495  let Src1RC32 = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2496  let Src0DPP = getVregSrcForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2497  let Src1DPP = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2498  let Src2DPP = getVregSrcForVT<Src2VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2499  let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0 /*IsFake16*/>.ret;
2500  let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0 /*IsFake16*/>.ret;
2501  let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0 /*IsFake16*/>.ret;
2502  let Src0VOP3DPP = VGPRSrc_16;
2503  let Src0ModVOP3DPP = getSrcModVOP3DPP<Src0VT, 0 /*IsFake16*/>.ret;
2504  let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0 /*IsFake16*/>.ret;
2505  let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0 /*IsFake16*/>.ret;
2506
2507  let DstRC64 = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret;
2508  let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret;
2509  let Src1RC64 = getVOP3SrcForVT<Src1VT, 1 /*IsTrue16*/>.ret;
2510  let Src2RC64 = getVOP3SrcForVT<Src2VT, 1 /*IsTrue16*/>.ret;
2511  let Src0Mod = getSrcMod<Src0VT, 1 /*IsTrue16*/>.ret;
2512  let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/>.ret;
2513  let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/>.ret;
2514}
2515
2516class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
2517  let IsTrue16 = 1;
2518  // Most DstVT are 16-bit, but not all
2519  let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
2520  let DstRC64 = getVALUDstForVT<DstVT>.ret;
2521  let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2522  let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2523  let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2524  let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2525  let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
2526  let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
2527  let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
2528}
2529
2530def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>;
2531def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>;
2532def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
2533def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>;
2534
2535def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
2536def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
2537def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
2538def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
2539def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>;
2540
2541def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
2542def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
2543
2544def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>;
2545def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>;
2546def VOP_I16_I32 : VOPProfile <[i16, i32, untyped, untyped]>;
2547
2548def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
2549def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
2550def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
2551
2552def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
2553def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
2554def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
2555def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
2556
2557def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>;
2558def VOP_BF16_V2BF16_V2BF16_BF16: VOPProfile <[bf16, v2bf16, v2bf16, bf16]>;
2559def VOP_F32_V2BF16_V2BF16_F32 : VOPProfile <[f32, v2bf16, v2bf16, f32]>;
2560
2561def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
2562
2563def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
2564
2565def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
2566def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
2567def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
2568def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>;
2569def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>;
2570def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
2571def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
2572def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
2573def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
2574def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
2575def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
2576def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>;
2577
2578def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
2579def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
2580def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
2581def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
2582def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
2583def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
2584def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
2585def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
2586def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>;
2587def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
2588def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
2589
2590def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
2591def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
2592def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
2593
2594def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>;
2595def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
2596def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
2597def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
2598def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
2599def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
2600def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
2601def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
2602def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
2603
2604def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>;
2605def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>;
2606
2607def VOP_V4F32_F32_F32_V4F32       : VOPProfile <[v4f32,  f32,   f32,   v4f32]>;
2608def VOP_V16F32_F32_F32_V16F32     : VOPProfile <[v16f32, f32,   f32,   v16f32]>;
2609def VOP_V32F32_F32_F32_V32F32     : VOPProfile <[v32f32, f32,   f32,   v32f32]>;
2610def VOP_V4F32_V4F16_V4F16_V4F32   : VOPProfile <[v4f32,  v4f16, v4f16, v4f32]>;
2611def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>;
2612def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>;
2613def VOP_V4F32_V2I16_V2I16_V4F32   : VOPProfile <[v4f32,  v2i16, v2i16, v4f32]>;
2614def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>;
2615def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>;
2616def VOP_V4I32_I32_I32_V4I32       : VOPProfile <[v4i32,  i32,   i32,   v4i32]>;
2617def VOP_V16I32_I32_I32_V16I32     : VOPProfile <[v16i32, i32,   i32,   v16i32]>;
2618def VOP_V32I32_I32_I32_V32I32     : VOPProfile <[v32i32, i32,   i32,   v32i32]>;
2619
2620def VOP_V4F64_F64_F64_V4F64 : VOPProfile <[v4f64, f64, f64, v4f64]>;
2621def VOP_V1F64_F64_F64_V1F64 : VOPProfile <[v1f64, f64, f64, v1f64]>;
2622
2623def VOP_V2F32_V2F32_V2F32_V2F32   : VOPProfile <[v2f32,  v2f32, v2f32, v2f32]>;
2624def VOP_V2F32_V2F32_V2F32         : VOPProfile <[v2f32,  v2f32, v2f32, untyped]>;
2625def VOP_V2I32_V2I32_V2I32         : VOPProfile <[v2i32,  v2i32, v2i32, untyped]>;
2626def VOP_V4F32_V4I16_V4I16_V4F32   : VOPProfile <[v4f32,  v4i16, v4i16, v4f32]>;
2627def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>;
2628def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>;
2629
2630def VOP_V4I32_I64_I64_V4I32       : VOPProfile <[v4i32,  i64,   i64,   v4i32]>;
2631def VOP_V16I32_I64_I64_V16I32     : VOPProfile <[v16i32, i64,   i64,   v16i32]>;
2632def VOP_V4F32_V2F32_V2F32_V4F32   : VOPProfile <[v4f32,  v2f32, v2f32, v4f32]>;
2633def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>;
2634def VOP_V4F32_I64_I64_V4F32       : VOPProfile <[v4f32,  i64,   i64,   v4f32]>;
2635def VOP_V16F32_I64_I64_V16F32     : VOPProfile <[v16f32, i64,   i64,   v16f32]>;
2636
2637def VOP_V4F32_V4F16_V8F16_I32     : VOPProfile <[v4f32,  v4f16, v8f16, i32]>;
2638def VOP_V16F32_V4F16_V8F16_I32    : VOPProfile <[v16f32, v4f16, v8f16, i32]>;
2639def VOP_V4F32_V4I16_V8I16_I32     : VOPProfile <[v4f32,  v4i16, v8i16, i32]>;
2640def VOP_V16F32_V4I16_V8I16_I32    : VOPProfile <[v16f32, v4i16, v8i16, i32]>;
2641def VOP_V4I32_V2I32_V4I32_I32     : VOPProfile <[v4i32,  v2i32, v4i32, i32]>;
2642def VOP_V16I32_V2I32_V4I32_I32    : VOPProfile <[v16i32, v2i32, v4i32, i32]>;
2643def VOP_V4F32_V2I32_V4I32_I32     : VOPProfile <[v4f32,  v2i32, v4i32, i32]>;
2644def VOP_V16F32_V2I32_V4I32_I32    : VOPProfile <[v16f32, v2i32, v4i32, i32]>;
2645
2646class Commutable_REV <string revOp, bit isOrig> {
2647  string RevOp = revOp;
2648  bit IsOrig = isOrig;
2649}
2650
2651//===----------------------------------------------------------------------===//
2652// Interpolation opcodes
2653//===----------------------------------------------------------------------===//
2654
2655class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">;
2656
2657class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
2658  VINTRPCommon <outs, ins, "", pattern>,
2659  SIMCInstr<opName, SIEncodingFamily.NONE> {
2660  let isPseudo = 1;
2661  let isCodeGenOnly = 1;
2662}
2663
2664// FIXME-GFX10: WIP.
2665class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
2666                      string asm, int encodingFamily> :
2667  VINTRPCommon <outs, ins, asm, []>,
2668  VINTRPe <op>,
2669  SIMCInstr<opName, encodingFamily> {
2670}
2671
2672class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
2673                      string asm> :
2674  VINTRPCommon <outs, ins, asm, []>,
2675  VINTRPe_vi <op>,
2676  SIMCInstr<opName, SIEncodingFamily.VI> {
2677  let AssemblerPredicate = isGFX8GFX9;
2678  let DecoderNamespace = "GFX8";
2679}
2680
2681// FIXME-GFX10: WIP.
2682multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
2683                     list<dag> pattern = []> {
2684  def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>;
2685
2686  let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
2687    def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>;
2688  } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
2689
2690  def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
2691
2692  let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
2693    def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>;
2694  } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
2695}
2696
2697//===----------------------------------------------------------------------===//
2698// Vector instruction mappings
2699//===----------------------------------------------------------------------===//
2700
2701// Maps an opcode in e32 form to its e64 equivalent
2702def getVOPe64 : InstrMapping {
2703  let FilterClass = "VOP";
2704  let RowFields = ["OpName"];
2705  let ColFields = ["Size", "VOP3"];
2706  let KeyCol = ["4", "0"];
2707  let ValueCols = [["8", "1"]];
2708}
2709
2710// Maps an opcode in e64 form to its e32 equivalent
2711def getVOPe32 : InstrMapping {
2712  let FilterClass = "VOP";
2713  let RowFields = ["OpName"];
2714  let ColFields = ["Size", "VOP3"];
2715  let KeyCol = ["8", "1"];
2716  let ValueCols = [["4", "0"]];
2717}
2718
2719// Maps ordinary instructions to their SDWA counterparts
2720def getSDWAOp : InstrMapping {
2721  let FilterClass = "VOP";
2722  let RowFields = ["OpName"];
2723  let ColFields = ["AsmVariantName"];
2724  let KeyCol = ["Default"];
2725  let ValueCols = [["SDWA"]];
2726}
2727
2728// Maps SDWA instructions to their ordinary counterparts
2729def getBasicFromSDWAOp : InstrMapping {
2730  let FilterClass = "VOP";
2731  let RowFields = ["OpName"];
2732  let ColFields = ["AsmVariantName"];
2733  let KeyCol = ["SDWA"];
2734  let ValueCols = [["Default"]];
2735}
2736
2737// Maps ordinary instructions to their DPP counterparts
2738def getDPPOp32 : InstrMapping {
2739  let FilterClass = "VOP";
2740  let RowFields = ["OpName"];
2741  let ColFields = ["AsmVariantName"];
2742  let KeyCol = ["Default"];
2743  let ValueCols = [["DPP"]];
2744}
2745
2746def getDPPOp64 : InstrMapping {
2747  let FilterClass = "VOP";
2748  let RowFields = ["OpName"];
2749  let ColFields = ["AsmVariantName"];
2750  let KeyCol = ["VOP3"];
2751  let ValueCols = [["VOP3_DPP"]];
2752}
2753
2754// Maps an commuted opcode to its original version
2755def getCommuteOrig : InstrMapping {
2756  let FilterClass = "Commutable_REV";
2757  let RowFields = ["RevOp"];
2758  let ColFields = ["IsOrig"];
2759  let KeyCol = ["0"];
2760  let ValueCols = [["1"]];
2761}
2762
2763// Maps an original opcode to its commuted version
2764def getCommuteRev : InstrMapping {
2765  let FilterClass = "Commutable_REV";
2766  let RowFields = ["RevOp"];
2767  let ColFields = ["IsOrig"];
2768  let KeyCol = ["1"];
2769  let ValueCols = [["0"]];
2770}
2771
2772def getMCOpcodeGen : InstrMapping {
2773  let FilterClass = "SIMCInstr";
2774  let RowFields = ["PseudoInstr"];
2775  let ColFields = ["Subtarget"];
2776  let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
2777  // These columns must be kept in sync with the SIEncodingFamily enumeration.
2778  let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
2779                   [!cast<string>(SIEncodingFamily.VI)],
2780                   [!cast<string>(SIEncodingFamily.SDWA)],
2781                   [!cast<string>(SIEncodingFamily.SDWA9)],
2782                   // GFX80 encoding is added to work around a multiple matching
2783                   // issue for buffer instructions with unpacked d16 data. This
2784                   // does not actually change the encoding, and thus may be
2785                   // removed later.
2786                   [!cast<string>(SIEncodingFamily.GFX80)],
2787                   [!cast<string>(SIEncodingFamily.GFX9)],
2788                   [!cast<string>(SIEncodingFamily.GFX10)],
2789                   [!cast<string>(SIEncodingFamily.SDWA10)],
2790                   [!cast<string>(SIEncodingFamily.GFX90A)],
2791                   [!cast<string>(SIEncodingFamily.GFX940)],
2792                   [!cast<string>(SIEncodingFamily.GFX11)],
2793                   [!cast<string>(SIEncodingFamily.GFX12)]];
2794}
2795
2796// Get equivalent SOPK instruction.
2797def getSOPKOp : InstrMapping {
2798  let FilterClass = "SOPKInstTable";
2799  let RowFields = ["BaseCmpOp"];
2800  let ColFields = ["IsSOPK"];
2801  let KeyCol = ["0"];
2802  let ValueCols = [["1"]];
2803}
2804
2805def getAddr64Inst : InstrMapping {
2806  let FilterClass = "MUBUFAddr64Table";
2807  let RowFields = ["OpName"];
2808  let ColFields = ["IsAddr64"];
2809  let KeyCol = ["0"];
2810  let ValueCols = [["1"]];
2811}
2812
2813def getIfAddr64Inst : InstrMapping {
2814  let FilterClass = "MUBUFAddr64Table";
2815  let RowFields = ["OpName"];
2816  let ColFields = ["IsAddr64"];
2817  let KeyCol = ["1"];
2818  let ValueCols = [["1"]];
2819}
2820
2821// Maps a GLOBAL to its SADDR form.
2822def getGlobalSaddrOp : InstrMapping {
2823  let FilterClass = "GlobalSaddrTable";
2824  let RowFields = ["SaddrOp"];
2825  let ColFields = ["IsSaddr"];
2826  let KeyCol = ["0"];
2827  let ValueCols = [["1"]];
2828}
2829
2830// Maps a GLOBAL SADDR to its VADDR form.
2831def getGlobalVaddrOp : InstrMapping {
2832  let FilterClass = "GlobalSaddrTable";
2833  let RowFields = ["SaddrOp"];
2834  let ColFields = ["IsSaddr"];
2835  let KeyCol = ["1"];
2836  let ValueCols = [["0"]];
2837}
2838
2839// Maps a v_cmpx opcode with sdst to opcode without sdst.
2840def getVCMPXNoSDstOp : InstrMapping {
2841  let FilterClass = "VCMPXNoSDstTable";
2842  let RowFields = ["NoSDstOp"];
2843  let ColFields = ["HasSDst"];
2844  let KeyCol = ["1"];
2845  let ValueCols = [["0"]];
2846}
2847
2848// Maps a SOPP to a SOPP with S_NOP
2849def getSOPPWithRelaxation : InstrMapping {
2850  let FilterClass = "SOPPRelaxTable";
2851  let RowFields = ["KeyName"];
2852  let ColFields = ["IsRelaxed"];
2853  let KeyCol = ["0"];
2854  let ValueCols = [["1"]];
2855}
2856
2857// Maps flat scratch opcodes by addressing modes
2858def getFlatScratchInstSTfromSS : InstrMapping {
2859  let FilterClass = "FlatScratchInst";
2860  let RowFields = ["SVOp"];
2861  let ColFields = ["Mode"];
2862  let KeyCol = ["SS"];
2863  let ValueCols = [["ST"]];
2864}
2865
2866def getFlatScratchInstSSfromSV : InstrMapping {
2867  let FilterClass = "FlatScratchInst";
2868  let RowFields = ["SVOp"];
2869  let ColFields = ["Mode"];
2870  let KeyCol = ["SV"];
2871  let ValueCols = [["SS"]];
2872}
2873
2874def getFlatScratchInstSVfromSVS : InstrMapping {
2875  let FilterClass = "FlatScratchInst";
2876  let RowFields = ["SVOp"];
2877  let ColFields = ["Mode"];
2878  let KeyCol = ["SVS"];
2879  let ValueCols = [["SV"]];
2880}
2881
2882def getFlatScratchInstSVfromSS : InstrMapping {
2883  let FilterClass = "FlatScratchInst";
2884  let RowFields = ["SVOp"];
2885  let ColFields = ["Mode"];
2886  let KeyCol = ["SS"];
2887  let ValueCols = [["SV"]];
2888}
2889
2890def getMFMAEarlyClobberOp : InstrMapping {
2891  let FilterClass = "MFMATable";
2892  let RowFields = ["FMAOp"];
2893  let ColFields = ["IsMac"];
2894  let KeyCol = ["1"];
2895  let ValueCols = [["0"]];
2896}
2897
2898// Maps an v_cmp instruction to its v_cmpx equivalent.
2899def getVCMPXOpFromVCMP : InstrMapping {
2900  let FilterClass = "VCMPVCMPXTable";
2901  let RowFields = ["VCMPOp"];
2902  let ColFields = ["IsVCMPX"];
2903  let KeyCol = ["0"];
2904  let ValueCols = [["1"]];
2905}
2906
2907def VOPDComponentTable : GenericTable {
2908  let FilterClass = "VOPD_Component";
2909  let CppTypeName = "VOPDComponentInfo";
2910  let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX"];
2911  let PrimaryKey = ["BaseVOP"];
2912  let PrimaryKeyName = "getVOPDComponentHelper";
2913}
2914
2915def getVOPDBaseFromComponent : SearchIndex {
2916  let Table = VOPDComponentTable;
2917  let Key = ["VOPDOp"];
2918}
2919
2920def VOPDPairs : GenericTable {
2921  let FilterClass = "VOPD_Base";
2922  let CppTypeName = "VOPDInfo";
2923  let Fields = ["Opcode", "OpX", "OpY", "SubTgt"];
2924  let PrimaryKey = ["Opcode"];
2925  let PrimaryKeyName = "getVOPDOpcodeHelper";
2926}
2927
2928def getVOPDInfoFromComponentOpcodes : SearchIndex {
2929  let Table = VOPDPairs;
2930  let Key = ["OpX", "OpY", "SubTgt"];
2931}
2932
2933include "SIInstructions.td"
2934
2935include "DSInstructions.td"
2936include "MIMGInstructions.td"
2937