xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td (revision e64fe029e9d3ce476e77a478318e0c3cd201ff08)
1//===---- SMInstructions.td - Scalar Memory Instruction Definitions -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def smrd_offset_8 : NamedOperandU32<"SMRDOffset8",
10                                  NamedMatchClass<"SMRDOffset8">> {
11  let OperandType = "OPERAND_IMMEDIATE";
12}
13
14class SMEMOffset : NamedOperandU32<"SMEMOffset",
15                                   NamedMatchClass<"SMEMOffset">> {
16  let OperandType = "OPERAND_IMMEDIATE";
17  let EncoderMethod = "getSMEMOffsetEncoding";
18  let DecoderMethod = "decodeSMEMOffset";
19}
20
21def smem_offset : SMEMOffset;
22
23def smem_offset_mod : SMEMOffset {
24  let PrintMethod = "printSMEMOffsetMod";
25}
26
27//===----------------------------------------------------------------------===//
28// Scalar Memory classes
29//===----------------------------------------------------------------------===//
30
31class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> :
32  InstSI <outs, ins, "", pattern>,
33  SIMCInstr<opName, SIEncodingFamily.NONE> {
34  let isPseudo = 1;
35  let isCodeGenOnly = 1;
36
37  let LGKM_CNT = 1;
38  let SMRD = 1;
39  let mayStore = 0;
40  let mayLoad = 1;
41  let hasSideEffects = 0;
42  let UseNamedOperandTable = 1;
43  let SchedRW = [WriteSMEM];
44
45  string Mnemonic = opName;
46  string AsmOperands = asmOps;
47
48  bits<1> has_sbase = 1;
49  bits<1> has_sdst = 1;
50  bit has_glc = 0;
51  bit has_dlc = 0;
52  bit has_offset = 0;
53  bit has_soffset = 0;
54  bit is_buffer = 0;
55}
56
57class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
58  : InstSI<ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands> {
59
60  let isPseudo = 0;
61  let isCodeGenOnly = 0;
62
63  Instruction Opcode = !cast<Instruction>(NAME);
64
65  // copy relevant pseudo op flags
66  let LGKM_CNT             = ps.LGKM_CNT;
67  let SMRD                 = ps.SMRD;
68  let mayStore             = ps.mayStore;
69  let mayLoad              = ps.mayLoad;
70  let hasSideEffects       = ps.hasSideEffects;
71  let UseNamedOperandTable = ps.UseNamedOperandTable;
72  let SchedRW              = ps.SchedRW;
73  let SubtargetPredicate   = ps.SubtargetPredicate;
74  let AsmMatchConverter    = ps.AsmMatchConverter;
75  let IsAtomicRet          = ps.IsAtomicRet;
76  let IsAtomicNoRet        = ps.IsAtomicNoRet;
77
78  let TSFlags = ps.TSFlags;
79
80  bit is_buffer = ps.is_buffer;
81
82  // encoding
83  bits<7>  sbase;
84  bits<7>  sdst;
85  bits<32> offset;
86  bits<8>  soffset;
87  bits<5> cpol;
88}
89
90class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
91                 dag ins, string asm> {
92  bit HasOffset = hasOffset;
93  bit HasSOffset = hasSOffset;
94  string Variant = variant;
95  dag Ins = ins;
96  string Asm = asm;
97}
98
99def IMM_Offset : OffsetMode<1, 0, "_IMM", (ins smem_offset:$offset), "$offset">;
100def SGPR_Offset : OffsetMode<0, 1, "_SGPR", (ins SReg_32:$soffset), "$soffset">;
101def SGPR_IMM_Offset : OffsetMode<1, 1, "_SGPR_IMM",
102                                 (ins SReg_32:$soffset, smem_offset_mod:$offset),
103                                 "$soffset$offset">;
104
105class SM_Probe_Pseudo <string opName, RegisterClass baseClass, OffsetMode offsets>
106  : SM_Pseudo<opName, (outs),
107              !con((ins i8imm:$sdata, baseClass:$sbase), offsets.Ins),
108              " $sdata, $sbase, " # offsets.Asm> {
109  let mayLoad = 0;
110  let mayStore = 0;
111  let has_glc = 0;
112  let LGKM_CNT = 0;
113  let ScalarStore = 0;
114  let hasSideEffects = 1;
115  let has_offset = offsets.HasOffset;
116  let has_soffset = offsets.HasSOffset;
117  let PseudoInstr = opName # offsets.Variant;
118}
119
120class SM_Load_Pseudo <string opName, RegisterClass baseClass,
121                      RegisterClass dstClass, OffsetMode offsets>
122  : SM_Pseudo<opName, (outs dstClass:$sdst),
123              !con((ins baseClass:$sbase), offsets.Ins, (ins CPol:$cpol)),
124              " $sdst, $sbase, " # offsets.Asm # "$cpol", []> {
125  RegisterClass BaseClass = baseClass;
126  let mayLoad = 1;
127  let mayStore = 0;
128  let has_glc = 1;
129  let has_dlc = 1;
130  let has_offset = offsets.HasOffset;
131  let has_soffset = offsets.HasSOffset;
132  let PseudoInstr = opName # offsets.Variant;
133}
134
135class SM_Store_Pseudo <string opName, RegisterClass baseClass,
136                       RegisterClass srcClass, OffsetMode offsets>
137  : SM_Pseudo<opName, (outs), !con((ins srcClass:$sdata, baseClass:$sbase),
138                                   offsets.Ins, (ins CPol:$cpol)),
139              " $sdata, $sbase, " # offsets.Asm # "$cpol"> {
140  RegisterClass BaseClass = baseClass;
141  RegisterClass SrcClass = srcClass;
142  let mayLoad = 0;
143  let mayStore = 1;
144  let has_glc = 1;
145  let has_dlc = 1;
146  let has_offset = offsets.HasOffset;
147  let has_soffset = offsets.HasSOffset;
148  let ScalarStore = 1;
149  let PseudoInstr = opName # offsets.Variant;
150}
151
152class SM_Discard_Pseudo <string opName, OffsetMode offsets>
153  : SM_Pseudo<opName, (outs), !con((ins SReg_64:$sbase), offsets.Ins),
154              " $sbase, " # offsets.Asm> {
155  let mayLoad = 0;
156  let mayStore = 0;
157  let has_glc = 0;
158  let has_sdst = 0;
159  let ScalarStore = 0;
160  let hasSideEffects = 1;
161  let has_offset = offsets.HasOffset;
162  let has_soffset = offsets.HasSOffset;
163  let PseudoInstr = opName # offsets.Variant;
164}
165
166multiclass SM_Pseudo_Loads<string opName,
167                           RegisterClass baseClass,
168                           RegisterClass dstClass> {
169  def _IMM : SM_Load_Pseudo <opName, baseClass, dstClass, IMM_Offset>;
170  def _SGPR : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_Offset>;
171  def _SGPR_IMM : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_IMM_Offset>;
172}
173
174multiclass SM_Pseudo_Stores<string opName,
175                           RegisterClass baseClass,
176                           RegisterClass srcClass> {
177  def _IMM : SM_Store_Pseudo <opName, baseClass, srcClass, IMM_Offset>;
178  def _SGPR : SM_Store_Pseudo <opName, baseClass, srcClass, SGPR_Offset>;
179  def _SGPR_IMM : SM_Store_Pseudo <opName, baseClass, srcClass, SGPR_IMM_Offset>;
180}
181
182multiclass SM_Pseudo_Discards<string opName> {
183  def _IMM : SM_Discard_Pseudo <opName, IMM_Offset>;
184  def _SGPR : SM_Discard_Pseudo <opName, SGPR_Offset>;
185  def _SGPR_IMM : SM_Discard_Pseudo <opName, SGPR_IMM_Offset>;
186}
187
188class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
189  opName, (outs SReg_64_XEXEC:$sdst), (ins),
190  " $sdst", [(set i64:$sdst, (node))]> {
191  let hasSideEffects = 1;
192
193  let mayStore = 0;
194  let mayLoad = 0;
195  let has_sbase = 0;
196}
197
198class SM_Inval_Pseudo <string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
199  opName, (outs), (ins), "", [(node)]> {
200  let hasSideEffects = 1;
201  let mayLoad = 0;
202  let mayStore = 0;
203  let has_sdst = 0;
204  let has_sbase = 0;
205}
206
207multiclass SM_Pseudo_Probe<string opName, RegisterClass baseClass> {
208  def _IMM  : SM_Probe_Pseudo <opName, baseClass, IMM_Offset>;
209  def _SGPR : SM_Probe_Pseudo <opName, baseClass, SGPR_Offset>;
210  def _SGPR_IMM : SM_Probe_Pseudo <opName, baseClass, SGPR_IMM_Offset>;
211}
212
213class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
214  opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins),
215  " $sdst", [(set i32:$sdst, (node))]> {
216  let hasSideEffects = 1;
217  let mayStore = 0;
218  let mayLoad = 0;
219  let has_sbase = 0;
220}
221
222//===----------------------------------------------------------------------===//
223// Scalar Atomic Memory Classes
224//===----------------------------------------------------------------------===//
225
226class SM_Atomic_Pseudo <string opName,
227                        dag outs, dag ins, string asmOps, bit isRet>
228  : SM_Pseudo<opName, outs, ins, asmOps, []> {
229
230  bit glc = isRet;
231
232  let mayLoad = 1;
233  let mayStore = 1;
234  let has_glc = 1;
235  let has_dlc = 1;
236  let has_soffset = 1;
237
238  // Should these be set?
239  let ScalarStore = 1;
240  let hasSideEffects = 1;
241  let maybeAtomic = 1;
242
243  let IsAtomicNoRet = !not(isRet);
244  let IsAtomicRet = isRet;
245
246  let AsmMatchConverter = "cvtSMEMAtomic";
247}
248
249class SM_Pseudo_Atomic<string opName,
250                       RegisterClass baseClass,
251                       RegisterClass dataClass,
252                       OffsetMode offsets,
253                       bit isRet,
254                       string opNameWithSuffix =
255                         opName # offsets.Variant # !if(isRet, "_RTN", ""),
256                       Operand CPolTy = !if(isRet, CPol_GLC1, CPol)> :
257  SM_Atomic_Pseudo<opName,
258                   !if(isRet, (outs dataClass:$sdst), (outs)),
259                   !con((ins dataClass:$sdata, baseClass:$sbase), offsets.Ins,
260                        (ins CPolTy:$cpol)),
261                   !if(isRet, " $sdst", " $sdata") #
262                     ", $sbase, " # offsets.Asm # "$cpol",
263                   isRet>,
264  AtomicNoRet <opNameWithSuffix, isRet> {
265  let has_offset = offsets.HasOffset;
266  let has_soffset = offsets.HasSOffset;
267  let PseudoInstr = opNameWithSuffix;
268
269  let Constraints = !if(isRet, "$sdst = $sdata", "");
270  let DisableEncoding = !if(isRet, "$sdata", "");
271}
272
273multiclass SM_Pseudo_Atomics<string opName,
274                             RegisterClass baseClass,
275                             RegisterClass dataClass> {
276  def _IMM      : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 0>;
277  def _SGPR     : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 0>;
278  def _SGPR_IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 0>;
279  def _IMM_RTN  : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 1>;
280  def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 1>;
281  def _SGPR_IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 1>;
282}
283
284//===----------------------------------------------------------------------===//
285// Scalar Memory Instructions
286//===----------------------------------------------------------------------===//
287
288// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit
289// SMRD instructions, because the SReg_32_XM0 register class does not include M0
290// and writing to M0 from an SMRD instruction will hang the GPU.
291
292// XXX - SMEM instructions do not allow exec for data operand, but
293// does sdst for SMRD on SI/CI?
294defm S_LOAD_DWORD    : SM_Pseudo_Loads <"s_load_dword", SReg_64, SReg_32_XM0_XEXEC>;
295defm S_LOAD_DWORDX2  : SM_Pseudo_Loads <"s_load_dwordx2", SReg_64, SReg_64_XEXEC>;
296defm S_LOAD_DWORDX4  : SM_Pseudo_Loads <"s_load_dwordx4", SReg_64, SReg_128>;
297defm S_LOAD_DWORDX8  : SM_Pseudo_Loads <"s_load_dwordx8", SReg_64, SReg_256>;
298defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <"s_load_dwordx16", SReg_64, SReg_512>;
299
300let is_buffer = 1 in {
301defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <
302  "s_buffer_load_dword", SReg_128, SReg_32_XM0_XEXEC
303>;
304
305// FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on
306// SI/CI, bit disallowed for SMEM on VI.
307defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <
308  "s_buffer_load_dwordx2", SReg_128, SReg_64_XEXEC
309>;
310
311defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <
312  "s_buffer_load_dwordx4", SReg_128, SReg_128
313>;
314
315defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <
316  "s_buffer_load_dwordx8", SReg_128, SReg_256
317>;
318
319defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <
320  "s_buffer_load_dwordx16", SReg_128, SReg_512
321>;
322}
323
324let SubtargetPredicate = HasScalarStores in {
325defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
326defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>;
327defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>;
328
329let is_buffer = 1 in {
330defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <
331  "s_buffer_store_dword", SReg_128, SReg_32_XM0_XEXEC
332>;
333
334defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <
335  "s_buffer_store_dwordx2", SReg_128, SReg_64_XEXEC
336>;
337
338defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <
339  "s_buffer_store_dwordx4", SReg_128, SReg_128
340>;
341}
342} // End SubtargetPredicate = HasScalarStores
343
344let SubtargetPredicate = HasSMemTimeInst in
345def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>;
346def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>;
347
348let SubtargetPredicate = isGFX7GFX8GFX9 in {
349def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
350} // let SubtargetPredicate = isGFX7GFX8GFX9
351
352let SubtargetPredicate = isGFX8Plus in {
353let OtherPredicates = [HasScalarStores] in {
354def S_DCACHE_WB     : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>;
355def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
356} // End OtherPredicates = [HasScalarStores]
357
358defm S_ATC_PROBE        : SM_Pseudo_Probe <"s_atc_probe", SReg_64>;
359let is_buffer = 1 in {
360defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>;
361}
362} // SubtargetPredicate = isGFX8Plus
363
364let SubtargetPredicate = HasSMemRealTime in
365def S_MEMREALTIME   : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
366
367let SubtargetPredicate = isGFX10Plus in
368def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">;
369let SubtargetPredicate = HasGetWaveIdInst in
370def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>;
371
372
373let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in {
374defm S_SCRATCH_LOAD_DWORD    : SM_Pseudo_Loads <"s_scratch_load_dword",   SReg_64, SReg_32_XM0_XEXEC>;
375defm S_SCRATCH_LOAD_DWORDX2  : SM_Pseudo_Loads <"s_scratch_load_dwordx2", SReg_64, SReg_64_XEXEC>;
376defm S_SCRATCH_LOAD_DWORDX4  : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_64, SReg_128>;
377
378defm S_SCRATCH_STORE_DWORD   : SM_Pseudo_Stores <"s_scratch_store_dword",   SReg_64, SReg_32_XM0_XEXEC>;
379defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <"s_scratch_store_dwordx2", SReg_64, SReg_64_XEXEC>;
380defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <"s_scratch_store_dwordx4", SReg_64, SReg_128>;
381} // SubtargetPredicate = HasScalarFlatScratchInsts
382
383let SubtargetPredicate = HasScalarAtomics in {
384
385let is_buffer = 1 in {
386defm S_BUFFER_ATOMIC_SWAP         : SM_Pseudo_Atomics <"s_buffer_atomic_swap", SReg_128, SReg_32_XM0_XEXEC>;
387defm S_BUFFER_ATOMIC_CMPSWAP      : SM_Pseudo_Atomics <"s_buffer_atomic_cmpswap", SReg_128, SReg_64_XEXEC>;
388defm S_BUFFER_ATOMIC_ADD          : SM_Pseudo_Atomics <"s_buffer_atomic_add", SReg_128, SReg_32_XM0_XEXEC>;
389defm S_BUFFER_ATOMIC_SUB          : SM_Pseudo_Atomics <"s_buffer_atomic_sub", SReg_128, SReg_32_XM0_XEXEC>;
390defm S_BUFFER_ATOMIC_SMIN         : SM_Pseudo_Atomics <"s_buffer_atomic_smin", SReg_128, SReg_32_XM0_XEXEC>;
391defm S_BUFFER_ATOMIC_UMIN         : SM_Pseudo_Atomics <"s_buffer_atomic_umin", SReg_128, SReg_32_XM0_XEXEC>;
392defm S_BUFFER_ATOMIC_SMAX         : SM_Pseudo_Atomics <"s_buffer_atomic_smax", SReg_128, SReg_32_XM0_XEXEC>;
393defm S_BUFFER_ATOMIC_UMAX         : SM_Pseudo_Atomics <"s_buffer_atomic_umax", SReg_128, SReg_32_XM0_XEXEC>;
394defm S_BUFFER_ATOMIC_AND          : SM_Pseudo_Atomics <"s_buffer_atomic_and", SReg_128, SReg_32_XM0_XEXEC>;
395defm S_BUFFER_ATOMIC_OR           : SM_Pseudo_Atomics <"s_buffer_atomic_or", SReg_128, SReg_32_XM0_XEXEC>;
396defm S_BUFFER_ATOMIC_XOR          : SM_Pseudo_Atomics <"s_buffer_atomic_xor", SReg_128, SReg_32_XM0_XEXEC>;
397defm S_BUFFER_ATOMIC_INC          : SM_Pseudo_Atomics <"s_buffer_atomic_inc", SReg_128, SReg_32_XM0_XEXEC>;
398defm S_BUFFER_ATOMIC_DEC          : SM_Pseudo_Atomics <"s_buffer_atomic_dec", SReg_128, SReg_32_XM0_XEXEC>;
399
400defm S_BUFFER_ATOMIC_SWAP_X2      : SM_Pseudo_Atomics <"s_buffer_atomic_swap_x2", SReg_128, SReg_64_XEXEC>;
401defm S_BUFFER_ATOMIC_CMPSWAP_X2   : SM_Pseudo_Atomics <"s_buffer_atomic_cmpswap_x2", SReg_128, SReg_128>;
402defm S_BUFFER_ATOMIC_ADD_X2       : SM_Pseudo_Atomics <"s_buffer_atomic_add_x2", SReg_128, SReg_64_XEXEC>;
403defm S_BUFFER_ATOMIC_SUB_X2       : SM_Pseudo_Atomics <"s_buffer_atomic_sub_x2", SReg_128, SReg_64_XEXEC>;
404defm S_BUFFER_ATOMIC_SMIN_X2      : SM_Pseudo_Atomics <"s_buffer_atomic_smin_x2", SReg_128, SReg_64_XEXEC>;
405defm S_BUFFER_ATOMIC_UMIN_X2      : SM_Pseudo_Atomics <"s_buffer_atomic_umin_x2", SReg_128, SReg_64_XEXEC>;
406defm S_BUFFER_ATOMIC_SMAX_X2      : SM_Pseudo_Atomics <"s_buffer_atomic_smax_x2", SReg_128, SReg_64_XEXEC>;
407defm S_BUFFER_ATOMIC_UMAX_X2      : SM_Pseudo_Atomics <"s_buffer_atomic_umax_x2", SReg_128, SReg_64_XEXEC>;
408defm S_BUFFER_ATOMIC_AND_X2       : SM_Pseudo_Atomics <"s_buffer_atomic_and_x2", SReg_128, SReg_64_XEXEC>;
409defm S_BUFFER_ATOMIC_OR_X2        : SM_Pseudo_Atomics <"s_buffer_atomic_or_x2", SReg_128, SReg_64_XEXEC>;
410defm S_BUFFER_ATOMIC_XOR_X2       : SM_Pseudo_Atomics <"s_buffer_atomic_xor_x2", SReg_128, SReg_64_XEXEC>;
411defm S_BUFFER_ATOMIC_INC_X2       : SM_Pseudo_Atomics <"s_buffer_atomic_inc_x2", SReg_128, SReg_64_XEXEC>;
412defm S_BUFFER_ATOMIC_DEC_X2       : SM_Pseudo_Atomics <"s_buffer_atomic_dec_x2", SReg_128, SReg_64_XEXEC>;
413}
414
415defm S_ATOMIC_SWAP                : SM_Pseudo_Atomics <"s_atomic_swap", SReg_64, SReg_32_XM0_XEXEC>;
416defm S_ATOMIC_CMPSWAP             : SM_Pseudo_Atomics <"s_atomic_cmpswap", SReg_64, SReg_64_XEXEC>;
417defm S_ATOMIC_ADD                 : SM_Pseudo_Atomics <"s_atomic_add", SReg_64, SReg_32_XM0_XEXEC>;
418defm S_ATOMIC_SUB                 : SM_Pseudo_Atomics <"s_atomic_sub", SReg_64, SReg_32_XM0_XEXEC>;
419defm S_ATOMIC_SMIN                : SM_Pseudo_Atomics <"s_atomic_smin", SReg_64, SReg_32_XM0_XEXEC>;
420defm S_ATOMIC_UMIN                : SM_Pseudo_Atomics <"s_atomic_umin", SReg_64, SReg_32_XM0_XEXEC>;
421defm S_ATOMIC_SMAX                : SM_Pseudo_Atomics <"s_atomic_smax", SReg_64, SReg_32_XM0_XEXEC>;
422defm S_ATOMIC_UMAX                : SM_Pseudo_Atomics <"s_atomic_umax", SReg_64, SReg_32_XM0_XEXEC>;
423defm S_ATOMIC_AND                 : SM_Pseudo_Atomics <"s_atomic_and", SReg_64, SReg_32_XM0_XEXEC>;
424defm S_ATOMIC_OR                  : SM_Pseudo_Atomics <"s_atomic_or", SReg_64, SReg_32_XM0_XEXEC>;
425defm S_ATOMIC_XOR                 : SM_Pseudo_Atomics <"s_atomic_xor", SReg_64, SReg_32_XM0_XEXEC>;
426defm S_ATOMIC_INC                 : SM_Pseudo_Atomics <"s_atomic_inc", SReg_64, SReg_32_XM0_XEXEC>;
427defm S_ATOMIC_DEC                 : SM_Pseudo_Atomics <"s_atomic_dec", SReg_64, SReg_32_XM0_XEXEC>;
428
429defm S_ATOMIC_SWAP_X2             : SM_Pseudo_Atomics <"s_atomic_swap_x2", SReg_64, SReg_64_XEXEC>;
430defm S_ATOMIC_CMPSWAP_X2          : SM_Pseudo_Atomics <"s_atomic_cmpswap_x2", SReg_64, SReg_128>;
431defm S_ATOMIC_ADD_X2              : SM_Pseudo_Atomics <"s_atomic_add_x2", SReg_64, SReg_64_XEXEC>;
432defm S_ATOMIC_SUB_X2              : SM_Pseudo_Atomics <"s_atomic_sub_x2", SReg_64, SReg_64_XEXEC>;
433defm S_ATOMIC_SMIN_X2             : SM_Pseudo_Atomics <"s_atomic_smin_x2", SReg_64, SReg_64_XEXEC>;
434defm S_ATOMIC_UMIN_X2             : SM_Pseudo_Atomics <"s_atomic_umin_x2", SReg_64, SReg_64_XEXEC>;
435defm S_ATOMIC_SMAX_X2             : SM_Pseudo_Atomics <"s_atomic_smax_x2", SReg_64, SReg_64_XEXEC>;
436defm S_ATOMIC_UMAX_X2             : SM_Pseudo_Atomics <"s_atomic_umax_x2", SReg_64, SReg_64_XEXEC>;
437defm S_ATOMIC_AND_X2              : SM_Pseudo_Atomics <"s_atomic_and_x2", SReg_64, SReg_64_XEXEC>;
438defm S_ATOMIC_OR_X2               : SM_Pseudo_Atomics <"s_atomic_or_x2", SReg_64, SReg_64_XEXEC>;
439defm S_ATOMIC_XOR_X2              : SM_Pseudo_Atomics <"s_atomic_xor_x2", SReg_64, SReg_64_XEXEC>;
440defm S_ATOMIC_INC_X2              : SM_Pseudo_Atomics <"s_atomic_inc_x2", SReg_64, SReg_64_XEXEC>;
441defm S_ATOMIC_DEC_X2              : SM_Pseudo_Atomics <"s_atomic_dec_x2", SReg_64, SReg_64_XEXEC>;
442
443} // let SubtargetPredicate = HasScalarAtomics
444
445let SubtargetPredicate = HasScalarAtomics in {
446defm S_DCACHE_DISCARD    : SM_Pseudo_Discards <"s_dcache_discard">;
447defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">;
448}
449
450//===----------------------------------------------------------------------===//
451// Targets
452//===----------------------------------------------------------------------===//
453
454//===----------------------------------------------------------------------===//
455// SI
456//===----------------------------------------------------------------------===//
457
458class SMRD_Real_si <bits<5> op, SM_Pseudo ps>
459  : SM_Real<ps>
460  , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
461  , Enc32 {
462
463  let AssemblerPredicate = isGFX6GFX7;
464  let DecoderNamespace = "GFX6GFX7";
465
466  let Inst{7-0}   = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?));
467  let Inst{8}     = ps.has_offset;
468  let Inst{14-9}  = !if(ps.has_sbase, sbase{6-1}, ?);
469  let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
470  let Inst{26-22} = op;
471  let Inst{31-27} = 0x18; //encoding
472}
473
474multiclass SM_Real_Loads_si<bits<5> op, string ps,
475                            SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
476                            SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
477
478  def _IMM_si : SMRD_Real_si <op, immPs> {
479    let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol);
480  }
481
482  def _SGPR_si : SMRD_Real_si <op, sgprPs> {
483    let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol);
484  }
485
486}
487
488defm S_LOAD_DWORD           : SM_Real_Loads_si <0x00, "S_LOAD_DWORD">;
489defm S_LOAD_DWORDX2         : SM_Real_Loads_si <0x01, "S_LOAD_DWORDX2">;
490defm S_LOAD_DWORDX4         : SM_Real_Loads_si <0x02, "S_LOAD_DWORDX4">;
491defm S_LOAD_DWORDX8         : SM_Real_Loads_si <0x03, "S_LOAD_DWORDX8">;
492defm S_LOAD_DWORDX16        : SM_Real_Loads_si <0x04, "S_LOAD_DWORDX16">;
493defm S_BUFFER_LOAD_DWORD    : SM_Real_Loads_si <0x08, "S_BUFFER_LOAD_DWORD">;
494defm S_BUFFER_LOAD_DWORDX2  : SM_Real_Loads_si <0x09, "S_BUFFER_LOAD_DWORDX2">;
495defm S_BUFFER_LOAD_DWORDX4  : SM_Real_Loads_si <0x0a, "S_BUFFER_LOAD_DWORDX4">;
496defm S_BUFFER_LOAD_DWORDX8  : SM_Real_Loads_si <0x0b, "S_BUFFER_LOAD_DWORDX8">;
497defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_si <0x0c, "S_BUFFER_LOAD_DWORDX16">;
498
499def S_MEMTIME_si    : SMRD_Real_si <0x1e, S_MEMTIME>;
500def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>;
501
502
503//===----------------------------------------------------------------------===//
504// VI and GFX9.
505//===----------------------------------------------------------------------===//
506
507class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
508  : SM_Real<ps>
509  , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI>
510  , Enc64 {
511  field bit IsGFX9SpecificEncoding = false;
512  let AssemblerPredicate = !if(IsGFX9SpecificEncoding, isGFX9Only, isGFX8GFX9);
513  let DecoderNamespace = "GFX8";
514
515  let Inst{5-0}   = !if(ps.has_sbase, sbase{6-1}, ?);
516  let Inst{12-6}  = !if(ps.has_sdst, sdst{6-0}, ?);
517
518  // Note that for GFX9 instructions with immediate offsets, soffset_en
519  // must be defined, whereas in GFX8 it's undefined in all cases,
520  // meaning GFX9 is not perfectly backward-compatible with GFX8, despite
521  // documentation suggesting otherwise.
522  field bit SOffsetEn = !if(IsGFX9SpecificEncoding,
523    !if(ps.has_offset, ps.has_soffset, !if(ps.has_soffset, 0, ?)),
524    ?);
525  let Inst{14} = SOffsetEn;
526
527  let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
528
529  // imm
530  // TODO: Shall not be defined if the instruction has no offset nor
531  // soffset.
532  let Inst{17} = ps.has_offset;
533
534  let Inst{25-18} = op;
535  let Inst{31-26} = 0x30; //encoding
536
537  // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed.
538  // Offset value is corrected accordingly when offset is encoded/decoded.
539  // TODO: Forbid non-M0 register offsets for GFX8 stores and atomics.
540  field bits<21> Offset;
541  let Offset{6-0} = !if(ps.has_offset, offset{6-0},
542                                       !if(ps.has_soffset, soffset{6-0}, ?));
543  let Offset{20-7} = !if(ps.has_offset, offset{20-7}, ?);
544  let Inst{52-32} = Offset;
545
546  // soffset
547  let Inst{63-57} = !if(!and(IsGFX9SpecificEncoding, ps.has_soffset),
548                        soffset{6-0}, ?);
549}
550
551class SMEM_Real_Load_vi<bits<8> op, string ps, OffsetMode offsets>
552    : SMEM_Real_vi<op, !cast<SM_Pseudo>(ps # offsets.Variant)> {
553  RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
554  let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
555}
556
557// The alternative GFX9 SGPR encoding using soffset to encode the
558// offset register. Not available in assembler and goes to the GFX9
559// encoding family to avoid conflicts with the primary SGPR variant.
560class SMEM_Real_SGPR_alt_gfx9 {
561  bit IsGFX9SpecificEncoding = true;
562  bit SOffsetEn = 1;
563  bit Offset = ?;
564  int Subtarget = SIEncodingFamily.GFX9;
565  string AsmVariantName = "NonParsable";
566}
567
568multiclass SM_Real_Loads_vi<bits<8> op, string ps> {
569  def _IMM_vi : SMEM_Real_Load_vi <op, ps, IMM_Offset>;
570  def _SGPR_vi : SMEM_Real_Load_vi <op, ps, SGPR_Offset>;
571  def _SGPR_alt_gfx9 : SMEM_Real_Load_vi <op, ps, SGPR_Offset>,
572                       SMEM_Real_SGPR_alt_gfx9;
573  let IsGFX9SpecificEncoding = true in
574  def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi <op, ps, SGPR_IMM_Offset>;
575}
576
577class SMEM_Real_Store_Base_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> {
578  // encoding
579  bits<7> sdata;
580
581  let sdst = ?;
582  let Inst{12-6}  = !if(ps.has_sdst, sdata{6-0}, ?);
583}
584
585class SMEM_Real_Store_vi <bits<8> op, string ps, OffsetMode offsets>
586    : SMEM_Real_Store_Base_vi <op, !cast<SM_Pseudo>(ps # offsets.Variant)> {
587  RegisterClass SrcClass = !cast<SM_Store_Pseudo>(ps # offsets.Variant).SrcClass;
588  RegisterClass BaseClass = !cast<SM_Store_Pseudo>(ps # offsets.Variant).BaseClass;
589  let InOperandList = !con((ins SrcClass:$sdata, BaseClass:$sbase),
590                           offsets.Ins, (ins CPol:$cpol));
591}
592
593multiclass SM_Real_Stores_vi<bits<8> op, string ps> {
594  def _IMM_vi : SMEM_Real_Store_vi <op, ps, IMM_Offset>;
595  def _SGPR_vi : SMEM_Real_Store_vi <op, ps, SGPR_Offset>;
596  def _SGPR_alt_gfx9 : SMEM_Real_Store_vi <op, ps, SGPR_Offset>,
597                       SMEM_Real_SGPR_alt_gfx9;
598  let IsGFX9SpecificEncoding = true in
599  def _SGPR_IMM_gfx9 : SMEM_Real_Store_vi <op, ps, SGPR_IMM_Offset>;
600}
601
602multiclass SM_Real_Probe_vi<bits<8> op, string ps> {
603  def _IMM_vi  : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
604  def _SGPR_vi : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>;
605  def _SGPR_alt_gfx9
606    : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>,
607      SMEM_Real_SGPR_alt_gfx9;
608  let IsGFX9SpecificEncoding = true in
609  def _SGPR_IMM_gfx9
610    : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>;
611}
612
613defm S_LOAD_DWORD           : SM_Real_Loads_vi <0x00, "S_LOAD_DWORD">;
614defm S_LOAD_DWORDX2         : SM_Real_Loads_vi <0x01, "S_LOAD_DWORDX2">;
615defm S_LOAD_DWORDX4         : SM_Real_Loads_vi <0x02, "S_LOAD_DWORDX4">;
616defm S_LOAD_DWORDX8         : SM_Real_Loads_vi <0x03, "S_LOAD_DWORDX8">;
617defm S_LOAD_DWORDX16        : SM_Real_Loads_vi <0x04, "S_LOAD_DWORDX16">;
618defm S_BUFFER_LOAD_DWORD    : SM_Real_Loads_vi <0x08, "S_BUFFER_LOAD_DWORD">;
619defm S_BUFFER_LOAD_DWORDX2  : SM_Real_Loads_vi <0x09, "S_BUFFER_LOAD_DWORDX2">;
620defm S_BUFFER_LOAD_DWORDX4  : SM_Real_Loads_vi <0x0a, "S_BUFFER_LOAD_DWORDX4">;
621defm S_BUFFER_LOAD_DWORDX8  : SM_Real_Loads_vi <0x0b, "S_BUFFER_LOAD_DWORDX8">;
622defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c, "S_BUFFER_LOAD_DWORDX16">;
623
624defm S_STORE_DWORD : SM_Real_Stores_vi <0x10, "S_STORE_DWORD">;
625defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11, "S_STORE_DWORDX2">;
626defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12, "S_STORE_DWORDX4">;
627
628defm S_BUFFER_STORE_DWORD    : SM_Real_Stores_vi <0x18, "S_BUFFER_STORE_DWORD">;
629defm S_BUFFER_STORE_DWORDX2  : SM_Real_Stores_vi <0x19, "S_BUFFER_STORE_DWORDX2">;
630defm S_BUFFER_STORE_DWORDX4  : SM_Real_Stores_vi <0x1a, "S_BUFFER_STORE_DWORDX4">;
631
632// These instructions use same encoding
633def S_DCACHE_INV_vi         : SMEM_Real_vi <0x20, S_DCACHE_INV>;
634def S_DCACHE_WB_vi          : SMEM_Real_vi <0x21, S_DCACHE_WB>;
635def S_DCACHE_INV_VOL_vi     : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>;
636def S_DCACHE_WB_VOL_vi      : SMEM_Real_vi <0x23, S_DCACHE_WB_VOL>;
637def S_MEMTIME_vi            : SMEM_Real_vi <0x24, S_MEMTIME>;
638def S_MEMREALTIME_vi        : SMEM_Real_vi <0x25, S_MEMREALTIME>;
639
640defm S_SCRATCH_LOAD_DWORD    : SM_Real_Loads_vi <0x05, "S_SCRATCH_LOAD_DWORD">;
641defm S_SCRATCH_LOAD_DWORDX2  : SM_Real_Loads_vi <0x06, "S_SCRATCH_LOAD_DWORDX2">;
642defm S_SCRATCH_LOAD_DWORDX4  : SM_Real_Loads_vi <0x07, "S_SCRATCH_LOAD_DWORDX4">;
643
644defm S_SCRATCH_STORE_DWORD   : SM_Real_Stores_vi <0x15, "S_SCRATCH_STORE_DWORD">;
645defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_vi <0x16, "S_SCRATCH_STORE_DWORDX2">;
646defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_vi <0x17, "S_SCRATCH_STORE_DWORDX4">;
647
648defm S_ATC_PROBE        : SM_Real_Probe_vi <0x26, "S_ATC_PROBE">;
649defm S_ATC_PROBE_BUFFER : SM_Real_Probe_vi <0x27, "S_ATC_PROBE_BUFFER">;
650
651//===----------------------------------------------------------------------===//
652// GFX9
653//===----------------------------------------------------------------------===//
654
655class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps>
656  : SMEM_Real_vi <op, ps>,
657    AtomicNoRet <!subst("_RTN","",NAME), ps.glc> {
658
659  bits<7> sdata;
660
661  let Constraints = ps.Constraints;
662  let DisableEncoding = ps.DisableEncoding;
663
664  let cpol{CPolBit.GLC} = ps.glc;
665  let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
666}
667
668multiclass SM_Real_Atomics_vi<bits<8> op, string ps> {
669  def _IMM_vi       : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
670  def _SGPR_vi      : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
671  def _SGPR_alt_gfx9
672    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>,
673      SMEM_Real_SGPR_alt_gfx9;
674  let IsGFX9SpecificEncoding = true in
675  def _SGPR_IMM_gfx9
676    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
677  def _IMM_RTN_vi   : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
678  def _SGPR_RTN_vi  : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
679  def _SGPR_RTN_alt_gfx9
680    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>,
681      SMEM_Real_SGPR_alt_gfx9;
682  let IsGFX9SpecificEncoding = true in
683  def _SGPR_IMM_RTN_gfx9
684    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
685}
686
687defm S_BUFFER_ATOMIC_SWAP         : SM_Real_Atomics_vi <0x40, "S_BUFFER_ATOMIC_SWAP">;
688defm S_BUFFER_ATOMIC_CMPSWAP      : SM_Real_Atomics_vi <0x41, "S_BUFFER_ATOMIC_CMPSWAP">;
689defm S_BUFFER_ATOMIC_ADD          : SM_Real_Atomics_vi <0x42, "S_BUFFER_ATOMIC_ADD">;
690defm S_BUFFER_ATOMIC_SUB          : SM_Real_Atomics_vi <0x43, "S_BUFFER_ATOMIC_SUB">;
691defm S_BUFFER_ATOMIC_SMIN         : SM_Real_Atomics_vi <0x44, "S_BUFFER_ATOMIC_SMIN">;
692defm S_BUFFER_ATOMIC_UMIN         : SM_Real_Atomics_vi <0x45, "S_BUFFER_ATOMIC_UMIN">;
693defm S_BUFFER_ATOMIC_SMAX         : SM_Real_Atomics_vi <0x46, "S_BUFFER_ATOMIC_SMAX">;
694defm S_BUFFER_ATOMIC_UMAX         : SM_Real_Atomics_vi <0x47, "S_BUFFER_ATOMIC_UMAX">;
695defm S_BUFFER_ATOMIC_AND          : SM_Real_Atomics_vi <0x48, "S_BUFFER_ATOMIC_AND">;
696defm S_BUFFER_ATOMIC_OR           : SM_Real_Atomics_vi <0x49, "S_BUFFER_ATOMIC_OR">;
697defm S_BUFFER_ATOMIC_XOR          : SM_Real_Atomics_vi <0x4a, "S_BUFFER_ATOMIC_XOR">;
698defm S_BUFFER_ATOMIC_INC          : SM_Real_Atomics_vi <0x4b, "S_BUFFER_ATOMIC_INC">;
699defm S_BUFFER_ATOMIC_DEC          : SM_Real_Atomics_vi <0x4c, "S_BUFFER_ATOMIC_DEC">;
700
701defm S_BUFFER_ATOMIC_SWAP_X2      : SM_Real_Atomics_vi <0x60, "S_BUFFER_ATOMIC_SWAP_X2">;
702defm S_BUFFER_ATOMIC_CMPSWAP_X2   : SM_Real_Atomics_vi <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">;
703defm S_BUFFER_ATOMIC_ADD_X2       : SM_Real_Atomics_vi <0x62, "S_BUFFER_ATOMIC_ADD_X2">;
704defm S_BUFFER_ATOMIC_SUB_X2       : SM_Real_Atomics_vi <0x63, "S_BUFFER_ATOMIC_SUB_X2">;
705defm S_BUFFER_ATOMIC_SMIN_X2      : SM_Real_Atomics_vi <0x64, "S_BUFFER_ATOMIC_SMIN_X2">;
706defm S_BUFFER_ATOMIC_UMIN_X2      : SM_Real_Atomics_vi <0x65, "S_BUFFER_ATOMIC_UMIN_X2">;
707defm S_BUFFER_ATOMIC_SMAX_X2      : SM_Real_Atomics_vi <0x66, "S_BUFFER_ATOMIC_SMAX_X2">;
708defm S_BUFFER_ATOMIC_UMAX_X2      : SM_Real_Atomics_vi <0x67, "S_BUFFER_ATOMIC_UMAX_X2">;
709defm S_BUFFER_ATOMIC_AND_X2       : SM_Real_Atomics_vi <0x68, "S_BUFFER_ATOMIC_AND_X2">;
710defm S_BUFFER_ATOMIC_OR_X2        : SM_Real_Atomics_vi <0x69, "S_BUFFER_ATOMIC_OR_X2">;
711defm S_BUFFER_ATOMIC_XOR_X2       : SM_Real_Atomics_vi <0x6a, "S_BUFFER_ATOMIC_XOR_X2">;
712defm S_BUFFER_ATOMIC_INC_X2       : SM_Real_Atomics_vi <0x6b, "S_BUFFER_ATOMIC_INC_X2">;
713defm S_BUFFER_ATOMIC_DEC_X2       : SM_Real_Atomics_vi <0x6c, "S_BUFFER_ATOMIC_DEC_X2">;
714
715defm S_ATOMIC_SWAP                : SM_Real_Atomics_vi <0x80, "S_ATOMIC_SWAP">;
716defm S_ATOMIC_CMPSWAP             : SM_Real_Atomics_vi <0x81, "S_ATOMIC_CMPSWAP">;
717defm S_ATOMIC_ADD                 : SM_Real_Atomics_vi <0x82, "S_ATOMIC_ADD">;
718defm S_ATOMIC_SUB                 : SM_Real_Atomics_vi <0x83, "S_ATOMIC_SUB">;
719defm S_ATOMIC_SMIN                : SM_Real_Atomics_vi <0x84, "S_ATOMIC_SMIN">;
720defm S_ATOMIC_UMIN                : SM_Real_Atomics_vi <0x85, "S_ATOMIC_UMIN">;
721defm S_ATOMIC_SMAX                : SM_Real_Atomics_vi <0x86, "S_ATOMIC_SMAX">;
722defm S_ATOMIC_UMAX                : SM_Real_Atomics_vi <0x87, "S_ATOMIC_UMAX">;
723defm S_ATOMIC_AND                 : SM_Real_Atomics_vi <0x88, "S_ATOMIC_AND">;
724defm S_ATOMIC_OR                  : SM_Real_Atomics_vi <0x89, "S_ATOMIC_OR">;
725defm S_ATOMIC_XOR                 : SM_Real_Atomics_vi <0x8a, "S_ATOMIC_XOR">;
726defm S_ATOMIC_INC                 : SM_Real_Atomics_vi <0x8b, "S_ATOMIC_INC">;
727defm S_ATOMIC_DEC                 : SM_Real_Atomics_vi <0x8c, "S_ATOMIC_DEC">;
728
729defm S_ATOMIC_SWAP_X2             : SM_Real_Atomics_vi <0xa0, "S_ATOMIC_SWAP_X2">;
730defm S_ATOMIC_CMPSWAP_X2          : SM_Real_Atomics_vi <0xa1, "S_ATOMIC_CMPSWAP_X2">;
731defm S_ATOMIC_ADD_X2              : SM_Real_Atomics_vi <0xa2, "S_ATOMIC_ADD_X2">;
732defm S_ATOMIC_SUB_X2              : SM_Real_Atomics_vi <0xa3, "S_ATOMIC_SUB_X2">;
733defm S_ATOMIC_SMIN_X2             : SM_Real_Atomics_vi <0xa4, "S_ATOMIC_SMIN_X2">;
734defm S_ATOMIC_UMIN_X2             : SM_Real_Atomics_vi <0xa5, "S_ATOMIC_UMIN_X2">;
735defm S_ATOMIC_SMAX_X2             : SM_Real_Atomics_vi <0xa6, "S_ATOMIC_SMAX_X2">;
736defm S_ATOMIC_UMAX_X2             : SM_Real_Atomics_vi <0xa7, "S_ATOMIC_UMAX_X2">;
737defm S_ATOMIC_AND_X2              : SM_Real_Atomics_vi <0xa8, "S_ATOMIC_AND_X2">;
738defm S_ATOMIC_OR_X2               : SM_Real_Atomics_vi <0xa9, "S_ATOMIC_OR_X2">;
739defm S_ATOMIC_XOR_X2              : SM_Real_Atomics_vi <0xaa, "S_ATOMIC_XOR_X2">;
740defm S_ATOMIC_INC_X2              : SM_Real_Atomics_vi <0xab, "S_ATOMIC_INC_X2">;
741defm S_ATOMIC_DEC_X2              : SM_Real_Atomics_vi <0xac, "S_ATOMIC_DEC_X2">;
742
743multiclass SM_Real_Discard_vi<bits<8> op, string ps> {
744  def _IMM_vi  : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_IMM)>;
745  def _SGPR_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>;
746  def _SGPR_alt_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>,
747                       SMEM_Real_SGPR_alt_gfx9;
748  let IsGFX9SpecificEncoding = true in
749  def _SGPR_IMM_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR_IMM)>;
750}
751
752defm S_DCACHE_DISCARD    : SM_Real_Discard_vi <0x28, "S_DCACHE_DISCARD">;
753defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_vi <0x29, "S_DCACHE_DISCARD_X2">;
754
755//===----------------------------------------------------------------------===//
756// CI
757//===----------------------------------------------------------------------===//
758
759def smrd_literal_offset : NamedOperandU32<"SMRDLiteralOffset",
760                                          NamedMatchClass<"SMRDLiteralOffset">> {
761  let OperandType = "OPERAND_IMMEDIATE";
762}
763
764class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
765  SM_Real<ps>,
766  Enc64 {
767
768  let AssemblerPredicate = isGFX7Only;
769  let DecoderNamespace = "GFX7";
770  let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, CPol:$cpol);
771
772  let Inst{7-0}   = 0xff;
773  let Inst{8}     = 0;
774  let Inst{14-9}  = sbase{6-1};
775  let Inst{21-15} = sdst{6-0};
776  let Inst{26-22} = op;
777  let Inst{31-27} = 0x18; //encoding
778  let Inst{63-32} = offset{31-0};
779}
780
781def S_LOAD_DWORD_IMM_ci           : SMRD_Real_Load_IMM_ci <0x00, S_LOAD_DWORD_IMM>;
782def S_LOAD_DWORDX2_IMM_ci         : SMRD_Real_Load_IMM_ci <0x01, S_LOAD_DWORDX2_IMM>;
783def S_LOAD_DWORDX4_IMM_ci         : SMRD_Real_Load_IMM_ci <0x02, S_LOAD_DWORDX4_IMM>;
784def S_LOAD_DWORDX8_IMM_ci         : SMRD_Real_Load_IMM_ci <0x03, S_LOAD_DWORDX8_IMM>;
785def S_LOAD_DWORDX16_IMM_ci        : SMRD_Real_Load_IMM_ci <0x04, S_LOAD_DWORDX16_IMM>;
786def S_BUFFER_LOAD_DWORD_IMM_ci    : SMRD_Real_Load_IMM_ci <0x08, S_BUFFER_LOAD_DWORD_IMM>;
787def S_BUFFER_LOAD_DWORDX2_IMM_ci  : SMRD_Real_Load_IMM_ci <0x09, S_BUFFER_LOAD_DWORDX2_IMM>;
788def S_BUFFER_LOAD_DWORDX4_IMM_ci  : SMRD_Real_Load_IMM_ci <0x0a, S_BUFFER_LOAD_DWORDX4_IMM>;
789def S_BUFFER_LOAD_DWORDX8_IMM_ci  : SMRD_Real_Load_IMM_ci <0x0b, S_BUFFER_LOAD_DWORDX8_IMM>;
790def S_BUFFER_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x0c, S_BUFFER_LOAD_DWORDX16_IMM>;
791
792class SMRD_Real_ci <bits<5> op, SM_Pseudo ps>
793  : SM_Real<ps>
794  , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
795  , Enc32 {
796
797  let AssemblerPredicate = isGFX7Only;
798  let DecoderNamespace = "GFX7";
799
800  let Inst{7-0}   = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?));
801  let Inst{8}     = ps.has_offset;
802  let Inst{14-9}  = !if(ps.has_sbase, sbase{6-1}, ?);
803  let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
804  let Inst{26-22} = op;
805  let Inst{31-27} = 0x18; //encoding
806}
807
808def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>;
809
810//===----------------------------------------------------------------------===//
811// Scalar Memory Patterns
812//===----------------------------------------------------------------------===//
813
814def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]> {
815  let GISelPredicateCode = [{
816    if (!MI.hasOneMemOperand())
817      return false;
818    if (!isInstrUniform(MI))
819      return false;
820
821    // FIXME: We should probably be caching this.
822    SmallVector<GEPInfo, 4> AddrInfo;
823    getAddrModeInfo(MI, MRI, AddrInfo);
824
825    if (hasVgprParts(AddrInfo))
826      return false;
827    return true;
828  }];
829}
830
831def SMRDImm         : ComplexPattern<iPTR, 2, "SelectSMRDImm">;
832def SMRDImm32       : ComplexPattern<iPTR, 2, "SelectSMRDImm32">;
833def SMRDSgpr        : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">;
834def SMRDSgprImm     : ComplexPattern<iPTR, 3, "SelectSMRDSgprImm">;
835def SMRDBufferImm   : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">;
836def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
837def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">;
838
839multiclass SMRD_Pattern <string Instr, ValueType vt> {
840
841  // 1. IMM offset
842  def : GCNPat <
843    (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
844    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
845  >;
846
847  // 2. 32-bit IMM offset on CI
848  def : GCNPat <
849    (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
850    (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
851    let OtherPredicates = [isGFX7Only];
852  }
853
854  // 3. SGPR offset
855  def : GCNPat <
856    (smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
857    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0))
858  >;
859
860  // 4. SGPR+IMM offset
861  def : GCNPat <
862    (smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
863    (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> {
864    let OtherPredicates = [isGFX9Plus];
865  }
866
867  // 5. No offset
868  def : GCNPat <
869    (vt (smrd_load (i64 SReg_64:$sbase))),
870    (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))
871  >;
872}
873
874multiclass SMLoad_Pattern <string Instr, ValueType vt> {
875  // 1. Offset as an immediate
876  def : GCNPat <
877    (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
878    (vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> {
879    let AddedComplexity = 2;
880  }
881
882  // 2. 32-bit IMM offset on CI
883  def : GCNPat <
884    (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
885    (!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset,
886                                    (extract_cpol $cachepolicy))> {
887    let OtherPredicates = [isGFX7Only];
888    let AddedComplexity = 1;
889  }
890
891  // 3. Offset loaded in an 32bit SGPR
892  def : GCNPat <
893    (SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy),
894    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$soffset, (extract_cpol $cachepolicy)))
895  >;
896
897  // 4. Offset as an 32-bit SGPR + immediate
898  def : GCNPat <
899    (SIsbuffer_load v4i32:$sbase, (SMRDBufferSgprImm i32:$soffset, i32:$offset),
900                    timm:$cachepolicy),
901    (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, i32imm:$offset,
902                                             (extract_cpol $cachepolicy)))> {
903    let OtherPredicates = [isGFX9Plus];
904  }
905}
906
907// Global and constant loads can be selected to either MUBUF or SMRD
908// instructions, but SMRD instructions are faster so we want the instruction
909// selector to prefer those.
910let AddedComplexity = 100 in {
911
912foreach vt = Reg32Types.types in {
913defm : SMRD_Pattern <"S_LOAD_DWORD", vt>;
914}
915
916foreach vt = SReg_64.RegTypes in {
917defm : SMRD_Pattern <"S_LOAD_DWORDX2", vt>;
918}
919
920foreach vt = SReg_128.RegTypes in {
921defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>;
922}
923
924foreach vt = SReg_256.RegTypes in {
925defm : SMRD_Pattern <"S_LOAD_DWORDX8", vt>;
926}
927
928foreach vt = SReg_512.RegTypes in {
929defm : SMRD_Pattern <"S_LOAD_DWORDX16", vt>;
930}
931
932defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD",     i32>;
933defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2",   v2i32>;
934defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4",   v4i32>;
935defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8",   v8i32>;
936defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16",  v16i32>;
937
938defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD",     f32>;
939defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2",   v2f32>;
940defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4",   v4f32>;
941defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8",   v8f32>;
942defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16",  v16f32>;
943} // End let AddedComplexity = 100
944
945let OtherPredicates = [HasSMemTimeInst] in {
946def : GCNPat <
947  (i64 (readcyclecounter)),
948  (S_MEMTIME)
949>;
950} // let OtherPredicates = [HasSMemTimeInst]
951
952let OtherPredicates = [HasShaderCyclesRegister] in {
953def : GCNPat <
954  (i64 (readcyclecounter)),
955  (REG_SEQUENCE SReg_64,
956    (S_GETREG_B32 getHwRegImm<HWREG.SHADER_CYCLES, 0, -12>.ret), sub0,
957    (S_MOV_B32 (i32 0)), sub1)> {
958  // Prefer this to s_memtime because it has lower and more predictable latency.
959  let AddedComplexity = 1;
960}
961} // let OtherPredicates = [HasShaderCyclesRegister]
962
963//===----------------------------------------------------------------------===//
964// GFX10.
965//===----------------------------------------------------------------------===//
966
967class SMEM_Real_10Plus_common<bits<8> op, SM_Pseudo ps, string opName,
968                              int subtarget, RegisterWithSubRegs sgpr_null> :
969    SM_Real<ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>, Enc64 {
970  let Inst{5-0}   = !if(ps.has_sbase, sbase{6-1}, ?);
971  let Inst{12-6}  = !if(ps.has_sdst, sdst{6-0}, ?);
972  let Inst{25-18} = op;
973  let Inst{31-26} = 0x3d;
974  // There are SMEM instructions that do not employ any of the offset
975  // fields, in which case we need them to remain undefined.
976  let Inst{52-32} = !if(ps.has_offset, offset{20-0}, !if(ps.has_soffset, 0, ?));
977  let Inst{63-57} = !if(ps.has_soffset, soffset{6-0},
978                        !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?));
979}
980
981class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps>
982    : SMEM_Real_10Plus_common<op, ps, ps.Mnemonic, SIEncodingFamily.GFX10,
983                              SGPR_NULL_gfxpre11> {
984  let AssemblerPredicate = isGFX10Only;
985  let DecoderNamespace = "GFX10";
986  let Inst{14}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?);
987  let Inst{16}    = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
988}
989
990class SMEM_Real_Load_gfx10<bits<8> op, string ps, OffsetMode offsets>
991    : SMEM_Real_gfx10<op, !cast<SM_Pseudo>(ps # offsets.Variant)> {
992  RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
993  let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
994}
995
996multiclass SM_Real_Loads_gfx10<bits<8> op, string ps> {
997  def _IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps, IMM_Offset>;
998  def _SGPR_gfx10 : SMEM_Real_Load_gfx10<op, ps, SGPR_Offset>;
999  def _SGPR_IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps, SGPR_IMM_Offset>;
1000}
1001
1002class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> {
1003  bits<7> sdata;
1004
1005  let sdst = ?;
1006  let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
1007}
1008
1009multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
1010                                SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
1011                                SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
1012  def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> {
1013    let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
1014  }
1015
1016  def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
1017    let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol);
1018  }
1019
1020  def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Store_Pseudo>(ps#_SGPR_IMM)> {
1021    let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase,
1022                             SReg_32:$soffset, smem_offset_mod:$offset, CPol:$cpol);
1023  }
1024}
1025
1026defm S_LOAD_DWORD            : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">;
1027defm S_LOAD_DWORDX2          : SM_Real_Loads_gfx10<0x001, "S_LOAD_DWORDX2">;
1028defm S_LOAD_DWORDX4          : SM_Real_Loads_gfx10<0x002, "S_LOAD_DWORDX4">;
1029defm S_LOAD_DWORDX8          : SM_Real_Loads_gfx10<0x003, "S_LOAD_DWORDX8">;
1030defm S_LOAD_DWORDX16         : SM_Real_Loads_gfx10<0x004, "S_LOAD_DWORDX16">;
1031
1032let SubtargetPredicate = HasScalarFlatScratchInsts in {
1033defm S_SCRATCH_LOAD_DWORD    : SM_Real_Loads_gfx10<0x005, "S_SCRATCH_LOAD_DWORD">;
1034defm S_SCRATCH_LOAD_DWORDX2  : SM_Real_Loads_gfx10<0x006, "S_SCRATCH_LOAD_DWORDX2">;
1035defm S_SCRATCH_LOAD_DWORDX4  : SM_Real_Loads_gfx10<0x007, "S_SCRATCH_LOAD_DWORDX4">;
1036} // End SubtargetPredicate = HasScalarFlatScratchInsts
1037
1038defm S_BUFFER_LOAD_DWORD     : SM_Real_Loads_gfx10<0x008, "S_BUFFER_LOAD_DWORD">;
1039defm S_BUFFER_LOAD_DWORDX2   : SM_Real_Loads_gfx10<0x009, "S_BUFFER_LOAD_DWORDX2">;
1040defm S_BUFFER_LOAD_DWORDX4   : SM_Real_Loads_gfx10<0x00a, "S_BUFFER_LOAD_DWORDX4">;
1041defm S_BUFFER_LOAD_DWORDX8   : SM_Real_Loads_gfx10<0x00b, "S_BUFFER_LOAD_DWORDX8">;
1042defm S_BUFFER_LOAD_DWORDX16  : SM_Real_Loads_gfx10<0x00c, "S_BUFFER_LOAD_DWORDX16">;
1043
1044let SubtargetPredicate = HasScalarStores in {
1045defm S_STORE_DWORD           : SM_Real_Stores_gfx10<0x010, "S_STORE_DWORD">;
1046defm S_STORE_DWORDX2         : SM_Real_Stores_gfx10<0x011, "S_STORE_DWORDX2">;
1047defm S_STORE_DWORDX4         : SM_Real_Stores_gfx10<0x012, "S_STORE_DWORDX4">;
1048let OtherPredicates = [HasScalarFlatScratchInsts] in {
1049defm S_SCRATCH_STORE_DWORD   : SM_Real_Stores_gfx10<0x015, "S_SCRATCH_STORE_DWORD">;
1050defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016, "S_SCRATCH_STORE_DWORDX2">;
1051defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017, "S_SCRATCH_STORE_DWORDX4">;
1052} // End OtherPredicates = [HasScalarFlatScratchInsts]
1053defm S_BUFFER_STORE_DWORD    : SM_Real_Stores_gfx10<0x018, "S_BUFFER_STORE_DWORD">;
1054defm S_BUFFER_STORE_DWORDX2  : SM_Real_Stores_gfx10<0x019, "S_BUFFER_STORE_DWORDX2">;
1055defm S_BUFFER_STORE_DWORDX4  : SM_Real_Stores_gfx10<0x01a, "S_BUFFER_STORE_DWORDX4">;
1056} // End SubtargetPredicate = HasScalarStores
1057
1058def S_MEMREALTIME_gfx10              : SMEM_Real_gfx10<0x025, S_MEMREALTIME>;
1059def S_MEMTIME_gfx10                  : SMEM_Real_gfx10<0x024, S_MEMTIME>;
1060def S_GL1_INV_gfx10                  : SMEM_Real_gfx10<0x01f, S_GL1_INV>;
1061def S_GET_WAVEID_IN_WORKGROUP_gfx10  : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>;
1062def S_DCACHE_INV_gfx10               : SMEM_Real_gfx10<0x020, S_DCACHE_INV>;
1063
1064let SubtargetPredicate = HasScalarStores in {
1065def S_DCACHE_WB_gfx10                : SMEM_Real_gfx10<0x021, S_DCACHE_WB>;
1066} // End SubtargetPredicate = HasScalarStores
1067
1068multiclass SM_Real_Probe_gfx10<bits<8> op, string ps> {
1069  def _IMM_gfx10  : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
1070  def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
1071  def _SGPR_IMM_gfx10
1072    : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>;
1073}
1074
1075defm S_ATC_PROBE        : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">;
1076defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27, "S_ATC_PROBE_BUFFER">;
1077
1078class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
1079  : SMEM_Real_gfx10 <op, ps>,
1080    AtomicNoRet <!subst("_RTN","",NAME), ps.glc> {
1081
1082  bits<7> sdata;
1083
1084  let Constraints = ps.Constraints;
1085  let DisableEncoding = ps.DisableEncoding;
1086
1087  let cpol{CPolBit.GLC} = ps.glc;
1088
1089  let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
1090  let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
1091}
1092
1093multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
1094  def _IMM_gfx10       : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
1095  def _SGPR_gfx10      : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
1096  def _SGPR_IMM_gfx10  : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
1097  def _IMM_RTN_gfx10   : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
1098  def _SGPR_RTN_gfx10  : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
1099  def _SGPR_IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
1100}
1101
1102let SubtargetPredicate = HasScalarAtomics in {
1103
1104defm S_BUFFER_ATOMIC_SWAP         : SM_Real_Atomics_gfx10 <0x40, "S_BUFFER_ATOMIC_SWAP">;
1105defm S_BUFFER_ATOMIC_CMPSWAP      : SM_Real_Atomics_gfx10 <0x41, "S_BUFFER_ATOMIC_CMPSWAP">;
1106defm S_BUFFER_ATOMIC_ADD          : SM_Real_Atomics_gfx10 <0x42, "S_BUFFER_ATOMIC_ADD">;
1107defm S_BUFFER_ATOMIC_SUB          : SM_Real_Atomics_gfx10 <0x43, "S_BUFFER_ATOMIC_SUB">;
1108defm S_BUFFER_ATOMIC_SMIN         : SM_Real_Atomics_gfx10 <0x44, "S_BUFFER_ATOMIC_SMIN">;
1109defm S_BUFFER_ATOMIC_UMIN         : SM_Real_Atomics_gfx10 <0x45, "S_BUFFER_ATOMIC_UMIN">;
1110defm S_BUFFER_ATOMIC_SMAX         : SM_Real_Atomics_gfx10 <0x46, "S_BUFFER_ATOMIC_SMAX">;
1111defm S_BUFFER_ATOMIC_UMAX         : SM_Real_Atomics_gfx10 <0x47, "S_BUFFER_ATOMIC_UMAX">;
1112defm S_BUFFER_ATOMIC_AND          : SM_Real_Atomics_gfx10 <0x48, "S_BUFFER_ATOMIC_AND">;
1113defm S_BUFFER_ATOMIC_OR           : SM_Real_Atomics_gfx10 <0x49, "S_BUFFER_ATOMIC_OR">;
1114defm S_BUFFER_ATOMIC_XOR          : SM_Real_Atomics_gfx10 <0x4a, "S_BUFFER_ATOMIC_XOR">;
1115defm S_BUFFER_ATOMIC_INC          : SM_Real_Atomics_gfx10 <0x4b, "S_BUFFER_ATOMIC_INC">;
1116defm S_BUFFER_ATOMIC_DEC          : SM_Real_Atomics_gfx10 <0x4c, "S_BUFFER_ATOMIC_DEC">;
1117
1118defm S_BUFFER_ATOMIC_SWAP_X2      : SM_Real_Atomics_gfx10 <0x60, "S_BUFFER_ATOMIC_SWAP_X2">;
1119defm S_BUFFER_ATOMIC_CMPSWAP_X2   : SM_Real_Atomics_gfx10 <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">;
1120defm S_BUFFER_ATOMIC_ADD_X2       : SM_Real_Atomics_gfx10 <0x62, "S_BUFFER_ATOMIC_ADD_X2">;
1121defm S_BUFFER_ATOMIC_SUB_X2       : SM_Real_Atomics_gfx10 <0x63, "S_BUFFER_ATOMIC_SUB_X2">;
1122defm S_BUFFER_ATOMIC_SMIN_X2      : SM_Real_Atomics_gfx10 <0x64, "S_BUFFER_ATOMIC_SMIN_X2">;
1123defm S_BUFFER_ATOMIC_UMIN_X2      : SM_Real_Atomics_gfx10 <0x65, "S_BUFFER_ATOMIC_UMIN_X2">;
1124defm S_BUFFER_ATOMIC_SMAX_X2      : SM_Real_Atomics_gfx10 <0x66, "S_BUFFER_ATOMIC_SMAX_X2">;
1125defm S_BUFFER_ATOMIC_UMAX_X2      : SM_Real_Atomics_gfx10 <0x67, "S_BUFFER_ATOMIC_UMAX_X2">;
1126defm S_BUFFER_ATOMIC_AND_X2       : SM_Real_Atomics_gfx10 <0x68, "S_BUFFER_ATOMIC_AND_X2">;
1127defm S_BUFFER_ATOMIC_OR_X2        : SM_Real_Atomics_gfx10 <0x69, "S_BUFFER_ATOMIC_OR_X2">;
1128defm S_BUFFER_ATOMIC_XOR_X2       : SM_Real_Atomics_gfx10 <0x6a, "S_BUFFER_ATOMIC_XOR_X2">;
1129defm S_BUFFER_ATOMIC_INC_X2       : SM_Real_Atomics_gfx10 <0x6b, "S_BUFFER_ATOMIC_INC_X2">;
1130defm S_BUFFER_ATOMIC_DEC_X2       : SM_Real_Atomics_gfx10 <0x6c, "S_BUFFER_ATOMIC_DEC_X2">;
1131
1132defm S_ATOMIC_SWAP                : SM_Real_Atomics_gfx10 <0x80, "S_ATOMIC_SWAP">;
1133defm S_ATOMIC_CMPSWAP             : SM_Real_Atomics_gfx10 <0x81, "S_ATOMIC_CMPSWAP">;
1134defm S_ATOMIC_ADD                 : SM_Real_Atomics_gfx10 <0x82, "S_ATOMIC_ADD">;
1135defm S_ATOMIC_SUB                 : SM_Real_Atomics_gfx10 <0x83, "S_ATOMIC_SUB">;
1136defm S_ATOMIC_SMIN                : SM_Real_Atomics_gfx10 <0x84, "S_ATOMIC_SMIN">;
1137defm S_ATOMIC_UMIN                : SM_Real_Atomics_gfx10 <0x85, "S_ATOMIC_UMIN">;
1138defm S_ATOMIC_SMAX                : SM_Real_Atomics_gfx10 <0x86, "S_ATOMIC_SMAX">;
1139defm S_ATOMIC_UMAX                : SM_Real_Atomics_gfx10 <0x87, "S_ATOMIC_UMAX">;
1140defm S_ATOMIC_AND                 : SM_Real_Atomics_gfx10 <0x88, "S_ATOMIC_AND">;
1141defm S_ATOMIC_OR                  : SM_Real_Atomics_gfx10 <0x89, "S_ATOMIC_OR">;
1142defm S_ATOMIC_XOR                 : SM_Real_Atomics_gfx10 <0x8a, "S_ATOMIC_XOR">;
1143defm S_ATOMIC_INC                 : SM_Real_Atomics_gfx10 <0x8b, "S_ATOMIC_INC">;
1144defm S_ATOMIC_DEC                 : SM_Real_Atomics_gfx10 <0x8c, "S_ATOMIC_DEC">;
1145
1146defm S_ATOMIC_SWAP_X2             : SM_Real_Atomics_gfx10 <0xa0, "S_ATOMIC_SWAP_X2">;
1147defm S_ATOMIC_CMPSWAP_X2          : SM_Real_Atomics_gfx10 <0xa1, "S_ATOMIC_CMPSWAP_X2">;
1148defm S_ATOMIC_ADD_X2              : SM_Real_Atomics_gfx10 <0xa2, "S_ATOMIC_ADD_X2">;
1149defm S_ATOMIC_SUB_X2              : SM_Real_Atomics_gfx10 <0xa3, "S_ATOMIC_SUB_X2">;
1150defm S_ATOMIC_SMIN_X2             : SM_Real_Atomics_gfx10 <0xa4, "S_ATOMIC_SMIN_X2">;
1151defm S_ATOMIC_UMIN_X2             : SM_Real_Atomics_gfx10 <0xa5, "S_ATOMIC_UMIN_X2">;
1152defm S_ATOMIC_SMAX_X2             : SM_Real_Atomics_gfx10 <0xa6, "S_ATOMIC_SMAX_X2">;
1153defm S_ATOMIC_UMAX_X2             : SM_Real_Atomics_gfx10 <0xa7, "S_ATOMIC_UMAX_X2">;
1154defm S_ATOMIC_AND_X2              : SM_Real_Atomics_gfx10 <0xa8, "S_ATOMIC_AND_X2">;
1155defm S_ATOMIC_OR_X2               : SM_Real_Atomics_gfx10 <0xa9, "S_ATOMIC_OR_X2">;
1156defm S_ATOMIC_XOR_X2              : SM_Real_Atomics_gfx10 <0xaa, "S_ATOMIC_XOR_X2">;
1157defm S_ATOMIC_INC_X2              : SM_Real_Atomics_gfx10 <0xab, "S_ATOMIC_INC_X2">;
1158defm S_ATOMIC_DEC_X2              : SM_Real_Atomics_gfx10 <0xac, "S_ATOMIC_DEC_X2">;
1159
1160multiclass SM_Real_Discard_gfx10<bits<8> op, string ps> {
1161  def _IMM_gfx10  : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
1162  def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
1163  def _SGPR_IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>;
1164}
1165
1166defm S_DCACHE_DISCARD    : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">;
1167defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29, "S_DCACHE_DISCARD_X2">;
1168
1169} // End SubtargetPredicate = HasScalarAtomics
1170
1171def SMInfoTable : GenericTable {
1172  let FilterClass = "SM_Real";
1173  let CppTypeName = "SMInfo";
1174  let Fields = ["Opcode", "is_buffer"];
1175
1176  let PrimaryKey = ["Opcode"];
1177  let PrimaryKeyName = "getSMEMOpcodeHelper";
1178}
1179
1180//===----------------------------------------------------------------------===//
1181// GFX11.
1182//===----------------------------------------------------------------------===//
1183
1184class SMEM_Real_gfx11<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
1185    SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX11,
1186                            SGPR_NULL_gfx11plus> {
1187  let AssemblerPredicate = isGFX11Plus;
1188  let DecoderNamespace = "GFX11";
1189  let Inst{13}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
1190  let Inst{14}    = !if(ps.has_glc, cpol{CPolBit.GLC}, 0);
1191}
1192
1193class SMEM_Real_Load_gfx11<bits<8> op, string ps, string opName, OffsetMode offsets> :
1194    SMEM_Real_gfx11<op, !cast<SM_Pseudo>(ps # offsets.Variant), opName> {
1195  RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
1196  let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
1197}
1198
1199multiclass SM_Real_Loads_gfx11<bits<8> op, string ps, string opName> {
1200  def _IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, IMM_Offset>;
1201  def _SGPR_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, SGPR_Offset>;
1202  def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, SGPR_IMM_Offset>;
1203  def : MnemonicAlias<!cast<SM_Pseudo>(ps#"_IMM").Mnemonic, opName>,
1204                      Requires<[isGFX11Plus]>;
1205}
1206
1207defm S_LOAD_B32  : SM_Real_Loads_gfx11<0x000, "S_LOAD_DWORD", "s_load_b32">;
1208defm S_LOAD_B64  : SM_Real_Loads_gfx11<0x001, "S_LOAD_DWORDX2", "s_load_b64">;
1209defm S_LOAD_B128 : SM_Real_Loads_gfx11<0x002, "S_LOAD_DWORDX4", "s_load_b128">;
1210defm S_LOAD_B256 : SM_Real_Loads_gfx11<0x003, "S_LOAD_DWORDX8", "s_load_b256">;
1211defm S_LOAD_B512 : SM_Real_Loads_gfx11<0x004, "S_LOAD_DWORDX16", "s_load_b512">;
1212
1213defm S_BUFFER_LOAD_B32  : SM_Real_Loads_gfx11<0x008, "S_BUFFER_LOAD_DWORD", "s_buffer_load_b32">;
1214defm S_BUFFER_LOAD_B64  : SM_Real_Loads_gfx11<0x009, "S_BUFFER_LOAD_DWORDX2", "s_buffer_load_b64">;
1215defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx11<0x00a, "S_BUFFER_LOAD_DWORDX4", "s_buffer_load_b128">;
1216defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx11<0x00b, "S_BUFFER_LOAD_DWORDX8", "s_buffer_load_b256">;
1217defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx11<0x00c, "S_BUFFER_LOAD_DWORDX16", "s_buffer_load_b512">;
1218
1219def S_GL1_INV_gfx11    : SMEM_Real_gfx11<0x020, S_GL1_INV>;
1220def S_DCACHE_INV_gfx11 : SMEM_Real_gfx11<0x021, S_DCACHE_INV>;
1221
1222class SMEM_Real_Store_gfx11 <bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx11<op, ps> {
1223  // encoding
1224  bits<7> sdata;
1225
1226  let sdst = ?;
1227  let Inst{12-6}  = !if(ps.has_sdst, sdata{6-0}, ?);
1228}
1229
1230multiclass SM_Real_Probe_gfx11<bits<8> op, string ps> {
1231  def _IMM_gfx11  : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
1232  def _SGPR_gfx11 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>;
1233  def _SGPR_IMM_gfx11
1234    : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>;
1235}
1236
1237defm S_ATC_PROBE        : SM_Real_Probe_gfx11 <0x22, "S_ATC_PROBE">;
1238defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23, "S_ATC_PROBE_BUFFER">;
1239