xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td (revision a90b9d0159070121c221b966469c3e36d912bf82)
1//===---- SMInstructions.td - Scalar Memory Instruction Definitions -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def smrd_offset_8 : ImmOperand<i32, "SMRDOffset8", 1>;
10
11let EncoderMethod = "getSMEMOffsetEncoding",
12    DecoderMethod = "decodeSMEMOffset" in {
13def smem_offset : ImmOperand<i32, "SMEMOffset", 1>;
14def smem_offset_mod : NamedIntOperand<i32, "offset", "SMEMOffsetMod">;
15}
16
17//===----------------------------------------------------------------------===//
18// Scalar Memory classes
19//===----------------------------------------------------------------------===//
20
21class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> :
22  InstSI <outs, ins, "", pattern>,
23  SIMCInstr<opName, SIEncodingFamily.NONE> {
24  let isPseudo = 1;
25  let isCodeGenOnly = 1;
26
27  let LGKM_CNT = 1;
28  let SMRD = 1;
29  let mayStore = 0;
30  let mayLoad = 1;
31  let hasSideEffects = 0;
32  let maybeAtomic = 0;
33  let UseNamedOperandTable = 1;
34  let SchedRW = [WriteSMEM];
35
36  string Mnemonic = opName;
37  string AsmOperands = asmOps;
38
39  bits<1> has_sbase = 1;
40  bits<1> has_sdst = 1;
41  bit has_glc = 0;
42  bit has_dlc = 0;
43  bit has_offset = 0;
44  bit has_soffset = 0;
45  bit is_buffer = 0;
46}
47
48class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
49  : InstSI<ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands> {
50
51  let isPseudo = 0;
52  let isCodeGenOnly = 0;
53
54  Instruction Opcode = !cast<Instruction>(NAME);
55
56  // copy relevant pseudo op flags
57  let LGKM_CNT             = ps.LGKM_CNT;
58  let SMRD                 = ps.SMRD;
59  let mayStore             = ps.mayStore;
60  let mayLoad              = ps.mayLoad;
61  let hasSideEffects       = ps.hasSideEffects;
62  let UseNamedOperandTable = ps.UseNamedOperandTable;
63  let SchedRW              = ps.SchedRW;
64  let SubtargetPredicate   = ps.SubtargetPredicate;
65  let AsmMatchConverter    = ps.AsmMatchConverter;
66  let IsAtomicRet          = ps.IsAtomicRet;
67  let IsAtomicNoRet        = ps.IsAtomicNoRet;
68
69  let TSFlags = ps.TSFlags;
70
71  bit is_buffer = ps.is_buffer;
72
73  // encoding
74  bits<7>  sbase;
75  bits<7>  sdst;
76  bits<32> offset;
77  bits<8>  soffset;
78  bits<5>  cpol;
79}
80
81class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
82                 dag ins, string asm> {
83  bit HasOffset = hasOffset;
84  bit HasSOffset = hasSOffset;
85  string Variant = variant;
86  dag Ins = ins;
87  string Asm = asm;
88}
89
90def IMM_Offset : OffsetMode<1, 0, "_IMM", (ins smem_offset:$offset), "$offset">;
91def SGPR_Offset : OffsetMode<0, 1, "_SGPR", (ins SReg_32:$soffset), "$soffset">;
92def SGPR_IMM_Offset : OffsetMode<1, 1, "_SGPR_IMM",
93                                 (ins SReg_32:$soffset, smem_offset_mod:$offset),
94                                 "$soffset$offset">;
95
96class SM_Probe_Pseudo <string opName, RegisterClass baseClass, OffsetMode offsets>
97  : SM_Pseudo<opName, (outs),
98              !con((ins i8imm:$sdata, baseClass:$sbase), offsets.Ins),
99              " $sdata, $sbase, " # offsets.Asm> {
100  let mayLoad = 0;
101  let mayStore = 0;
102  let has_glc = 0;
103  let LGKM_CNT = 0;
104  let ScalarStore = 0;
105  let hasSideEffects = 1;
106  let has_offset = offsets.HasOffset;
107  let has_soffset = offsets.HasSOffset;
108  let PseudoInstr = opName # offsets.Variant;
109}
110
111class SM_Load_Pseudo <string opName, RegisterClass baseClass,
112                      RegisterClass dstClass, OffsetMode offsets>
113  : SM_Pseudo<opName, (outs dstClass:$sdst),
114              !con((ins baseClass:$sbase), offsets.Ins, (ins CPol:$cpol)),
115              " $sdst, $sbase, " # offsets.Asm # "$cpol", []> {
116  RegisterClass BaseClass = baseClass;
117  let mayLoad = 1;
118  let isReMaterializable = 1;
119  let mayStore = 0;
120  let has_glc = 1;
121  let has_dlc = 1;
122  let has_offset = offsets.HasOffset;
123  let has_soffset = offsets.HasSOffset;
124  let PseudoInstr = opName # offsets.Variant;
125}
126
127class SM_Store_Pseudo <string opName, RegisterClass baseClass,
128                       RegisterClass srcClass, OffsetMode offsets>
129  : SM_Pseudo<opName, (outs), !con((ins srcClass:$sdata, baseClass:$sbase),
130                                   offsets.Ins, (ins CPol:$cpol)),
131              " $sdata, $sbase, " # offsets.Asm # "$cpol"> {
132  RegisterClass BaseClass = baseClass;
133  let mayLoad = 0;
134  let mayStore = 1;
135  let has_glc = 1;
136  let has_dlc = 1;
137  let has_offset = offsets.HasOffset;
138  let has_soffset = offsets.HasSOffset;
139  let ScalarStore = 1;
140  let PseudoInstr = opName # offsets.Variant;
141}
142
143class SM_Discard_Pseudo <string opName, OffsetMode offsets>
144  : SM_Pseudo<opName, (outs), !con((ins SReg_64:$sbase), offsets.Ins),
145              " $sbase, " # offsets.Asm> {
146  let mayLoad = 0;
147  let mayStore = 0;
148  let has_glc = 0;
149  let has_sdst = 0;
150  let ScalarStore = 0;
151  let hasSideEffects = 1;
152  let has_offset = offsets.HasOffset;
153  let has_soffset = offsets.HasSOffset;
154  let PseudoInstr = opName # offsets.Variant;
155}
156
157multiclass SM_Pseudo_Loads<RegisterClass baseClass,
158                           RegisterClass dstClass> {
159  defvar opName = !tolower(NAME);
160  def _IMM : SM_Load_Pseudo <opName, baseClass, dstClass, IMM_Offset>;
161  def _SGPR : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_Offset>;
162  def _SGPR_IMM : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_IMM_Offset>;
163}
164
165multiclass SM_Pseudo_Stores<RegisterClass baseClass,
166                            RegisterClass srcClass> {
167  defvar opName = !tolower(NAME);
168  def _IMM : SM_Store_Pseudo <opName, baseClass, srcClass, IMM_Offset>;
169  def _SGPR : SM_Store_Pseudo <opName, baseClass, srcClass, SGPR_Offset>;
170  def _SGPR_IMM : SM_Store_Pseudo <opName, baseClass, srcClass, SGPR_IMM_Offset>;
171}
172
173multiclass SM_Pseudo_Discards {
174  defvar opName = !tolower(NAME);
175  def _IMM : SM_Discard_Pseudo <opName, IMM_Offset>;
176  def _SGPR : SM_Discard_Pseudo <opName, SGPR_Offset>;
177  def _SGPR_IMM : SM_Discard_Pseudo <opName, SGPR_IMM_Offset>;
178}
179
180class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
181  opName, (outs SReg_64_XEXEC:$sdst), (ins),
182  " $sdst", [(set i64:$sdst, (node))]> {
183  let hasSideEffects = 1;
184
185  let mayStore = 0;
186  let mayLoad = 0;
187  let has_sbase = 0;
188}
189
190class SM_Inval_Pseudo <string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
191  opName, (outs), (ins), "", [(node)]> {
192  let hasSideEffects = 1;
193  let mayLoad = 0;
194  let mayStore = 0;
195  let has_sdst = 0;
196  let has_sbase = 0;
197}
198
199multiclass SM_Pseudo_Probe<RegisterClass baseClass> {
200  defvar opName = !tolower(NAME);
201  def _IMM  : SM_Probe_Pseudo <opName, baseClass, IMM_Offset>;
202  def _SGPR : SM_Probe_Pseudo <opName, baseClass, SGPR_Offset>;
203  def _SGPR_IMM : SM_Probe_Pseudo <opName, baseClass, SGPR_IMM_Offset>;
204}
205
206class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
207  opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins),
208  " $sdst", [(set i32:$sdst, (node))]> {
209  let hasSideEffects = 1;
210  let mayStore = 0;
211  let mayLoad = 0;
212  let has_sbase = 0;
213}
214
215class SM_Prefetch_Pseudo <string opName, RegisterClass baseClass, bit hasSBase>
216  : SM_Pseudo<opName, (outs), !con(!if(hasSBase, (ins baseClass:$sbase), (ins)),
217                                   (ins smem_offset:$offset, SReg_32:$soffset, i8imm:$sdata)),
218              !if(hasSBase, " $sbase,", "") # " $offset, $soffset, $sdata"> {
219  // Mark prefetches as both load and store to prevent reordering with loads
220  // and stores. This is also needed for pattern to match prefetch intrinsic.
221  let mayLoad = 1;
222  let mayStore = 1;
223  let has_glc = 0;
224  let LGKM_CNT = 0;
225  let has_sbase = hasSBase;
226  let ScalarStore = 0;
227  let has_offset = 1;
228  let has_soffset = 1;
229  let PseudoInstr = opName;
230}
231
232//===----------------------------------------------------------------------===//
233// Scalar Atomic Memory Classes
234//===----------------------------------------------------------------------===//
235
236class SM_Atomic_Pseudo <string opName,
237                        dag outs, dag ins, string asmOps, bit isRet>
238  : SM_Pseudo<opName, outs, ins, asmOps, []> {
239
240  bit glc = isRet;
241
242  let mayLoad = 1;
243  let mayStore = 1;
244  let has_glc = 1;
245  let has_dlc = 1;
246  let has_soffset = 1;
247
248  // Should these be set?
249  let ScalarStore = 1;
250  let hasSideEffects = 1;
251  let maybeAtomic = 1;
252
253  let IsAtomicNoRet = !not(isRet);
254  let IsAtomicRet = isRet;
255}
256
257class SM_Pseudo_Atomic<string opName,
258                       RegisterClass baseClass,
259                       RegisterClass dataClass,
260                       OffsetMode offsets,
261                       bit isRet,
262                       string opNameWithSuffix =
263                         opName # offsets.Variant # !if(isRet, "_RTN", ""),
264                       Operand CPolTy = !if(isRet, CPol_GLC, CPol_NonGLC)> :
265  SM_Atomic_Pseudo<opName,
266                   !if(isRet, (outs dataClass:$sdst), (outs)),
267                   !con((ins dataClass:$sdata, baseClass:$sbase), offsets.Ins,
268                        (ins CPolTy:$cpol)),
269                   !if(isRet, " $sdst", " $sdata") #
270                     ", $sbase, " # offsets.Asm # "$cpol",
271                   isRet>,
272  AtomicNoRet <opNameWithSuffix, isRet> {
273  let has_offset = offsets.HasOffset;
274  let has_soffset = offsets.HasSOffset;
275  let PseudoInstr = opNameWithSuffix;
276
277  let Constraints = !if(isRet, "$sdst = $sdata", "");
278  let DisableEncoding = !if(isRet, "$sdata", "");
279}
280
281multiclass SM_Pseudo_Atomics<RegisterClass baseClass,
282                             RegisterClass dataClass> {
283  defvar opName = !tolower(NAME);
284  def _IMM      : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 0>;
285  def _SGPR     : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 0>;
286  def _SGPR_IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 0>;
287  def _IMM_RTN  : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 1>;
288  def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 1>;
289  def _SGPR_IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 1>;
290}
291
292//===----------------------------------------------------------------------===//
293// Scalar Memory Instructions
294//===----------------------------------------------------------------------===//
295
296// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit
297// SMRD instructions, because the SReg_32_XM0 register class does not include M0
298// and writing to M0 from an SMRD instruction will hang the GPU.
299
300// XXX - SMEM instructions do not allow exec for data operand, but
301// does sdst for SMRD on SI/CI?
302defm S_LOAD_DWORD    : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
303defm S_LOAD_DWORDX2  : SM_Pseudo_Loads <SReg_64, SReg_64_XEXEC>;
304let SubtargetPredicate = HasScalarDwordx3Loads in
305  defm S_LOAD_DWORDX3  : SM_Pseudo_Loads <SReg_64, SReg_96>;
306defm S_LOAD_DWORDX4  : SM_Pseudo_Loads <SReg_64, SReg_128>;
307defm S_LOAD_DWORDX8  : SM_Pseudo_Loads <SReg_64, SReg_256>;
308defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_64, SReg_512>;
309defm S_LOAD_I8       : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
310defm S_LOAD_U8       : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
311defm S_LOAD_I16      : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
312defm S_LOAD_U16      : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
313
314let is_buffer = 1 in {
315defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
316// FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on
317// SI/CI, bit disallowed for SMEM on VI.
318defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_128, SReg_64_XEXEC>;
319let SubtargetPredicate = HasScalarDwordx3Loads in
320  defm S_BUFFER_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_128, SReg_96>;
321defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128, SReg_128>;
322defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128, SReg_256>;
323defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128, SReg_512>;
324defm S_BUFFER_LOAD_I8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
325defm S_BUFFER_LOAD_U8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
326defm S_BUFFER_LOAD_I16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
327defm S_BUFFER_LOAD_U16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
328}
329
330let SubtargetPredicate = HasScalarStores in {
331defm S_STORE_DWORD : SM_Pseudo_Stores <SReg_64, SReg_32_XM0_XEXEC>;
332defm S_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_64, SReg_64_XEXEC>;
333defm S_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_64, SReg_128>;
334
335let is_buffer = 1 in {
336defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <SReg_128, SReg_32_XM0_XEXEC>;
337defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_128, SReg_64_XEXEC>;
338defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_128, SReg_128>;
339}
340} // End SubtargetPredicate = HasScalarStores
341
342let SubtargetPredicate = HasSMemTimeInst in
343def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>;
344def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>;
345
346let SubtargetPredicate = isGFX7GFX8GFX9 in {
347def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
348} // let SubtargetPredicate = isGFX7GFX8GFX9
349
350let SubtargetPredicate = isGFX8Plus in {
351let OtherPredicates = [HasScalarStores] in {
352def S_DCACHE_WB     : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>;
353def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
354} // End OtherPredicates = [HasScalarStores]
355
356defm S_ATC_PROBE        : SM_Pseudo_Probe <SReg_64>;
357let is_buffer = 1 in {
358defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <SReg_128>;
359}
360} // SubtargetPredicate = isGFX8Plus
361
362let SubtargetPredicate = HasSMemRealTime in
363def S_MEMREALTIME   : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
364
365let SubtargetPredicate = isGFX10Plus in
366def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">;
367let SubtargetPredicate = HasGetWaveIdInst in
368def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>;
369
370
371let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in {
372defm S_SCRATCH_LOAD_DWORD    : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
373defm S_SCRATCH_LOAD_DWORDX2  : SM_Pseudo_Loads <SReg_64, SReg_64_XEXEC>;
374defm S_SCRATCH_LOAD_DWORDX4  : SM_Pseudo_Loads <SReg_64, SReg_128>;
375
376defm S_SCRATCH_STORE_DWORD   : SM_Pseudo_Stores <SReg_64, SReg_32_XM0_XEXEC>;
377defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_64, SReg_64_XEXEC>;
378defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_64, SReg_128>;
379} // SubtargetPredicate = HasScalarFlatScratchInsts
380
381let SubtargetPredicate = HasScalarAtomics in {
382
383let is_buffer = 1 in {
384defm S_BUFFER_ATOMIC_SWAP         : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
385defm S_BUFFER_ATOMIC_CMPSWAP      : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
386defm S_BUFFER_ATOMIC_ADD          : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
387defm S_BUFFER_ATOMIC_SUB          : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
388defm S_BUFFER_ATOMIC_SMIN         : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
389defm S_BUFFER_ATOMIC_UMIN         : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
390defm S_BUFFER_ATOMIC_SMAX         : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
391defm S_BUFFER_ATOMIC_UMAX         : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
392defm S_BUFFER_ATOMIC_AND          : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
393defm S_BUFFER_ATOMIC_OR           : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
394defm S_BUFFER_ATOMIC_XOR          : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
395defm S_BUFFER_ATOMIC_INC          : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
396defm S_BUFFER_ATOMIC_DEC          : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
397
398defm S_BUFFER_ATOMIC_SWAP_X2      : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
399defm S_BUFFER_ATOMIC_CMPSWAP_X2   : SM_Pseudo_Atomics <SReg_128, SReg_128>;
400defm S_BUFFER_ATOMIC_ADD_X2       : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
401defm S_BUFFER_ATOMIC_SUB_X2       : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
402defm S_BUFFER_ATOMIC_SMIN_X2      : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
403defm S_BUFFER_ATOMIC_UMIN_X2      : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
404defm S_BUFFER_ATOMIC_SMAX_X2      : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
405defm S_BUFFER_ATOMIC_UMAX_X2      : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
406defm S_BUFFER_ATOMIC_AND_X2       : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
407defm S_BUFFER_ATOMIC_OR_X2        : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
408defm S_BUFFER_ATOMIC_XOR_X2       : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
409defm S_BUFFER_ATOMIC_INC_X2       : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
410defm S_BUFFER_ATOMIC_DEC_X2       : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
411}
412
413defm S_ATOMIC_SWAP                : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
414defm S_ATOMIC_CMPSWAP             : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
415defm S_ATOMIC_ADD                 : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
416defm S_ATOMIC_SUB                 : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
417defm S_ATOMIC_SMIN                : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
418defm S_ATOMIC_UMIN                : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
419defm S_ATOMIC_SMAX                : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
420defm S_ATOMIC_UMAX                : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
421defm S_ATOMIC_AND                 : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
422defm S_ATOMIC_OR                  : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
423defm S_ATOMIC_XOR                 : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
424defm S_ATOMIC_INC                 : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
425defm S_ATOMIC_DEC                 : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
426
427defm S_ATOMIC_SWAP_X2             : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
428defm S_ATOMIC_CMPSWAP_X2          : SM_Pseudo_Atomics <SReg_64, SReg_128>;
429defm S_ATOMIC_ADD_X2              : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
430defm S_ATOMIC_SUB_X2              : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
431defm S_ATOMIC_SMIN_X2             : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
432defm S_ATOMIC_UMIN_X2             : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
433defm S_ATOMIC_SMAX_X2             : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
434defm S_ATOMIC_UMAX_X2             : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
435defm S_ATOMIC_AND_X2              : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
436defm S_ATOMIC_OR_X2               : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
437defm S_ATOMIC_XOR_X2              : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
438defm S_ATOMIC_INC_X2              : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
439defm S_ATOMIC_DEC_X2              : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
440
441} // let SubtargetPredicate = HasScalarAtomics
442
443let SubtargetPredicate = HasScalarAtomics in {
444defm S_DCACHE_DISCARD    : SM_Pseudo_Discards;
445defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards;
446}
447
448let SubtargetPredicate = isGFX12Plus in {
449def S_PREFETCH_INST        : SM_Prefetch_Pseudo <"s_prefetch_inst", SReg_64, 1>;
450def S_PREFETCH_INST_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_inst_pc_rel", SReg_64, 0>;
451def S_PREFETCH_DATA        : SM_Prefetch_Pseudo <"s_prefetch_data", SReg_64, 1>;
452def S_PREFETCH_DATA_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_data_pc_rel", SReg_64, 0>;
453def S_BUFFER_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_buffer_prefetch_data", SReg_128, 1> {
454  let is_buffer = 1;
455}
456} // end let SubtargetPredicate = isGFX12Plus
457
458//===----------------------------------------------------------------------===//
459// Targets
460//===----------------------------------------------------------------------===//
461
462//===----------------------------------------------------------------------===//
463// SI
464//===----------------------------------------------------------------------===//
465
466class SMRD_Real_si <bits<5> op, SM_Pseudo ps>
467  : SM_Real<ps>
468  , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
469  , Enc32 {
470
471  let AssemblerPredicate = isGFX6GFX7;
472  let DecoderNamespace = "GFX6GFX7";
473
474  let Inst{7-0}   = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?));
475  let Inst{8}     = ps.has_offset;
476  let Inst{14-9}  = !if(ps.has_sbase, sbase{6-1}, ?);
477  let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
478  let Inst{26-22} = op;
479  let Inst{31-27} = 0x18; //encoding
480}
481
482multiclass SM_Real_Loads_si<bits<5> op> {
483  defvar ps = NAME;
484  defvar immPs = !cast<SM_Load_Pseudo>(ps#_IMM);
485  def _IMM_si : SMRD_Real_si <op, immPs> {
486    let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol);
487  }
488
489  defvar sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR);
490  def _SGPR_si : SMRD_Real_si <op, sgprPs>;
491}
492
493defm S_LOAD_DWORD           : SM_Real_Loads_si <0x00>;
494defm S_LOAD_DWORDX2         : SM_Real_Loads_si <0x01>;
495defm S_LOAD_DWORDX4         : SM_Real_Loads_si <0x02>;
496defm S_LOAD_DWORDX8         : SM_Real_Loads_si <0x03>;
497defm S_LOAD_DWORDX16        : SM_Real_Loads_si <0x04>;
498defm S_BUFFER_LOAD_DWORD    : SM_Real_Loads_si <0x08>;
499defm S_BUFFER_LOAD_DWORDX2  : SM_Real_Loads_si <0x09>;
500defm S_BUFFER_LOAD_DWORDX4  : SM_Real_Loads_si <0x0a>;
501defm S_BUFFER_LOAD_DWORDX8  : SM_Real_Loads_si <0x0b>;
502defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_si <0x0c>;
503
504def S_MEMTIME_si    : SMRD_Real_si <0x1e, S_MEMTIME>;
505def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>;
506
507
508//===----------------------------------------------------------------------===//
509// VI and GFX9.
510//===----------------------------------------------------------------------===//
511
512class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
513  : SM_Real<ps>
514  , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI>
515  , Enc64 {
516  field bit IsGFX9SpecificEncoding = false;
517  let AssemblerPredicate = !if(IsGFX9SpecificEncoding, isGFX9Only, isGFX8GFX9);
518  let DecoderNamespace = "GFX8";
519
520  let Inst{5-0}   = !if(ps.has_sbase, sbase{6-1}, ?);
521  let Inst{12-6}  = !if(ps.has_sdst, sdst{6-0}, ?);
522
523  // Note that for GFX9 instructions with immediate offsets, soffset_en
524  // must be defined, whereas in GFX8 it's undefined in all cases,
525  // meaning GFX9 is not perfectly backward-compatible with GFX8, despite
526  // documentation suggesting otherwise.
527  field bit SOffsetEn = !if(IsGFX9SpecificEncoding,
528    !if(ps.has_offset, ps.has_soffset, !if(ps.has_soffset, 0, ?)),
529    ?);
530  let Inst{14} = SOffsetEn;
531
532  let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
533
534  // imm
535  // TODO: Shall not be defined if the instruction has no offset nor
536  // soffset.
537  let Inst{17} = ps.has_offset;
538
539  let Inst{25-18} = op;
540  let Inst{31-26} = 0x30; //encoding
541
542  // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed.
543  // Offset value is corrected accordingly when offset is encoded/decoded.
544  // TODO: Forbid non-M0 register offsets for GFX8 stores and atomics.
545  field bits<21> Offset;
546  let Offset{6-0} = !if(ps.has_offset, offset{6-0},
547                                       !if(ps.has_soffset, soffset{6-0}, ?));
548  let Offset{20-7} = !if(ps.has_offset, offset{20-7}, ?);
549  let Inst{52-32} = Offset;
550
551  // soffset
552  let Inst{63-57} = !if(!and(IsGFX9SpecificEncoding, ps.has_soffset),
553                        soffset{6-0}, ?);
554}
555
556class SMEM_Real_Load_vi<bits<8> op, string ps>
557    : SMEM_Real_vi<op, !cast<SM_Pseudo>(ps)>;
558
559// The alternative GFX9 SGPR encoding using soffset to encode the
560// offset register. Not available in assembler and goes to the GFX9
561// encoding family to avoid conflicts with the primary SGPR variant.
562class SMEM_Real_SGPR_alt_gfx9 {
563  bit IsGFX9SpecificEncoding = true;
564  bit SOffsetEn = 1;
565  bit Offset = ?;
566  int Subtarget = SIEncodingFamily.GFX9;
567  string AsmVariantName = "NonParsable";
568}
569
570multiclass SM_Real_Loads_vi<bits<8> op> {
571  defvar ps = NAME;
572  def _IMM_vi : SMEM_Real_Load_vi <op, ps#"_IMM">;
573  def _SGPR_vi : SMEM_Real_Load_vi <op, ps#"_SGPR">;
574  def _SGPR_alt_gfx9 : SMEM_Real_Load_vi <op, ps#"_SGPR">,
575                       SMEM_Real_SGPR_alt_gfx9;
576  let IsGFX9SpecificEncoding = true in
577  def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi <op, ps#"_SGPR_IMM">;
578}
579
580class SMEM_Real_Store_Base_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> {
581  // encoding
582  bits<7> sdata;
583
584  let sdst = ?;
585  let Inst{12-6}  = !if(ps.has_sdst, sdata{6-0}, ?);
586}
587
588class SMEM_Real_Store_vi <bits<8> op, string ps>
589    : SMEM_Real_Store_Base_vi <op, !cast<SM_Pseudo>(ps)>;
590
591multiclass SM_Real_Stores_vi<bits<8> op> {
592  defvar ps = NAME;
593  def _IMM_vi : SMEM_Real_Store_vi <op, ps#"_IMM">;
594  def _SGPR_vi : SMEM_Real_Store_vi <op, ps#"_SGPR">;
595  def _SGPR_alt_gfx9 : SMEM_Real_Store_vi <op, ps#"_SGPR">,
596                       SMEM_Real_SGPR_alt_gfx9;
597  let IsGFX9SpecificEncoding = true in
598  def _SGPR_IMM_gfx9 : SMEM_Real_Store_vi <op, ps#"_SGPR_IMM">;
599}
600
601multiclass SM_Real_Probe_vi<bits<8> op> {
602  defvar ps = NAME;
603  def _IMM_vi  : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
604  def _SGPR_vi : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>;
605  def _SGPR_alt_gfx9
606    : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>,
607      SMEM_Real_SGPR_alt_gfx9;
608  let IsGFX9SpecificEncoding = true in
609  def _SGPR_IMM_gfx9
610    : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>;
611}
612
613defm S_LOAD_DWORD           : SM_Real_Loads_vi <0x00>;
614defm S_LOAD_DWORDX2         : SM_Real_Loads_vi <0x01>;
615defm S_LOAD_DWORDX4         : SM_Real_Loads_vi <0x02>;
616defm S_LOAD_DWORDX8         : SM_Real_Loads_vi <0x03>;
617defm S_LOAD_DWORDX16        : SM_Real_Loads_vi <0x04>;
618defm S_BUFFER_LOAD_DWORD    : SM_Real_Loads_vi <0x08>;
619defm S_BUFFER_LOAD_DWORDX2  : SM_Real_Loads_vi <0x09>;
620defm S_BUFFER_LOAD_DWORDX4  : SM_Real_Loads_vi <0x0a>;
621defm S_BUFFER_LOAD_DWORDX8  : SM_Real_Loads_vi <0x0b>;
622defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c>;
623
624defm S_STORE_DWORD : SM_Real_Stores_vi <0x10>;
625defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11>;
626defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12>;
627
628defm S_BUFFER_STORE_DWORD    : SM_Real_Stores_vi <0x18>;
629defm S_BUFFER_STORE_DWORDX2  : SM_Real_Stores_vi <0x19>;
630defm S_BUFFER_STORE_DWORDX4  : SM_Real_Stores_vi <0x1a>;
631
632// These instructions use same encoding
633def S_DCACHE_INV_vi         : SMEM_Real_vi <0x20, S_DCACHE_INV>;
634def S_DCACHE_WB_vi          : SMEM_Real_vi <0x21, S_DCACHE_WB>;
635def S_DCACHE_INV_VOL_vi     : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>;
636def S_DCACHE_WB_VOL_vi      : SMEM_Real_vi <0x23, S_DCACHE_WB_VOL>;
637def S_MEMTIME_vi            : SMEM_Real_vi <0x24, S_MEMTIME>;
638def S_MEMREALTIME_vi        : SMEM_Real_vi <0x25, S_MEMREALTIME>;
639
640defm S_SCRATCH_LOAD_DWORD    : SM_Real_Loads_vi <0x05>;
641defm S_SCRATCH_LOAD_DWORDX2  : SM_Real_Loads_vi <0x06>;
642defm S_SCRATCH_LOAD_DWORDX4  : SM_Real_Loads_vi <0x07>;
643
644defm S_SCRATCH_STORE_DWORD   : SM_Real_Stores_vi <0x15>;
645defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_vi <0x16>;
646defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_vi <0x17>;
647
648defm S_ATC_PROBE        : SM_Real_Probe_vi <0x26>;
649defm S_ATC_PROBE_BUFFER : SM_Real_Probe_vi <0x27>;
650
651//===----------------------------------------------------------------------===//
652// GFX9
653//===----------------------------------------------------------------------===//
654
655class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps>
656  : SMEM_Real_vi <op, ps>,
657    AtomicNoRet <!subst("_RTN","",NAME), ps.glc> {
658
659  bits<7> sdata;
660
661  let Constraints = ps.Constraints;
662  let DisableEncoding = ps.DisableEncoding;
663
664  let cpol{CPolBit.GLC} = ps.glc;
665  let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
666}
667
668multiclass SM_Real_Atomics_vi<bits<8> op> {
669  defvar ps = NAME;
670  def _IMM_vi       : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
671  def _SGPR_vi      : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
672  def _SGPR_alt_gfx9
673    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>,
674      SMEM_Real_SGPR_alt_gfx9;
675  let IsGFX9SpecificEncoding = true in
676  def _SGPR_IMM_gfx9
677    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
678  def _IMM_RTN_vi   : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
679  def _SGPR_RTN_vi  : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
680  def _SGPR_RTN_alt_gfx9
681    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>,
682      SMEM_Real_SGPR_alt_gfx9;
683  let IsGFX9SpecificEncoding = true in
684  def _SGPR_IMM_RTN_gfx9
685    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
686}
687
688defm S_BUFFER_ATOMIC_SWAP         : SM_Real_Atomics_vi <0x40>;
689defm S_BUFFER_ATOMIC_CMPSWAP      : SM_Real_Atomics_vi <0x41>;
690defm S_BUFFER_ATOMIC_ADD          : SM_Real_Atomics_vi <0x42>;
691defm S_BUFFER_ATOMIC_SUB          : SM_Real_Atomics_vi <0x43>;
692defm S_BUFFER_ATOMIC_SMIN         : SM_Real_Atomics_vi <0x44>;
693defm S_BUFFER_ATOMIC_UMIN         : SM_Real_Atomics_vi <0x45>;
694defm S_BUFFER_ATOMIC_SMAX         : SM_Real_Atomics_vi <0x46>;
695defm S_BUFFER_ATOMIC_UMAX         : SM_Real_Atomics_vi <0x47>;
696defm S_BUFFER_ATOMIC_AND          : SM_Real_Atomics_vi <0x48>;
697defm S_BUFFER_ATOMIC_OR           : SM_Real_Atomics_vi <0x49>;
698defm S_BUFFER_ATOMIC_XOR          : SM_Real_Atomics_vi <0x4a>;
699defm S_BUFFER_ATOMIC_INC          : SM_Real_Atomics_vi <0x4b>;
700defm S_BUFFER_ATOMIC_DEC          : SM_Real_Atomics_vi <0x4c>;
701
702defm S_BUFFER_ATOMIC_SWAP_X2      : SM_Real_Atomics_vi <0x60>;
703defm S_BUFFER_ATOMIC_CMPSWAP_X2   : SM_Real_Atomics_vi <0x61>;
704defm S_BUFFER_ATOMIC_ADD_X2       : SM_Real_Atomics_vi <0x62>;
705defm S_BUFFER_ATOMIC_SUB_X2       : SM_Real_Atomics_vi <0x63>;
706defm S_BUFFER_ATOMIC_SMIN_X2      : SM_Real_Atomics_vi <0x64>;
707defm S_BUFFER_ATOMIC_UMIN_X2      : SM_Real_Atomics_vi <0x65>;
708defm S_BUFFER_ATOMIC_SMAX_X2      : SM_Real_Atomics_vi <0x66>;
709defm S_BUFFER_ATOMIC_UMAX_X2      : SM_Real_Atomics_vi <0x67>;
710defm S_BUFFER_ATOMIC_AND_X2       : SM_Real_Atomics_vi <0x68>;
711defm S_BUFFER_ATOMIC_OR_X2        : SM_Real_Atomics_vi <0x69>;
712defm S_BUFFER_ATOMIC_XOR_X2       : SM_Real_Atomics_vi <0x6a>;
713defm S_BUFFER_ATOMIC_INC_X2       : SM_Real_Atomics_vi <0x6b>;
714defm S_BUFFER_ATOMIC_DEC_X2       : SM_Real_Atomics_vi <0x6c>;
715
716defm S_ATOMIC_SWAP                : SM_Real_Atomics_vi <0x80>;
717defm S_ATOMIC_CMPSWAP             : SM_Real_Atomics_vi <0x81>;
718defm S_ATOMIC_ADD                 : SM_Real_Atomics_vi <0x82>;
719defm S_ATOMIC_SUB                 : SM_Real_Atomics_vi <0x83>;
720defm S_ATOMIC_SMIN                : SM_Real_Atomics_vi <0x84>;
721defm S_ATOMIC_UMIN                : SM_Real_Atomics_vi <0x85>;
722defm S_ATOMIC_SMAX                : SM_Real_Atomics_vi <0x86>;
723defm S_ATOMIC_UMAX                : SM_Real_Atomics_vi <0x87>;
724defm S_ATOMIC_AND                 : SM_Real_Atomics_vi <0x88>;
725defm S_ATOMIC_OR                  : SM_Real_Atomics_vi <0x89>;
726defm S_ATOMIC_XOR                 : SM_Real_Atomics_vi <0x8a>;
727defm S_ATOMIC_INC                 : SM_Real_Atomics_vi <0x8b>;
728defm S_ATOMIC_DEC                 : SM_Real_Atomics_vi <0x8c>;
729
730defm S_ATOMIC_SWAP_X2             : SM_Real_Atomics_vi <0xa0>;
731defm S_ATOMIC_CMPSWAP_X2          : SM_Real_Atomics_vi <0xa1>;
732defm S_ATOMIC_ADD_X2              : SM_Real_Atomics_vi <0xa2>;
733defm S_ATOMIC_SUB_X2              : SM_Real_Atomics_vi <0xa3>;
734defm S_ATOMIC_SMIN_X2             : SM_Real_Atomics_vi <0xa4>;
735defm S_ATOMIC_UMIN_X2             : SM_Real_Atomics_vi <0xa5>;
736defm S_ATOMIC_SMAX_X2             : SM_Real_Atomics_vi <0xa6>;
737defm S_ATOMIC_UMAX_X2             : SM_Real_Atomics_vi <0xa7>;
738defm S_ATOMIC_AND_X2              : SM_Real_Atomics_vi <0xa8>;
739defm S_ATOMIC_OR_X2               : SM_Real_Atomics_vi <0xa9>;
740defm S_ATOMIC_XOR_X2              : SM_Real_Atomics_vi <0xaa>;
741defm S_ATOMIC_INC_X2              : SM_Real_Atomics_vi <0xab>;
742defm S_ATOMIC_DEC_X2              : SM_Real_Atomics_vi <0xac>;
743
744multiclass SM_Real_Discard_vi<bits<8> op> {
745  defvar ps = NAME;
746  def _IMM_vi  : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_IMM)>;
747  def _SGPR_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>;
748  def _SGPR_alt_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>,
749                       SMEM_Real_SGPR_alt_gfx9;
750  let IsGFX9SpecificEncoding = true in
751  def _SGPR_IMM_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR_IMM)>;
752}
753
754defm S_DCACHE_DISCARD    : SM_Real_Discard_vi <0x28>;
755defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_vi <0x29>;
756
757//===----------------------------------------------------------------------===//
758// CI
759//===----------------------------------------------------------------------===//
760
761def smrd_literal_offset : ImmOperand<i32, "SMRDLiteralOffset">;
762
763class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
764  SM_Real<ps>,
765  Enc64 {
766
767  let AssemblerPredicate = isGFX7Only;
768  let DecoderNamespace = "GFX7";
769  let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, CPol:$cpol);
770
771  let Inst{7-0}   = 0xff;
772  let Inst{8}     = 0;
773  let Inst{14-9}  = sbase{6-1};
774  let Inst{21-15} = sdst{6-0};
775  let Inst{26-22} = op;
776  let Inst{31-27} = 0x18; //encoding
777  let Inst{63-32} = offset{31-0};
778}
779
780def S_LOAD_DWORD_IMM_ci           : SMRD_Real_Load_IMM_ci <0x00, S_LOAD_DWORD_IMM>;
781def S_LOAD_DWORDX2_IMM_ci         : SMRD_Real_Load_IMM_ci <0x01, S_LOAD_DWORDX2_IMM>;
782def S_LOAD_DWORDX4_IMM_ci         : SMRD_Real_Load_IMM_ci <0x02, S_LOAD_DWORDX4_IMM>;
783def S_LOAD_DWORDX8_IMM_ci         : SMRD_Real_Load_IMM_ci <0x03, S_LOAD_DWORDX8_IMM>;
784def S_LOAD_DWORDX16_IMM_ci        : SMRD_Real_Load_IMM_ci <0x04, S_LOAD_DWORDX16_IMM>;
785def S_BUFFER_LOAD_DWORD_IMM_ci    : SMRD_Real_Load_IMM_ci <0x08, S_BUFFER_LOAD_DWORD_IMM>;
786def S_BUFFER_LOAD_DWORDX2_IMM_ci  : SMRD_Real_Load_IMM_ci <0x09, S_BUFFER_LOAD_DWORDX2_IMM>;
787def S_BUFFER_LOAD_DWORDX4_IMM_ci  : SMRD_Real_Load_IMM_ci <0x0a, S_BUFFER_LOAD_DWORDX4_IMM>;
788def S_BUFFER_LOAD_DWORDX8_IMM_ci  : SMRD_Real_Load_IMM_ci <0x0b, S_BUFFER_LOAD_DWORDX8_IMM>;
789def S_BUFFER_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x0c, S_BUFFER_LOAD_DWORDX16_IMM>;
790
791class SMRD_Real_ci <bits<5> op, SM_Pseudo ps>
792  : SM_Real<ps>
793  , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
794  , Enc32 {
795
796  let AssemblerPredicate = isGFX7Only;
797  let DecoderNamespace = "GFX7";
798
799  let Inst{7-0}   = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?));
800  let Inst{8}     = ps.has_offset;
801  let Inst{14-9}  = !if(ps.has_sbase, sbase{6-1}, ?);
802  let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
803  let Inst{26-22} = op;
804  let Inst{31-27} = 0x18; //encoding
805}
806
807def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>;
808
809//===----------------------------------------------------------------------===//
810// Scalar Memory Patterns
811//===----------------------------------------------------------------------===//
812
813class SMRDLoadPat<PatFrag Op> : PatFrag <(ops node:$ptr), (Op node:$ptr), [{ return isUniformLoad(N);}]> {
814  let GISelPredicateCode = [{
815    if (!MI.hasOneMemOperand())
816      return false;
817    if (!isInstrUniform(MI))
818      return false;
819
820    // FIXME: We should probably be caching this.
821    SmallVector<GEPInfo, 4> AddrInfo;
822    getAddrModeInfo(MI, MRI, AddrInfo);
823
824    if (hasVgprParts(AddrInfo))
825      return false;
826    return true;
827  }];
828}
829
830def smrd_load : SMRDLoadPat<load>;
831def smrd_extloadi8 : SMRDLoadPat<extloadi8>;
832def smrd_zextloadi8 : SMRDLoadPat<zextloadi8>;
833def smrd_sextloadi8 : SMRDLoadPat<sextloadi8>;
834def smrd_extloadi16 : SMRDLoadPat<extloadi16>;
835def smrd_zextloadi16 : SMRDLoadPat<zextloadi16>;
836def smrd_sextloadi16 : SMRDLoadPat<sextloadi16>;
837
838def smrd_prefetch : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
839                             (prefetch node:$ptr, node:$rw, node:$loc, node:$type),
840                             [{ return !N->getOperand(1)->isDivergent();}]> {
841  let GISelPredicateCode = [{
842    return isInstrUniform(MI);
843  }];
844}
845
846def SMRDImm         : ComplexPattern<iPTR, 2, "SelectSMRDImm">;
847def SMRDImm32       : ComplexPattern<iPTR, 2, "SelectSMRDImm32">;
848def SMRDSgpr        : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">;
849def SMRDSgprImm     : ComplexPattern<iPTR, 3, "SelectSMRDSgprImm">;
850def SMRDBufferImm   : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">;
851def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
852def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">;
853
854multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> {
855
856  // 1. IMM offset
857  def : GCNPat <
858    (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
859    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
860  >;
861
862  // 2. 32-bit IMM offset on CI
863  if immci then def : GCNPat <
864    (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
865    (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
866    let OtherPredicates = [isGFX7Only];
867  }
868
869  // 3. SGPR offset
870  def : GCNPat <
871    (smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
872    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0))> {
873    let OtherPredicates = [isNotGFX9Plus];
874  }
875  def : GCNPat <
876    (smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
877    (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))> {
878    let OtherPredicates = [isGFX9Plus];
879  }
880
881  // 4. SGPR+IMM offset
882  def : GCNPat <
883    (smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
884    (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> {
885    let OtherPredicates = [isGFX9Plus];
886  }
887
888  // 5. No offset
889  def : GCNPat <
890    (vt (smrd_load (i64 SReg_64:$sbase))),
891    (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))
892  >;
893}
894
895multiclass SMLoad_Pattern <string Instr, ValueType vt, bit immci = true> {
896  // 1. Offset as an immediate
897  def : GCNPat <
898    (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
899    (vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> {
900    let AddedComplexity = 2;
901  }
902
903  // 2. 32-bit IMM offset on CI
904  if immci then def : GCNPat <
905    (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
906    (!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset,
907                                    (extract_cpol $cachepolicy))> {
908    let OtherPredicates = [isGFX7Only];
909    let AddedComplexity = 1;
910  }
911
912  // 3. Offset loaded in an 32bit SGPR
913  def : GCNPat <
914    (SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy),
915    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$soffset, (extract_cpol $cachepolicy)))> {
916    let OtherPredicates = [isNotGFX9Plus];
917  }
918  def : GCNPat <
919    (SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy),
920    (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, 0, (extract_cpol $cachepolicy)))> {
921    let OtherPredicates = [isGFX9Plus];
922  }
923
924  // 4. Offset as an 32-bit SGPR + immediate
925  def : GCNPat <
926    (SIsbuffer_load v4i32:$sbase, (SMRDBufferSgprImm i32:$soffset, i32:$offset),
927                    timm:$cachepolicy),
928    (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, i32imm:$offset,
929                                             (extract_cpol $cachepolicy)))> {
930    let OtherPredicates = [isGFX9Plus];
931  }
932}
933
934multiclass ScalarLoadWithExtensionPat <string Instr, SDPatternOperator node, ValueType vt> {
935   // 1. IMM offset
936   def : GCNPat <
937     (node (SMRDImm i64:$sbase, i32:$offset)),
938     (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))>{
939       let OtherPredicates = [isGFX12Plus];
940   }
941
942   // 2. SGPR offset
943   def : GCNPat <
944     (node (SMRDSgpr i64:$sbase, i32:$soffset)),
945     (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))>{
946       let OtherPredicates = [isGFX12Plus];
947   }
948
949   // 3. SGPR+IMM offset
950   def : GCNPat <
951     (node (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
952     (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))>{
953       let OtherPredicates = [isGFX12Plus];
954   }
955
956   // 4. No offset
957   def : GCNPat <
958     (vt (node (i64 SReg_64:$sbase))),
959     (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))>{
960       let OtherPredicates = [isGFX12Plus];
961  }
962}
963
964multiclass ScalarBufferLoadIntrinsicPat <SDPatternOperator name, string Instr> {
965
966  // 1. Offset as an immediate
967  def : GCNPat <
968    (name v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
969    (i32 (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> {
970    let OtherPredicates = [isGFX12Plus];
971  }
972
973  // 2. Offset as an 32-bit SGPR
974  def : GCNPat <
975    (name v4i32:$sbase, i32:$soffset, timm:$cachepolicy),
976    (i32 (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, 0, (extract_cpol $cachepolicy)))> {
977    let OtherPredicates = [isGFX12Plus];
978  }
979
980  // 3. Offset as an 32-bit SGPR + immediate
981  def : GCNPat <
982    (name v4i32:$sbase, (SMRDBufferSgprImm i32:$soffset, i32:$offset),
983                    timm:$cachepolicy),
984    (i32 (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, i32imm:$offset,
985                                             (extract_cpol $cachepolicy)))> {
986    let OtherPredicates = [isGFX12Plus];
987  }
988}
989
990// Global and constant loads can be selected to either MUBUF or SMRD
991// instructions, but SMRD instructions are faster so we want the instruction
992// selector to prefer those.
993let AddedComplexity = 100 in {
994
995defm : ScalarLoadWithExtensionPat <"S_LOAD_U8", smrd_extloadi8, i32>;
996defm : ScalarLoadWithExtensionPat <"S_LOAD_U8", smrd_zextloadi8, i32>;
997defm : ScalarLoadWithExtensionPat <"S_LOAD_I8", smrd_sextloadi8, i32>;
998defm : ScalarLoadWithExtensionPat <"S_LOAD_U16", smrd_extloadi16, i32>;
999defm : ScalarLoadWithExtensionPat <"S_LOAD_U16", smrd_zextloadi16, i32>;
1000defm : ScalarLoadWithExtensionPat <"S_LOAD_I16", smrd_sextloadi16, i32>;
1001defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_byte, "S_BUFFER_LOAD_I8">;
1002defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_ubyte, "S_BUFFER_LOAD_U8">;
1003defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_short, "S_BUFFER_LOAD_I16">;
1004defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_ushort, "S_BUFFER_LOAD_U16">;
1005
1006foreach vt = Reg32Types.types in {
1007defm : SMRD_Pattern <"S_LOAD_DWORD", vt>;
1008}
1009
1010foreach vt = SReg_64.RegTypes in {
1011defm : SMRD_Pattern <"S_LOAD_DWORDX2", vt>;
1012}
1013
1014foreach vt = SReg_96.RegTypes in {
1015defm : SMRD_Pattern <"S_LOAD_DWORDX3", vt, false>;
1016}
1017
1018foreach vt = SReg_128.RegTypes in {
1019defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>;
1020}
1021
1022foreach vt = SReg_256.RegTypes in {
1023defm : SMRD_Pattern <"S_LOAD_DWORDX8", vt>;
1024}
1025
1026foreach vt = SReg_512.RegTypes in {
1027defm : SMRD_Pattern <"S_LOAD_DWORDX16", vt>;
1028}
1029
1030} // End let AddedComplexity = 100
1031
1032defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD",     i32>;
1033defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2",   v2i32>;
1034defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX3",   v3i32, false>;
1035defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4",   v4i32>;
1036defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8",   v8i32>;
1037defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16",  v16i32>;
1038
1039defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD",     f32>;
1040defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2",   v2f32>;
1041defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX3",   v3f32, false>;
1042defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4",   v4f32>;
1043defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8",   v8f32>;
1044defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16",  v16f32>;
1045
1046let OtherPredicates = [HasSMemTimeInst] in {
1047def : GCNPat <
1048  (i64 (readcyclecounter)),
1049  (S_MEMTIME)
1050>;
1051} // let OtherPredicates = [HasSMemTimeInst]
1052
1053let OtherPredicates = [HasShaderCyclesRegister] in {
1054def : GCNPat <
1055  (i64 (readcyclecounter)),
1056  (REG_SEQUENCE SReg_64,
1057    (S_GETREG_B32 getHwRegImm<HWREG.SHADER_CYCLES, 0, -12>.ret), sub0,
1058    (S_MOV_B32 (i32 0)), sub1)> {
1059  // Prefer this to s_memtime because it has lower and more predictable latency.
1060  let AddedComplexity = 1;
1061}
1062} // let OtherPredicates = [HasShaderCyclesRegister]
1063
1064def i32imm_zero : TImmLeaf <i32, [{
1065  return Imm == 0;
1066}]>;
1067
1068def i32imm_one : TImmLeaf <i32, [{
1069  return Imm == 1;
1070}]>;
1071
1072multiclass SMPrefetchPat<string type, TImmLeaf cache_type> {
1073  def : GCNPat <
1074    (smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, cache_type),
1075    (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, $offset, (i32 SGPR_NULL), (i8 0))
1076  >;
1077
1078  def : GCNPat <
1079    (smrd_prefetch (i64 SReg_64:$sbase), timm, timm, cache_type),
1080    (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, 0, (i32 SGPR_NULL), (i8 0))
1081  >;
1082
1083  def : GCNPat <
1084    (smrd_prefetch (i32 SReg_32:$sbase), timm, timm, cache_type),
1085    (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type)
1086        (i64 (REG_SEQUENCE SReg_64, $sbase, sub0, (i32 (S_MOV_B32 (i32 0))), sub1)),
1087        0, (i32 SGPR_NULL), (i8 0))
1088  >;
1089}
1090
1091defm : SMPrefetchPat<"INST", i32imm_zero>;
1092defm : SMPrefetchPat<"DATA", i32imm_one>;
1093
1094//===----------------------------------------------------------------------===//
1095// GFX10.
1096//===----------------------------------------------------------------------===//
1097
1098class SMEM_Real_10Plus_common<bits<8> op, SM_Pseudo ps, string opName,
1099                              int subtarget, RegisterWithSubRegs sgpr_null> :
1100    SM_Real<ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>, Enc64 {
1101  let Inst{5-0}   = !if(ps.has_sbase, sbase{6-1}, ?);
1102  let Inst{12-6}  = !if(ps.has_sdst, sdst{6-0}, ?);
1103  let Inst{25-18} = op;
1104  let Inst{31-26} = 0x3d;
1105  // There are SMEM instructions that do not employ any of the offset
1106  // fields, in which case we need them to remain undefined.
1107  let Inst{52-32} = !if(ps.has_offset, offset{20-0}, !if(ps.has_soffset, 0, ?));
1108  let Inst{63-57} = !if(ps.has_soffset, soffset{6-0},
1109                        !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?));
1110}
1111
1112class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps>
1113    : SMEM_Real_10Plus_common<op, ps, ps.Mnemonic, SIEncodingFamily.GFX10,
1114                              SGPR_NULL_gfxpre11> {
1115  let AssemblerPredicate = isGFX10Only;
1116  let DecoderNamespace = "GFX10";
1117  let Inst{14}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?);
1118  let Inst{16}    = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
1119}
1120
1121class SMEM_Real_Load_gfx10<bits<8> op, string ps>
1122    : SMEM_Real_gfx10<op, !cast<SM_Pseudo>(ps)>;
1123
1124multiclass SM_Real_Loads_gfx10<bits<8> op> {
1125  defvar ps = NAME;
1126  def _IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps#"_IMM">;
1127  def _SGPR_gfx10 : SMEM_Real_Load_gfx10<op, ps#"_SGPR">;
1128  def _SGPR_IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps#"_SGPR_IMM">;
1129}
1130
1131class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> {
1132  bits<7> sdata;
1133
1134  let sdst = ?;
1135  let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
1136}
1137
1138multiclass SM_Real_Stores_gfx10<bits<8> op> {
1139  defvar ps = NAME;
1140  defvar immPs = !cast<SM_Store_Pseudo>(ps#_IMM);
1141  def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs>;
1142
1143  defvar sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR);
1144  def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs>;
1145
1146  defvar sgprImmPs = !cast<SM_Store_Pseudo>(ps#_SGPR_IMM);
1147  def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 <op, sgprImmPs>;
1148}
1149
1150defm S_LOAD_DWORD            : SM_Real_Loads_gfx10<0x000>;
1151defm S_LOAD_DWORDX2          : SM_Real_Loads_gfx10<0x001>;
1152defm S_LOAD_DWORDX4          : SM_Real_Loads_gfx10<0x002>;
1153defm S_LOAD_DWORDX8          : SM_Real_Loads_gfx10<0x003>;
1154defm S_LOAD_DWORDX16         : SM_Real_Loads_gfx10<0x004>;
1155
1156let SubtargetPredicate = HasScalarFlatScratchInsts in {
1157defm S_SCRATCH_LOAD_DWORD    : SM_Real_Loads_gfx10<0x005>;
1158defm S_SCRATCH_LOAD_DWORDX2  : SM_Real_Loads_gfx10<0x006>;
1159defm S_SCRATCH_LOAD_DWORDX4  : SM_Real_Loads_gfx10<0x007>;
1160} // End SubtargetPredicate = HasScalarFlatScratchInsts
1161
1162defm S_BUFFER_LOAD_DWORD     : SM_Real_Loads_gfx10<0x008>;
1163defm S_BUFFER_LOAD_DWORDX2   : SM_Real_Loads_gfx10<0x009>;
1164defm S_BUFFER_LOAD_DWORDX4   : SM_Real_Loads_gfx10<0x00a>;
1165defm S_BUFFER_LOAD_DWORDX8   : SM_Real_Loads_gfx10<0x00b>;
1166defm S_BUFFER_LOAD_DWORDX16  : SM_Real_Loads_gfx10<0x00c>;
1167
1168let SubtargetPredicate = HasScalarStores in {
1169defm S_STORE_DWORD           : SM_Real_Stores_gfx10<0x010>;
1170defm S_STORE_DWORDX2         : SM_Real_Stores_gfx10<0x011>;
1171defm S_STORE_DWORDX4         : SM_Real_Stores_gfx10<0x012>;
1172let OtherPredicates = [HasScalarFlatScratchInsts] in {
1173defm S_SCRATCH_STORE_DWORD   : SM_Real_Stores_gfx10<0x015>;
1174defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016>;
1175defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017>;
1176} // End OtherPredicates = [HasScalarFlatScratchInsts]
1177defm S_BUFFER_STORE_DWORD    : SM_Real_Stores_gfx10<0x018>;
1178defm S_BUFFER_STORE_DWORDX2  : SM_Real_Stores_gfx10<0x019>;
1179defm S_BUFFER_STORE_DWORDX4  : SM_Real_Stores_gfx10<0x01a>;
1180} // End SubtargetPredicate = HasScalarStores
1181
1182def S_MEMREALTIME_gfx10              : SMEM_Real_gfx10<0x025, S_MEMREALTIME>;
1183def S_MEMTIME_gfx10                  : SMEM_Real_gfx10<0x024, S_MEMTIME>;
1184def S_GL1_INV_gfx10                  : SMEM_Real_gfx10<0x01f, S_GL1_INV>;
1185def S_GET_WAVEID_IN_WORKGROUP_gfx10  : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>;
1186def S_DCACHE_INV_gfx10               : SMEM_Real_gfx10<0x020, S_DCACHE_INV>;
1187
1188let SubtargetPredicate = HasScalarStores in {
1189def S_DCACHE_WB_gfx10                : SMEM_Real_gfx10<0x021, S_DCACHE_WB>;
1190} // End SubtargetPredicate = HasScalarStores
1191
1192multiclass SM_Real_Probe_gfx10<bits<8> op> {
1193  defvar ps = NAME;
1194  def _IMM_gfx10  : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
1195  def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
1196  def _SGPR_IMM_gfx10
1197    : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>;
1198}
1199
1200defm S_ATC_PROBE        : SM_Real_Probe_gfx10 <0x26>;
1201defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27>;
1202
1203class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
1204  : SMEM_Real_gfx10 <op, ps>,
1205    AtomicNoRet <!subst("_RTN","",NAME), ps.glc> {
1206
1207  bits<7> sdata;
1208
1209  let Constraints = ps.Constraints;
1210  let DisableEncoding = ps.DisableEncoding;
1211
1212  let cpol{CPolBit.GLC} = ps.glc;
1213
1214  let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
1215  let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
1216}
1217
1218multiclass SM_Real_Atomics_gfx10<bits<8> op> {
1219  defvar ps = NAME;
1220  def _IMM_gfx10       : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
1221  def _SGPR_gfx10      : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
1222  def _SGPR_IMM_gfx10  : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
1223  def _IMM_RTN_gfx10   : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
1224  def _SGPR_RTN_gfx10  : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
1225  def _SGPR_IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
1226}
1227
1228let SubtargetPredicate = HasScalarAtomics in {
1229
1230defm S_BUFFER_ATOMIC_SWAP         : SM_Real_Atomics_gfx10 <0x40>;
1231defm S_BUFFER_ATOMIC_CMPSWAP      : SM_Real_Atomics_gfx10 <0x41>;
1232defm S_BUFFER_ATOMIC_ADD          : SM_Real_Atomics_gfx10 <0x42>;
1233defm S_BUFFER_ATOMIC_SUB          : SM_Real_Atomics_gfx10 <0x43>;
1234defm S_BUFFER_ATOMIC_SMIN         : SM_Real_Atomics_gfx10 <0x44>;
1235defm S_BUFFER_ATOMIC_UMIN         : SM_Real_Atomics_gfx10 <0x45>;
1236defm S_BUFFER_ATOMIC_SMAX         : SM_Real_Atomics_gfx10 <0x46>;
1237defm S_BUFFER_ATOMIC_UMAX         : SM_Real_Atomics_gfx10 <0x47>;
1238defm S_BUFFER_ATOMIC_AND          : SM_Real_Atomics_gfx10 <0x48>;
1239defm S_BUFFER_ATOMIC_OR           : SM_Real_Atomics_gfx10 <0x49>;
1240defm S_BUFFER_ATOMIC_XOR          : SM_Real_Atomics_gfx10 <0x4a>;
1241defm S_BUFFER_ATOMIC_INC          : SM_Real_Atomics_gfx10 <0x4b>;
1242defm S_BUFFER_ATOMIC_DEC          : SM_Real_Atomics_gfx10 <0x4c>;
1243
1244defm S_BUFFER_ATOMIC_SWAP_X2      : SM_Real_Atomics_gfx10 <0x60>;
1245defm S_BUFFER_ATOMIC_CMPSWAP_X2   : SM_Real_Atomics_gfx10 <0x61>;
1246defm S_BUFFER_ATOMIC_ADD_X2       : SM_Real_Atomics_gfx10 <0x62>;
1247defm S_BUFFER_ATOMIC_SUB_X2       : SM_Real_Atomics_gfx10 <0x63>;
1248defm S_BUFFER_ATOMIC_SMIN_X2      : SM_Real_Atomics_gfx10 <0x64>;
1249defm S_BUFFER_ATOMIC_UMIN_X2      : SM_Real_Atomics_gfx10 <0x65>;
1250defm S_BUFFER_ATOMIC_SMAX_X2      : SM_Real_Atomics_gfx10 <0x66>;
1251defm S_BUFFER_ATOMIC_UMAX_X2      : SM_Real_Atomics_gfx10 <0x67>;
1252defm S_BUFFER_ATOMIC_AND_X2       : SM_Real_Atomics_gfx10 <0x68>;
1253defm S_BUFFER_ATOMIC_OR_X2        : SM_Real_Atomics_gfx10 <0x69>;
1254defm S_BUFFER_ATOMIC_XOR_X2       : SM_Real_Atomics_gfx10 <0x6a>;
1255defm S_BUFFER_ATOMIC_INC_X2       : SM_Real_Atomics_gfx10 <0x6b>;
1256defm S_BUFFER_ATOMIC_DEC_X2       : SM_Real_Atomics_gfx10 <0x6c>;
1257
1258defm S_ATOMIC_SWAP                : SM_Real_Atomics_gfx10 <0x80>;
1259defm S_ATOMIC_CMPSWAP             : SM_Real_Atomics_gfx10 <0x81>;
1260defm S_ATOMIC_ADD                 : SM_Real_Atomics_gfx10 <0x82>;
1261defm S_ATOMIC_SUB                 : SM_Real_Atomics_gfx10 <0x83>;
1262defm S_ATOMIC_SMIN                : SM_Real_Atomics_gfx10 <0x84>;
1263defm S_ATOMIC_UMIN                : SM_Real_Atomics_gfx10 <0x85>;
1264defm S_ATOMIC_SMAX                : SM_Real_Atomics_gfx10 <0x86>;
1265defm S_ATOMIC_UMAX                : SM_Real_Atomics_gfx10 <0x87>;
1266defm S_ATOMIC_AND                 : SM_Real_Atomics_gfx10 <0x88>;
1267defm S_ATOMIC_OR                  : SM_Real_Atomics_gfx10 <0x89>;
1268defm S_ATOMIC_XOR                 : SM_Real_Atomics_gfx10 <0x8a>;
1269defm S_ATOMIC_INC                 : SM_Real_Atomics_gfx10 <0x8b>;
1270defm S_ATOMIC_DEC                 : SM_Real_Atomics_gfx10 <0x8c>;
1271
1272defm S_ATOMIC_SWAP_X2             : SM_Real_Atomics_gfx10 <0xa0>;
1273defm S_ATOMIC_CMPSWAP_X2          : SM_Real_Atomics_gfx10 <0xa1>;
1274defm S_ATOMIC_ADD_X2              : SM_Real_Atomics_gfx10 <0xa2>;
1275defm S_ATOMIC_SUB_X2              : SM_Real_Atomics_gfx10 <0xa3>;
1276defm S_ATOMIC_SMIN_X2             : SM_Real_Atomics_gfx10 <0xa4>;
1277defm S_ATOMIC_UMIN_X2             : SM_Real_Atomics_gfx10 <0xa5>;
1278defm S_ATOMIC_SMAX_X2             : SM_Real_Atomics_gfx10 <0xa6>;
1279defm S_ATOMIC_UMAX_X2             : SM_Real_Atomics_gfx10 <0xa7>;
1280defm S_ATOMIC_AND_X2              : SM_Real_Atomics_gfx10 <0xa8>;
1281defm S_ATOMIC_OR_X2               : SM_Real_Atomics_gfx10 <0xa9>;
1282defm S_ATOMIC_XOR_X2              : SM_Real_Atomics_gfx10 <0xaa>;
1283defm S_ATOMIC_INC_X2              : SM_Real_Atomics_gfx10 <0xab>;
1284defm S_ATOMIC_DEC_X2              : SM_Real_Atomics_gfx10 <0xac>;
1285
1286multiclass SM_Real_Discard_gfx10<bits<8> op> {
1287  defvar ps = NAME;
1288  def _IMM_gfx10  : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
1289  def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
1290  def _SGPR_IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>;
1291}
1292
1293defm S_DCACHE_DISCARD    : SM_Real_Discard_gfx10 <0x28>;
1294defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29>;
1295
1296} // End SubtargetPredicate = HasScalarAtomics
1297
1298def SMInfoTable : GenericTable {
1299  let FilterClass = "SM_Real";
1300  let CppTypeName = "SMInfo";
1301  let Fields = ["Opcode", "is_buffer"];
1302
1303  let PrimaryKey = ["Opcode"];
1304  let PrimaryKeyName = "getSMEMOpcodeHelper";
1305}
1306
1307//===----------------------------------------------------------------------===//
1308// GFX11.
1309//===----------------------------------------------------------------------===//
1310
1311class SMEM_Real_gfx11<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
1312    SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX11,
1313                            SGPR_NULL_gfx11plus> {
1314  let AssemblerPredicate = isGFX11Only;
1315  let DecoderNamespace = "GFX11";
1316  let Inst{13}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
1317  let Inst{14}    = !if(ps.has_glc, cpol{CPolBit.GLC}, 0);
1318}
1319
1320class SMEM_Real_Load_gfx11<bits<8> op, string ps, string opName> :
1321    SMEM_Real_gfx11<op, !cast<SM_Pseudo>(ps), opName>;
1322
1323multiclass SM_Real_Loads_gfx11<bits<8> op, string ps> {
1324  defvar opName = !tolower(NAME);
1325  def _IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_IMM", opName>;
1326  def _SGPR_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_SGPR", opName>;
1327  def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_SGPR_IMM", opName>;
1328  def : MnemonicAlias<!cast<SM_Pseudo>(ps#"_IMM").Mnemonic, opName>,
1329                      Requires<[isGFX11Plus]>;
1330}
1331
1332defm S_LOAD_B32  : SM_Real_Loads_gfx11<0x000, "S_LOAD_DWORD">;
1333defm S_LOAD_B64  : SM_Real_Loads_gfx11<0x001, "S_LOAD_DWORDX2">;
1334defm S_LOAD_B128 : SM_Real_Loads_gfx11<0x002, "S_LOAD_DWORDX4">;
1335defm S_LOAD_B256 : SM_Real_Loads_gfx11<0x003, "S_LOAD_DWORDX8">;
1336defm S_LOAD_B512 : SM_Real_Loads_gfx11<0x004, "S_LOAD_DWORDX16">;
1337
1338defm S_BUFFER_LOAD_B32  : SM_Real_Loads_gfx11<0x008, "S_BUFFER_LOAD_DWORD">;
1339defm S_BUFFER_LOAD_B64  : SM_Real_Loads_gfx11<0x009, "S_BUFFER_LOAD_DWORDX2">;
1340defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx11<0x00a, "S_BUFFER_LOAD_DWORDX4">;
1341defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx11<0x00b, "S_BUFFER_LOAD_DWORDX8">;
1342defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx11<0x00c, "S_BUFFER_LOAD_DWORDX16">;
1343
1344def S_GL1_INV_gfx11    : SMEM_Real_gfx11<0x020, S_GL1_INV>;
1345def S_DCACHE_INV_gfx11 : SMEM_Real_gfx11<0x021, S_DCACHE_INV>;
1346
1347class SMEM_Real_Store_gfx11 <bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx11<op, ps> {
1348  // encoding
1349  bits<7> sdata;
1350
1351  let sdst = ?;
1352  let Inst{12-6}  = !if(ps.has_sdst, sdata{6-0}, ?);
1353}
1354
1355multiclass SM_Real_Probe_gfx11<bits<8> op> {
1356  defvar ps = NAME;
1357  def _IMM_gfx11  : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
1358  def _SGPR_gfx11 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>;
1359  def _SGPR_IMM_gfx11
1360    : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>;
1361}
1362
1363defm S_ATC_PROBE        : SM_Real_Probe_gfx11 <0x22>;
1364defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23>;
1365
1366//===----------------------------------------------------------------------===//
1367// GFX12.
1368//===----------------------------------------------------------------------===//
1369
1370class SMEM_Real_gfx12Plus<bits<6> op, SM_Pseudo ps, string opName,
1371                          int subtarget, RegisterWithSubRegs sgpr_null> :
1372    SM_Real<ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>, Enc64 {
1373
1374  let Inst{18-13} = op;
1375  let Inst{31-26} = 0x3d;
1376
1377  let Inst{55-32} = !if(ps.has_offset, offset{23-0}, !if(ps.has_soffset, 0, ?));
1378  let Inst{63-57} = !if(ps.has_soffset, soffset{6-0},
1379                        !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?));
1380}
1381
1382class SMEM_Real_gfx12<bits<6> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
1383    SMEM_Real_gfx12Plus<op, ps, opName, SIEncodingFamily.GFX12,
1384                        SGPR_NULL_gfx11plus> {
1385  let AssemblerPredicate = isGFX12Plus;
1386  let DecoderNamespace = "GFX12";
1387
1388  let Inst{5-0}   = !if(ps.has_sbase, sbase{6-1}, ?);
1389  let Inst{12-6}  = !if(ps.has_sdst, sdst{6-0}, ?);
1390}
1391
1392class SMEM_Real_Prefetch_gfx12<bits<6> op, SM_Pseudo ps> :
1393    SMEM_Real_gfx12<op, ps> {
1394  bits<7> sdata; // Only 5 bits of sdata are supported.
1395
1396  let sdst = ?;
1397  let Inst{12-11} = 0; // Unused sdata bits.
1398  let Inst{10-6}  = !if(ps.has_sdst, sdata{4-0}, ?);
1399}
1400
1401class SMEM_Real_Load_gfx12<bits<6> op, string ps, string opName, OffsetMode offsets> :
1402    SMEM_Real_gfx12<op, !cast<SM_Pseudo>(ps # offsets.Variant), opName> {
1403  RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
1404  let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
1405
1406  let Inst{22-21} = cpol{4-3}; // scope
1407  let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported
1408}
1409
1410multiclass SM_Real_Loads_gfx12<bits<6> op, string ps = NAME> {
1411  defvar opName = !tolower(NAME);
1412  def _IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, IMM_Offset>;
1413  def _SGPR_IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, SGPR_IMM_Offset>;
1414}
1415
1416defm S_LOAD_B32  : SM_Real_Loads_gfx12<0x00, "S_LOAD_DWORD">;
1417defm S_LOAD_B64  : SM_Real_Loads_gfx12<0x01, "S_LOAD_DWORDX2">;
1418defm S_LOAD_B96  : SM_Real_Loads_gfx12<0x05, "S_LOAD_DWORDX3">;
1419defm S_LOAD_B128 : SM_Real_Loads_gfx12<0x02, "S_LOAD_DWORDX4">;
1420defm S_LOAD_B256 : SM_Real_Loads_gfx12<0x03, "S_LOAD_DWORDX8">;
1421defm S_LOAD_B512 : SM_Real_Loads_gfx12<0x04, "S_LOAD_DWORDX16">;
1422
1423defm S_LOAD_I8   : SM_Real_Loads_gfx12<0x08>;
1424defm S_LOAD_U8   : SM_Real_Loads_gfx12<0x09>;
1425defm S_LOAD_I16  : SM_Real_Loads_gfx12<0x0a>;
1426defm S_LOAD_U16  : SM_Real_Loads_gfx12<0x0b>;
1427
1428defm S_BUFFER_LOAD_B32  : SM_Real_Loads_gfx12<0x10, "S_BUFFER_LOAD_DWORD">;
1429defm S_BUFFER_LOAD_B64  : SM_Real_Loads_gfx12<0x11, "S_BUFFER_LOAD_DWORDX2">;
1430defm S_BUFFER_LOAD_B96  : SM_Real_Loads_gfx12<0x15, "S_BUFFER_LOAD_DWORDX3">;
1431defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx12<0x12, "S_BUFFER_LOAD_DWORDX4">;
1432defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx12<0x13, "S_BUFFER_LOAD_DWORDX8">;
1433defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx12<0x14, "S_BUFFER_LOAD_DWORDX16">;
1434
1435defm S_BUFFER_LOAD_I8  : SM_Real_Loads_gfx12<0x18>;
1436defm S_BUFFER_LOAD_U8  : SM_Real_Loads_gfx12<0x19>;
1437defm S_BUFFER_LOAD_I16 : SM_Real_Loads_gfx12<0x1a>;
1438defm S_BUFFER_LOAD_U16 : SM_Real_Loads_gfx12<0x1b>;
1439
1440def S_DCACHE_INV_gfx12 : SMEM_Real_gfx12<0x021, S_DCACHE_INV>;
1441
1442def S_PREFETCH_INST_gfx12        : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>;
1443def S_PREFETCH_INST_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x25, S_PREFETCH_INST_PC_REL>;
1444def S_PREFETCH_DATA_gfx12        : SMEM_Real_Prefetch_gfx12<0x26, S_PREFETCH_DATA>;
1445def S_BUFFER_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x27, S_BUFFER_PREFETCH_DATA>;
1446def S_PREFETCH_DATA_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x28, S_PREFETCH_DATA_PC_REL>;
1447
1448multiclass SMEM_Real_Probe_gfx12<bits<6> op> {
1449  defvar ps = NAME;
1450  def _IMM_gfx12      : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
1451  def _SGPR_IMM_gfx12 : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>;
1452}
1453
1454defm S_ATC_PROBE        : SMEM_Real_Probe_gfx12<0x22>;
1455defm S_ATC_PROBE_BUFFER : SMEM_Real_Probe_gfx12<0x23>;
1456