xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SMInstructions.td (revision b1879975794772ee51f0b4865753364c7d7626c3)
1//===---- SMInstructions.td - Scalar Memory Instruction Definitions -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def smrd_offset_8 : ImmOperand<i32, "SMRDOffset8", 1>;
10
11let EncoderMethod = "getSMEMOffsetEncoding",
12    DecoderMethod = "decodeSMEMOffset" in {
13def SMEMOffset : ImmOperand<i32, "SMEMOffset", 1>;
14def SMEMOffsetMod : NamedIntOperand<i32, "offset", 0>;
15def OptSMEMOffsetMod : NamedIntOperand<i32, "offset"> {
16  let ImmTy = SMEMOffsetMod.ImmTy;
17  let PredicateMethod = SMEMOffsetMod.PredicateMethod;
18  let PrintMethod = SMEMOffsetMod.PrintMethod;
19}
20}
21
22//===----------------------------------------------------------------------===//
23// Scalar Memory classes
24//===----------------------------------------------------------------------===//
25
26class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> :
27  InstSI <outs, ins, "", pattern>,
28  SIMCInstr<NAME, SIEncodingFamily.NONE> {
29  let isPseudo = 1;
30  let isCodeGenOnly = 1;
31
32  let LGKM_CNT = 1;
33  let SMRD = 1;
34  let mayStore = 0;
35  let mayLoad = 1;
36  let hasSideEffects = 0;
37  let maybeAtomic = 0;
38  let UseNamedOperandTable = 1;
39  let SchedRW = [WriteSMEM];
40
41  string Mnemonic = opName;
42  string AsmOperands = asmOps;
43
44  bits<1> has_sbase = 1;
45  bits<1> has_sdst = 1;
46  bit has_glc = 0;
47  bit has_dlc = 0;
48  bit has_offset = 0;
49  bit has_soffset = 0;
50  bit is_buffer = 0;
51}
52
53class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
54  : InstSI<ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands> {
55
56  let isPseudo = 0;
57  let isCodeGenOnly = 0;
58
59  Instruction Opcode = !cast<Instruction>(NAME);
60
61  // copy relevant pseudo op flags
62  let LGKM_CNT             = ps.LGKM_CNT;
63  let SMRD                 = ps.SMRD;
64  let mayStore             = ps.mayStore;
65  let mayLoad              = ps.mayLoad;
66  let hasSideEffects       = ps.hasSideEffects;
67  let UseNamedOperandTable = ps.UseNamedOperandTable;
68  let SchedRW              = ps.SchedRW;
69  let SubtargetPredicate   = ps.SubtargetPredicate;
70  let OtherPredicates      = ps.OtherPredicates;
71  let AsmMatchConverter    = ps.AsmMatchConverter;
72  let IsAtomicRet          = ps.IsAtomicRet;
73  let IsAtomicNoRet        = ps.IsAtomicNoRet;
74  let Uses                 = ps.Uses;
75  let Defs                 = ps.Defs;
76
77  let TSFlags = ps.TSFlags;
78
79  bit is_buffer = ps.is_buffer;
80
81  // encoding
82  bits<7>  sbase;
83  bits<7>  sdst;
84  bits<32> offset;
85  bits<8>  soffset;
86  bits<5>  cpol;
87}
88
89class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
90                 dag ins, string asm> {
91  bit HasOffset = hasOffset;
92  bit HasSOffset = hasSOffset;
93  string Variant = variant;
94  dag Ins = ins;
95  string Asm = asm;
96}
97
98def IMM_Offset : OffsetMode<1, 0, "_IMM", (ins SMEMOffset:$offset), "$offset">;
99def SGPR_Offset : OffsetMode<0, 1, "_SGPR", (ins SReg_32:$soffset), "$soffset">;
100def SGPR_IMM_Offset : OffsetMode<1, 1, "_SGPR_IMM",
101                                 (ins SReg_32:$soffset, SMEMOffsetMod:$offset),
102                                 "$soffset$offset">;
103def SGPR_IMM_OptOffset : OffsetMode<1, 1, "_SGPR_IMM",
104                                    (ins SReg_32:$soffset, OptSMEMOffsetMod:$offset),
105                                    "$soffset$offset">;
106
107class SM_Probe_Pseudo <string opName, RegisterClass baseClass, OffsetMode offsets>
108  : SM_Pseudo<opName, (outs),
109              !con((ins i8imm:$sdata, baseClass:$sbase), offsets.Ins),
110              " $sdata, $sbase, " # offsets.Asm> {
111  let mayLoad = 0;
112  let mayStore = 0;
113  let has_glc = 0;
114  let LGKM_CNT = 0;
115  let ScalarStore = 0;
116  let hasSideEffects = 1;
117  let has_offset = offsets.HasOffset;
118  let has_soffset = offsets.HasSOffset;
119}
120
121class SM_Load_Pseudo <string opName, RegisterClass baseClass,
122                      RegisterClass dstClass, OffsetMode offsets>
123  : SM_Pseudo<opName, (outs dstClass:$sdst),
124              !con((ins baseClass:$sbase), offsets.Ins, (ins CPol:$cpol)),
125              " $sdst, $sbase, " # offsets.Asm # "$cpol", []> {
126  RegisterClass BaseClass = baseClass;
127  let mayLoad = 1;
128  let isReMaterializable = 1;
129  let mayStore = 0;
130  let has_glc = 1;
131  let has_dlc = 1;
132  let has_offset = offsets.HasOffset;
133  let has_soffset = offsets.HasSOffset;
134}
135
136class SM_Store_Pseudo <string opName, RegisterClass baseClass,
137                       RegisterClass srcClass, OffsetMode offsets>
138  : SM_Pseudo<opName, (outs), !con((ins srcClass:$sdata, baseClass:$sbase),
139                                   offsets.Ins, (ins CPol:$cpol)),
140              " $sdata, $sbase, " # offsets.Asm # "$cpol"> {
141  RegisterClass BaseClass = baseClass;
142  let mayLoad = 0;
143  let mayStore = 1;
144  let has_glc = 1;
145  let has_dlc = 1;
146  let has_offset = offsets.HasOffset;
147  let has_soffset = offsets.HasSOffset;
148  let ScalarStore = 1;
149}
150
151class SM_Discard_Pseudo <string opName, OffsetMode offsets>
152  : SM_Pseudo<opName, (outs), !con((ins SReg_64:$sbase), offsets.Ins),
153              " $sbase, " # offsets.Asm> {
154  let mayLoad = 0;
155  let mayStore = 0;
156  let has_glc = 0;
157  let has_sdst = 0;
158  let ScalarStore = 0;
159  let hasSideEffects = 1;
160  let has_offset = offsets.HasOffset;
161  let has_soffset = offsets.HasSOffset;
162}
163
164multiclass SM_Load_Pseudos<string op, RegisterClass baseClass,
165                           RegisterClass dstClass, OffsetMode offsets> {
166  defvar opName = !tolower(op);
167  def "" : SM_Load_Pseudo <opName, baseClass, dstClass, offsets>;
168
169  // The constrained multi-dword load equivalents with early clobber flag at
170  // the dst operands. They are needed only for codegen and there is no need
171  // for their real opcodes.
172  if !gt(dstClass.RegTypes[0].Size, 32) then
173    let Constraints = "@earlyclobber $sdst",
174        PseudoInstr = op # offsets.Variant in
175      def "" # _ec : SM_Load_Pseudo <opName, baseClass, dstClass, offsets>;
176}
177
178multiclass SM_Pseudo_Loads<RegisterClass baseClass,
179                           RegisterClass dstClass> {
180  defm _IMM : SM_Load_Pseudos <NAME, baseClass, dstClass, IMM_Offset>;
181  defm _SGPR : SM_Load_Pseudos <NAME, baseClass, dstClass, SGPR_Offset>;
182  defm _SGPR_IMM : SM_Load_Pseudos <NAME, baseClass, dstClass, SGPR_IMM_Offset>;
183}
184
185multiclass SM_Pseudo_Stores<RegisterClass baseClass,
186                            RegisterClass srcClass> {
187  defvar opName = !tolower(NAME);
188  def _IMM : SM_Store_Pseudo <opName, baseClass, srcClass, IMM_Offset>;
189  def _SGPR : SM_Store_Pseudo <opName, baseClass, srcClass, SGPR_Offset>;
190  def _SGPR_IMM : SM_Store_Pseudo <opName, baseClass, srcClass, SGPR_IMM_Offset>;
191}
192
193multiclass SM_Pseudo_Discards {
194  defvar opName = !tolower(NAME);
195  def _IMM : SM_Discard_Pseudo <opName, IMM_Offset>;
196  def _SGPR : SM_Discard_Pseudo <opName, SGPR_Offset>;
197  def _SGPR_IMM : SM_Discard_Pseudo <opName, SGPR_IMM_Offset>;
198}
199
200class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
201  opName, (outs SReg_64_XEXEC:$sdst), (ins),
202  " $sdst", [(set i64:$sdst, (node))]> {
203  let hasSideEffects = 1;
204
205  let mayStore = 0;
206  let mayLoad = 0;
207  let has_sbase = 0;
208}
209
210class SM_Inval_Pseudo <string opName, SDPatternOperator node = null_frag> : SM_Pseudo<
211  opName, (outs), (ins), "", [(node)]> {
212  let hasSideEffects = 1;
213  let mayLoad = 0;
214  let mayStore = 0;
215  let has_sdst = 0;
216  let has_sbase = 0;
217}
218
219multiclass SM_Pseudo_Probe<RegisterClass baseClass> {
220  defvar opName = !tolower(NAME);
221  def _IMM  : SM_Probe_Pseudo <opName, baseClass, IMM_Offset>;
222  def _SGPR : SM_Probe_Pseudo <opName, baseClass, SGPR_Offset>;
223  def _SGPR_IMM : SM_Probe_Pseudo <opName, baseClass, SGPR_IMM_Offset>;
224  def _SGPR_OPT_IMM : SM_Probe_Pseudo <opName, baseClass, SGPR_IMM_OptOffset>;
225}
226
227class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
228  opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins),
229  " $sdst", [(set i32:$sdst, (node))]> {
230  let hasSideEffects = 1;
231  let mayStore = 0;
232  let mayLoad = 0;
233  let has_sbase = 0;
234}
235
236class SM_Prefetch_Pseudo <string opName, RegisterClass baseClass, bit hasSBase>
237  : SM_Pseudo<opName, (outs), !con(!if(hasSBase, (ins baseClass:$sbase), (ins)),
238                                   (ins SMEMOffset:$offset, SReg_32:$soffset, i8imm:$sdata)),
239              !if(hasSBase, " $sbase,", "") # " $offset, $soffset, $sdata"> {
240  // Mark prefetches as both load and store to prevent reordering with loads
241  // and stores. This is also needed for pattern to match prefetch intrinsic.
242  let mayLoad = 1;
243  let mayStore = 1;
244  let has_glc = 0;
245  let LGKM_CNT = 0;
246  let has_sbase = hasSBase;
247  let ScalarStore = 0;
248  let has_offset = 1;
249  let has_soffset = 1;
250}
251
252//===----------------------------------------------------------------------===//
253// Scalar Atomic Memory Classes
254//===----------------------------------------------------------------------===//
255
256class SM_Atomic_Pseudo <string opName,
257                        dag outs, dag ins, string asmOps, bit isRet>
258  : SM_Pseudo<opName, outs, ins, asmOps, []> {
259
260  bit glc = isRet;
261
262  let mayLoad = 1;
263  let mayStore = 1;
264  let has_glc = 1;
265  let has_dlc = 1;
266  let has_soffset = 1;
267
268  // Should these be set?
269  let ScalarStore = 1;
270  let hasSideEffects = 1;
271  let maybeAtomic = 1;
272
273  let IsAtomicNoRet = !not(isRet);
274  let IsAtomicRet = isRet;
275}
276
277class SM_Pseudo_Atomic<string opName,
278                       RegisterClass baseClass,
279                       RegisterClass dataClass,
280                       OffsetMode offsets,
281                       bit isRet,
282                       Operand CPolTy = !if(isRet, CPol_GLC, CPol_NonGLC)> :
283  SM_Atomic_Pseudo<opName,
284                   !if(isRet, (outs dataClass:$sdst), (outs)),
285                   !con((ins dataClass:$sdata, baseClass:$sbase), offsets.Ins,
286                        (ins CPolTy:$cpol)),
287                   !if(isRet, " $sdst", " $sdata") #
288                     ", $sbase, " # offsets.Asm # "$cpol",
289                   isRet> {
290  let has_offset = offsets.HasOffset;
291  let has_soffset = offsets.HasSOffset;
292
293  let Constraints = !if(isRet, "$sdst = $sdata", "");
294  let DisableEncoding = !if(isRet, "$sdata", "");
295}
296
297multiclass SM_Pseudo_Atomics<RegisterClass baseClass,
298                             RegisterClass dataClass> {
299  defvar opName = !tolower(NAME);
300  def _IMM      : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 0>;
301  def _SGPR     : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 0>;
302  def _SGPR_IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 0>;
303  def _IMM_RTN  : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 1>;
304  def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 1>;
305  def _SGPR_IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 1>;
306}
307
308//===----------------------------------------------------------------------===//
309// Scalar Memory Instructions
310//===----------------------------------------------------------------------===//
311
312// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit
313// SMRD instructions, because the SReg_32_XM0 register class does not include M0
314// and writing to M0 from an SMRD instruction will hang the GPU.
315
316// XXX - SMEM instructions do not allow exec for data operand, but
317// does sdst for SMRD on SI/CI?
318defm S_LOAD_DWORD    : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
319defm S_LOAD_DWORDX2  : SM_Pseudo_Loads <SReg_64, SReg_64_XEXEC>;
320let SubtargetPredicate = HasScalarDwordx3Loads in
321  defm S_LOAD_DWORDX3  : SM_Pseudo_Loads <SReg_64, SReg_96>;
322defm S_LOAD_DWORDX4  : SM_Pseudo_Loads <SReg_64, SReg_128>;
323defm S_LOAD_DWORDX8  : SM_Pseudo_Loads <SReg_64, SReg_256>;
324defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_64, SReg_512>;
325defm S_LOAD_I8       : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
326defm S_LOAD_U8       : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
327defm S_LOAD_I16      : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
328defm S_LOAD_U16      : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
329
330let is_buffer = 1 in {
331defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
332// FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on
333// SI/CI, bit disallowed for SMEM on VI.
334defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_128, SReg_64_XEXEC>;
335let SubtargetPredicate = HasScalarDwordx3Loads in
336  defm S_BUFFER_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_128, SReg_96>;
337defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128, SReg_128>;
338defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128, SReg_256>;
339defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128, SReg_512>;
340defm S_BUFFER_LOAD_I8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
341defm S_BUFFER_LOAD_U8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
342defm S_BUFFER_LOAD_I16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
343defm S_BUFFER_LOAD_U16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
344}
345
346let SubtargetPredicate = HasScalarStores in {
347defm S_STORE_DWORD : SM_Pseudo_Stores <SReg_64, SReg_32_XM0_XEXEC>;
348defm S_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_64, SReg_64_XEXEC>;
349defm S_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_64, SReg_128>;
350
351let is_buffer = 1 in {
352defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <SReg_128, SReg_32_XM0_XEXEC>;
353defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_128, SReg_64_XEXEC>;
354defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_128, SReg_128>;
355}
356} // End SubtargetPredicate = HasScalarStores
357
358let SubtargetPredicate = HasSMemTimeInst in
359def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>;
360def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>;
361
362let SubtargetPredicate = isGFX7GFX8GFX9 in {
363def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
364} // let SubtargetPredicate = isGFX7GFX8GFX9
365
366let SubtargetPredicate = isGFX8Plus in {
367let OtherPredicates = [HasScalarStores] in {
368def S_DCACHE_WB     : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>;
369def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
370} // End OtherPredicates = [HasScalarStores]
371
372defm S_ATC_PROBE        : SM_Pseudo_Probe <SReg_64>;
373let is_buffer = 1 in {
374defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <SReg_128>;
375}
376} // SubtargetPredicate = isGFX8Plus
377
378let SubtargetPredicate = HasSMemRealTime in
379def S_MEMREALTIME   : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
380
381let SubtargetPredicate = isGFX10Plus in
382def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">;
383let SubtargetPredicate = HasGetWaveIdInst in
384def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>;
385
386
387let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in {
388defm S_SCRATCH_LOAD_DWORD    : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
389defm S_SCRATCH_LOAD_DWORDX2  : SM_Pseudo_Loads <SReg_64, SReg_64_XEXEC>;
390defm S_SCRATCH_LOAD_DWORDX4  : SM_Pseudo_Loads <SReg_64, SReg_128>;
391
392defm S_SCRATCH_STORE_DWORD   : SM_Pseudo_Stores <SReg_64, SReg_32_XM0_XEXEC>;
393defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_64, SReg_64_XEXEC>;
394defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_64, SReg_128>;
395} // SubtargetPredicate = HasScalarFlatScratchInsts
396
397let SubtargetPredicate = HasScalarAtomics in {
398
399let is_buffer = 1 in {
400defm S_BUFFER_ATOMIC_SWAP         : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
401defm S_BUFFER_ATOMIC_CMPSWAP      : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
402defm S_BUFFER_ATOMIC_ADD          : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
403defm S_BUFFER_ATOMIC_SUB          : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
404defm S_BUFFER_ATOMIC_SMIN         : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
405defm S_BUFFER_ATOMIC_UMIN         : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
406defm S_BUFFER_ATOMIC_SMAX         : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
407defm S_BUFFER_ATOMIC_UMAX         : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
408defm S_BUFFER_ATOMIC_AND          : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
409defm S_BUFFER_ATOMIC_OR           : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
410defm S_BUFFER_ATOMIC_XOR          : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
411defm S_BUFFER_ATOMIC_INC          : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
412defm S_BUFFER_ATOMIC_DEC          : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
413
414defm S_BUFFER_ATOMIC_SWAP_X2      : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
415defm S_BUFFER_ATOMIC_CMPSWAP_X2   : SM_Pseudo_Atomics <SReg_128, SReg_128>;
416defm S_BUFFER_ATOMIC_ADD_X2       : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
417defm S_BUFFER_ATOMIC_SUB_X2       : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
418defm S_BUFFER_ATOMIC_SMIN_X2      : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
419defm S_BUFFER_ATOMIC_UMIN_X2      : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
420defm S_BUFFER_ATOMIC_SMAX_X2      : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
421defm S_BUFFER_ATOMIC_UMAX_X2      : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
422defm S_BUFFER_ATOMIC_AND_X2       : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
423defm S_BUFFER_ATOMIC_OR_X2        : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
424defm S_BUFFER_ATOMIC_XOR_X2       : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
425defm S_BUFFER_ATOMIC_INC_X2       : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
426defm S_BUFFER_ATOMIC_DEC_X2       : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
427}
428
429defm S_ATOMIC_SWAP                : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
430defm S_ATOMIC_CMPSWAP             : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
431defm S_ATOMIC_ADD                 : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
432defm S_ATOMIC_SUB                 : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
433defm S_ATOMIC_SMIN                : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
434defm S_ATOMIC_UMIN                : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
435defm S_ATOMIC_SMAX                : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
436defm S_ATOMIC_UMAX                : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
437defm S_ATOMIC_AND                 : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
438defm S_ATOMIC_OR                  : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
439defm S_ATOMIC_XOR                 : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
440defm S_ATOMIC_INC                 : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
441defm S_ATOMIC_DEC                 : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
442
443defm S_ATOMIC_SWAP_X2             : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
444defm S_ATOMIC_CMPSWAP_X2          : SM_Pseudo_Atomics <SReg_64, SReg_128>;
445defm S_ATOMIC_ADD_X2              : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
446defm S_ATOMIC_SUB_X2              : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
447defm S_ATOMIC_SMIN_X2             : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
448defm S_ATOMIC_UMIN_X2             : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
449defm S_ATOMIC_SMAX_X2             : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
450defm S_ATOMIC_UMAX_X2             : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
451defm S_ATOMIC_AND_X2              : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
452defm S_ATOMIC_OR_X2               : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
453defm S_ATOMIC_XOR_X2              : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
454defm S_ATOMIC_INC_X2              : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
455defm S_ATOMIC_DEC_X2              : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
456
457} // let SubtargetPredicate = HasScalarAtomics
458
459let SubtargetPredicate = HasScalarAtomics in {
460defm S_DCACHE_DISCARD    : SM_Pseudo_Discards;
461defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards;
462}
463
464let SubtargetPredicate = isGFX12Plus in {
465def S_PREFETCH_INST        : SM_Prefetch_Pseudo <"s_prefetch_inst", SReg_64, 1>;
466def S_PREFETCH_INST_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_inst_pc_rel", SReg_64, 0>;
467def S_PREFETCH_DATA        : SM_Prefetch_Pseudo <"s_prefetch_data", SReg_64, 1>;
468def S_PREFETCH_DATA_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_data_pc_rel", SReg_64, 0>;
469def S_BUFFER_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_buffer_prefetch_data", SReg_128, 1> {
470  let is_buffer = 1;
471}
472} // end let SubtargetPredicate = isGFX12Plus
473
474//===----------------------------------------------------------------------===//
475// Targets
476//===----------------------------------------------------------------------===//
477
478//===----------------------------------------------------------------------===//
479// SI
480//===----------------------------------------------------------------------===//
481
482class SMRD_Real_si <bits<5> op, SM_Pseudo ps>
483  : SM_Real<ps>
484  , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
485  , Enc32 {
486
487  let AssemblerPredicate = isGFX6GFX7;
488  let DecoderNamespace = "GFX6GFX7";
489
490  let Inst{7-0}   = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?));
491  let Inst{8}     = ps.has_offset;
492  let Inst{14-9}  = !if(ps.has_sbase, sbase{6-1}, ?);
493  let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
494  let Inst{26-22} = op;
495  let Inst{31-27} = 0x18; //encoding
496}
497
498multiclass SM_Real_Loads_si<bits<5> op> {
499  defvar ps = NAME;
500  defvar immPs = !cast<SM_Load_Pseudo>(ps#_IMM);
501  def _IMM_si : SMRD_Real_si <op, immPs> {
502    let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol);
503  }
504
505  defvar sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR);
506  def _SGPR_si : SMRD_Real_si <op, sgprPs>;
507}
508
509defm S_LOAD_DWORD           : SM_Real_Loads_si <0x00>;
510defm S_LOAD_DWORDX2         : SM_Real_Loads_si <0x01>;
511defm S_LOAD_DWORDX4         : SM_Real_Loads_si <0x02>;
512defm S_LOAD_DWORDX8         : SM_Real_Loads_si <0x03>;
513defm S_LOAD_DWORDX16        : SM_Real_Loads_si <0x04>;
514defm S_BUFFER_LOAD_DWORD    : SM_Real_Loads_si <0x08>;
515defm S_BUFFER_LOAD_DWORDX2  : SM_Real_Loads_si <0x09>;
516defm S_BUFFER_LOAD_DWORDX4  : SM_Real_Loads_si <0x0a>;
517defm S_BUFFER_LOAD_DWORDX8  : SM_Real_Loads_si <0x0b>;
518defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_si <0x0c>;
519
520def S_MEMTIME_si    : SMRD_Real_si <0x1e, S_MEMTIME>;
521def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>;
522
523
524//===----------------------------------------------------------------------===//
525// VI and GFX9.
526//===----------------------------------------------------------------------===//
527
528class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
529  : SM_Real<ps>
530  , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI>
531  , Enc64 {
532  field bit IsGFX9SpecificEncoding = false;
533  let AssemblerPredicate = !if(IsGFX9SpecificEncoding, isGFX9Only, isGFX8GFX9);
534  let DecoderNamespace = "GFX8";
535
536  let Inst{5-0}   = !if(ps.has_sbase, sbase{6-1}, ?);
537  let Inst{12-6}  = !if(ps.has_sdst, sdst{6-0}, ?);
538
539  // Note that for GFX9 instructions with immediate offsets, soffset_en
540  // must be defined, whereas in GFX8 it's undefined in all cases,
541  // meaning GFX9 is not perfectly backward-compatible with GFX8, despite
542  // documentation suggesting otherwise.
543  field bit SOffsetEn = !if(IsGFX9SpecificEncoding,
544    !if(ps.has_offset, ps.has_soffset, !if(ps.has_soffset, 0, ?)),
545    ?);
546  let Inst{14} = SOffsetEn;
547
548  let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
549
550  // imm
551  // TODO: Shall not be defined if the instruction has no offset nor
552  // soffset.
553  let Inst{17} = ps.has_offset;
554
555  let Inst{25-18} = op;
556  let Inst{31-26} = 0x30; //encoding
557
558  // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed.
559  // Offset value is corrected accordingly when offset is encoded/decoded.
560  // TODO: Forbid non-M0 register offsets for GFX8 stores and atomics.
561  field bits<21> Offset;
562  let Offset{6-0} = !if(ps.has_offset, offset{6-0},
563                                       !if(ps.has_soffset, soffset{6-0}, ?));
564  let Offset{20-7} = !if(ps.has_offset, offset{20-7}, ?);
565  let Inst{52-32} = Offset;
566
567  // soffset
568  let Inst{63-57} = !if(!and(IsGFX9SpecificEncoding, ps.has_soffset),
569                        soffset{6-0}, ?);
570}
571
572class SMEM_Real_Load_vi<bits<8> op, string ps>
573    : SMEM_Real_vi<op, !cast<SM_Pseudo>(ps)>;
574
575// The alternative GFX9 SGPR encoding using soffset to encode the
576// offset register. Not available in assembler and goes to the GFX9
577// encoding family to avoid conflicts with the primary SGPR variant.
578class SMEM_Real_SGPR_alt_gfx9 {
579  bit IsGFX9SpecificEncoding = true;
580  bit SOffsetEn = 1;
581  bit Offset = ?;
582  int Subtarget = SIEncodingFamily.GFX9;
583  string AsmVariantName = "NonParsable";
584}
585
586multiclass SM_Real_Loads_vi<bits<8> op> {
587  defvar ps = NAME;
588  def _IMM_vi : SMEM_Real_Load_vi <op, ps#"_IMM">;
589  def _SGPR_vi : SMEM_Real_Load_vi <op, ps#"_SGPR">;
590  def _SGPR_alt_gfx9 : SMEM_Real_Load_vi <op, ps#"_SGPR">,
591                       SMEM_Real_SGPR_alt_gfx9;
592  let IsGFX9SpecificEncoding = true in
593  def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi <op, ps#"_SGPR_IMM">;
594}
595
596class SMEM_Real_Store_Base_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> {
597  // encoding
598  bits<7> sdata;
599
600  let sdst = ?;
601  let Inst{12-6}  = !if(ps.has_sdst, sdata{6-0}, ?);
602}
603
604class SMEM_Real_Store_vi <bits<8> op, string ps>
605    : SMEM_Real_Store_Base_vi <op, !cast<SM_Pseudo>(ps)>;
606
607multiclass SM_Real_Stores_vi<bits<8> op> {
608  defvar ps = NAME;
609  def _IMM_vi : SMEM_Real_Store_vi <op, ps#"_IMM">;
610  def _SGPR_vi : SMEM_Real_Store_vi <op, ps#"_SGPR">;
611  def _SGPR_alt_gfx9 : SMEM_Real_Store_vi <op, ps#"_SGPR">,
612                       SMEM_Real_SGPR_alt_gfx9;
613  let IsGFX9SpecificEncoding = true in
614  def _SGPR_IMM_gfx9 : SMEM_Real_Store_vi <op, ps#"_SGPR_IMM">;
615}
616
617multiclass SM_Real_Probe_vi<bits<8> op> {
618  defvar ps = NAME;
619  def _IMM_vi  : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
620  def _SGPR_vi : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>;
621  def _SGPR_alt_gfx9
622    : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>,
623      SMEM_Real_SGPR_alt_gfx9;
624  let IsGFX9SpecificEncoding = true in
625  def _SGPR_IMM_gfx9
626    : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>;
627}
628
629defm S_LOAD_DWORD           : SM_Real_Loads_vi <0x00>;
630defm S_LOAD_DWORDX2         : SM_Real_Loads_vi <0x01>;
631defm S_LOAD_DWORDX4         : SM_Real_Loads_vi <0x02>;
632defm S_LOAD_DWORDX8         : SM_Real_Loads_vi <0x03>;
633defm S_LOAD_DWORDX16        : SM_Real_Loads_vi <0x04>;
634defm S_BUFFER_LOAD_DWORD    : SM_Real_Loads_vi <0x08>;
635defm S_BUFFER_LOAD_DWORDX2  : SM_Real_Loads_vi <0x09>;
636defm S_BUFFER_LOAD_DWORDX4  : SM_Real_Loads_vi <0x0a>;
637defm S_BUFFER_LOAD_DWORDX8  : SM_Real_Loads_vi <0x0b>;
638defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c>;
639
640defm S_STORE_DWORD : SM_Real_Stores_vi <0x10>;
641defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11>;
642defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12>;
643
644defm S_BUFFER_STORE_DWORD    : SM_Real_Stores_vi <0x18>;
645defm S_BUFFER_STORE_DWORDX2  : SM_Real_Stores_vi <0x19>;
646defm S_BUFFER_STORE_DWORDX4  : SM_Real_Stores_vi <0x1a>;
647
648// These instructions use same encoding
649def S_DCACHE_INV_vi         : SMEM_Real_vi <0x20, S_DCACHE_INV>;
650def S_DCACHE_WB_vi          : SMEM_Real_vi <0x21, S_DCACHE_WB>;
651def S_DCACHE_INV_VOL_vi     : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>;
652def S_DCACHE_WB_VOL_vi      : SMEM_Real_vi <0x23, S_DCACHE_WB_VOL>;
653def S_MEMTIME_vi            : SMEM_Real_vi <0x24, S_MEMTIME>;
654def S_MEMREALTIME_vi        : SMEM_Real_vi <0x25, S_MEMREALTIME>;
655
656defm S_SCRATCH_LOAD_DWORD    : SM_Real_Loads_vi <0x05>;
657defm S_SCRATCH_LOAD_DWORDX2  : SM_Real_Loads_vi <0x06>;
658defm S_SCRATCH_LOAD_DWORDX4  : SM_Real_Loads_vi <0x07>;
659
660defm S_SCRATCH_STORE_DWORD   : SM_Real_Stores_vi <0x15>;
661defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_vi <0x16>;
662defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_vi <0x17>;
663
664defm S_ATC_PROBE        : SM_Real_Probe_vi <0x26>;
665defm S_ATC_PROBE_BUFFER : SM_Real_Probe_vi <0x27>;
666
667//===----------------------------------------------------------------------===//
668// GFX9
669//===----------------------------------------------------------------------===//
670
671class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps>
672  : SMEM_Real_vi <op, ps> {
673
674  bits<7> sdata;
675
676  let Constraints = ps.Constraints;
677  let DisableEncoding = ps.DisableEncoding;
678
679  let cpol{CPolBit.GLC} = ps.glc;
680  let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
681}
682
683multiclass SM_Real_Atomics_vi<bits<8> op> {
684  defvar ps = NAME;
685  def _IMM_vi       : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
686  def _SGPR_vi      : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
687  def _SGPR_alt_gfx9
688    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>,
689      SMEM_Real_SGPR_alt_gfx9;
690  let IsGFX9SpecificEncoding = true in
691  def _SGPR_IMM_gfx9
692    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
693  def _IMM_RTN_vi   : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
694  def _SGPR_RTN_vi  : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
695  def _SGPR_RTN_alt_gfx9
696    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>,
697      SMEM_Real_SGPR_alt_gfx9;
698  let IsGFX9SpecificEncoding = true in
699  def _SGPR_IMM_RTN_gfx9
700    : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
701}
702
703defm S_BUFFER_ATOMIC_SWAP         : SM_Real_Atomics_vi <0x40>;
704defm S_BUFFER_ATOMIC_CMPSWAP      : SM_Real_Atomics_vi <0x41>;
705defm S_BUFFER_ATOMIC_ADD          : SM_Real_Atomics_vi <0x42>;
706defm S_BUFFER_ATOMIC_SUB          : SM_Real_Atomics_vi <0x43>;
707defm S_BUFFER_ATOMIC_SMIN         : SM_Real_Atomics_vi <0x44>;
708defm S_BUFFER_ATOMIC_UMIN         : SM_Real_Atomics_vi <0x45>;
709defm S_BUFFER_ATOMIC_SMAX         : SM_Real_Atomics_vi <0x46>;
710defm S_BUFFER_ATOMIC_UMAX         : SM_Real_Atomics_vi <0x47>;
711defm S_BUFFER_ATOMIC_AND          : SM_Real_Atomics_vi <0x48>;
712defm S_BUFFER_ATOMIC_OR           : SM_Real_Atomics_vi <0x49>;
713defm S_BUFFER_ATOMIC_XOR          : SM_Real_Atomics_vi <0x4a>;
714defm S_BUFFER_ATOMIC_INC          : SM_Real_Atomics_vi <0x4b>;
715defm S_BUFFER_ATOMIC_DEC          : SM_Real_Atomics_vi <0x4c>;
716
717defm S_BUFFER_ATOMIC_SWAP_X2      : SM_Real_Atomics_vi <0x60>;
718defm S_BUFFER_ATOMIC_CMPSWAP_X2   : SM_Real_Atomics_vi <0x61>;
719defm S_BUFFER_ATOMIC_ADD_X2       : SM_Real_Atomics_vi <0x62>;
720defm S_BUFFER_ATOMIC_SUB_X2       : SM_Real_Atomics_vi <0x63>;
721defm S_BUFFER_ATOMIC_SMIN_X2      : SM_Real_Atomics_vi <0x64>;
722defm S_BUFFER_ATOMIC_UMIN_X2      : SM_Real_Atomics_vi <0x65>;
723defm S_BUFFER_ATOMIC_SMAX_X2      : SM_Real_Atomics_vi <0x66>;
724defm S_BUFFER_ATOMIC_UMAX_X2      : SM_Real_Atomics_vi <0x67>;
725defm S_BUFFER_ATOMIC_AND_X2       : SM_Real_Atomics_vi <0x68>;
726defm S_BUFFER_ATOMIC_OR_X2        : SM_Real_Atomics_vi <0x69>;
727defm S_BUFFER_ATOMIC_XOR_X2       : SM_Real_Atomics_vi <0x6a>;
728defm S_BUFFER_ATOMIC_INC_X2       : SM_Real_Atomics_vi <0x6b>;
729defm S_BUFFER_ATOMIC_DEC_X2       : SM_Real_Atomics_vi <0x6c>;
730
731defm S_ATOMIC_SWAP                : SM_Real_Atomics_vi <0x80>;
732defm S_ATOMIC_CMPSWAP             : SM_Real_Atomics_vi <0x81>;
733defm S_ATOMIC_ADD                 : SM_Real_Atomics_vi <0x82>;
734defm S_ATOMIC_SUB                 : SM_Real_Atomics_vi <0x83>;
735defm S_ATOMIC_SMIN                : SM_Real_Atomics_vi <0x84>;
736defm S_ATOMIC_UMIN                : SM_Real_Atomics_vi <0x85>;
737defm S_ATOMIC_SMAX                : SM_Real_Atomics_vi <0x86>;
738defm S_ATOMIC_UMAX                : SM_Real_Atomics_vi <0x87>;
739defm S_ATOMIC_AND                 : SM_Real_Atomics_vi <0x88>;
740defm S_ATOMIC_OR                  : SM_Real_Atomics_vi <0x89>;
741defm S_ATOMIC_XOR                 : SM_Real_Atomics_vi <0x8a>;
742defm S_ATOMIC_INC                 : SM_Real_Atomics_vi <0x8b>;
743defm S_ATOMIC_DEC                 : SM_Real_Atomics_vi <0x8c>;
744
745defm S_ATOMIC_SWAP_X2             : SM_Real_Atomics_vi <0xa0>;
746defm S_ATOMIC_CMPSWAP_X2          : SM_Real_Atomics_vi <0xa1>;
747defm S_ATOMIC_ADD_X2              : SM_Real_Atomics_vi <0xa2>;
748defm S_ATOMIC_SUB_X2              : SM_Real_Atomics_vi <0xa3>;
749defm S_ATOMIC_SMIN_X2             : SM_Real_Atomics_vi <0xa4>;
750defm S_ATOMIC_UMIN_X2             : SM_Real_Atomics_vi <0xa5>;
751defm S_ATOMIC_SMAX_X2             : SM_Real_Atomics_vi <0xa6>;
752defm S_ATOMIC_UMAX_X2             : SM_Real_Atomics_vi <0xa7>;
753defm S_ATOMIC_AND_X2              : SM_Real_Atomics_vi <0xa8>;
754defm S_ATOMIC_OR_X2               : SM_Real_Atomics_vi <0xa9>;
755defm S_ATOMIC_XOR_X2              : SM_Real_Atomics_vi <0xaa>;
756defm S_ATOMIC_INC_X2              : SM_Real_Atomics_vi <0xab>;
757defm S_ATOMIC_DEC_X2              : SM_Real_Atomics_vi <0xac>;
758
759multiclass SM_Real_Discard_vi<bits<8> op> {
760  defvar ps = NAME;
761  def _IMM_vi  : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_IMM)>;
762  def _SGPR_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>;
763  def _SGPR_alt_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>,
764                       SMEM_Real_SGPR_alt_gfx9;
765  let IsGFX9SpecificEncoding = true in
766  def _SGPR_IMM_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR_IMM)>;
767}
768
769defm S_DCACHE_DISCARD    : SM_Real_Discard_vi <0x28>;
770defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_vi <0x29>;
771
772//===----------------------------------------------------------------------===//
773// CI
774//===----------------------------------------------------------------------===//
775
776def smrd_literal_offset : ImmOperand<i32, "SMRDLiteralOffset">;
777
778class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
779  SM_Real<ps>,
780  Enc64 {
781
782  let AssemblerPredicate = isGFX7Only;
783  let DecoderNamespace = "GFX7";
784  let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, CPol:$cpol);
785
786  let Inst{7-0}   = 0xff;
787  let Inst{8}     = 0;
788  let Inst{14-9}  = sbase{6-1};
789  let Inst{21-15} = sdst{6-0};
790  let Inst{26-22} = op;
791  let Inst{31-27} = 0x18; //encoding
792  let Inst{63-32} = offset{31-0};
793}
794
795def S_LOAD_DWORD_IMM_ci           : SMRD_Real_Load_IMM_ci <0x00, S_LOAD_DWORD_IMM>;
796def S_LOAD_DWORDX2_IMM_ci         : SMRD_Real_Load_IMM_ci <0x01, S_LOAD_DWORDX2_IMM>;
797def S_LOAD_DWORDX4_IMM_ci         : SMRD_Real_Load_IMM_ci <0x02, S_LOAD_DWORDX4_IMM>;
798def S_LOAD_DWORDX8_IMM_ci         : SMRD_Real_Load_IMM_ci <0x03, S_LOAD_DWORDX8_IMM>;
799def S_LOAD_DWORDX16_IMM_ci        : SMRD_Real_Load_IMM_ci <0x04, S_LOAD_DWORDX16_IMM>;
800def S_BUFFER_LOAD_DWORD_IMM_ci    : SMRD_Real_Load_IMM_ci <0x08, S_BUFFER_LOAD_DWORD_IMM>;
801def S_BUFFER_LOAD_DWORDX2_IMM_ci  : SMRD_Real_Load_IMM_ci <0x09, S_BUFFER_LOAD_DWORDX2_IMM>;
802def S_BUFFER_LOAD_DWORDX4_IMM_ci  : SMRD_Real_Load_IMM_ci <0x0a, S_BUFFER_LOAD_DWORDX4_IMM>;
803def S_BUFFER_LOAD_DWORDX8_IMM_ci  : SMRD_Real_Load_IMM_ci <0x0b, S_BUFFER_LOAD_DWORDX8_IMM>;
804def S_BUFFER_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x0c, S_BUFFER_LOAD_DWORDX16_IMM>;
805
806class SMRD_Real_ci <bits<5> op, SM_Pseudo ps>
807  : SM_Real<ps>
808  , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
809  , Enc32 {
810
811  let AssemblerPredicate = isGFX7Only;
812  let DecoderNamespace = "GFX7";
813
814  let Inst{7-0}   = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?));
815  let Inst{8}     = ps.has_offset;
816  let Inst{14-9}  = !if(ps.has_sbase, sbase{6-1}, ?);
817  let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
818  let Inst{26-22} = op;
819  let Inst{31-27} = 0x18; //encoding
820}
821
822def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>;
823
824//===----------------------------------------------------------------------===//
825// Scalar Memory Patterns
826//===----------------------------------------------------------------------===//
827
828class SMRDLoadPat<PatFrag Op> : PatFrag <(ops node:$ptr), (Op node:$ptr), [{ return isUniformLoad(N);}]> {
829  let GISelPredicateCode = [{
830    if (!MI.hasOneMemOperand())
831      return false;
832    if (!isInstrUniform(MI))
833      return false;
834
835    // FIXME: We should probably be caching this.
836    SmallVector<GEPInfo, 4> AddrInfo;
837    getAddrModeInfo(MI, MRI, AddrInfo);
838
839    if (hasVgprParts(AddrInfo))
840      return false;
841    return true;
842  }];
843}
844
845def smrd_load : SMRDLoadPat<load>;
846def smrd_extloadi8 : SMRDLoadPat<extloadi8>;
847def smrd_zextloadi8 : SMRDLoadPat<zextloadi8>;
848def smrd_sextloadi8 : SMRDLoadPat<sextloadi8>;
849def smrd_extloadi16 : SMRDLoadPat<extloadi16>;
850def smrd_zextloadi16 : SMRDLoadPat<zextloadi16>;
851def smrd_sextloadi16 : SMRDLoadPat<sextloadi16>;
852
853def smrd_prefetch : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type),
854                             (prefetch node:$ptr, node:$rw, node:$loc, node:$type),
855                             [{ return !N->getOperand(1)->isDivergent();}]> {
856  let GISelPredicateCode = [{
857    return isInstrUniform(MI);
858  }];
859}
860
861def SMRDImm         : ComplexPattern<iPTR, 2, "SelectSMRDImm">;
862def SMRDImm32       : ComplexPattern<iPTR, 2, "SelectSMRDImm32">;
863def SMRDSgpr        : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">;
864def SMRDSgprImm     : ComplexPattern<iPTR, 3, "SelectSMRDSgprImm">;
865def SMRDBufferImm   : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">;
866def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">;
867def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">;
868
869class SMRDAlignedLoadPat<PatFrag Op> : PatFrag <(ops node:$ptr), (Op node:$ptr), [{
870  // Returns true if it is a single dword load or naturally aligned multi-dword load.
871  LoadSDNode *Ld = cast<LoadSDNode>(N);
872  unsigned Size = Ld->getMemoryVT().getStoreSize();
873  return Size <= 4 || Ld->getAlign().value() >= Size;
874}]> {
875  let GISelPredicateCode = [{
876  auto &Ld = cast<GLoad>(MI);
877  TypeSize Size = Ld.getMMO().getSize().getValue();
878  return Size <= 4 || Ld.getMMO().getAlign().value() >= Size;
879  }];
880}
881
882def aligned_smrd_load : SMRDAlignedLoadPat<smrd_load>;
883
884multiclass SMRD_Patterns <string Instr, ValueType vt, PatFrag frag,
885                          bit immci = true, string suffix = ""> {
886  // 1. IMM offset
887  def : GCNPat <
888    (frag (SMRDImm i64:$sbase, i32:$offset)),
889    (vt (!cast<SM_Pseudo>(Instr#"_IMM"#suffix) $sbase, $offset, 0))>;
890
891  // 2. 32-bit IMM offset on CI
892  if immci then def : GCNPat <
893    (frag (SMRDImm32 i64:$sbase, i32:$offset)),
894    (vt (!cast<InstSI>(Instr#"_IMM_ci"#suffix) $sbase, $offset, 0))> {
895    let SubtargetPredicate = isGFX7Only;
896  }
897
898  // 3. SGPR offset
899  def : GCNPat <
900    (frag (SMRDSgpr i64:$sbase, i32:$soffset)),
901    (vt (!cast<SM_Pseudo>(Instr#"_SGPR"#suffix) $sbase, $soffset, 0))> {
902    let SubtargetPredicate = isNotGFX9Plus;
903  }
904  def : GCNPat <
905    (frag (SMRDSgpr i64:$sbase, i32:$soffset)),
906    (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM"#suffix) $sbase, $soffset, 0, 0))> {
907    let SubtargetPredicate = isGFX9Plus;
908  }
909
910  // 4. SGPR+IMM offset
911  def : GCNPat <
912    (frag (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
913    (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM"#suffix) $sbase, $soffset, $offset, 0))> {
914    let SubtargetPredicate = isGFX9Plus;
915  }
916
917  // 5. No offset
918  def : GCNPat <
919    (vt (frag (i64 SReg_64:$sbase))),
920    (vt (!cast<SM_Pseudo>(Instr#"_IMM"#suffix) i64:$sbase, 0, 0))>;
921}
922
923multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> {
924  // High priority when XNACK is enabled and the load was naturally aligned.
925  let OtherPredicates = [HasXNACKEnabled], AddedComplexity = 102 in
926    defm: SMRD_Patterns <Instr, vt, aligned_smrd_load, immci>;
927
928  // XNACK is enabled and the load wasn't naturally aligned. The constrained sload variant.
929  if !gt(vt.Size, 32) then {
930    let OtherPredicates = [HasXNACKEnabled], AddedComplexity = 101 in
931      defm: SMRD_Patterns <Instr, vt, smrd_load, /*immci=*/false, /*suffix=*/"_ec">;
932  }
933
934  // XNACK is disabled.
935  let AddedComplexity = 100 in
936    defm: SMRD_Patterns <Instr, vt, smrd_load, immci>;
937}
938
939multiclass SMLoad_Pattern <string Instr, ValueType vt, bit immci = true> {
940  // 1. Offset as an immediate
941  def : GCNPat <
942    (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
943    (vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> {
944    let AddedComplexity = 2;
945  }
946
947  // 2. 32-bit IMM offset on CI
948  if immci then def : GCNPat <
949    (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
950    (!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset,
951                                    (extract_cpol $cachepolicy))> {
952    let OtherPredicates = [isGFX7Only];
953    let AddedComplexity = 1;
954  }
955
956  // 3. Offset loaded in an 32bit SGPR
957  def : GCNPat <
958    (SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy),
959    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$soffset, (extract_cpol $cachepolicy)))> {
960    let OtherPredicates = [isNotGFX9Plus];
961  }
962  def : GCNPat <
963    (SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy),
964    (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, 0, (extract_cpol $cachepolicy)))> {
965    let OtherPredicates = [isGFX9Plus];
966  }
967
968  // 4. Offset as an 32-bit SGPR + immediate
969  def : GCNPat <
970    (SIsbuffer_load v4i32:$sbase, (SMRDBufferSgprImm i32:$soffset, i32:$offset),
971                    timm:$cachepolicy),
972    (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, i32imm:$offset,
973                                             (extract_cpol $cachepolicy)))> {
974    let OtherPredicates = [isGFX9Plus];
975  }
976}
977
978multiclass ScalarLoadWithExtensionPat <string Instr, SDPatternOperator node, ValueType vt> {
979   // 1. IMM offset
980   def : GCNPat <
981     (node (SMRDImm i64:$sbase, i32:$offset)),
982     (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))>{
983       let OtherPredicates = [isGFX12Plus];
984   }
985
986   // 2. SGPR offset
987   def : GCNPat <
988     (node (SMRDSgpr i64:$sbase, i32:$soffset)),
989     (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))>{
990       let OtherPredicates = [isGFX12Plus];
991   }
992
993   // 3. SGPR+IMM offset
994   def : GCNPat <
995     (node (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
996     (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))>{
997       let OtherPredicates = [isGFX12Plus];
998   }
999
1000   // 4. No offset
1001   def : GCNPat <
1002     (vt (node (i64 SReg_64:$sbase))),
1003     (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))>{
1004       let OtherPredicates = [isGFX12Plus];
1005  }
1006}
1007
1008multiclass ScalarBufferLoadIntrinsicPat <SDPatternOperator name, string Instr> {
1009
1010  // 1. Offset as an immediate
1011  def : GCNPat <
1012    (name v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
1013    (i32 (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> {
1014    let OtherPredicates = [isGFX12Plus];
1015  }
1016
1017  // 2. Offset as an 32-bit SGPR
1018  def : GCNPat <
1019    (name v4i32:$sbase, i32:$soffset, timm:$cachepolicy),
1020    (i32 (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, 0, (extract_cpol $cachepolicy)))> {
1021    let OtherPredicates = [isGFX12Plus];
1022  }
1023
1024  // 3. Offset as an 32-bit SGPR + immediate
1025  def : GCNPat <
1026    (name v4i32:$sbase, (SMRDBufferSgprImm i32:$soffset, i32:$offset),
1027                    timm:$cachepolicy),
1028    (i32 (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, i32imm:$offset,
1029                                             (extract_cpol $cachepolicy)))> {
1030    let OtherPredicates = [isGFX12Plus];
1031  }
1032}
1033
1034// Global and constant loads can be selected to either MUBUF or SMRD
1035// instructions, but SMRD instructions are faster so we want the instruction
1036// selector to prefer those.
1037let AddedComplexity = 100 in {
1038
1039defm : ScalarLoadWithExtensionPat <"S_LOAD_U8", smrd_extloadi8, i32>;
1040defm : ScalarLoadWithExtensionPat <"S_LOAD_U8", smrd_zextloadi8, i32>;
1041defm : ScalarLoadWithExtensionPat <"S_LOAD_I8", smrd_sextloadi8, i32>;
1042defm : ScalarLoadWithExtensionPat <"S_LOAD_U16", smrd_extloadi16, i32>;
1043defm : ScalarLoadWithExtensionPat <"S_LOAD_U16", smrd_zextloadi16, i32>;
1044defm : ScalarLoadWithExtensionPat <"S_LOAD_I16", smrd_sextloadi16, i32>;
1045defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_byte, "S_BUFFER_LOAD_I8">;
1046defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_ubyte, "S_BUFFER_LOAD_U8">;
1047defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_short, "S_BUFFER_LOAD_I16">;
1048defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_ushort, "S_BUFFER_LOAD_U16">;
1049
1050} // End let AddedComplexity = 100
1051
1052foreach vt = Reg32Types.types in {
1053defm : SMRD_Pattern <"S_LOAD_DWORD", vt>;
1054}
1055
1056foreach vt = SReg_64.RegTypes in {
1057defm : SMRD_Pattern <"S_LOAD_DWORDX2", vt>;
1058}
1059
1060foreach vt = SReg_96.RegTypes in {
1061defm : SMRD_Pattern <"S_LOAD_DWORDX3", vt, false>;
1062}
1063
1064foreach vt = SReg_128.RegTypes in {
1065defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>;
1066}
1067
1068foreach vt = SReg_256.RegTypes in {
1069defm : SMRD_Pattern <"S_LOAD_DWORDX8", vt>;
1070}
1071
1072foreach vt = SReg_512.RegTypes in {
1073defm : SMRD_Pattern <"S_LOAD_DWORDX16", vt>;
1074}
1075
1076
1077defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD",     i32>;
1078defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2",   v2i32>;
1079defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX3",   v3i32, false>;
1080defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4",   v4i32>;
1081defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8",   v8i32>;
1082defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16",  v16i32>;
1083
1084defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD",     f32>;
1085defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2",   v2f32>;
1086defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX3",   v3f32, false>;
1087defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4",   v4f32>;
1088defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8",   v8f32>;
1089defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16",  v16f32>;
1090
1091let OtherPredicates = [HasSMemTimeInst] in {
1092def : GCNPat <
1093  (i64 (readcyclecounter)),
1094  (S_MEMTIME)
1095>;
1096} // let OtherPredicates = [HasSMemTimeInst]
1097
1098let OtherPredicates = [HasShaderCyclesRegister] in {
1099def : GCNPat <
1100  (i64 (readcyclecounter)),
1101  (REG_SEQUENCE SReg_64,
1102    (S_GETREG_B32 getHwRegImm<HWREG.SHADER_CYCLES, 0, -12>.ret), sub0,
1103    (S_MOV_B32 (i32 0)), sub1)> {
1104}
1105} // let OtherPredicates = [HasShaderCyclesRegister]
1106
1107let OtherPredicates = [HasSMemRealTime] in {
1108def : GCNPat <
1109  (i64 (readsteadycounter)),
1110  (S_MEMREALTIME)
1111>;
1112} // let OtherPredicates = [HasSMemRealTime]
1113
1114let SubtargetPredicate = isGFX11Plus in {
1115def : GCNPat <
1116  (i64 (readsteadycounter)),
1117  (S_SENDMSG_RTN_B64 (i32 /*MSG_RTN_GET_REALTIME=*/0x83))
1118>;
1119} // let SubtargetPredicate = [isGFX11Plus]
1120
1121def i32imm_zero : TImmLeaf <i32, [{
1122  return Imm == 0;
1123}]>;
1124
1125def i32imm_one : TImmLeaf <i32, [{
1126  return Imm == 1;
1127}]>;
1128
1129multiclass SMPrefetchPat<string type, TImmLeaf cache_type> {
1130  def : GCNPat <
1131    (smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, cache_type),
1132    (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, $offset, (i32 SGPR_NULL), (i8 0))
1133  >;
1134
1135  def : GCNPat <
1136    (smrd_prefetch (i64 SReg_64:$sbase), timm, timm, cache_type),
1137    (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, 0, (i32 SGPR_NULL), (i8 0))
1138  >;
1139
1140  def : GCNPat <
1141    (smrd_prefetch (i32 SReg_32:$sbase), timm, timm, cache_type),
1142    (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type)
1143        (i64 (REG_SEQUENCE SReg_64, $sbase, sub0, (i32 (S_MOV_B32 (i32 0))), sub1)),
1144        0, (i32 SGPR_NULL), (i8 0))
1145  >;
1146}
1147
1148defm : SMPrefetchPat<"INST", i32imm_zero>;
1149defm : SMPrefetchPat<"DATA", i32imm_one>;
1150
1151//===----------------------------------------------------------------------===//
1152// GFX10.
1153//===----------------------------------------------------------------------===//
1154
1155class SMEM_Real_10Plus_common<bits<8> op, SM_Pseudo ps, string opName,
1156                              int subtarget, RegisterWithSubRegs sgpr_null> :
1157    SM_Real<ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>, Enc64 {
1158  let Inst{5-0}   = !if(ps.has_sbase, sbase{6-1}, ?);
1159  let Inst{12-6}  = !if(ps.has_sdst, sdst{6-0}, ?);
1160  let Inst{25-18} = op;
1161  let Inst{31-26} = 0x3d;
1162  // There are SMEM instructions that do not employ any of the offset
1163  // fields, in which case we need them to remain undefined.
1164  let Inst{52-32} = !if(ps.has_offset, offset{20-0}, !if(ps.has_soffset, 0, ?));
1165  let Inst{63-57} = !if(ps.has_soffset, soffset{6-0},
1166                        !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?));
1167}
1168
1169class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps>
1170    : SMEM_Real_10Plus_common<op, ps, ps.Mnemonic, SIEncodingFamily.GFX10,
1171                              SGPR_NULL_gfxpre11> {
1172  let AssemblerPredicate = isGFX10Only;
1173  let DecoderNamespace = "GFX10";
1174  let Inst{14}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?);
1175  let Inst{16}    = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
1176}
1177
1178class SMEM_Real_Load_gfx10<bits<8> op, string ps>
1179    : SMEM_Real_gfx10<op, !cast<SM_Pseudo>(ps)>;
1180
1181multiclass SM_Real_Loads_gfx10<bits<8> op> {
1182  defvar ps = NAME;
1183  def _IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps#"_IMM">;
1184  def _SGPR_gfx10 : SMEM_Real_Load_gfx10<op, ps#"_SGPR">;
1185  def _SGPR_IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps#"_SGPR_IMM">;
1186}
1187
1188class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> {
1189  bits<7> sdata;
1190
1191  let sdst = ?;
1192  let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
1193}
1194
1195multiclass SM_Real_Stores_gfx10<bits<8> op> {
1196  defvar ps = NAME;
1197  defvar immPs = !cast<SM_Store_Pseudo>(ps#_IMM);
1198  def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs>;
1199
1200  defvar sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR);
1201  def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs>;
1202
1203  defvar sgprImmPs = !cast<SM_Store_Pseudo>(ps#_SGPR_IMM);
1204  def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 <op, sgprImmPs>;
1205}
1206
1207defm S_LOAD_DWORD            : SM_Real_Loads_gfx10<0x000>;
1208defm S_LOAD_DWORDX2          : SM_Real_Loads_gfx10<0x001>;
1209defm S_LOAD_DWORDX4          : SM_Real_Loads_gfx10<0x002>;
1210defm S_LOAD_DWORDX8          : SM_Real_Loads_gfx10<0x003>;
1211defm S_LOAD_DWORDX16         : SM_Real_Loads_gfx10<0x004>;
1212
1213defm S_SCRATCH_LOAD_DWORD    : SM_Real_Loads_gfx10<0x005>;
1214defm S_SCRATCH_LOAD_DWORDX2  : SM_Real_Loads_gfx10<0x006>;
1215defm S_SCRATCH_LOAD_DWORDX4  : SM_Real_Loads_gfx10<0x007>;
1216
1217defm S_BUFFER_LOAD_DWORD     : SM_Real_Loads_gfx10<0x008>;
1218defm S_BUFFER_LOAD_DWORDX2   : SM_Real_Loads_gfx10<0x009>;
1219defm S_BUFFER_LOAD_DWORDX4   : SM_Real_Loads_gfx10<0x00a>;
1220defm S_BUFFER_LOAD_DWORDX8   : SM_Real_Loads_gfx10<0x00b>;
1221defm S_BUFFER_LOAD_DWORDX16  : SM_Real_Loads_gfx10<0x00c>;
1222
1223defm S_STORE_DWORD           : SM_Real_Stores_gfx10<0x010>;
1224defm S_STORE_DWORDX2         : SM_Real_Stores_gfx10<0x011>;
1225defm S_STORE_DWORDX4         : SM_Real_Stores_gfx10<0x012>;
1226defm S_SCRATCH_STORE_DWORD   : SM_Real_Stores_gfx10<0x015>;
1227defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016>;
1228defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017>;
1229defm S_BUFFER_STORE_DWORD    : SM_Real_Stores_gfx10<0x018>;
1230defm S_BUFFER_STORE_DWORDX2  : SM_Real_Stores_gfx10<0x019>;
1231defm S_BUFFER_STORE_DWORDX4  : SM_Real_Stores_gfx10<0x01a>;
1232
1233def S_MEMREALTIME_gfx10              : SMEM_Real_gfx10<0x025, S_MEMREALTIME>;
1234def S_MEMTIME_gfx10                  : SMEM_Real_gfx10<0x024, S_MEMTIME>;
1235def S_GL1_INV_gfx10                  : SMEM_Real_gfx10<0x01f, S_GL1_INV>;
1236def S_GET_WAVEID_IN_WORKGROUP_gfx10  : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>;
1237def S_DCACHE_INV_gfx10               : SMEM_Real_gfx10<0x020, S_DCACHE_INV>;
1238
1239def S_DCACHE_WB_gfx10                : SMEM_Real_gfx10<0x021, S_DCACHE_WB>;
1240
1241multiclass SM_Real_Probe_gfx10<bits<8> op> {
1242  defvar ps = NAME;
1243  def _IMM_gfx10  : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
1244  def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
1245  def _SGPR_IMM_gfx10
1246    : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>;
1247}
1248
1249defm S_ATC_PROBE        : SM_Real_Probe_gfx10 <0x26>;
1250defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27>;
1251
1252class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
1253  : SMEM_Real_gfx10 <op, ps> {
1254
1255  bits<7> sdata;
1256
1257  let Constraints = ps.Constraints;
1258  let DisableEncoding = ps.DisableEncoding;
1259
1260  let cpol{CPolBit.GLC} = ps.glc;
1261
1262  let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
1263  let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
1264}
1265
1266multiclass SM_Real_Atomics_gfx10<bits<8> op> {
1267  defvar ps = NAME;
1268  def _IMM_gfx10       : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
1269  def _SGPR_gfx10      : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
1270  def _SGPR_IMM_gfx10  : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
1271  def _IMM_RTN_gfx10   : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>;
1272  def _SGPR_RTN_gfx10  : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>;
1273  def _SGPR_IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
1274}
1275
1276defm S_BUFFER_ATOMIC_SWAP         : SM_Real_Atomics_gfx10 <0x40>;
1277defm S_BUFFER_ATOMIC_CMPSWAP      : SM_Real_Atomics_gfx10 <0x41>;
1278defm S_BUFFER_ATOMIC_ADD          : SM_Real_Atomics_gfx10 <0x42>;
1279defm S_BUFFER_ATOMIC_SUB          : SM_Real_Atomics_gfx10 <0x43>;
1280defm S_BUFFER_ATOMIC_SMIN         : SM_Real_Atomics_gfx10 <0x44>;
1281defm S_BUFFER_ATOMIC_UMIN         : SM_Real_Atomics_gfx10 <0x45>;
1282defm S_BUFFER_ATOMIC_SMAX         : SM_Real_Atomics_gfx10 <0x46>;
1283defm S_BUFFER_ATOMIC_UMAX         : SM_Real_Atomics_gfx10 <0x47>;
1284defm S_BUFFER_ATOMIC_AND          : SM_Real_Atomics_gfx10 <0x48>;
1285defm S_BUFFER_ATOMIC_OR           : SM_Real_Atomics_gfx10 <0x49>;
1286defm S_BUFFER_ATOMIC_XOR          : SM_Real_Atomics_gfx10 <0x4a>;
1287defm S_BUFFER_ATOMIC_INC          : SM_Real_Atomics_gfx10 <0x4b>;
1288defm S_BUFFER_ATOMIC_DEC          : SM_Real_Atomics_gfx10 <0x4c>;
1289
1290defm S_BUFFER_ATOMIC_SWAP_X2      : SM_Real_Atomics_gfx10 <0x60>;
1291defm S_BUFFER_ATOMIC_CMPSWAP_X2   : SM_Real_Atomics_gfx10 <0x61>;
1292defm S_BUFFER_ATOMIC_ADD_X2       : SM_Real_Atomics_gfx10 <0x62>;
1293defm S_BUFFER_ATOMIC_SUB_X2       : SM_Real_Atomics_gfx10 <0x63>;
1294defm S_BUFFER_ATOMIC_SMIN_X2      : SM_Real_Atomics_gfx10 <0x64>;
1295defm S_BUFFER_ATOMIC_UMIN_X2      : SM_Real_Atomics_gfx10 <0x65>;
1296defm S_BUFFER_ATOMIC_SMAX_X2      : SM_Real_Atomics_gfx10 <0x66>;
1297defm S_BUFFER_ATOMIC_UMAX_X2      : SM_Real_Atomics_gfx10 <0x67>;
1298defm S_BUFFER_ATOMIC_AND_X2       : SM_Real_Atomics_gfx10 <0x68>;
1299defm S_BUFFER_ATOMIC_OR_X2        : SM_Real_Atomics_gfx10 <0x69>;
1300defm S_BUFFER_ATOMIC_XOR_X2       : SM_Real_Atomics_gfx10 <0x6a>;
1301defm S_BUFFER_ATOMIC_INC_X2       : SM_Real_Atomics_gfx10 <0x6b>;
1302defm S_BUFFER_ATOMIC_DEC_X2       : SM_Real_Atomics_gfx10 <0x6c>;
1303
1304defm S_ATOMIC_SWAP                : SM_Real_Atomics_gfx10 <0x80>;
1305defm S_ATOMIC_CMPSWAP             : SM_Real_Atomics_gfx10 <0x81>;
1306defm S_ATOMIC_ADD                 : SM_Real_Atomics_gfx10 <0x82>;
1307defm S_ATOMIC_SUB                 : SM_Real_Atomics_gfx10 <0x83>;
1308defm S_ATOMIC_SMIN                : SM_Real_Atomics_gfx10 <0x84>;
1309defm S_ATOMIC_UMIN                : SM_Real_Atomics_gfx10 <0x85>;
1310defm S_ATOMIC_SMAX                : SM_Real_Atomics_gfx10 <0x86>;
1311defm S_ATOMIC_UMAX                : SM_Real_Atomics_gfx10 <0x87>;
1312defm S_ATOMIC_AND                 : SM_Real_Atomics_gfx10 <0x88>;
1313defm S_ATOMIC_OR                  : SM_Real_Atomics_gfx10 <0x89>;
1314defm S_ATOMIC_XOR                 : SM_Real_Atomics_gfx10 <0x8a>;
1315defm S_ATOMIC_INC                 : SM_Real_Atomics_gfx10 <0x8b>;
1316defm S_ATOMIC_DEC                 : SM_Real_Atomics_gfx10 <0x8c>;
1317
1318defm S_ATOMIC_SWAP_X2             : SM_Real_Atomics_gfx10 <0xa0>;
1319defm S_ATOMIC_CMPSWAP_X2          : SM_Real_Atomics_gfx10 <0xa1>;
1320defm S_ATOMIC_ADD_X2              : SM_Real_Atomics_gfx10 <0xa2>;
1321defm S_ATOMIC_SUB_X2              : SM_Real_Atomics_gfx10 <0xa3>;
1322defm S_ATOMIC_SMIN_X2             : SM_Real_Atomics_gfx10 <0xa4>;
1323defm S_ATOMIC_UMIN_X2             : SM_Real_Atomics_gfx10 <0xa5>;
1324defm S_ATOMIC_SMAX_X2             : SM_Real_Atomics_gfx10 <0xa6>;
1325defm S_ATOMIC_UMAX_X2             : SM_Real_Atomics_gfx10 <0xa7>;
1326defm S_ATOMIC_AND_X2              : SM_Real_Atomics_gfx10 <0xa8>;
1327defm S_ATOMIC_OR_X2               : SM_Real_Atomics_gfx10 <0xa9>;
1328defm S_ATOMIC_XOR_X2              : SM_Real_Atomics_gfx10 <0xaa>;
1329defm S_ATOMIC_INC_X2              : SM_Real_Atomics_gfx10 <0xab>;
1330defm S_ATOMIC_DEC_X2              : SM_Real_Atomics_gfx10 <0xac>;
1331
1332multiclass SM_Real_Discard_gfx10<bits<8> op> {
1333  defvar ps = NAME;
1334  def _IMM_gfx10  : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
1335  def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
1336  def _SGPR_IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>;
1337}
1338
1339defm S_DCACHE_DISCARD    : SM_Real_Discard_gfx10 <0x28>;
1340defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29>;
1341
1342def SMInfoTable : GenericTable {
1343  let FilterClass = "SM_Real";
1344  let CppTypeName = "SMInfo";
1345  let Fields = ["Opcode", "is_buffer"];
1346
1347  let PrimaryKey = ["Opcode"];
1348  let PrimaryKeyName = "getSMEMOpcodeHelper";
1349}
1350
1351//===----------------------------------------------------------------------===//
1352// GFX11.
1353//===----------------------------------------------------------------------===//
1354
1355class SMEM_Real_gfx11<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
1356    SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX11,
1357                            SGPR_NULL_gfx11plus> {
1358  let AssemblerPredicate = isGFX11Only;
1359  let DecoderNamespace = "GFX11";
1360  let Inst{13}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
1361  let Inst{14}    = !if(ps.has_glc, cpol{CPolBit.GLC}, 0);
1362}
1363
1364class SMEM_Real_Load_gfx11<bits<8> op, string ps, string opName> :
1365    SMEM_Real_gfx11<op, !cast<SM_Pseudo>(ps), opName>;
1366
1367multiclass SM_Real_Loads_gfx11<bits<8> op, string ps> {
1368  defvar opName = !tolower(NAME);
1369  def _IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_IMM", opName>;
1370  def _SGPR_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_SGPR", opName>;
1371  def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_SGPR_IMM", opName>;
1372  def : AMDGPUMnemonicAlias<!cast<SM_Pseudo>(ps#"_IMM").Mnemonic, opName> {
1373    let AssemblerPredicate = isGFX11Plus;
1374  }
1375}
1376
1377defm S_LOAD_B32  : SM_Real_Loads_gfx11<0x000, "S_LOAD_DWORD">;
1378defm S_LOAD_B64  : SM_Real_Loads_gfx11<0x001, "S_LOAD_DWORDX2">;
1379defm S_LOAD_B128 : SM_Real_Loads_gfx11<0x002, "S_LOAD_DWORDX4">;
1380defm S_LOAD_B256 : SM_Real_Loads_gfx11<0x003, "S_LOAD_DWORDX8">;
1381defm S_LOAD_B512 : SM_Real_Loads_gfx11<0x004, "S_LOAD_DWORDX16">;
1382
1383defm S_BUFFER_LOAD_B32  : SM_Real_Loads_gfx11<0x008, "S_BUFFER_LOAD_DWORD">;
1384defm S_BUFFER_LOAD_B64  : SM_Real_Loads_gfx11<0x009, "S_BUFFER_LOAD_DWORDX2">;
1385defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx11<0x00a, "S_BUFFER_LOAD_DWORDX4">;
1386defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx11<0x00b, "S_BUFFER_LOAD_DWORDX8">;
1387defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx11<0x00c, "S_BUFFER_LOAD_DWORDX16">;
1388
1389def S_GL1_INV_gfx11    : SMEM_Real_gfx11<0x020, S_GL1_INV>;
1390def S_DCACHE_INV_gfx11 : SMEM_Real_gfx11<0x021, S_DCACHE_INV>;
1391
1392class SMEM_Real_Store_gfx11 <bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx11<op, ps> {
1393  // encoding
1394  bits<7> sdata;
1395
1396  let sdst = ?;
1397  let Inst{12-6}  = !if(ps.has_sdst, sdata{6-0}, ?);
1398}
1399
1400multiclass SM_Real_Probe_gfx11<bits<8> op> {
1401  defvar ps = NAME;
1402  def _IMM_gfx11  : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
1403  def _SGPR_gfx11 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>;
1404  def _SGPR_IMM_gfx11
1405    : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>;
1406}
1407
1408defm S_ATC_PROBE        : SM_Real_Probe_gfx11 <0x22>;
1409defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23>;
1410
1411//===----------------------------------------------------------------------===//
1412// GFX12.
1413//===----------------------------------------------------------------------===//
1414
1415class SMEM_Real_gfx12Plus<bits<6> op, SM_Pseudo ps, string opName,
1416                          int subtarget, RegisterWithSubRegs sgpr_null> :
1417    SM_Real<ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>, Enc64 {
1418
1419  let Inst{18-13} = op;
1420  let Inst{31-26} = 0x3d;
1421
1422  let Inst{55-32} = !if(ps.has_offset, offset{23-0}, !if(ps.has_soffset, 0, ?));
1423  let Inst{63-57} = !if(ps.has_soffset, soffset{6-0},
1424                        !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?));
1425}
1426
1427class SMEM_Real_gfx12<bits<6> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
1428    SMEM_Real_gfx12Plus<op, ps, opName, SIEncodingFamily.GFX12,
1429                        SGPR_NULL_gfx11plus> {
1430  let AssemblerPredicate = isGFX12Plus;
1431  let DecoderNamespace = "GFX12";
1432
1433  let Inst{5-0}   = !if(ps.has_sbase, sbase{6-1}, ?);
1434  let Inst{12-6}  = !if(ps.has_sdst, sdst{6-0}, ?);
1435}
1436
1437class SMEM_Real_Prefetch_gfx12<bits<6> op, SM_Pseudo ps> :
1438    SMEM_Real_gfx12<op, ps> {
1439  bits<7> sdata; // Only 5 bits of sdata are supported.
1440
1441  let sdst = ?;
1442  let Inst{12-11} = 0; // Unused sdata bits.
1443  let Inst{10-6}  = !if(ps.has_sdst, sdata{4-0}, ?);
1444}
1445
1446class SMEM_Real_Load_gfx12<bits<6> op, string ps, string opName, OffsetMode offsets> :
1447    SMEM_Real_gfx12<op, !cast<SM_Pseudo>(ps # offsets.Variant), opName> {
1448  RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
1449  let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
1450
1451  let Inst{22-21} = cpol{4-3}; // scope
1452  let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported
1453}
1454
1455multiclass SM_Real_Loads_gfx12<bits<6> op, string ps = NAME> {
1456  defvar opName = !tolower(NAME);
1457  def _IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, IMM_Offset>;
1458  def _SGPR_IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, SGPR_IMM_OptOffset>;
1459}
1460
1461defm S_LOAD_B32  : SM_Real_Loads_gfx12<0x00, "S_LOAD_DWORD">;
1462defm S_LOAD_B64  : SM_Real_Loads_gfx12<0x01, "S_LOAD_DWORDX2">;
1463defm S_LOAD_B96  : SM_Real_Loads_gfx12<0x05, "S_LOAD_DWORDX3">;
1464defm S_LOAD_B128 : SM_Real_Loads_gfx12<0x02, "S_LOAD_DWORDX4">;
1465defm S_LOAD_B256 : SM_Real_Loads_gfx12<0x03, "S_LOAD_DWORDX8">;
1466defm S_LOAD_B512 : SM_Real_Loads_gfx12<0x04, "S_LOAD_DWORDX16">;
1467
1468defm S_LOAD_I8   : SM_Real_Loads_gfx12<0x08>;
1469defm S_LOAD_U8   : SM_Real_Loads_gfx12<0x09>;
1470defm S_LOAD_I16  : SM_Real_Loads_gfx12<0x0a>;
1471defm S_LOAD_U16  : SM_Real_Loads_gfx12<0x0b>;
1472
1473defm S_BUFFER_LOAD_B32  : SM_Real_Loads_gfx12<0x10, "S_BUFFER_LOAD_DWORD">;
1474defm S_BUFFER_LOAD_B64  : SM_Real_Loads_gfx12<0x11, "S_BUFFER_LOAD_DWORDX2">;
1475defm S_BUFFER_LOAD_B96  : SM_Real_Loads_gfx12<0x15, "S_BUFFER_LOAD_DWORDX3">;
1476defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx12<0x12, "S_BUFFER_LOAD_DWORDX4">;
1477defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx12<0x13, "S_BUFFER_LOAD_DWORDX8">;
1478defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx12<0x14, "S_BUFFER_LOAD_DWORDX16">;
1479
1480defm S_BUFFER_LOAD_I8  : SM_Real_Loads_gfx12<0x18>;
1481defm S_BUFFER_LOAD_U8  : SM_Real_Loads_gfx12<0x19>;
1482defm S_BUFFER_LOAD_I16 : SM_Real_Loads_gfx12<0x1a>;
1483defm S_BUFFER_LOAD_U16 : SM_Real_Loads_gfx12<0x1b>;
1484
1485def S_DCACHE_INV_gfx12 : SMEM_Real_gfx12<0x021, S_DCACHE_INV>;
1486
1487def S_PREFETCH_INST_gfx12        : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>;
1488def S_PREFETCH_INST_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x25, S_PREFETCH_INST_PC_REL>;
1489def S_PREFETCH_DATA_gfx12        : SMEM_Real_Prefetch_gfx12<0x26, S_PREFETCH_DATA>;
1490def S_BUFFER_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x27, S_BUFFER_PREFETCH_DATA>;
1491def S_PREFETCH_DATA_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x28, S_PREFETCH_DATA_PC_REL>;
1492
1493multiclass SMEM_Real_Probe_gfx12<bits<6> op> {
1494  defvar ps = NAME;
1495  def _IMM_gfx12      : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
1496  def _SGPR_IMM_gfx12 : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_SGPR_OPT_IMM)>;
1497}
1498
1499defm S_ATC_PROBE        : SMEM_Real_Probe_gfx12<0x22>;
1500defm S_ATC_PROBE_BUFFER : SMEM_Real_Probe_gfx12<0x23>;
1501