xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1//===-- DSInstructions.td - DS Instruction Definitions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> :
10  InstSI <outs, ins, "", pattern>,
11  SIMCInstr <opName, SIEncodingFamily.NONE> {
12
13  let LGKM_CNT = 1;
14  let DS = 1;
15  let GWS = 0;
16  let Size = 8;
17  let UseNamedOperandTable = 1;
18
19  // Most instruction load and store data, so set this as the default.
20  let mayLoad = 1;
21  let mayStore = 1;
22
23  let hasSideEffects = 0;
24  let SchedRW = [WriteLDS];
25
26  let isPseudo = 1;
27  let isCodeGenOnly = 1;
28
29  string Mnemonic = opName;
30  string AsmOperands = asmOps;
31
32  // Well these bits a kind of hack because it would be more natural
33  // to test "outs" and "ins" dags for the presence of particular operands
34  bits<1> has_vdst = 1;
35  bits<1> has_addr = 1;
36  bits<1> has_data0 = 1;
37  bits<1> has_data1 = 1;
38
39  bits<1> has_gws_data0 = 0; // data0 is encoded as addr
40
41  bits<1> has_offset  = 1; // has "offset" that should be split to offset0,1
42  bits<1> has_offset0 = 1;
43  bits<1> has_offset1 = 1;
44
45  bits<1> has_gds = 1;
46  bits<1> gdsValue = 0; // if has_gds == 0 set gds to this value
47
48  bits<1> has_m0_read = 1;
49
50  let Uses = !if(has_m0_read, [M0, EXEC], [EXEC]);
51}
52
53class DS_Real <DS_Pseudo ps, string opName = ps.Mnemonic> :
54  InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands>,
55  Enc64 {
56
57  let isPseudo = 0;
58  let isCodeGenOnly = 0;
59  let LGKM_CNT = 1;
60  let DS = 1;
61  let UseNamedOperandTable = 1;
62
63  // copy relevant pseudo op flags
64  let GWS                = ps.GWS;
65  let SubtargetPredicate = ps.SubtargetPredicate;
66  let WaveSizePredicate  = ps.WaveSizePredicate;
67  let OtherPredicates    = ps.OtherPredicates;
68  let TSFlags            = ps.TSFlags;
69  let SchedRW            = ps.SchedRW;
70  let mayLoad            = ps.mayLoad;
71  let mayStore           = ps.mayStore;
72  let IsAtomicRet        = ps.IsAtomicRet;
73  let IsAtomicNoRet      = ps.IsAtomicNoRet;
74  let Uses               = ps.Uses;
75  let Defs               = ps.Defs;
76
77  let Constraints = ps.Constraints;
78  let DisableEncoding = ps.DisableEncoding;
79
80  // encoding fields
81  bits<10> vdst;
82  bits<1> gds;
83  bits<8> addr;
84  bits<10> data0;
85  bits<10> data1;
86  bits<8> offset0;
87  bits<8> offset1;
88
89  bits<16> offset;
90  let offset0 = !if(ps.has_offset, offset{7-0}, ?);
91  let offset1 = !if(ps.has_offset, offset{15-8}, ?);
92
93  bits<1> acc = !if(ps.has_vdst, vdst{9},
94                    !if(!or(ps.has_data0, ps.has_gws_data0), data0{9}, 0));
95}
96
97// DS Pseudo instructions
98
99class DS_0A1D_NORET<string opName, RegisterClass rc = VGPR_32>
100: DS_Pseudo<opName,
101  (outs),
102  (ins getLdStRegisterOperand<rc>.ret:$data0, Offset:$offset, gds:$gds),
103  " $data0$offset$gds"> {
104
105  let has_addr = 0;
106  let has_data1 = 0;
107  let has_vdst = 0;
108}
109
110class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32>
111: DS_Pseudo<opName,
112  (outs),
113  (ins VGPR_32:$addr, getLdStRegisterOperand<rc>.ret:$data0, Offset:$offset, gds:$gds),
114  " $addr, $data0$offset$gds"> {
115
116  let has_data1 = 0;
117  let has_vdst = 0;
118  let IsAtomicNoRet = 1;
119}
120
121multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
122  def "" : DS_1A1D_NORET<opName, rc>;
123
124  let has_m0_read = 0 in {
125    def _gfx9 : DS_1A1D_NORET<opName, rc>;
126  }
127}
128
129multiclass DS_1A1D_NORET_mc_gfx9<string opName, RegisterClass rc = VGPR_32> {
130  let has_m0_read = 0 in {
131    def "" : DS_1A1D_NORET<opName, rc>;
132  }
133}
134
135class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32,
136                    RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
137: DS_Pseudo<opName,
138  (outs),
139  (ins VGPR_32:$addr, data_op:$data0, data_op:$data1, Offset:$offset, gds:$gds),
140  " $addr, $data0, $data1$offset$gds"> {
141
142  let has_vdst = 0;
143  let IsAtomicNoRet = 1;
144}
145
146multiclass DS_1A2D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
147  def "" : DS_1A2D_NORET<opName, rc>;
148
149  let has_m0_read = 0 in {
150    def _gfx9 : DS_1A2D_NORET<opName, rc>;
151  }
152}
153
154class DS_1A2D_Off8_NORET <string opName, RegisterClass rc = VGPR_32,
155                          RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
156: DS_Pseudo<opName,
157  (outs),
158  (ins VGPR_32:$addr, data_op:$data0, data_op:$data1,
159       Offset0:$offset0, Offset1:$offset1, gds:$gds),
160  " $addr, $data0, $data1$offset0$offset1$gds"> {
161
162  let has_vdst = 0;
163  let has_offset = 0;
164}
165
166multiclass DS_1A2D_Off8_NORET_mc <string opName, RegisterClass rc = VGPR_32> {
167  def "" : DS_1A2D_Off8_NORET<opName, rc>;
168
169  let has_m0_read = 0 in {
170    def _gfx9 : DS_1A2D_Off8_NORET<opName, rc>;
171  }
172}
173
174class DS_0A1D_RET_GDS<string opName, RegisterClass rc = VGPR_32, RegisterClass src = rc,
175                  RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
176                  RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
177: DS_Pseudo<opName,
178  (outs dst_op:$vdst),
179  (ins src_op:$data0, Offset:$offset),
180  " $vdst, $data0$offset gds"> {
181
182  let has_addr = 0;
183  let has_data1 = 0;
184  let has_gds = 0;
185  let gdsValue = 1;
186  let hasSideEffects = 1;
187}
188
189class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32,
190                  RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
191: DS_Pseudo<opName,
192  (outs data_op:$vdst),
193  (ins VGPR_32:$addr, data_op:$data0, Offset:$offset, gds:$gds),
194  " $vdst, $addr, $data0$offset$gds"> {
195
196  let hasPostISelHook = 1;
197  let has_data1 = 0;
198  let IsAtomicRet = 1;
199}
200
201multiclass DS_1A1D_RET_mc <string opName, RegisterClass rc = VGPR_32> {
202  def "" : DS_1A1D_RET<opName, rc>;
203
204  let has_m0_read = 0 in {
205    def _gfx9 : DS_1A1D_RET<opName, rc>;
206  }
207}
208
209multiclass DS_1A1D_RET_mc_gfx9 <string opName, RegisterClass rc = VGPR_32> {
210  let has_m0_read = 0 in {
211    def "" : DS_1A1D_RET<opName, rc>;
212  }
213}
214
215class DS_1A2D_RET<string opName,
216                  RegisterClass rc = VGPR_32,
217                  RegisterClass src = rc,
218                  RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
219                  RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
220: DS_Pseudo<opName,
221  (outs dst_op:$vdst),
222  (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, Offset:$offset, gds:$gds),
223  " $vdst, $addr, $data0, $data1$offset$gds"> {
224
225  let hasPostISelHook = 1;
226  let IsAtomicRet = 1;
227}
228
229multiclass DS_1A2D_RET_mc<string opName,
230                          RegisterClass rc = VGPR_32,
231                          RegisterClass src = rc> {
232  def "" : DS_1A2D_RET<opName, rc, src>;
233
234  let has_m0_read = 0 in {
235    def _gfx9 : DS_1A2D_RET<opName, rc, src>;
236  }
237}
238
239class DS_1A2D_Off8_RET<string opName,
240                       RegisterClass rc = VGPR_32,
241                       RegisterClass src = rc,
242                       RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
243                       RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
244: DS_Pseudo<opName,
245  (outs dst_op:$vdst),
246  (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, Offset0:$offset0, Offset1:$offset1, gds:$gds),
247  " $vdst, $addr, $data0, $data1$offset0$offset1$gds"> {
248
249  let has_offset = 0;
250  let hasPostISelHook = 1;
251}
252
253multiclass DS_1A2D_Off8_RET_mc<string opName,
254                               RegisterClass rc = VGPR_32,
255                               RegisterClass src = rc> {
256  def "" : DS_1A2D_Off8_RET<opName, rc, src>;
257
258  let has_m0_read = 0 in {
259    def _gfx9 : DS_1A2D_Off8_RET<opName, rc, src>;
260  }
261}
262
263class DS_BVH_STACK<string opName>
264: DS_Pseudo<opName,
265  (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst, VGPR_32:$addr),
266  (ins VGPR_32:$addr_in, getLdStRegisterOperand<VGPR_32>.ret:$data0, VReg_128:$data1, Offset:$offset),
267  " $vdst, $addr, $data0, $data1$offset"> {
268  let Constraints = "$addr = $addr_in";
269  let DisableEncoding = "$addr_in";
270  let has_gds = 0;
271  let gdsValue = 0;
272  // TODO: Use MMOs in the LDS address space instead of hasSideEffects = 1.
273  let hasSideEffects = 1;
274  let SchedRW = [WriteLDS, WriteLDS];
275}
276
277class DS_1A_RET<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = Offset,
278                RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
279: DS_Pseudo<opName,
280  (outs data_op:$vdst),
281  !if(HasTiedOutput,
282    (ins VGPR_32:$addr, ofs:$offset, gds:$gds, data_op:$vdst_in),
283    (ins VGPR_32:$addr, ofs:$offset, gds:$gds)),
284  " $vdst, $addr$offset$gds"> {
285  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
286  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
287  let has_data0 = 0;
288  let has_data1 = 0;
289}
290
291multiclass DS_1A_RET_mc<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = Offset> {
292  def "" : DS_1A_RET<opName, rc, HasTiedOutput, ofs>;
293
294  let has_m0_read = 0 in {
295    def _gfx9 : DS_1A_RET<opName, rc, HasTiedOutput, ofs>;
296  }
297}
298
299class DS_1A_RET_Tied<string opName, RegisterClass rc = VGPR_32> :
300  DS_1A_RET<opName, rc, 1>;
301
302class DS_1A_Off8_RET <string opName, RegisterClass rc = VGPR_32>
303: DS_Pseudo<opName,
304  (outs getLdStRegisterOperand<rc>.ret:$vdst),
305  (ins VGPR_32:$addr, Offset0:$offset0, Offset1:$offset1, gds:$gds),
306  " $vdst, $addr$offset0$offset1$gds"> {
307
308  let has_offset = 0;
309  let has_data0 = 0;
310  let has_data1 = 0;
311}
312
313multiclass DS_1A_Off8_RET_mc <string opName, RegisterClass rc = VGPR_32> {
314  def "" : DS_1A_Off8_RET<opName, rc>;
315
316  let has_m0_read = 0 in {
317    def _gfx9 : DS_1A_Off8_RET<opName, rc>;
318  }
319}
320
321class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName,
322  (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst),
323  (ins VGPR_32:$addr, Offset:$offset),
324  " $vdst, $addr$offset gds"> {
325
326  let has_data0 = 0;
327  let has_data1 = 0;
328  let has_gds = 0;
329  let gdsValue = 1;
330}
331
332class DS_0A_RET <string opName> : DS_Pseudo<opName,
333  (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst),
334  (ins Offset:$offset, gds:$gds),
335  " $vdst$offset$gds"> {
336
337  let mayLoad = 1;
338  let mayStore = 1;
339
340  let has_addr = 0;
341  let has_data0 = 0;
342  let has_data1 = 0;
343}
344
345class DS_1A <string opName> : DS_Pseudo<opName,
346  (outs),
347  (ins VGPR_32:$addr, Offset:$offset, gds:$gds),
348  " $addr$offset$gds"> {
349
350  let mayLoad = 1;
351  let mayStore = 1;
352
353  let has_vdst = 0;
354  let has_data0 = 0;
355  let has_data1 = 0;
356}
357
358multiclass DS_1A_mc <string opName> {
359  def "" : DS_1A<opName>;
360
361  let has_m0_read = 0 in {
362    def _gfx9 : DS_1A<opName>;
363  }
364}
365
366
367class DS_GWS <string opName, dag ins, string asmOps>
368: DS_Pseudo<opName, (outs), ins, asmOps> {
369  let GWS = 1;
370
371  let has_vdst  = 0;
372  let has_addr  = 0;
373  let has_data0 = 0;
374  let has_data1 = 0;
375
376  let has_gds   = 0;
377  let gdsValue  = 1;
378}
379
380class DS_GWS_0D <string opName>
381: DS_GWS<opName,
382  (ins Offset:$offset), "$offset gds"> {
383  let hasSideEffects = 1;
384}
385
386class DS_GWS_1D <string opName>
387: DS_GWS<opName,
388  (ins getLdStRegisterOperand<VGPR_32>.ret:$data0, Offset:$offset),
389  " $data0$offset gds"> {
390
391  let has_gws_data0 = 1;
392  let hasSideEffects = 1;
393}
394
395class DS_VOID <string opName> : DS_Pseudo<opName,
396  (outs), (ins), ""> {
397  let mayLoad = 0;
398  let mayStore = 0;
399  let hasSideEffects = 1;
400  let UseNamedOperandTable = 0;
401
402  let has_vdst = 0;
403  let has_addr = 0;
404  let has_data0 = 0;
405  let has_data1 = 0;
406  let has_offset = 0;
407  let has_offset0 = 0;
408  let has_offset1 = 0;
409  let has_gds = 0;
410}
411
412class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag,
413                       RegisterOperand data_op = getLdStRegisterOperand<VGPR_32>.ret>
414: DS_Pseudo<opName,
415  (outs data_op:$vdst),
416  (ins VGPR_32:$addr, data_op:$data0, Offset:$offset),
417  " $vdst, $addr, $data0$offset",
418  [(set i32:$vdst,
419   (node (DS1Addr1Offset i32:$addr, i32:$offset), i32:$data0))] > {
420
421  let mayLoad = 0;
422  let mayStore = 0;
423  let isConvergent = 1;
424
425  let has_data1 = 0;
426  let has_gds = 0;
427}
428
429class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, int complexity = 0,
430  bit gds=0> : GCNPat <(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
431  (inst $ptr, getVregSrcForVT<vt>.ret:$value, Offset:$offset, (i1 gds))> {
432  let AddedComplexity = complexity;
433}
434
435defm DS_ADD_U32       : DS_1A1D_NORET_mc<"ds_add_u32">;
436defm DS_SUB_U32       : DS_1A1D_NORET_mc<"ds_sub_u32">;
437defm DS_RSUB_U32      : DS_1A1D_NORET_mc<"ds_rsub_u32">;
438defm DS_INC_U32       : DS_1A1D_NORET_mc<"ds_inc_u32">;
439defm DS_DEC_U32       : DS_1A1D_NORET_mc<"ds_dec_u32">;
440defm DS_MIN_I32       : DS_1A1D_NORET_mc<"ds_min_i32">;
441defm DS_MAX_I32       : DS_1A1D_NORET_mc<"ds_max_i32">;
442defm DS_MIN_U32       : DS_1A1D_NORET_mc<"ds_min_u32">;
443defm DS_MAX_U32       : DS_1A1D_NORET_mc<"ds_max_u32">;
444defm DS_AND_B32       : DS_1A1D_NORET_mc<"ds_and_b32">;
445defm DS_OR_B32        : DS_1A1D_NORET_mc<"ds_or_b32">;
446defm DS_XOR_B32       : DS_1A1D_NORET_mc<"ds_xor_b32">;
447
448let SubtargetPredicate = HasLDSFPAtomicAddF32 in {
449defm DS_ADD_F32       : DS_1A1D_NORET_mc<"ds_add_f32">;
450}
451
452defm DS_MIN_F32       : DS_1A1D_NORET_mc<"ds_min_f32">;
453defm DS_MAX_F32       : DS_1A1D_NORET_mc<"ds_max_f32">;
454
455let mayLoad = 0 in {
456defm DS_WRITE_B8      : DS_1A1D_NORET_mc<"ds_write_b8">;
457defm DS_WRITE_B16     : DS_1A1D_NORET_mc<"ds_write_b16">;
458defm DS_WRITE_B32     : DS_1A1D_NORET_mc<"ds_write_b32">;
459defm DS_WRITE2_B32    : DS_1A2D_Off8_NORET_mc<"ds_write2_b32">;
460defm DS_WRITE2ST64_B32: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b32">;
461
462
463let has_m0_read = 0 in {
464
465let SubtargetPredicate = HasD16LoadStore in {
466def DS_WRITE_B8_D16_HI  : DS_1A1D_NORET<"ds_write_b8_d16_hi">;
467def DS_WRITE_B16_D16_HI : DS_1A1D_NORET<"ds_write_b16_d16_hi">;
468}
469
470} // End has_m0_read = 0
471
472let SubtargetPredicate = HasDSAddTid in {
473def DS_WRITE_ADDTID_B32 : DS_0A1D_NORET<"ds_write_addtid_b32">;
474}
475
476} // End mayLoad = 0
477
478let SubtargetPredicate = HasLdsAtomicAddF64 in {
479  defm DS_ADD_F64     : DS_1A1D_NORET_mc_gfx9<"ds_add_f64", VReg_64>;
480  defm DS_ADD_RTN_F64 : DS_1A1D_RET_mc_gfx9<"ds_add_rtn_f64", VReg_64>;
481} // End SubtargetPredicate = HasLdsAtomicAddF64
482
483let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
484  defm DS_PK_ADD_F16      : DS_1A1D_NORET_mc<"ds_pk_add_f16">;
485  defm DS_PK_ADD_RTN_F16  : DS_1A1D_RET_mc<"ds_pk_add_rtn_f16", VGPR_32>;
486  defm DS_PK_ADD_BF16     : DS_1A1D_NORET_mc<"ds_pk_add_bf16">;
487  defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc<"ds_pk_add_rtn_bf16", VGPR_32>;
488} // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
489
490defm DS_CMPSTORE_B32     : DS_1A2D_NORET_mc<"ds_cmpstore_b32">;
491defm DS_CMPSTORE_F32     : DS_1A2D_NORET_mc<"ds_cmpstore_f32">;
492defm DS_CMPSTORE_B64     : DS_1A2D_NORET_mc<"ds_cmpstore_b64", VReg_64>;
493defm DS_CMPSTORE_F64     : DS_1A2D_NORET_mc<"ds_cmpstore_f64", VReg_64>;
494defm DS_CMPSTORE_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b32", VGPR_32>;
495defm DS_CMPSTORE_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f32", VGPR_32>;
496defm DS_CMPSTORE_RTN_B64  : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b64", VReg_64>;
497defm DS_CMPSTORE_RTN_F64  : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f64", VReg_64>;
498
499defm DS_MSKOR_B32     : DS_1A2D_NORET_mc<"ds_mskor_b32">;
500defm DS_CMPST_B32     : DS_1A2D_NORET_mc<"ds_cmpst_b32">;
501defm DS_CMPST_F32     : DS_1A2D_NORET_mc<"ds_cmpst_f32">;
502
503defm DS_ADD_U64       : DS_1A1D_NORET_mc<"ds_add_u64", VReg_64>;
504defm DS_SUB_U64       : DS_1A1D_NORET_mc<"ds_sub_u64", VReg_64>;
505defm DS_RSUB_U64      : DS_1A1D_NORET_mc<"ds_rsub_u64", VReg_64>;
506defm DS_INC_U64       : DS_1A1D_NORET_mc<"ds_inc_u64", VReg_64>;
507defm DS_DEC_U64       : DS_1A1D_NORET_mc<"ds_dec_u64", VReg_64>;
508defm DS_MIN_I64       : DS_1A1D_NORET_mc<"ds_min_i64", VReg_64>;
509defm DS_MAX_I64       : DS_1A1D_NORET_mc<"ds_max_i64", VReg_64>;
510defm DS_MIN_U64       : DS_1A1D_NORET_mc<"ds_min_u64", VReg_64>;
511defm DS_MAX_U64       : DS_1A1D_NORET_mc<"ds_max_u64", VReg_64>;
512defm DS_AND_B64       : DS_1A1D_NORET_mc<"ds_and_b64", VReg_64>;
513defm DS_OR_B64        : DS_1A1D_NORET_mc<"ds_or_b64", VReg_64>;
514defm DS_XOR_B64       : DS_1A1D_NORET_mc<"ds_xor_b64", VReg_64>;
515defm DS_MSKOR_B64     : DS_1A2D_NORET_mc<"ds_mskor_b64", VReg_64>;
516let mayLoad = 0 in {
517defm DS_WRITE_B64     : DS_1A1D_NORET_mc<"ds_write_b64", VReg_64>;
518defm DS_WRITE2_B64    : DS_1A2D_Off8_NORET_mc<"ds_write2_b64", VReg_64>;
519defm DS_WRITE2ST64_B64: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b64", VReg_64>;
520}
521defm DS_CMPST_B64     : DS_1A2D_NORET_mc<"ds_cmpst_b64", VReg_64>;
522defm DS_CMPST_F64     : DS_1A2D_NORET_mc<"ds_cmpst_f64", VReg_64>;
523defm DS_MIN_F64       : DS_1A1D_NORET_mc<"ds_min_f64", VReg_64>;
524defm DS_MAX_F64       : DS_1A1D_NORET_mc<"ds_max_f64", VReg_64>;
525
526defm DS_ADD_RTN_U32   : DS_1A1D_RET_mc<"ds_add_rtn_u32", VGPR_32>;
527
528let SubtargetPredicate = HasLDSFPAtomicAddF32 in {
529defm DS_ADD_RTN_F32   : DS_1A1D_RET_mc<"ds_add_rtn_f32", VGPR_32>;
530}
531defm DS_SUB_RTN_U32   : DS_1A1D_RET_mc<"ds_sub_rtn_u32", VGPR_32>;
532defm DS_RSUB_RTN_U32  : DS_1A1D_RET_mc<"ds_rsub_rtn_u32", VGPR_32>;
533defm DS_INC_RTN_U32   : DS_1A1D_RET_mc<"ds_inc_rtn_u32", VGPR_32>;
534defm DS_DEC_RTN_U32   : DS_1A1D_RET_mc<"ds_dec_rtn_u32", VGPR_32>;
535defm DS_MIN_RTN_I32   : DS_1A1D_RET_mc<"ds_min_rtn_i32", VGPR_32>;
536defm DS_MAX_RTN_I32   : DS_1A1D_RET_mc<"ds_max_rtn_i32", VGPR_32>;
537defm DS_MIN_RTN_U32   : DS_1A1D_RET_mc<"ds_min_rtn_u32", VGPR_32>;
538defm DS_MAX_RTN_U32   : DS_1A1D_RET_mc<"ds_max_rtn_u32", VGPR_32>;
539defm DS_AND_RTN_B32   : DS_1A1D_RET_mc<"ds_and_rtn_b32", VGPR_32>;
540defm DS_OR_RTN_B32    : DS_1A1D_RET_mc<"ds_or_rtn_b32", VGPR_32>;
541defm DS_XOR_RTN_B32   : DS_1A1D_RET_mc<"ds_xor_rtn_b32", VGPR_32>;
542defm DS_MSKOR_RTN_B32 : DS_1A2D_RET_mc<"ds_mskor_rtn_b32", VGPR_32>;
543defm DS_CMPST_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b32", VGPR_32>;
544defm DS_CMPST_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f32", VGPR_32>;
545defm DS_MIN_RTN_F32   : DS_1A1D_RET_mc<"ds_min_rtn_f32", VGPR_32>;
546defm DS_MAX_RTN_F32   : DS_1A1D_RET_mc<"ds_max_rtn_f32", VGPR_32>;
547
548defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b32">;
549defm DS_WRXCHG2_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>;
550defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>;
551
552defm DS_ADD_RTN_U64  : DS_1A1D_RET_mc<"ds_add_rtn_u64", VReg_64>;
553defm DS_SUB_RTN_U64  : DS_1A1D_RET_mc<"ds_sub_rtn_u64", VReg_64>;
554defm DS_RSUB_RTN_U64  : DS_1A1D_RET_mc<"ds_rsub_rtn_u64", VReg_64>;
555defm DS_INC_RTN_U64   : DS_1A1D_RET_mc<"ds_inc_rtn_u64", VReg_64>;
556defm DS_DEC_RTN_U64   : DS_1A1D_RET_mc<"ds_dec_rtn_u64", VReg_64>;
557defm DS_MIN_RTN_I64    : DS_1A1D_RET_mc<"ds_min_rtn_i64", VReg_64>;
558defm DS_MAX_RTN_I64    : DS_1A1D_RET_mc<"ds_max_rtn_i64", VReg_64>;
559defm DS_MIN_RTN_U64   : DS_1A1D_RET_mc<"ds_min_rtn_u64", VReg_64>;
560defm DS_MAX_RTN_U64   : DS_1A1D_RET_mc<"ds_max_rtn_u64", VReg_64>;
561defm DS_AND_RTN_B64    : DS_1A1D_RET_mc<"ds_and_rtn_b64", VReg_64>;
562defm DS_OR_RTN_B64     : DS_1A1D_RET_mc<"ds_or_rtn_b64", VReg_64>;
563defm DS_XOR_RTN_B64    : DS_1A1D_RET_mc<"ds_xor_rtn_b64", VReg_64>;
564defm DS_MSKOR_RTN_B64  : DS_1A2D_RET_mc<"ds_mskor_rtn_b64", VReg_64>;
565defm DS_CMPST_RTN_B64  : DS_1A2D_RET_mc<"ds_cmpst_rtn_b64", VReg_64>;
566defm DS_CMPST_RTN_F64  : DS_1A2D_RET_mc<"ds_cmpst_rtn_f64", VReg_64>;
567defm DS_MIN_RTN_F64    : DS_1A1D_RET_mc<"ds_min_rtn_f64", VReg_64>;
568defm DS_MAX_RTN_F64    : DS_1A1D_RET_mc<"ds_max_rtn_f64", VReg_64>;
569
570defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", VReg_64>;
571defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>;
572defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>;
573
574let isConvergent = 1, usesCustomInserter = 1 in {
575def DS_GWS_INIT       : DS_GWS_1D<"ds_gws_init"> {
576  let mayLoad = 0;
577}
578def DS_GWS_SEMA_V     : DS_GWS_0D<"ds_gws_sema_v">;
579def DS_GWS_SEMA_BR    : DS_GWS_1D<"ds_gws_sema_br">;
580def DS_GWS_SEMA_P     : DS_GWS_0D<"ds_gws_sema_p">;
581def DS_GWS_BARRIER    : DS_GWS_1D<"ds_gws_barrier">;
582}
583
584let SubtargetPredicate = HasDsSrc2Insts in {
585def DS_ADD_SRC2_U32   : DS_1A<"ds_add_src2_u32">;
586def DS_SUB_SRC2_U32   : DS_1A<"ds_sub_src2_u32">;
587def DS_RSUB_SRC2_U32  : DS_1A<"ds_rsub_src2_u32">;
588def DS_INC_SRC2_U32   : DS_1A<"ds_inc_src2_u32">;
589def DS_DEC_SRC2_U32   : DS_1A<"ds_dec_src2_u32">;
590def DS_MIN_SRC2_I32   : DS_1A<"ds_min_src2_i32">;
591def DS_MAX_SRC2_I32   : DS_1A<"ds_max_src2_i32">;
592def DS_MIN_SRC2_U32   : DS_1A<"ds_min_src2_u32">;
593def DS_MAX_SRC2_U32   : DS_1A<"ds_max_src2_u32">;
594def DS_AND_SRC2_B32   : DS_1A<"ds_and_src2_b32">;
595def DS_OR_SRC2_B32    : DS_1A<"ds_or_src2_b32">;
596def DS_XOR_SRC2_B32   : DS_1A<"ds_xor_src2_b32">;
597def DS_MIN_SRC2_F32   : DS_1A<"ds_min_src2_f32">;
598def DS_MAX_SRC2_F32   : DS_1A<"ds_max_src2_f32">;
599
600def DS_ADD_SRC2_U64   : DS_1A<"ds_add_src2_u64">;
601def DS_SUB_SRC2_U64   : DS_1A<"ds_sub_src2_u64">;
602def DS_RSUB_SRC2_U64  : DS_1A<"ds_rsub_src2_u64">;
603def DS_INC_SRC2_U64   : DS_1A<"ds_inc_src2_u64">;
604def DS_DEC_SRC2_U64   : DS_1A<"ds_dec_src2_u64">;
605def DS_MIN_SRC2_I64   : DS_1A<"ds_min_src2_i64">;
606def DS_MAX_SRC2_I64   : DS_1A<"ds_max_src2_i64">;
607def DS_MIN_SRC2_U64   : DS_1A<"ds_min_src2_u64">;
608def DS_MAX_SRC2_U64   : DS_1A<"ds_max_src2_u64">;
609def DS_AND_SRC2_B64   : DS_1A<"ds_and_src2_b64">;
610def DS_OR_SRC2_B64    : DS_1A<"ds_or_src2_b64">;
611def DS_XOR_SRC2_B64   : DS_1A<"ds_xor_src2_b64">;
612def DS_MIN_SRC2_F64   : DS_1A<"ds_min_src2_f64">;
613def DS_MAX_SRC2_F64   : DS_1A<"ds_max_src2_f64">;
614
615def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">;
616def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">;
617} // End SubtargetPredicate = HasDsSrc2Insts
618
619let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in {
620def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, Swizzle>;
621}
622
623let mayStore = 0 in {
624defm DS_READ_I8      : DS_1A_RET_mc<"ds_read_i8">;
625defm DS_READ_U8      : DS_1A_RET_mc<"ds_read_u8">;
626defm DS_READ_I16     : DS_1A_RET_mc<"ds_read_i16">;
627defm DS_READ_U16     : DS_1A_RET_mc<"ds_read_u16">;
628defm DS_READ_B32     : DS_1A_RET_mc<"ds_read_b32">;
629defm DS_READ_B64     : DS_1A_RET_mc<"ds_read_b64", VReg_64>;
630
631defm DS_READ2_B32    : DS_1A_Off8_RET_mc<"ds_read2_b32", VReg_64>;
632defm DS_READ2ST64_B32: DS_1A_Off8_RET_mc<"ds_read2st64_b32", VReg_64>;
633
634defm DS_READ2_B64    : DS_1A_Off8_RET_mc<"ds_read2_b64", VReg_128>;
635defm DS_READ2ST64_B64: DS_1A_Off8_RET_mc<"ds_read2st64_b64", VReg_128>;
636
637let has_m0_read = 0 in {
638let SubtargetPredicate = HasD16LoadStore, TiedSourceNotRead = 1 in {
639def DS_READ_U8_D16     : DS_1A_RET_Tied<"ds_read_u8_d16">;
640def DS_READ_U8_D16_HI  : DS_1A_RET_Tied<"ds_read_u8_d16_hi">;
641def DS_READ_I8_D16     : DS_1A_RET_Tied<"ds_read_i8_d16">;
642def DS_READ_I8_D16_HI  : DS_1A_RET_Tied<"ds_read_i8_d16_hi">;
643def DS_READ_U16_D16    : DS_1A_RET_Tied<"ds_read_u16_d16">;
644def DS_READ_U16_D16_HI : DS_1A_RET_Tied<"ds_read_u16_d16_hi">;
645}
646} // End has_m0_read = 0
647
648let SubtargetPredicate = HasDSAddTid in {
649def DS_READ_ADDTID_B32 : DS_0A_RET<"ds_read_addtid_b32">;
650}
651
652} // End mayStore = 0
653
654def DS_CONSUME       : DS_0A_RET<"ds_consume">;
655def DS_APPEND        : DS_0A_RET<"ds_append">;
656
657let SubtargetPredicate = isNotGFX90APlus in
658def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">;
659
660//===----------------------------------------------------------------------===//
661// Instruction definitions for CI and newer.
662//===----------------------------------------------------------------------===//
663
664let SubtargetPredicate = isGFX7Plus in {
665
666defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPR_32>;
667defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", VReg_64>;
668
669let isConvergent = 1, usesCustomInserter = 1 in {
670def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">;
671}
672
673let mayStore = 0 in {
674defm DS_READ_B96 : DS_1A_RET_mc<"ds_read_b96", VReg_96>;
675defm DS_READ_B128: DS_1A_RET_mc<"ds_read_b128", VReg_128>;
676} // End mayStore = 0
677
678let mayLoad = 0 in {
679defm DS_WRITE_B96 : DS_1A1D_NORET_mc<"ds_write_b96", VReg_96>;
680defm DS_WRITE_B128 : DS_1A1D_NORET_mc<"ds_write_b128", VReg_128>;
681} // End mayLoad = 0
682
683def DS_NOP : DS_VOID<"ds_nop">;
684
685} // let SubtargetPredicate = isGFX7Plus
686
687//===----------------------------------------------------------------------===//
688// Instruction definitions for VI and newer.
689//===----------------------------------------------------------------------===//
690
691let SubtargetPredicate = isGFX8Plus in {
692
693let Uses = [EXEC] in {
694def DS_PERMUTE_B32  : DS_1A1D_PERMUTE <"ds_permute_b32",
695                                       int_amdgcn_ds_permute>;
696def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
697                                       int_amdgcn_ds_bpermute>;
698}
699
700} // let SubtargetPredicate = isGFX8Plus
701
702let SubtargetPredicate = HasLDSFPAtomicAddF32, OtherPredicates = [HasDsSrc2Insts] in {
703def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
704}
705
706
707//===----------------------------------------------------------------------===//
708// Instruction definitions for GFX11.
709//===----------------------------------------------------------------------===//
710
711let SubtargetPredicate = isGFX11Only in {
712
713def DS_ADD_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_add_gs_reg_rtn", VReg_64, VGPR_32>;
714def DS_SUB_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_sub_gs_reg_rtn", VReg_64, VGPR_32>;
715
716} // let SubtargetPredicate = isGFX11Only
717
718let SubtargetPredicate = isGFX11Plus in {
719
720let OtherPredicates = [HasImageInsts] in
721def DS_BVH_STACK_RTN_B32 : DS_BVH_STACK<"ds_bvh_stack_rtn_b32">;
722
723} // let SubtargetPredicate = isGFX11Plus
724
725//===----------------------------------------------------------------------===//
726// Instruction definitions for GFX12 and newer.
727//===----------------------------------------------------------------------===//
728
729let SubtargetPredicate = isGFX12Plus in {
730
731defm DS_COND_SUB_U32      : DS_1A1D_NORET_mc<"ds_cond_sub_u32">;
732defm DS_COND_SUB_RTN_U32  : DS_1A1D_RET_mc<"ds_cond_sub_rtn_u32", VGPR_32>;
733defm DS_SUB_CLAMP_U32     : DS_1A1D_NORET_mc<"ds_sub_clamp_u32">;
734defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_clamp_rtn_u32", VGPR_32>;
735
736multiclass DSAtomicRetNoRetPatIntrinsic_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
737                                  ValueType vt, string frag> {
738  def : DSAtomicRetPat<inst, vt,
739                        !cast<PatFrag>(frag#"_local_addrspace")>;
740
741  let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
742    def : DSAtomicRetPat<noRetInst, vt,
743                          !cast<PatFrag>(frag#"_noret_local_addrspace"), /* complexity */ 1>;
744}
745
746defm : DSAtomicRetNoRetPatIntrinsic_mc<DS_COND_SUB_RTN_U32, DS_COND_SUB_U32, i32, "int_amdgcn_atomic_cond_sub_u32">;
747} // let SubtargetPredicate = isGFX12Plus
748
749//===----------------------------------------------------------------------===//
750// DS Patterns
751//===----------------------------------------------------------------------===//
752
753def : GCNPat <
754  (int_amdgcn_ds_swizzle i32:$src, timm:$offset16),
755  (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16timm $offset16), (i1 0))
756>;
757
758class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
759  (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
760  (inst $ptr, Offset:$offset, (i1 gds))
761>;
762
763multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
764
765  let OtherPredicates = [LDSRequiresM0Init] in {
766    def : DSReadPat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
767  }
768
769  let OtherPredicates = [NotLDSRequiresM0Init] in {
770    def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
771  }
772}
773
774class DSReadPat_D16 <DS_Pseudo inst, PatFrag frag, ValueType vt> : GCNPat <
775  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$in),
776  (inst $ptr, Offset:$offset, (i1 0), $in)
777>;
778
779defm : DSReadPat_mc <DS_READ_I8, i32, "sextloadi8_local">;
780defm : DSReadPat_mc <DS_READ_I8,  i16, "sextloadi8_local">;
781defm : DSReadPat_mc <DS_READ_U8,  i32, "extloadi8_local">;
782defm : DSReadPat_mc <DS_READ_U8,  i32, "zextloadi8_local">;
783defm : DSReadPat_mc <DS_READ_U8,  i16, "extloadi8_local">;
784defm : DSReadPat_mc <DS_READ_U8,  i16, "zextloadi8_local">;
785defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
786defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
787defm : DSReadPat_mc <DS_READ_U16, i32, "extloadi16_local">;
788defm : DSReadPat_mc <DS_READ_U16, i32, "zextloadi16_local">;
789defm : DSReadPat_mc <DS_READ_U16, i16, "load_local">;
790
791foreach vt = Reg32Types.types in {
792defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
793}
794
795defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_8_local">;
796defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_8_local">;
797defm : DSReadPat_mc <DS_READ_U16, i16, "atomic_load_16_local">;
798defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_16_local">;
799defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
800defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
801
802let OtherPredicates = [D16PreservesUnusedBits] in {
803def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2i16>;
804def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2f16>;
805def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2i16>;
806def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2f16>;
807def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2i16>;
808def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2f16>;
809
810def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2i16>;
811def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2f16>;
812def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2i16>;
813def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2f16>;
814def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2i16>;
815def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2f16>;
816}
817
818class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
819  (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
820  (inst $ptr, getVregSrcForVT<vt>.ret:$value, Offset:$offset, (i1 gds))
821>;
822
823multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
824  let OtherPredicates = [LDSRequiresM0Init] in {
825    def : DSWritePat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
826  }
827
828  let OtherPredicates = [NotLDSRequiresM0Init] in {
829    def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
830  }
831}
832
833defm : DSWritePat_mc <DS_WRITE_B8, i32, "truncstorei8_local">;
834defm : DSWritePat_mc <DS_WRITE_B16, i32, "truncstorei16_local">;
835defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">;
836defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">;
837
838foreach vt = Reg32Types.types in {
839defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
840}
841
842defm : DSWritePat_mc <DS_WRITE_B8, i16, "atomic_store_8_local">;
843defm : DSWritePat_mc <DS_WRITE_B8, i32, "atomic_store_8_local">;
844defm : DSWritePat_mc <DS_WRITE_B16, i16, "atomic_store_16_local">;
845defm : DSWritePat_mc <DS_WRITE_B16, i32, "atomic_store_16_local">;
846defm : DSWritePat_mc <DS_WRITE_B32, i32, "atomic_store_32_local">;
847defm : DSWritePat_mc <DS_WRITE_B64, i64, "atomic_store_64_local">;
848
849let OtherPredicates = [HasD16LoadStore] in {
850def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_hi16_local>;
851def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_hi16_local>;
852}
853
854class DS64Bit4ByteAlignedReadPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
855  (vt:$value (frag (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
856  (inst $ptr, $offset0, $offset1, (i1 0))
857>;
858
859class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat<
860  (frag vt:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
861  (inst $ptr, (i32 (EXTRACT_SUBREG VReg_64:$value, sub0)),
862              (i32 (EXTRACT_SUBREG VReg_64:$value, sub1)), $offset0, $offset1,
863              (i1 0))
864>;
865
866class DS128Bit8ByteAlignedReadPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
867  (vt:$value (frag (DS128Bit8ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
868  (inst $ptr, $offset0, $offset1, (i1 0))
869>;
870
871class DS128Bit8ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat<
872  (frag vt:$value, (DS128Bit8ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
873  (inst $ptr, (i64 (EXTRACT_SUBREG VReg_128:$value, sub0_sub1)),
874              (i64 (EXTRACT_SUBREG VReg_128:$value, sub2_sub3)), $offset0, $offset1,
875              (i1 0))
876>;
877
878multiclass DS64Bit4ByteAlignedPat_mc<ValueType vt> {
879  let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
880    def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, vt, load_local_m0>;
881    def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, vt, store_local_m0>;
882  }
883
884  let OtherPredicates = [NotLDSRequiresM0Init] in {
885    def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32_gfx9, vt, load_local>;
886    def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32_gfx9, vt, store_local>;
887  }
888}
889
890multiclass DS128Bit8ByteAlignedPat_mc<ValueType vt> {
891  let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
892    def : DS128Bit8ByteAlignedReadPat<DS_READ2_B64, vt, load_local_m0>;
893    def : DS128Bit8ByteAlignedWritePat<DS_WRITE2_B64, vt, store_local_m0>;
894  }
895
896  let OtherPredicates = [NotLDSRequiresM0Init] in {
897    def : DS128Bit8ByteAlignedReadPat<DS_READ2_B64_gfx9, vt, load_local>;
898    def : DS128Bit8ByteAlignedWritePat<DS_WRITE2_B64_gfx9, vt, store_local>;
899  }
900}
901
902// v2i32 loads are split into i32 loads on SI during lowering, due to a bug
903// related to bounds checking.
904foreach vt = VReg_64.RegTypes in {
905defm : DS64Bit4ByteAlignedPat_mc<vt>;
906}
907
908foreach vt = VReg_128.RegTypes in {
909defm : DS128Bit8ByteAlignedPat_mc<vt>;
910}
911
912// Prefer ds_read over ds_read2 and ds_write over ds_write2, all other things
913// being equal, because it has a larger immediate offset range.
914let AddedComplexity = 100 in {
915
916foreach vt = VReg_64.RegTypes in {
917defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">;
918defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align8_local">;
919}
920
921let SubtargetPredicate = isGFX7Plus in {
922
923foreach vt = VReg_96.RegTypes in {
924defm : DSReadPat_mc <DS_READ_B96, vt, "load_align16_local">;
925defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_align16_local">;
926}
927
928foreach vt = VReg_128.RegTypes in {
929defm : DSReadPat_mc <DS_READ_B128, vt, "load_align16_local">;
930defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align16_local">;
931}
932
933let SubtargetPredicate = HasUnalignedAccessMode in {
934
935// Select 64 bit loads and stores aligned less than 4 as a single ds_read_b64/
936// ds_write_b64 instruction as this is faster than ds_read2_b32/ds_write2_b32
937// which would be used otherwise. In this case a b32 access would still be
938// misaligned, but we will have 2 of them.
939foreach vt = VReg_64.RegTypes in {
940defm : DSReadPat_mc <DS_READ_B64, vt, "load_align_less_than_4_local">;
941defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align_less_than_4_local">;
942}
943
944// Selection will split most of the unaligned 3 dword accesses due to performance
945// reasons when beneficial. Keep these two patterns for the rest of the cases.
946foreach vt = VReg_96.RegTypes in {
947defm : DSReadPat_mc <DS_READ_B96, vt, "load_local">;
948defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_local">;
949}
950
951// Select 128 bit loads and stores aligned less than 4 as a single ds_read_b128/
952// ds_write_b128 instruction as this is faster than ds_read2_b64/ds_write2_b64
953// which would be used otherwise. In this case a b64 access would still be
954// misaligned, but we will have 2 of them.
955foreach vt = VReg_128.RegTypes in {
956defm : DSReadPat_mc <DS_READ_B128, vt, "load_align_less_than_4_local">;
957defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align_less_than_4_local">;
958}
959
960} // End SubtargetPredicate = HasUnalignedAccessMode
961
962} // End SubtargetPredicate = isGFX7Plus
963
964} // End AddedComplexity = 100
965
966multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
967  let OtherPredicates = [LDSRequiresM0Init] in {
968    def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt)>;
969  }
970
971  let OtherPredicates = [NotLDSRequiresM0Init] in {
972    def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
973                         !cast<PatFrag>(frag#"_local_"#vt)>;
974  }
975
976  let OtherPredicates = [HasGDS] in {
977    def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt),
978                         /* complexity */ 0, /* gds */ 1>;
979  }
980}
981
982multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
983                                  ValueType vt, string frag> {
984  let OtherPredicates = [LDSRequiresM0Init] in {
985    def : DSAtomicRetPat<inst, vt,
986                         !cast<PatFrag>(frag#"_local_m0_"#vt)>;
987    def : DSAtomicRetPat<noRetInst, vt,
988                         !cast<PatFrag>(frag#"_local_m0_noret_"#vt), /* complexity */ 1>;
989  }
990
991  let OtherPredicates = [NotLDSRequiresM0Init] in {
992    def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
993                         !cast<PatFrag>(frag#"_local_"#vt)>;
994    def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
995                         !cast<PatFrag>(frag#"_local_noret_"#vt), /* complexity */ 1>;
996  }
997
998  let OtherPredicates = [HasGDS] in {
999    def : DSAtomicRetPat<inst, vt,
1000                         !cast<PatFrag>(frag#"_region_m0_"#vt),
1001                         /* complexity */ 0, /* gds */ 1>;
1002    def : DSAtomicRetPat<noRetInst, vt,
1003                         !cast<PatFrag>(frag#"_region_m0_noret_"#vt),
1004                         /* complexity */ 1, /* gds */ 1>;
1005  }
1006}
1007
1008
1009
1010let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1011// Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode.
1012class DSAtomicCmpXChgSwapped<DS_Pseudo inst, ValueType vt, PatFrag frag,
1013  int complexity = 0, bit gds=0> : GCNPat<
1014  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
1015  (inst $ptr, getVregSrcForVT<vt>.ret:$cmp, getVregSrcForVT<vt>.ret:$swap, Offset:$offset, (i1 gds))> {
1016  let AddedComplexity = complexity;
1017}
1018
1019multiclass DSAtomicCmpXChgSwapped_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
1020                                     string frag> {
1021  let OtherPredicates = [LDSRequiresM0Init] in {
1022    def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt)>;
1023    def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_local_m0_noret_"#vt),
1024                                 /* complexity */ 1>;
1025  }
1026
1027  let OtherPredicates = [NotLDSRequiresM0Init] in {
1028    def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1029                                 !cast<PatFrag>(frag#"_local_"#vt)>;
1030    def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1031                                 !cast<PatFrag>(frag#"_local_noret_"#vt),
1032                                 /* complexity */ 1>;
1033  }
1034
1035  let OtherPredicates = [HasGDS] in {
1036    def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt),
1037                                 /* complexity */ 0, /* gds */ 1>;
1038    def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt),
1039                                 /* complexity */ 1, /* gds */ 1>;
1040  }
1041}
1042} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
1043
1044let SubtargetPredicate = isGFX11Plus in {
1045// The order of src and cmp agrees with the BUFFER_ATOMIC_CMPSWAP opcode.
1046class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag,
1047  int complexity = 0, bit gds=0> : GCNPat<
1048  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
1049  (inst $ptr, getVregSrcForVT<vt>.ret:$swap, getVregSrcForVT<vt>.ret:$cmp, Offset:$offset, (i1 gds))> {
1050  let AddedComplexity = complexity;
1051}
1052
1053multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt, string frag> {
1054
1055  def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1056                        !cast<PatFrag>(frag#"_local_"#vt)>;
1057  def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1058                        !cast<PatFrag>(frag#"_local_noret_"#vt), /* complexity */ 1>;
1059
1060  let OtherPredicates = [HasGDS] in {
1061    def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt),
1062                          /* complexity */ 0, /* gds */ 1>;
1063    def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt),
1064                          /* complexity */ 1, /* gds */ 1>;
1065  }
1066}
1067} // End SubtargetPredicate = isGFX11Plus
1068
1069// 32-bit atomics.
1070defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap">;
1071defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U32, DS_ADD_U32, i32, "atomic_load_add">;
1072defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U32, DS_SUB_U32, i32, "atomic_load_sub">;
1073defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U32, DS_INC_U32, i32, "atomic_load_uinc_wrap">;
1074defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U32, DS_DEC_U32, i32, "atomic_load_udec_wrap">;
1075defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B32, DS_AND_B32, i32, "atomic_load_and">;
1076defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B32, DS_OR_B32, i32, "atomic_load_or">;
1077defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B32, DS_XOR_B32, i32, "atomic_load_xor">;
1078defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_I32, DS_MIN_I32, i32, "atomic_load_min">;
1079defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_I32, DS_MAX_I32, i32, "atomic_load_max">;
1080defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_U32, DS_MIN_U32, i32, "atomic_load_umin">;
1081defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_U32, DS_MAX_U32, i32, "atomic_load_umax">;
1082defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_F32, DS_MIN_F32, f32, "atomic_load_fmin">;
1083defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_F32, DS_MAX_F32, f32, "atomic_load_fmax">;
1084
1085
1086let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
1087defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_F16, DS_PK_ADD_F16, v2f16, "atomic_load_fadd">;
1088defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_BF16, DS_PK_ADD_BF16, v2bf16, "atomic_load_fadd">;
1089}
1090
1091let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1092defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B32, DS_CMPST_B32, i32, "atomic_cmp_swap">;
1093}
1094
1095let SubtargetPredicate = isGFX11Plus in {
1096defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B32, DS_CMPSTORE_B32, i32, "atomic_cmp_swap">;
1097}
1098
1099let SubtargetPredicate = HasLDSFPAtomicAddF32 in {
1100defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_F32, DS_ADD_F32, f32, "atomic_load_fadd">;
1101}
1102
1103// 64-bit atomics.
1104defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">;
1105defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U64, DS_ADD_U64, i64, "atomic_load_add">;
1106defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U64, DS_SUB_U64, i64, "atomic_load_sub">;
1107defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U64, DS_INC_U64, i64, "atomic_load_uinc_wrap">;
1108defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U64, DS_DEC_U64, i64, "atomic_load_udec_wrap">;
1109defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B64, DS_AND_B64, i64, "atomic_load_and">;
1110defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B64, DS_OR_B64, i64, "atomic_load_or">;
1111defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B64, DS_XOR_B64, i64, "atomic_load_xor">;
1112defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_I64, DS_MIN_I64, i64, "atomic_load_min">;
1113defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_I64, DS_MAX_I64, i64, "atomic_load_max">;
1114defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_U64, DS_MIN_U64, i64, "atomic_load_umin">;
1115defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_U64, DS_MAX_U64, i64, "atomic_load_umax">;
1116defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_F64, DS_MIN_F64, f64, "atomic_load_fmin">;
1117defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_F64, DS_MAX_F64, f64, "atomic_load_fmax">;
1118
1119let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1120defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B64, DS_CMPST_B64, i64, "atomic_cmp_swap">;
1121} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
1122
1123let SubtargetPredicate = isGFX11Plus in {
1124defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B64, DS_CMPSTORE_B64, i64, "atomic_cmp_swap">;
1125} // End SubtargetPredicate = isGFX11Plus
1126
1127let SubtargetPredicate = HasLdsAtomicAddF64 in {
1128def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_f64>;
1129let AddedComplexity = 1 in
1130def : DSAtomicRetPat<DS_ADD_F64, f64, atomic_load_fadd_local_noret_f64>;
1131
1132class DSAtomicRetPatIntrinsic<DS_Pseudo inst, ValueType vt, PatFrag frag,
1133  bit gds=0> : GCNPat <
1134  (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value)),
1135  (inst $ptr, getVregSrcForVT<vt>.ret:$value, Offset:$offset, (i1 gds))> {
1136}
1137
1138def : DSAtomicRetPatIntrinsic<DS_ADD_RTN_F64, f64, int_amdgcn_flat_atomic_fadd_local_addrspace>;
1139let AddedComplexity = 1 in
1140def : DSAtomicRetPatIntrinsic<DS_ADD_F64, f64, int_amdgcn_flat_atomic_fadd_noret_local_addrspace>;
1141}
1142
1143let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
1144defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_F16, DS_PK_ADD_F16, v2f16, "atomic_load_fadd">;
1145} // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
1146
1147let OtherPredicates = [HasGDS] in
1148def : GCNPat <
1149  (SIds_ordered_count i32:$value, i16:$offset),
1150  (DS_ORDERED_COUNT $value, (as_i16imm $offset))
1151>;
1152
1153def : GCNPat <
1154  (i64 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
1155  (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
1156>;
1157
1158def : GCNPat <
1159  (i32 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
1160  (EXTRACT_SUBREG
1161    (i64 (COPY_TO_REGCLASS
1162      (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
1163      VReg_64)),
1164    sub0)
1165>;
1166
1167def : GCNPat <
1168  (i64 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
1169  (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
1170>;
1171
1172def : GCNPat <
1173  (i32 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
1174  (EXTRACT_SUBREG
1175    (i64 (COPY_TO_REGCLASS
1176      (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
1177      VReg_64)),
1178    sub0)
1179>;
1180
1181//===----------------------------------------------------------------------===//
1182// Target-specific instruction encodings.
1183//===----------------------------------------------------------------------===//
1184
1185//===----------------------------------------------------------------------===//
1186// Base ENC_DS for GFX6, GFX7, GFX10, GFX11, GFX12.
1187//===----------------------------------------------------------------------===//
1188
1189class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op, DS_Pseudo ps, int ef,
1190                                               string opName = ps.Mnemonic,
1191                                               bit hasGDS = true>
1192    : DS_Real<ps, opName>, SIMCInstr <ps.PseudoInstr, ef> {
1193
1194  let Inst{7-0}   = !if(ps.has_offset0, offset0, 0);
1195  let Inst{15-8}  = !if(ps.has_offset1, offset1, 0);
1196  let Inst{17}    = !if(ps.has_gds, gds, ps.gdsValue);
1197  let Inst{25-18} = op;
1198  let Inst{31-26} = 0x36;
1199  let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0));
1200  let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0);
1201  let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0);
1202  let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0);
1203
1204  let gds = !if(hasGDS, ?, 0);
1205}
1206
1207//===----------------------------------------------------------------------===//
1208// GFX12.
1209//===----------------------------------------------------------------------===//
1210
1211multiclass DS_Real_gfx12<bits<8> op, string name = !tolower(NAME), bit needAlias = true> {
1212  defvar ps = !cast<DS_Pseudo>(NAME);
1213  let AssemblerPredicate = isGFX12Plus in {
1214    let DecoderNamespace = "GFX12" in
1215      def _gfx12 :
1216        Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, ps, SIEncodingFamily.GFX12,
1217                                               name, /*hasGDS=*/false>;
1218    if !and(needAlias, !ne(ps.Mnemonic, name)) then
1219      def : AMDGPUMnemonicAlias<ps.Mnemonic, name>;
1220  } // End AssemblerPredicate
1221}
1222
1223defm DS_MIN_F32           : DS_Real_gfx12<0x012, "ds_min_num_f32">;
1224defm DS_MAX_F32           : DS_Real_gfx12<0x013, "ds_max_num_f32">;
1225defm DS_MIN_RTN_F32       : DS_Real_gfx12<0x032, "ds_min_num_rtn_f32">;
1226defm DS_MAX_RTN_F32       : DS_Real_gfx12<0x033, "ds_max_num_rtn_f32">;
1227defm DS_MIN_F64           : DS_Real_gfx12<0x052, "ds_min_num_f64">;
1228defm DS_MAX_F64           : DS_Real_gfx12<0x053, "ds_max_num_f64">;
1229defm DS_MIN_RTN_F64       : DS_Real_gfx12<0x072, "ds_min_num_rtn_f64">;
1230defm DS_MAX_RTN_F64       : DS_Real_gfx12<0x073, "ds_max_num_rtn_f64">;
1231defm DS_COND_SUB_U32      : DS_Real_gfx12<0x098>;
1232defm DS_SUB_CLAMP_U32     : DS_Real_gfx12<0x099>;
1233defm DS_COND_SUB_RTN_U32  : DS_Real_gfx12<0x0a8>;
1234defm DS_SUB_CLAMP_RTN_U32 : DS_Real_gfx12<0x0a9>;
1235defm DS_PK_ADD_F16        : DS_Real_gfx12<0x09a>;
1236defm DS_PK_ADD_RTN_F16    : DS_Real_gfx12<0x0aa>;
1237defm DS_PK_ADD_BF16       : DS_Real_gfx12<0x09b>;
1238defm DS_PK_ADD_RTN_BF16   : DS_Real_gfx12<0x0ab>;
1239
1240// New aliases added in GFX12 without renaming the instructions.
1241let AssemblerPredicate = isGFX12Plus in {
1242  def : AMDGPUMnemonicAlias<"ds_subrev_u32", "ds_rsub_u32">;
1243  def : AMDGPUMnemonicAlias<"ds_subrev_rtn_u32", "ds_rsub_rtn_u32">;
1244  def : AMDGPUMnemonicAlias<"ds_subrev_u64", "ds_rsub_u64">;
1245  def : AMDGPUMnemonicAlias<"ds_subrev_rtn_u64", "ds_rsub_rtn_u64">;
1246}
1247
1248//===----------------------------------------------------------------------===//
1249// GFX11.
1250//===----------------------------------------------------------------------===//
1251
1252multiclass DS_Real_gfx11<bits<8> op, string name = !tolower(NAME)> {
1253  defvar ps = !cast<DS_Pseudo>(NAME);
1254  let AssemblerPredicate = isGFX11Only in {
1255    let DecoderNamespace = "GFX11" in
1256      def _gfx11 :
1257        Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, ps, SIEncodingFamily.GFX11,
1258                                               name>;
1259    if !ne(ps.Mnemonic, name) then
1260      def : AMDGPUMnemonicAlias<ps.Mnemonic, name>;
1261  } // End AssemblerPredicate
1262}
1263
1264multiclass DS_Real_gfx11_gfx12<bits<8> op, string name = !tolower(NAME)>
1265  : DS_Real_gfx11<op, name>, DS_Real_gfx12<op, name>;
1266
1267defm DS_WRITE_B32           : DS_Real_gfx11_gfx12<0x00d, "ds_store_b32">;
1268defm DS_WRITE2_B32          : DS_Real_gfx11_gfx12<0x00e, "ds_store_2addr_b32">;
1269defm DS_WRITE2ST64_B32      : DS_Real_gfx11_gfx12<0x00f, "ds_store_2addr_stride64_b32">;
1270defm DS_WRITE_B8            : DS_Real_gfx11_gfx12<0x01e, "ds_store_b8">;
1271defm DS_WRITE_B16           : DS_Real_gfx11_gfx12<0x01f, "ds_store_b16">;
1272defm DS_WRXCHG_RTN_B32      : DS_Real_gfx11_gfx12<0x02d, "ds_storexchg_rtn_b32">;
1273defm DS_WRXCHG2_RTN_B32     : DS_Real_gfx11_gfx12<0x02e, "ds_storexchg_2addr_rtn_b32">;
1274defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx11_gfx12<0x02f, "ds_storexchg_2addr_stride64_rtn_b32">;
1275defm DS_READ_B32            : DS_Real_gfx11_gfx12<0x036, "ds_load_b32">;
1276defm DS_READ2_B32           : DS_Real_gfx11_gfx12<0x037, "ds_load_2addr_b32">;
1277defm DS_READ2ST64_B32       : DS_Real_gfx11_gfx12<0x038, "ds_load_2addr_stride64_b32">;
1278defm DS_READ_I8             : DS_Real_gfx11_gfx12<0x039, "ds_load_i8">;
1279defm DS_READ_U8             : DS_Real_gfx11_gfx12<0x03a, "ds_load_u8">;
1280defm DS_READ_I16            : DS_Real_gfx11_gfx12<0x03b, "ds_load_i16">;
1281defm DS_READ_U16            : DS_Real_gfx11_gfx12<0x03c, "ds_load_u16">;
1282defm DS_WRITE_B64           : DS_Real_gfx11_gfx12<0x04d, "ds_store_b64">;
1283defm DS_WRITE2_B64          : DS_Real_gfx11_gfx12<0x04e, "ds_store_2addr_b64">;
1284defm DS_WRITE2ST64_B64      : DS_Real_gfx11_gfx12<0x04f, "ds_store_2addr_stride64_b64">;
1285defm DS_WRXCHG_RTN_B64      : DS_Real_gfx11_gfx12<0x06d, "ds_storexchg_rtn_b64">;
1286defm DS_WRXCHG2_RTN_B64     : DS_Real_gfx11_gfx12<0x06e, "ds_storexchg_2addr_rtn_b64">;
1287defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx11_gfx12<0x06f, "ds_storexchg_2addr_stride64_rtn_b64">;
1288defm DS_READ_B64            : DS_Real_gfx11_gfx12<0x076, "ds_load_b64">;
1289defm DS_READ2_B64           : DS_Real_gfx11_gfx12<0x077, "ds_load_2addr_b64">;
1290defm DS_READ2ST64_B64       : DS_Real_gfx11_gfx12<0x078, "ds_load_2addr_stride64_b64">;
1291defm DS_WRITE_B8_D16_HI     : DS_Real_gfx11_gfx12<0x0a0, "ds_store_b8_d16_hi">;
1292defm DS_WRITE_B16_D16_HI    : DS_Real_gfx11_gfx12<0x0a1, "ds_store_b16_d16_hi">;
1293defm DS_READ_U8_D16         : DS_Real_gfx11_gfx12<0x0a2, "ds_load_u8_d16">;
1294defm DS_READ_U8_D16_HI      : DS_Real_gfx11_gfx12<0x0a3, "ds_load_u8_d16_hi">;
1295defm DS_READ_I8_D16         : DS_Real_gfx11_gfx12<0x0a4, "ds_load_i8_d16">;
1296defm DS_READ_I8_D16_HI      : DS_Real_gfx11_gfx12<0x0a5, "ds_load_i8_d16_hi">;
1297defm DS_READ_U16_D16        : DS_Real_gfx11_gfx12<0x0a6, "ds_load_u16_d16">;
1298defm DS_READ_U16_D16_HI     : DS_Real_gfx11_gfx12<0x0a7, "ds_load_u16_d16_hi">;
1299defm DS_WRITE_ADDTID_B32    : DS_Real_gfx11_gfx12<0x0b0, "ds_store_addtid_b32">;
1300defm DS_READ_ADDTID_B32     : DS_Real_gfx11_gfx12<0x0b1, "ds_load_addtid_b32">;
1301defm DS_WRITE_B96           : DS_Real_gfx11_gfx12<0x0de, "ds_store_b96">;
1302defm DS_WRITE_B128          : DS_Real_gfx11_gfx12<0x0df, "ds_store_b128">;
1303defm DS_READ_B96            : DS_Real_gfx11_gfx12<0x0fe, "ds_load_b96">;
1304defm DS_READ_B128           : DS_Real_gfx11_gfx12<0x0ff, "ds_load_b128">;
1305
1306// DS_CMPST_* are renamed to DS_CMPSTORE_* in GFX11, but also the data operands (src and cmp) are swapped
1307// comparing to pre-GFX11.
1308// Note: the mnemonic alias is not generated to avoid a potential ambiguity due to the semantics change.
1309
1310defm DS_CMPSTORE_B32                     : DS_Real_gfx11_gfx12<0x010>;
1311defm DS_CMPSTORE_F32                     : DS_Real_gfx11<0x011>;
1312defm DS_CMPSTORE_RTN_B32                 : DS_Real_gfx11_gfx12<0x030>;
1313defm DS_CMPSTORE_RTN_F32                 : DS_Real_gfx11<0x031>;
1314defm DS_CMPSTORE_B64                     : DS_Real_gfx11_gfx12<0x050>;
1315defm DS_CMPSTORE_F64                     : DS_Real_gfx11<0x051>;
1316defm DS_CMPSTORE_RTN_B64                 : DS_Real_gfx11_gfx12<0x070>;
1317defm DS_CMPSTORE_RTN_F64                 : DS_Real_gfx11<0x071>;
1318
1319defm DS_ADD_RTN_F32                      : DS_Real_gfx11_gfx12<0x079>;
1320defm DS_ADD_GS_REG_RTN                   : DS_Real_gfx11<0x07a>;
1321defm DS_SUB_GS_REG_RTN                   : DS_Real_gfx11<0x07b>;
1322defm DS_BVH_STACK_RTN_B32                : DS_Real_gfx11<0x0ad>;
1323
1324//===----------------------------------------------------------------------===//
1325// GFX10.
1326//===----------------------------------------------------------------------===//
1327
1328let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
1329  multiclass DS_Real_gfx10<bits<8> op>  {
1330    def _gfx10 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1331      !cast<DS_Pseudo>(NAME), SIEncodingFamily.GFX10>;
1332  }
1333} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
1334
1335defm DS_ADD_RTN_F32      : DS_Real_gfx10<0x055>;
1336defm DS_WRITE_B8_D16_HI  : DS_Real_gfx10<0x0a0>;
1337defm DS_WRITE_B16_D16_HI : DS_Real_gfx10<0x0a1>;
1338defm DS_READ_U8_D16      : DS_Real_gfx10<0x0a2>;
1339defm DS_READ_U8_D16_HI   : DS_Real_gfx10<0x0a3>;
1340defm DS_READ_I8_D16      : DS_Real_gfx10<0x0a4>;
1341defm DS_READ_I8_D16_HI   : DS_Real_gfx10<0x0a5>;
1342defm DS_READ_U16_D16     : DS_Real_gfx10<0x0a6>;
1343defm DS_READ_U16_D16_HI  : DS_Real_gfx10<0x0a7>;
1344defm DS_WRITE_ADDTID_B32 : DS_Real_gfx10<0x0b0>;
1345defm DS_READ_ADDTID_B32  : DS_Real_gfx10<0x0b1>;
1346
1347//===----------------------------------------------------------------------===//
1348// GFX10, GFX11, GFX12.
1349//===----------------------------------------------------------------------===//
1350
1351multiclass DS_Real_gfx10_gfx11_gfx12<bits<8> op> :
1352  DS_Real_gfx10<op>, DS_Real_gfx11<op>, DS_Real_gfx12<op>;
1353
1354multiclass DS_Real_gfx10_gfx11<bits<8> op> :
1355  DS_Real_gfx10<op>, DS_Real_gfx11<op>;
1356
1357defm DS_ADD_F32          : DS_Real_gfx10_gfx11_gfx12<0x015>;
1358defm DS_ADD_SRC2_F32     : DS_Real_gfx10<0x095>;
1359defm DS_PERMUTE_B32      : DS_Real_gfx10_gfx11_gfx12<0x0b2>;
1360defm DS_BPERMUTE_B32     : DS_Real_gfx10_gfx11_gfx12<0x0b3>;
1361
1362//===----------------------------------------------------------------------===//
1363// GFX7, GFX10, GFX11, GFX12.
1364//===----------------------------------------------------------------------===//
1365
1366let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
1367  multiclass DS_Real_gfx7<bits<8> op> {
1368    def _gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1369      !cast<DS_Pseudo>(NAME), SIEncodingFamily.SI>;
1370  }
1371} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
1372
1373multiclass DS_Real_gfx7_gfx10_gfx11_gfx12<bits<8> op> :
1374  DS_Real_gfx7<op>, DS_Real_gfx10_gfx11_gfx12<op>;
1375
1376multiclass DS_Real_gfx7_gfx10_gfx11<bits<8> op> :
1377  DS_Real_gfx7<op>, DS_Real_gfx10_gfx11<op>;
1378
1379multiclass DS_Real_gfx7_gfx10<bits<8> op> :
1380  DS_Real_gfx7<op>, DS_Real_gfx10<op>;
1381
1382// FIXME-GFX7: Add tests when upstreaming this part.
1383defm DS_GWS_SEMA_RELEASE_ALL : DS_Real_gfx7_gfx10_gfx11<0x018>;
1384defm DS_WRAP_RTN_B32         : DS_Real_gfx7_gfx10_gfx11<0x034>;
1385defm DS_CONDXCHG32_RTN_B64   : DS_Real_gfx7_gfx10_gfx11_gfx12<0x07e>;
1386defm DS_WRITE_B96            : DS_Real_gfx7_gfx10<0x0de>;
1387defm DS_WRITE_B128           : DS_Real_gfx7_gfx10<0x0df>;
1388defm DS_READ_B96             : DS_Real_gfx7_gfx10<0x0fe>;
1389defm DS_READ_B128            : DS_Real_gfx7_gfx10<0x0ff>;
1390
1391//===----------------------------------------------------------------------===//
1392// GFX6, GFX7, GFX10, GFX11.
1393//===----------------------------------------------------------------------===//
1394
1395let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
1396  multiclass DS_Real_gfx6_gfx7<bits<8> op> {
1397    def _gfx6_gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1398      !cast<DS_Pseudo>(NAME), SIEncodingFamily.SI>;
1399  }
1400} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
1401
1402multiclass DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op> :
1403  DS_Real_gfx6_gfx7<op>, DS_Real_gfx10_gfx11_gfx12<op>;
1404
1405multiclass DS_Real_gfx6_gfx7_gfx10_gfx11<bits<8> op> :
1406  DS_Real_gfx6_gfx7<op>, DS_Real_gfx10_gfx11<op>;
1407
1408multiclass DS_Real_gfx6_gfx7_gfx10<bits<8> op> :
1409  DS_Real_gfx6_gfx7<op>, DS_Real_gfx10<op>;
1410
1411defm DS_ADD_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x000>;
1412defm DS_SUB_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x001>;
1413defm DS_RSUB_U32            : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>;
1414defm DS_INC_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x003>;
1415defm DS_DEC_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x004>;
1416defm DS_MIN_I32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x005>;
1417defm DS_MAX_I32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x006>;
1418defm DS_MIN_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x007>;
1419defm DS_MAX_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x008>;
1420defm DS_AND_B32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x009>;
1421defm DS_OR_B32              : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00a>;
1422defm DS_XOR_B32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00b>;
1423defm DS_MSKOR_B32           : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00c>;
1424
1425defm DS_WRITE_B32           : DS_Real_gfx6_gfx7_gfx10<0x00d>;
1426defm DS_WRITE2_B32          : DS_Real_gfx6_gfx7_gfx10<0x00e>;
1427defm DS_WRITE2ST64_B32      : DS_Real_gfx6_gfx7_gfx10<0x00f>;
1428defm DS_CMPST_B32           : DS_Real_gfx6_gfx7_gfx10<0x010>;
1429defm DS_CMPST_F32           : DS_Real_gfx6_gfx7_gfx10<0x011>;
1430
1431defm DS_MIN_F32             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x012>;
1432defm DS_MAX_F32             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x013>;
1433defm DS_NOP                 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x014>;
1434defm DS_GWS_INIT            : DS_Real_gfx6_gfx7_gfx10_gfx11<0x019>;
1435defm DS_GWS_SEMA_V          : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01a>;
1436defm DS_GWS_SEMA_BR         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01b>;
1437defm DS_GWS_SEMA_P          : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01c>;
1438defm DS_GWS_BARRIER         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01d>;
1439
1440defm DS_WRITE_B8            : DS_Real_gfx6_gfx7_gfx10<0x01e>;
1441defm DS_WRITE_B16           : DS_Real_gfx6_gfx7_gfx10<0x01f>;
1442
1443defm DS_ADD_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x020>;
1444defm DS_SUB_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x021>;
1445defm DS_RSUB_RTN_U32        : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x022>;
1446defm DS_INC_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x023>;
1447defm DS_DEC_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x024>;
1448defm DS_MIN_RTN_I32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x025>;
1449defm DS_MAX_RTN_I32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x026>;
1450defm DS_MIN_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x027>;
1451defm DS_MAX_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x028>;
1452defm DS_AND_RTN_B32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x029>;
1453defm DS_OR_RTN_B32          : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02a>;
1454defm DS_XOR_RTN_B32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02b>;
1455defm DS_MSKOR_RTN_B32       : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02c>;
1456
1457defm DS_WRXCHG_RTN_B32      : DS_Real_gfx6_gfx7_gfx10<0x02d>;
1458defm DS_WRXCHG2_RTN_B32     : DS_Real_gfx6_gfx7_gfx10<0x02e>;
1459defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02f>;
1460defm DS_CMPST_RTN_B32       : DS_Real_gfx6_gfx7_gfx10<0x030>;
1461defm DS_CMPST_RTN_F32       : DS_Real_gfx6_gfx7_gfx10<0x031>;
1462
1463defm DS_MIN_RTN_F32         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x032>;
1464defm DS_MAX_RTN_F32         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x033>;
1465defm DS_SWIZZLE_B32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x035>;
1466
1467defm DS_READ_B32            : DS_Real_gfx6_gfx7_gfx10<0x036>;
1468defm DS_READ2_B32           : DS_Real_gfx6_gfx7_gfx10<0x037>;
1469defm DS_READ2ST64_B32       : DS_Real_gfx6_gfx7_gfx10<0x038>;
1470defm DS_READ_I8             : DS_Real_gfx6_gfx7_gfx10<0x039>;
1471defm DS_READ_U8             : DS_Real_gfx6_gfx7_gfx10<0x03a>;
1472defm DS_READ_I16            : DS_Real_gfx6_gfx7_gfx10<0x03b>;
1473defm DS_READ_U16            : DS_Real_gfx6_gfx7_gfx10<0x03c>;
1474
1475defm DS_CONSUME             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x03d>;
1476defm DS_APPEND              : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x03e>;
1477defm DS_ORDERED_COUNT       : DS_Real_gfx6_gfx7_gfx10_gfx11<0x03f>;
1478defm DS_ADD_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x040>;
1479defm DS_SUB_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x041>;
1480defm DS_RSUB_U64            : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x042>;
1481defm DS_INC_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x043>;
1482defm DS_DEC_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x044>;
1483defm DS_MIN_I64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x045>;
1484defm DS_MAX_I64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x046>;
1485defm DS_MIN_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x047>;
1486defm DS_MAX_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x048>;
1487defm DS_AND_B64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x049>;
1488defm DS_OR_B64              : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04a>;
1489defm DS_XOR_B64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04b>;
1490defm DS_MSKOR_B64           : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04c>;
1491
1492defm DS_WRITE_B64           : DS_Real_gfx6_gfx7_gfx10<0x04d>;
1493defm DS_WRITE2_B64          : DS_Real_gfx6_gfx7_gfx10<0x04e>;
1494defm DS_WRITE2ST64_B64      : DS_Real_gfx6_gfx7_gfx10<0x04f>;
1495defm DS_CMPST_B64           : DS_Real_gfx6_gfx7_gfx10<0x050>;
1496defm DS_CMPST_F64           : DS_Real_gfx6_gfx7_gfx10<0x051>;
1497
1498defm DS_MIN_F64             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x052>;
1499defm DS_MAX_F64             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x053>;
1500defm DS_ADD_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x060>;
1501defm DS_SUB_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x061>;
1502defm DS_RSUB_RTN_U64        : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x062>;
1503defm DS_INC_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x063>;
1504defm DS_DEC_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x064>;
1505defm DS_MIN_RTN_I64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x065>;
1506defm DS_MAX_RTN_I64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x066>;
1507defm DS_MIN_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x067>;
1508defm DS_MAX_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x068>;
1509defm DS_AND_RTN_B64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x069>;
1510defm DS_OR_RTN_B64          : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06a>;
1511defm DS_XOR_RTN_B64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06b>;
1512defm DS_MSKOR_RTN_B64       : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06c>;
1513
1514defm DS_WRXCHG_RTN_B64      : DS_Real_gfx6_gfx7_gfx10<0x06d>;
1515defm DS_WRXCHG2_RTN_B64     : DS_Real_gfx6_gfx7_gfx10<0x06e>;
1516defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06f>;
1517defm DS_CMPST_RTN_B64       : DS_Real_gfx6_gfx7_gfx10<0x070>;
1518defm DS_CMPST_RTN_F64       : DS_Real_gfx6_gfx7_gfx10<0x071>;
1519
1520defm DS_MIN_RTN_F64         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x072>;
1521defm DS_MAX_RTN_F64         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x073>;
1522
1523defm DS_READ_B64            : DS_Real_gfx6_gfx7_gfx10<0x076>;
1524defm DS_READ2_B64           : DS_Real_gfx6_gfx7_gfx10<0x077>;
1525defm DS_READ2ST64_B64       : DS_Real_gfx6_gfx7_gfx10<0x078>;
1526defm DS_ADD_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x080>;
1527defm DS_SUB_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x081>;
1528defm DS_RSUB_SRC2_U32       : DS_Real_gfx6_gfx7_gfx10<0x082>;
1529defm DS_INC_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x083>;
1530defm DS_DEC_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x084>;
1531defm DS_MIN_SRC2_I32        : DS_Real_gfx6_gfx7_gfx10<0x085>;
1532defm DS_MAX_SRC2_I32        : DS_Real_gfx6_gfx7_gfx10<0x086>;
1533defm DS_MIN_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x087>;
1534defm DS_MAX_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x088>;
1535defm DS_AND_SRC2_B32        : DS_Real_gfx6_gfx7_gfx10<0x089>;
1536defm DS_OR_SRC2_B32         : DS_Real_gfx6_gfx7_gfx10<0x08a>;
1537defm DS_XOR_SRC2_B32        : DS_Real_gfx6_gfx7_gfx10<0x08b>;
1538defm DS_WRITE_SRC2_B32      : DS_Real_gfx6_gfx7_gfx10<0x08d>;
1539defm DS_MIN_SRC2_F32        : DS_Real_gfx6_gfx7_gfx10<0x092>;
1540defm DS_MAX_SRC2_F32        : DS_Real_gfx6_gfx7_gfx10<0x093>;
1541defm DS_ADD_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c0>;
1542defm DS_SUB_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c1>;
1543defm DS_RSUB_SRC2_U64       : DS_Real_gfx6_gfx7_gfx10<0x0c2>;
1544defm DS_INC_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c3>;
1545defm DS_DEC_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c4>;
1546defm DS_MIN_SRC2_I64        : DS_Real_gfx6_gfx7_gfx10<0x0c5>;
1547defm DS_MAX_SRC2_I64        : DS_Real_gfx6_gfx7_gfx10<0x0c6>;
1548defm DS_MIN_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c7>;
1549defm DS_MAX_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c8>;
1550defm DS_AND_SRC2_B64        : DS_Real_gfx6_gfx7_gfx10<0x0c9>;
1551defm DS_OR_SRC2_B64         : DS_Real_gfx6_gfx7_gfx10<0x0ca>;
1552defm DS_XOR_SRC2_B64        : DS_Real_gfx6_gfx7_gfx10<0x0cb>;
1553defm DS_WRITE_SRC2_B64      : DS_Real_gfx6_gfx7_gfx10<0x0cd>;
1554defm DS_MIN_SRC2_F64        : DS_Real_gfx6_gfx7_gfx10<0x0d2>;
1555defm DS_MAX_SRC2_F64        : DS_Real_gfx6_gfx7_gfx10<0x0d3>;
1556
1557//===----------------------------------------------------------------------===//
1558// GFX8, GFX9 (VI).
1559//===----------------------------------------------------------------------===//
1560
1561class DS_Real_vi <bits<8> op, DS_Pseudo ps> :
1562  DS_Real <ps>,
1563  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
1564  let AssemblerPredicate = isGFX8GFX9;
1565  let DecoderNamespace = "GFX8";
1566
1567  // encoding
1568  let Inst{7-0}   = !if(ps.has_offset0, offset0, 0);
1569  let Inst{15-8}  = !if(ps.has_offset1, offset1, 0);
1570  let Inst{16}    = !if(ps.has_gds, gds, ps.gdsValue);
1571  let Inst{24-17} = op;
1572  let Inst{25}    = acc;
1573  let Inst{31-26} = 0x36; // ds prefix
1574  let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0));
1575  let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0);
1576  let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0);
1577  let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0);
1578}
1579
1580def DS_ADD_U32_vi         : DS_Real_vi<0x0,  DS_ADD_U32>;
1581def DS_SUB_U32_vi         : DS_Real_vi<0x1,  DS_SUB_U32>;
1582def DS_RSUB_U32_vi        : DS_Real_vi<0x2,  DS_RSUB_U32>;
1583def DS_INC_U32_vi         : DS_Real_vi<0x3,  DS_INC_U32>;
1584def DS_DEC_U32_vi         : DS_Real_vi<0x4,  DS_DEC_U32>;
1585def DS_MIN_I32_vi         : DS_Real_vi<0x5,  DS_MIN_I32>;
1586def DS_MAX_I32_vi         : DS_Real_vi<0x6,  DS_MAX_I32>;
1587def DS_MIN_U32_vi         : DS_Real_vi<0x7,  DS_MIN_U32>;
1588def DS_MAX_U32_vi         : DS_Real_vi<0x8,  DS_MAX_U32>;
1589def DS_AND_B32_vi         : DS_Real_vi<0x9,  DS_AND_B32>;
1590def DS_OR_B32_vi          : DS_Real_vi<0xa,  DS_OR_B32>;
1591def DS_XOR_B32_vi         : DS_Real_vi<0xb,  DS_XOR_B32>;
1592def DS_MSKOR_B32_vi       : DS_Real_vi<0xc,  DS_MSKOR_B32>;
1593def DS_WRITE_B32_vi       : DS_Real_vi<0xd,  DS_WRITE_B32>;
1594def DS_WRITE2_B32_vi      : DS_Real_vi<0xe,  DS_WRITE2_B32>;
1595def DS_WRITE2ST64_B32_vi  : DS_Real_vi<0xf,  DS_WRITE2ST64_B32>;
1596def DS_CMPST_B32_vi       : DS_Real_vi<0x10, DS_CMPST_B32>;
1597def DS_CMPST_F32_vi       : DS_Real_vi<0x11, DS_CMPST_F32>;
1598def DS_MIN_F32_vi         : DS_Real_vi<0x12, DS_MIN_F32>;
1599def DS_MAX_F32_vi         : DS_Real_vi<0x13, DS_MAX_F32>;
1600def DS_NOP_vi             : DS_Real_vi<0x14, DS_NOP>;
1601def DS_ADD_F32_vi         : DS_Real_vi<0x15, DS_ADD_F32>;
1602def DS_GWS_INIT_vi        : DS_Real_vi<0x99, DS_GWS_INIT>;
1603def DS_GWS_SEMA_V_vi      : DS_Real_vi<0x9a, DS_GWS_SEMA_V>;
1604def DS_GWS_SEMA_BR_vi     : DS_Real_vi<0x9b, DS_GWS_SEMA_BR>;
1605def DS_GWS_SEMA_P_vi      : DS_Real_vi<0x9c, DS_GWS_SEMA_P>;
1606def DS_GWS_BARRIER_vi     : DS_Real_vi<0x9d, DS_GWS_BARRIER>;
1607def DS_WRITE_ADDTID_B32_vi : DS_Real_vi<0x1d, DS_WRITE_ADDTID_B32>;
1608def DS_WRITE_B8_vi        : DS_Real_vi<0x1e, DS_WRITE_B8>;
1609def DS_WRITE_B16_vi       : DS_Real_vi<0x1f, DS_WRITE_B16>;
1610def DS_ADD_RTN_U32_vi     : DS_Real_vi<0x20, DS_ADD_RTN_U32>;
1611def DS_SUB_RTN_U32_vi     : DS_Real_vi<0x21, DS_SUB_RTN_U32>;
1612def DS_RSUB_RTN_U32_vi    : DS_Real_vi<0x22, DS_RSUB_RTN_U32>;
1613def DS_INC_RTN_U32_vi     : DS_Real_vi<0x23, DS_INC_RTN_U32>;
1614def DS_DEC_RTN_U32_vi     : DS_Real_vi<0x24, DS_DEC_RTN_U32>;
1615def DS_MIN_RTN_I32_vi     : DS_Real_vi<0x25, DS_MIN_RTN_I32>;
1616def DS_MAX_RTN_I32_vi     : DS_Real_vi<0x26, DS_MAX_RTN_I32>;
1617def DS_MIN_RTN_U32_vi     : DS_Real_vi<0x27, DS_MIN_RTN_U32>;
1618def DS_MAX_RTN_U32_vi     : DS_Real_vi<0x28, DS_MAX_RTN_U32>;
1619def DS_AND_RTN_B32_vi     : DS_Real_vi<0x29, DS_AND_RTN_B32>;
1620def DS_OR_RTN_B32_vi      : DS_Real_vi<0x2a, DS_OR_RTN_B32>;
1621def DS_XOR_RTN_B32_vi     : DS_Real_vi<0x2b, DS_XOR_RTN_B32>;
1622def DS_MSKOR_RTN_B32_vi   : DS_Real_vi<0x2c, DS_MSKOR_RTN_B32>;
1623def DS_WRXCHG_RTN_B32_vi  : DS_Real_vi<0x2d, DS_WRXCHG_RTN_B32>;
1624def DS_WRXCHG2_RTN_B32_vi : DS_Real_vi<0x2e, DS_WRXCHG2_RTN_B32>;
1625def DS_WRXCHG2ST64_RTN_B32_vi : DS_Real_vi<0x2f, DS_WRXCHG2ST64_RTN_B32>;
1626def DS_CMPST_RTN_B32_vi   : DS_Real_vi<0x30, DS_CMPST_RTN_B32>;
1627def DS_CMPST_RTN_F32_vi   : DS_Real_vi<0x31, DS_CMPST_RTN_F32>;
1628def DS_MIN_RTN_F32_vi     : DS_Real_vi<0x32, DS_MIN_RTN_F32>;
1629def DS_MAX_RTN_F32_vi     : DS_Real_vi<0x33, DS_MAX_RTN_F32>;
1630def DS_WRAP_RTN_B32_vi    : DS_Real_vi<0x34, DS_WRAP_RTN_B32>;
1631def DS_ADD_RTN_F32_vi     : DS_Real_vi<0x35, DS_ADD_RTN_F32>;
1632def DS_READ_B32_vi        : DS_Real_vi<0x36, DS_READ_B32>;
1633def DS_READ2_B32_vi       : DS_Real_vi<0x37, DS_READ2_B32>;
1634def DS_READ2ST64_B32_vi   : DS_Real_vi<0x38, DS_READ2ST64_B32>;
1635def DS_READ_I8_vi         : DS_Real_vi<0x39, DS_READ_I8>;
1636def DS_READ_U8_vi         : DS_Real_vi<0x3a, DS_READ_U8>;
1637def DS_READ_I16_vi        : DS_Real_vi<0x3b, DS_READ_I16>;
1638def DS_READ_U16_vi        : DS_Real_vi<0x3c, DS_READ_U16>;
1639def DS_READ_ADDTID_B32_vi : DS_Real_vi<0xb6, DS_READ_ADDTID_B32>;
1640def DS_CONSUME_vi         : DS_Real_vi<0xbd, DS_CONSUME>;
1641def DS_APPEND_vi          : DS_Real_vi<0xbe, DS_APPEND>;
1642def DS_ORDERED_COUNT_vi   : DS_Real_vi<0xbf, DS_ORDERED_COUNT>;
1643def DS_SWIZZLE_B32_vi     : DS_Real_vi<0x3d, DS_SWIZZLE_B32>;
1644def DS_PERMUTE_B32_vi     : DS_Real_vi<0x3e, DS_PERMUTE_B32>;
1645def DS_BPERMUTE_B32_vi    : DS_Real_vi<0x3f, DS_BPERMUTE_B32>;
1646
1647def DS_ADD_U64_vi         : DS_Real_vi<0x40, DS_ADD_U64>;
1648def DS_SUB_U64_vi         : DS_Real_vi<0x41, DS_SUB_U64>;
1649def DS_RSUB_U64_vi        : DS_Real_vi<0x42, DS_RSUB_U64>;
1650def DS_INC_U64_vi         : DS_Real_vi<0x43, DS_INC_U64>;
1651def DS_DEC_U64_vi         : DS_Real_vi<0x44, DS_DEC_U64>;
1652def DS_MIN_I64_vi         : DS_Real_vi<0x45, DS_MIN_I64>;
1653def DS_MAX_I64_vi         : DS_Real_vi<0x46, DS_MAX_I64>;
1654def DS_MIN_U64_vi         : DS_Real_vi<0x47, DS_MIN_U64>;
1655def DS_MAX_U64_vi         : DS_Real_vi<0x48, DS_MAX_U64>;
1656def DS_AND_B64_vi         : DS_Real_vi<0x49, DS_AND_B64>;
1657def DS_OR_B64_vi          : DS_Real_vi<0x4a, DS_OR_B64>;
1658def DS_XOR_B64_vi         : DS_Real_vi<0x4b, DS_XOR_B64>;
1659def DS_MSKOR_B64_vi       : DS_Real_vi<0x4c, DS_MSKOR_B64>;
1660def DS_WRITE_B64_vi       : DS_Real_vi<0x4d, DS_WRITE_B64>;
1661def DS_WRITE2_B64_vi      : DS_Real_vi<0x4E, DS_WRITE2_B64>;
1662def DS_WRITE2ST64_B64_vi  : DS_Real_vi<0x4f, DS_WRITE2ST64_B64>;
1663def DS_CMPST_B64_vi       : DS_Real_vi<0x50, DS_CMPST_B64>;
1664def DS_CMPST_F64_vi       : DS_Real_vi<0x51, DS_CMPST_F64>;
1665def DS_MIN_F64_vi         : DS_Real_vi<0x52, DS_MIN_F64>;
1666def DS_MAX_F64_vi         : DS_Real_vi<0x53, DS_MAX_F64>;
1667
1668def DS_WRITE_B8_D16_HI_vi  : DS_Real_vi<0x54, DS_WRITE_B8_D16_HI>;
1669def DS_WRITE_B16_D16_HI_vi : DS_Real_vi<0x55, DS_WRITE_B16_D16_HI>;
1670
1671def DS_READ_U8_D16_vi     : DS_Real_vi<0x56, DS_READ_U8_D16>;
1672def DS_READ_U8_D16_HI_vi  : DS_Real_vi<0x57, DS_READ_U8_D16_HI>;
1673def DS_READ_I8_D16_vi     : DS_Real_vi<0x58, DS_READ_I8_D16>;
1674def DS_READ_I8_D16_HI_vi  : DS_Real_vi<0x59, DS_READ_I8_D16_HI>;
1675def DS_READ_U16_D16_vi    : DS_Real_vi<0x5a, DS_READ_U16_D16>;
1676def DS_READ_U16_D16_HI_vi : DS_Real_vi<0x5b, DS_READ_U16_D16_HI>;
1677
1678def DS_ADD_RTN_U64_vi     : DS_Real_vi<0x60, DS_ADD_RTN_U64>;
1679def DS_SUB_RTN_U64_vi     : DS_Real_vi<0x61, DS_SUB_RTN_U64>;
1680def DS_RSUB_RTN_U64_vi    : DS_Real_vi<0x62, DS_RSUB_RTN_U64>;
1681def DS_INC_RTN_U64_vi     : DS_Real_vi<0x63, DS_INC_RTN_U64>;
1682def DS_DEC_RTN_U64_vi     : DS_Real_vi<0x64, DS_DEC_RTN_U64>;
1683def DS_MIN_RTN_I64_vi     : DS_Real_vi<0x65, DS_MIN_RTN_I64>;
1684def DS_MAX_RTN_I64_vi     : DS_Real_vi<0x66, DS_MAX_RTN_I64>;
1685def DS_MIN_RTN_U64_vi     : DS_Real_vi<0x67, DS_MIN_RTN_U64>;
1686def DS_MAX_RTN_U64_vi     : DS_Real_vi<0x68, DS_MAX_RTN_U64>;
1687def DS_AND_RTN_B64_vi     : DS_Real_vi<0x69, DS_AND_RTN_B64>;
1688def DS_OR_RTN_B64_vi      : DS_Real_vi<0x6a, DS_OR_RTN_B64>;
1689def DS_XOR_RTN_B64_vi     : DS_Real_vi<0x6b, DS_XOR_RTN_B64>;
1690def DS_MSKOR_RTN_B64_vi   : DS_Real_vi<0x6c, DS_MSKOR_RTN_B64>;
1691def DS_WRXCHG_RTN_B64_vi  : DS_Real_vi<0x6d, DS_WRXCHG_RTN_B64>;
1692def DS_WRXCHG2_RTN_B64_vi : DS_Real_vi<0x6e, DS_WRXCHG2_RTN_B64>;
1693def DS_WRXCHG2ST64_RTN_B64_vi : DS_Real_vi<0x6f, DS_WRXCHG2ST64_RTN_B64>;
1694def DS_CONDXCHG32_RTN_B64_vi   : DS_Real_vi<0x7e, DS_CONDXCHG32_RTN_B64>;
1695def DS_GWS_SEMA_RELEASE_ALL_vi : DS_Real_vi<0x98, DS_GWS_SEMA_RELEASE_ALL>;
1696def DS_CMPST_RTN_B64_vi   : DS_Real_vi<0x70, DS_CMPST_RTN_B64>;
1697def DS_CMPST_RTN_F64_vi   : DS_Real_vi<0x71, DS_CMPST_RTN_F64>;
1698def DS_MIN_RTN_F64_vi     : DS_Real_vi<0x72, DS_MIN_RTN_F64>;
1699def DS_MAX_RTN_F64_vi     : DS_Real_vi<0x73, DS_MAX_RTN_F64>;
1700
1701def DS_READ_B64_vi        : DS_Real_vi<0x76, DS_READ_B64>;
1702def DS_READ2_B64_vi       : DS_Real_vi<0x77, DS_READ2_B64>;
1703def DS_READ2ST64_B64_vi   : DS_Real_vi<0x78, DS_READ2ST64_B64>;
1704
1705def DS_ADD_SRC2_U32_vi    : DS_Real_vi<0x80, DS_ADD_SRC2_U32>;
1706def DS_SUB_SRC2_U32_vi    : DS_Real_vi<0x81, DS_SUB_SRC2_U32>;
1707def DS_RSUB_SRC2_U32_vi   : DS_Real_vi<0x82, DS_RSUB_SRC2_U32>;
1708def DS_INC_SRC2_U32_vi    : DS_Real_vi<0x83, DS_INC_SRC2_U32>;
1709def DS_DEC_SRC2_U32_vi    : DS_Real_vi<0x84, DS_DEC_SRC2_U32>;
1710def DS_MIN_SRC2_I32_vi    : DS_Real_vi<0x85, DS_MIN_SRC2_I32>;
1711def DS_MAX_SRC2_I32_vi    : DS_Real_vi<0x86, DS_MAX_SRC2_I32>;
1712def DS_MIN_SRC2_U32_vi    : DS_Real_vi<0x87, DS_MIN_SRC2_U32>;
1713def DS_MAX_SRC2_U32_vi    : DS_Real_vi<0x88, DS_MAX_SRC2_U32>;
1714def DS_AND_SRC2_B32_vi    : DS_Real_vi<0x89, DS_AND_SRC2_B32>;
1715def DS_OR_SRC2_B32_vi     : DS_Real_vi<0x8a, DS_OR_SRC2_B32>;
1716def DS_XOR_SRC2_B32_vi    : DS_Real_vi<0x8b, DS_XOR_SRC2_B32>;
1717def DS_WRITE_SRC2_B32_vi  : DS_Real_vi<0x8d, DS_WRITE_SRC2_B32>;
1718def DS_MIN_SRC2_F32_vi    : DS_Real_vi<0x92, DS_MIN_SRC2_F32>;
1719def DS_MAX_SRC2_F32_vi    : DS_Real_vi<0x93, DS_MAX_SRC2_F32>;
1720def DS_ADD_SRC2_F32_vi    : DS_Real_vi<0x95, DS_ADD_SRC2_F32>;
1721def DS_ADD_SRC2_U64_vi    : DS_Real_vi<0xc0, DS_ADD_SRC2_U64>;
1722def DS_SUB_SRC2_U64_vi    : DS_Real_vi<0xc1, DS_SUB_SRC2_U64>;
1723def DS_RSUB_SRC2_U64_vi   : DS_Real_vi<0xc2, DS_RSUB_SRC2_U64>;
1724def DS_INC_SRC2_U64_vi    : DS_Real_vi<0xc3, DS_INC_SRC2_U64>;
1725def DS_DEC_SRC2_U64_vi    : DS_Real_vi<0xc4, DS_DEC_SRC2_U64>;
1726def DS_MIN_SRC2_I64_vi    : DS_Real_vi<0xc5, DS_MIN_SRC2_I64>;
1727def DS_MAX_SRC2_I64_vi    : DS_Real_vi<0xc6, DS_MAX_SRC2_I64>;
1728def DS_MIN_SRC2_U64_vi    : DS_Real_vi<0xc7, DS_MIN_SRC2_U64>;
1729def DS_MAX_SRC2_U64_vi    : DS_Real_vi<0xc8, DS_MAX_SRC2_U64>;
1730def DS_AND_SRC2_B64_vi    : DS_Real_vi<0xc9, DS_AND_SRC2_B64>;
1731def DS_OR_SRC2_B64_vi     : DS_Real_vi<0xca, DS_OR_SRC2_B64>;
1732def DS_XOR_SRC2_B64_vi    : DS_Real_vi<0xcb, DS_XOR_SRC2_B64>;
1733def DS_WRITE_SRC2_B64_vi  : DS_Real_vi<0xcd, DS_WRITE_SRC2_B64>;
1734def DS_MIN_SRC2_F64_vi    : DS_Real_vi<0xd2, DS_MIN_SRC2_F64>;
1735def DS_MAX_SRC2_F64_vi    : DS_Real_vi<0xd3, DS_MAX_SRC2_F64>;
1736def DS_WRITE_B96_vi       : DS_Real_vi<0xde, DS_WRITE_B96>;
1737def DS_WRITE_B128_vi      : DS_Real_vi<0xdf, DS_WRITE_B128>;
1738def DS_READ_B96_vi        : DS_Real_vi<0xfe, DS_READ_B96>;
1739def DS_READ_B128_vi       : DS_Real_vi<0xff, DS_READ_B128>;
1740
1741// GFX90A+.
1742def DS_ADD_F64_vi     : DS_Real_vi<0x5c, DS_ADD_F64>;
1743def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>;
1744
1745// GFX940+.
1746def DS_PK_ADD_F16_vi     : DS_Real_vi<0x17, DS_PK_ADD_F16>;
1747def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>;
1748def DS_PK_ADD_BF16_vi     : DS_Real_vi<0x18, DS_PK_ADD_BF16>;
1749def DS_PK_ADD_RTN_BF16_vi : DS_Real_vi<0xb8, DS_PK_ADD_RTN_BF16>;
1750