xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td (revision e1e636193db45630c7881246d25902e57c43d24e)
1//===-- DSInstructions.td - DS Instruction Definitions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> :
10  InstSI <outs, ins, "", pattern>,
11  SIMCInstr <opName, SIEncodingFamily.NONE> {
12
13  let LGKM_CNT = 1;
14  let DS = 1;
15  let GWS = 0;
16  let Size = 8;
17  let UseNamedOperandTable = 1;
18
19  // Most instruction load and store data, so set this as the default.
20  let mayLoad = 1;
21  let mayStore = 1;
22
23  let hasSideEffects = 0;
24  let SchedRW = [WriteLDS];
25
26  let isPseudo = 1;
27  let isCodeGenOnly = 1;
28
29  string Mnemonic = opName;
30  string AsmOperands = asmOps;
31
32  // Well these bits a kind of hack because it would be more natural
33  // to test "outs" and "ins" dags for the presence of particular operands
34  bits<1> has_vdst = 1;
35  bits<1> has_addr = 1;
36  bits<1> has_data0 = 1;
37  bits<1> has_data1 = 1;
38
39  bits<1> has_gws_data0 = 0; // data0 is encoded as addr
40
41  bits<1> has_offset  = 1; // has "offset" that should be split to offset0,1
42  bits<1> has_offset0 = 1;
43  bits<1> has_offset1 = 1;
44
45  bits<1> has_gds = 1;
46  bits<1> gdsValue = 0; // if has_gds == 0 set gds to this value
47
48  bits<1> has_m0_read = 1;
49
50  let Uses = !if(has_m0_read, [M0, EXEC], [EXEC]);
51}
52
53class DS_Real <DS_Pseudo ps, string opName = ps.Mnemonic> :
54  InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands>,
55  Enc64 {
56
57  let isPseudo = 0;
58  let isCodeGenOnly = 0;
59  let LGKM_CNT = 1;
60  let DS = 1;
61  let UseNamedOperandTable = 1;
62
63  // copy relevant pseudo op flags
64  let GWS                = ps.GWS;
65  let SubtargetPredicate = ps.SubtargetPredicate;
66  let OtherPredicates    = ps.OtherPredicates;
67  let SchedRW            = ps.SchedRW;
68  let mayLoad            = ps.mayLoad;
69  let mayStore           = ps.mayStore;
70  let IsAtomicRet        = ps.IsAtomicRet;
71  let IsAtomicNoRet      = ps.IsAtomicNoRet;
72
73  let Constraints = ps.Constraints;
74  let DisableEncoding = ps.DisableEncoding;
75
76  // encoding fields
77  bits<10> vdst;
78  bits<1> gds;
79  bits<8> addr;
80  bits<10> data0;
81  bits<10> data1;
82  bits<8> offset0;
83  bits<8> offset1;
84
85  bits<16> offset;
86  let offset0 = !if(ps.has_offset, offset{7-0}, ?);
87  let offset1 = !if(ps.has_offset, offset{15-8}, ?);
88
89  bits<1> acc = !if(ps.has_vdst, vdst{9},
90                    !if(!or(ps.has_data0, ps.has_gws_data0), data0{9}, 0));
91}
92
93// DS Pseudo instructions
94
95class DS_0A1D_NORET<string opName, RegisterClass rc = VGPR_32>
96: DS_Pseudo<opName,
97  (outs),
98  (ins getLdStRegisterOperand<rc>.ret:$data0, offset:$offset, gds:$gds),
99  " $data0$offset$gds"> {
100
101  let has_addr = 0;
102  let has_data1 = 0;
103  let has_vdst = 0;
104}
105
106class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32>
107: DS_Pseudo<opName,
108  (outs),
109  (ins VGPR_32:$addr, getLdStRegisterOperand<rc>.ret:$data0, offset:$offset, gds:$gds),
110  " $addr, $data0$offset$gds"> {
111
112  let has_data1 = 0;
113  let has_vdst = 0;
114  let IsAtomicNoRet = 1;
115}
116
117multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
118  def "" : DS_1A1D_NORET<opName, rc>,
119           AtomicNoRet<opName, 0>;
120
121  let has_m0_read = 0 in {
122    def _gfx9 : DS_1A1D_NORET<opName, rc>,
123                AtomicNoRet<opName#"_gfx9", 0>;
124  }
125}
126
127multiclass DS_1A1D_NORET_mc_gfx9<string opName, RegisterClass rc = VGPR_32> {
128  let has_m0_read = 0 in {
129    def "" : DS_1A1D_NORET<opName, rc>,
130                AtomicNoRet<opName, 0>;
131  }
132}
133
134class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32,
135                    RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
136: DS_Pseudo<opName,
137  (outs),
138  (ins VGPR_32:$addr, data_op:$data0, data_op:$data1, offset:$offset, gds:$gds),
139  " $addr, $data0, $data1$offset$gds"> {
140
141  let has_vdst = 0;
142  let IsAtomicNoRet = 1;
143}
144
145multiclass DS_1A2D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
146  def "" : DS_1A2D_NORET<opName, rc>,
147           AtomicNoRet<opName, 0>;
148
149  let has_m0_read = 0 in {
150    def _gfx9 : DS_1A2D_NORET<opName, rc>,
151                AtomicNoRet<opName#"_gfx9", 0>;
152  }
153}
154
155class DS_1A2D_Off8_NORET <string opName, RegisterClass rc = VGPR_32,
156                          RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
157: DS_Pseudo<opName,
158  (outs),
159  (ins VGPR_32:$addr, data_op:$data0, data_op:$data1,
160       offset0:$offset0, offset1:$offset1, gds:$gds),
161  " $addr, $data0, $data1$offset0$offset1$gds"> {
162
163  let has_vdst = 0;
164  let has_offset = 0;
165}
166
167multiclass DS_1A2D_Off8_NORET_mc <string opName, RegisterClass rc = VGPR_32> {
168  def "" : DS_1A2D_Off8_NORET<opName, rc>;
169
170  let has_m0_read = 0 in {
171    def _gfx9 : DS_1A2D_Off8_NORET<opName, rc>;
172  }
173}
174
175class DS_0A1D_RET_GDS<string opName, RegisterClass rc = VGPR_32, RegisterClass src = rc,
176                  RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
177                  RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
178: DS_Pseudo<opName,
179  (outs dst_op:$vdst),
180  (ins src_op:$data0, offset:$offset),
181  " $vdst, $data0$offset gds"> {
182
183  let has_addr = 0;
184  let has_data1 = 0;
185  let has_gds = 0;
186  let gdsValue = 1;
187  let hasSideEffects = 1;
188}
189
190class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32,
191                  RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
192: DS_Pseudo<opName,
193  (outs data_op:$vdst),
194  (ins VGPR_32:$addr, data_op:$data0, offset:$offset, gds:$gds),
195  " $vdst, $addr, $data0$offset$gds"> {
196
197  let hasPostISelHook = 1;
198  let has_data1 = 0;
199  let IsAtomicRet = 1;
200}
201
202multiclass DS_1A1D_RET_mc <string opName, RegisterClass rc = VGPR_32,
203                           string NoRetOp = ""> {
204  def "" : DS_1A1D_RET<opName, rc>,
205    AtomicNoRet<NoRetOp, !ne(NoRetOp, "")>;
206
207  let has_m0_read = 0 in {
208    def _gfx9 : DS_1A1D_RET<opName, rc>,
209      AtomicNoRet<!if(!eq(NoRetOp, ""), "", NoRetOp#"_gfx9"),
210                  !ne(NoRetOp, "")>;
211  }
212}
213
214multiclass DS_1A1D_RET_mc_gfx9 <string opName, RegisterClass rc = VGPR_32,
215                                string NoRetOp = ""> {
216  let has_m0_read = 0 in {
217    def "" : DS_1A1D_RET<opName, rc>,
218      AtomicNoRet<!if(!eq(NoRetOp, ""), "", NoRetOp),
219                  !ne(NoRetOp, "")>;
220  }
221}
222
223class DS_1A2D_RET<string opName,
224                  RegisterClass rc = VGPR_32,
225                  RegisterClass src = rc,
226                  RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
227                  RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
228: DS_Pseudo<opName,
229  (outs dst_op:$vdst),
230  (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, offset:$offset, gds:$gds),
231  " $vdst, $addr, $data0, $data1$offset$gds"> {
232
233  let hasPostISelHook = 1;
234  let IsAtomicRet = 1;
235}
236
237multiclass DS_1A2D_RET_mc<string opName,
238                          RegisterClass rc = VGPR_32,
239                          string NoRetOp = "",
240                          RegisterClass src = rc> {
241  def "" : DS_1A2D_RET<opName, rc, src>,
242    AtomicNoRet<NoRetOp, !ne(NoRetOp, "")>;
243
244  let has_m0_read = 0 in {
245    def _gfx9 : DS_1A2D_RET<opName, rc, src>,
246      AtomicNoRet<NoRetOp#"_gfx9", !ne(NoRetOp, "")>;
247  }
248}
249
250class DS_1A2D_Off8_RET<string opName,
251                       RegisterClass rc = VGPR_32,
252                       RegisterClass src = rc,
253                       RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
254                       RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
255: DS_Pseudo<opName,
256  (outs dst_op:$vdst),
257  (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, offset0:$offset0, offset1:$offset1, gds:$gds),
258  " $vdst, $addr, $data0, $data1$offset0$offset1$gds"> {
259
260  let has_offset = 0;
261  let hasPostISelHook = 1;
262}
263
264multiclass DS_1A2D_Off8_RET_mc<string opName,
265                               RegisterClass rc = VGPR_32,
266                               RegisterClass src = rc> {
267  def "" : DS_1A2D_Off8_RET<opName, rc, src>;
268
269  let has_m0_read = 0 in {
270    def _gfx9 : DS_1A2D_Off8_RET<opName, rc, src>;
271  }
272}
273
274class DS_BVH_STACK<string opName>
275: DS_Pseudo<opName,
276  (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst, VGPR_32:$addr),
277  (ins VGPR_32:$addr_in, getLdStRegisterOperand<VGPR_32>.ret:$data0, VReg_128:$data1, offset:$offset),
278  " $vdst, $addr, $data0, $data1$offset"> {
279  let Constraints = "$addr = $addr_in";
280  let DisableEncoding = "$addr_in";
281  let has_gds = 0;
282  let gdsValue = 0;
283  // TODO: Use MMOs in the LDS address space instead of hasSideEffects = 1.
284  let hasSideEffects = 1;
285  let SchedRW = [WriteLDS, WriteLDS];
286}
287
288class DS_1A_RET<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = offset,
289                RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
290: DS_Pseudo<opName,
291  (outs data_op:$vdst),
292  !if(HasTiedOutput,
293    (ins VGPR_32:$addr, ofs:$offset, gds:$gds, data_op:$vdst_in),
294    (ins VGPR_32:$addr, ofs:$offset, gds:$gds)),
295  " $vdst, $addr$offset$gds"> {
296  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
297  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
298  let has_data0 = 0;
299  let has_data1 = 0;
300}
301
302multiclass DS_1A_RET_mc<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = offset> {
303  def "" : DS_1A_RET<opName, rc, HasTiedOutput, ofs>;
304
305  let has_m0_read = 0 in {
306    def _gfx9 : DS_1A_RET<opName, rc, HasTiedOutput, ofs>;
307  }
308}
309
310class DS_1A_RET_Tied<string opName, RegisterClass rc = VGPR_32> :
311  DS_1A_RET<opName, rc, 1>;
312
313class DS_1A_Off8_RET <string opName, RegisterClass rc = VGPR_32>
314: DS_Pseudo<opName,
315  (outs getLdStRegisterOperand<rc>.ret:$vdst),
316  (ins VGPR_32:$addr, offset0:$offset0, offset1:$offset1, gds:$gds),
317  " $vdst, $addr$offset0$offset1$gds"> {
318
319  let has_offset = 0;
320  let has_data0 = 0;
321  let has_data1 = 0;
322}
323
324multiclass DS_1A_Off8_RET_mc <string opName, RegisterClass rc = VGPR_32> {
325  def "" : DS_1A_Off8_RET<opName, rc>;
326
327  let has_m0_read = 0 in {
328    def _gfx9 : DS_1A_Off8_RET<opName, rc>;
329  }
330}
331
332class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName,
333  (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst),
334  (ins VGPR_32:$addr, offset:$offset),
335  " $vdst, $addr$offset gds"> {
336
337  let has_data0 = 0;
338  let has_data1 = 0;
339  let has_gds = 0;
340  let gdsValue = 1;
341}
342
343class DS_0A_RET <string opName> : DS_Pseudo<opName,
344  (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst),
345  (ins offset:$offset, gds:$gds),
346  " $vdst$offset$gds"> {
347
348  let mayLoad = 1;
349  let mayStore = 1;
350
351  let has_addr = 0;
352  let has_data0 = 0;
353  let has_data1 = 0;
354}
355
356class DS_1A <string opName> : DS_Pseudo<opName,
357  (outs),
358  (ins VGPR_32:$addr, offset:$offset, gds:$gds),
359  " $addr$offset$gds"> {
360
361  let mayLoad = 1;
362  let mayStore = 1;
363
364  let has_vdst = 0;
365  let has_data0 = 0;
366  let has_data1 = 0;
367}
368
369multiclass DS_1A_mc <string opName> {
370  def "" : DS_1A<opName>;
371
372  let has_m0_read = 0 in {
373    def _gfx9 : DS_1A<opName>;
374  }
375}
376
377
378class DS_GWS <string opName, dag ins, string asmOps>
379: DS_Pseudo<opName, (outs), ins, asmOps> {
380  let GWS = 1;
381
382  let has_vdst  = 0;
383  let has_addr  = 0;
384  let has_data0 = 0;
385  let has_data1 = 0;
386
387  let has_gds   = 0;
388  let gdsValue  = 1;
389}
390
391class DS_GWS_0D <string opName>
392: DS_GWS<opName,
393  (ins offset:$offset), "$offset gds"> {
394  let hasSideEffects = 1;
395}
396
397class DS_GWS_1D <string opName>
398: DS_GWS<opName,
399  (ins getLdStRegisterOperand<VGPR_32>.ret:$data0, offset:$offset),
400  " $data0$offset gds"> {
401
402  let has_gws_data0 = 1;
403  let hasSideEffects = 1;
404}
405
406class DS_VOID <string opName> : DS_Pseudo<opName,
407  (outs), (ins), ""> {
408  let mayLoad = 0;
409  let mayStore = 0;
410  let hasSideEffects = 1;
411  let UseNamedOperandTable = 0;
412
413  let has_vdst = 0;
414  let has_addr = 0;
415  let has_data0 = 0;
416  let has_data1 = 0;
417  let has_offset = 0;
418  let has_offset0 = 0;
419  let has_offset1 = 0;
420  let has_gds = 0;
421}
422
423class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag,
424                       RegisterOperand data_op = getLdStRegisterOperand<VGPR_32>.ret>
425: DS_Pseudo<opName,
426  (outs data_op:$vdst),
427  (ins VGPR_32:$addr, data_op:$data0, offset:$offset),
428  " $vdst, $addr, $data0$offset",
429  [(set i32:$vdst,
430   (node (DS1Addr1Offset i32:$addr, i32:$offset), i32:$data0))] > {
431
432  let mayLoad = 0;
433  let mayStore = 0;
434  let isConvergent = 1;
435
436  let has_data1 = 0;
437  let has_gds = 0;
438}
439
440class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, int complexity = 0,
441  bit gds=0> : GCNPat <(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
442  (inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))> {
443  let AddedComplexity = complexity;
444}
445
446defm DS_ADD_U32       : DS_1A1D_NORET_mc<"ds_add_u32">;
447defm DS_SUB_U32       : DS_1A1D_NORET_mc<"ds_sub_u32">;
448defm DS_RSUB_U32      : DS_1A1D_NORET_mc<"ds_rsub_u32">;
449defm DS_INC_U32       : DS_1A1D_NORET_mc<"ds_inc_u32">;
450defm DS_DEC_U32       : DS_1A1D_NORET_mc<"ds_dec_u32">;
451defm DS_MIN_I32       : DS_1A1D_NORET_mc<"ds_min_i32">;
452defm DS_MAX_I32       : DS_1A1D_NORET_mc<"ds_max_i32">;
453defm DS_MIN_U32       : DS_1A1D_NORET_mc<"ds_min_u32">;
454defm DS_MAX_U32       : DS_1A1D_NORET_mc<"ds_max_u32">;
455defm DS_AND_B32       : DS_1A1D_NORET_mc<"ds_and_b32">;
456defm DS_OR_B32        : DS_1A1D_NORET_mc<"ds_or_b32">;
457defm DS_XOR_B32       : DS_1A1D_NORET_mc<"ds_xor_b32">;
458
459let SubtargetPredicate = HasLDSFPAtomicAdd in {
460defm DS_ADD_F32       : DS_1A1D_NORET_mc<"ds_add_f32">;
461}
462
463defm DS_MIN_F32       : DS_1A1D_NORET_mc<"ds_min_f32">;
464defm DS_MAX_F32       : DS_1A1D_NORET_mc<"ds_max_f32">;
465
466let mayLoad = 0 in {
467defm DS_WRITE_B8      : DS_1A1D_NORET_mc<"ds_write_b8">;
468defm DS_WRITE_B16     : DS_1A1D_NORET_mc<"ds_write_b16">;
469defm DS_WRITE_B32     : DS_1A1D_NORET_mc<"ds_write_b32">;
470defm DS_WRITE2_B32    : DS_1A2D_Off8_NORET_mc<"ds_write2_b32">;
471defm DS_WRITE2ST64_B32: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b32">;
472
473
474let has_m0_read = 0 in {
475
476let SubtargetPredicate = HasD16LoadStore in {
477def DS_WRITE_B8_D16_HI  : DS_1A1D_NORET<"ds_write_b8_d16_hi">;
478def DS_WRITE_B16_D16_HI : DS_1A1D_NORET<"ds_write_b16_d16_hi">;
479}
480
481} // End has_m0_read = 0
482
483let SubtargetPredicate = HasDSAddTid in {
484def DS_WRITE_ADDTID_B32 : DS_0A1D_NORET<"ds_write_addtid_b32">;
485}
486
487} // End mayLoad = 0
488
489let SubtargetPredicate = isGFX90APlus in {
490  defm DS_ADD_F64     : DS_1A1D_NORET_mc_gfx9<"ds_add_f64", VReg_64>;
491  defm DS_ADD_RTN_F64 : DS_1A1D_RET_mc_gfx9<"ds_add_rtn_f64", VReg_64, "ds_add_f64">;
492} // End SubtargetPredicate = isGFX90APlus
493
494let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
495  defm DS_PK_ADD_F16      : DS_1A1D_NORET_mc<"ds_pk_add_f16">;
496  defm DS_PK_ADD_RTN_F16  : DS_1A1D_RET_mc<"ds_pk_add_rtn_f16", VGPR_32, "ds_pk_add_f16">;
497  defm DS_PK_ADD_BF16     : DS_1A1D_NORET_mc<"ds_pk_add_bf16">;
498  defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc<"ds_pk_add_rtn_bf16", VGPR_32, "ds_pk_add_bf16">;
499} // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
500
501defm DS_CMPSTORE_B32     : DS_1A2D_NORET_mc<"ds_cmpstore_b32">;
502defm DS_CMPSTORE_F32     : DS_1A2D_NORET_mc<"ds_cmpstore_f32">;
503defm DS_CMPSTORE_B64     : DS_1A2D_NORET_mc<"ds_cmpstore_b64", VReg_64>;
504defm DS_CMPSTORE_F64     : DS_1A2D_NORET_mc<"ds_cmpstore_f64", VReg_64>;
505defm DS_CMPSTORE_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b32", VGPR_32, "ds_cmpstore_b32">;
506defm DS_CMPSTORE_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f32", VGPR_32, "ds_cmpstore_f32">;
507defm DS_CMPSTORE_RTN_B64  : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b64", VReg_64, "ds_cmpstore_b64">;
508defm DS_CMPSTORE_RTN_F64  : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f64", VReg_64, "ds_cmpstore_f64">;
509
510defm DS_MSKOR_B32     : DS_1A2D_NORET_mc<"ds_mskor_b32">;
511defm DS_CMPST_B32     : DS_1A2D_NORET_mc<"ds_cmpst_b32">;
512defm DS_CMPST_F32     : DS_1A2D_NORET_mc<"ds_cmpst_f32">;
513
514defm DS_ADD_U64       : DS_1A1D_NORET_mc<"ds_add_u64", VReg_64>;
515defm DS_SUB_U64       : DS_1A1D_NORET_mc<"ds_sub_u64", VReg_64>;
516defm DS_RSUB_U64      : DS_1A1D_NORET_mc<"ds_rsub_u64", VReg_64>;
517defm DS_INC_U64       : DS_1A1D_NORET_mc<"ds_inc_u64", VReg_64>;
518defm DS_DEC_U64       : DS_1A1D_NORET_mc<"ds_dec_u64", VReg_64>;
519defm DS_MIN_I64       : DS_1A1D_NORET_mc<"ds_min_i64", VReg_64>;
520defm DS_MAX_I64       : DS_1A1D_NORET_mc<"ds_max_i64", VReg_64>;
521defm DS_MIN_U64       : DS_1A1D_NORET_mc<"ds_min_u64", VReg_64>;
522defm DS_MAX_U64       : DS_1A1D_NORET_mc<"ds_max_u64", VReg_64>;
523defm DS_AND_B64       : DS_1A1D_NORET_mc<"ds_and_b64", VReg_64>;
524defm DS_OR_B64        : DS_1A1D_NORET_mc<"ds_or_b64", VReg_64>;
525defm DS_XOR_B64       : DS_1A1D_NORET_mc<"ds_xor_b64", VReg_64>;
526defm DS_MSKOR_B64     : DS_1A2D_NORET_mc<"ds_mskor_b64", VReg_64>;
527let mayLoad = 0 in {
528defm DS_WRITE_B64     : DS_1A1D_NORET_mc<"ds_write_b64", VReg_64>;
529defm DS_WRITE2_B64    : DS_1A2D_Off8_NORET_mc<"ds_write2_b64", VReg_64>;
530defm DS_WRITE2ST64_B64: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b64", VReg_64>;
531}
532defm DS_CMPST_B64     : DS_1A2D_NORET_mc<"ds_cmpst_b64", VReg_64>;
533defm DS_CMPST_F64     : DS_1A2D_NORET_mc<"ds_cmpst_f64", VReg_64>;
534defm DS_MIN_F64       : DS_1A1D_NORET_mc<"ds_min_f64", VReg_64>;
535defm DS_MAX_F64       : DS_1A1D_NORET_mc<"ds_max_f64", VReg_64>;
536
537defm DS_ADD_RTN_U32   : DS_1A1D_RET_mc<"ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
538
539let SubtargetPredicate = HasLDSFPAtomicAdd in {
540defm DS_ADD_RTN_F32   : DS_1A1D_RET_mc<"ds_add_rtn_f32", VGPR_32, "ds_add_f32">;
541}
542defm DS_SUB_RTN_U32   : DS_1A1D_RET_mc<"ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
543defm DS_RSUB_RTN_U32  : DS_1A1D_RET_mc<"ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
544defm DS_INC_RTN_U32   : DS_1A1D_RET_mc<"ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">;
545defm DS_DEC_RTN_U32   : DS_1A1D_RET_mc<"ds_dec_rtn_u32", VGPR_32, "ds_dec_u32">;
546defm DS_MIN_RTN_I32   : DS_1A1D_RET_mc<"ds_min_rtn_i32", VGPR_32, "ds_min_i32">;
547defm DS_MAX_RTN_I32   : DS_1A1D_RET_mc<"ds_max_rtn_i32", VGPR_32, "ds_max_i32">;
548defm DS_MIN_RTN_U32   : DS_1A1D_RET_mc<"ds_min_rtn_u32", VGPR_32, "ds_min_u32">;
549defm DS_MAX_RTN_U32   : DS_1A1D_RET_mc<"ds_max_rtn_u32", VGPR_32, "ds_max_u32">;
550defm DS_AND_RTN_B32   : DS_1A1D_RET_mc<"ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
551defm DS_OR_RTN_B32    : DS_1A1D_RET_mc<"ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
552defm DS_XOR_RTN_B32   : DS_1A1D_RET_mc<"ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
553defm DS_MSKOR_RTN_B32 : DS_1A2D_RET_mc<"ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
554defm DS_CMPST_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
555defm DS_CMPST_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
556defm DS_MIN_RTN_F32   : DS_1A1D_RET_mc<"ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
557defm DS_MAX_RTN_F32   : DS_1A1D_RET_mc<"ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
558
559defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b32">;
560defm DS_WRXCHG2_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>;
561defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>;
562
563defm DS_ADD_RTN_U64  : DS_1A1D_RET_mc<"ds_add_rtn_u64", VReg_64, "ds_add_u64">;
564defm DS_SUB_RTN_U64  : DS_1A1D_RET_mc<"ds_sub_rtn_u64", VReg_64, "ds_sub_u64">;
565defm DS_RSUB_RTN_U64  : DS_1A1D_RET_mc<"ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">;
566defm DS_INC_RTN_U64   : DS_1A1D_RET_mc<"ds_inc_rtn_u64", VReg_64, "ds_inc_u64">;
567defm DS_DEC_RTN_U64   : DS_1A1D_RET_mc<"ds_dec_rtn_u64", VReg_64, "ds_dec_u64">;
568defm DS_MIN_RTN_I64    : DS_1A1D_RET_mc<"ds_min_rtn_i64", VReg_64, "ds_min_i64">;
569defm DS_MAX_RTN_I64    : DS_1A1D_RET_mc<"ds_max_rtn_i64", VReg_64, "ds_max_i64">;
570defm DS_MIN_RTN_U64   : DS_1A1D_RET_mc<"ds_min_rtn_u64", VReg_64, "ds_min_u64">;
571defm DS_MAX_RTN_U64   : DS_1A1D_RET_mc<"ds_max_rtn_u64", VReg_64, "ds_max_u64">;
572defm DS_AND_RTN_B64    : DS_1A1D_RET_mc<"ds_and_rtn_b64", VReg_64, "ds_and_b64">;
573defm DS_OR_RTN_B64     : DS_1A1D_RET_mc<"ds_or_rtn_b64", VReg_64, "ds_or_b64">;
574defm DS_XOR_RTN_B64    : DS_1A1D_RET_mc<"ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
575defm DS_MSKOR_RTN_B64  : DS_1A2D_RET_mc<"ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
576defm DS_CMPST_RTN_B64  : DS_1A2D_RET_mc<"ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
577defm DS_CMPST_RTN_F64  : DS_1A2D_RET_mc<"ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
578defm DS_MIN_RTN_F64    : DS_1A1D_RET_mc<"ds_min_rtn_f64", VReg_64, "ds_min_f64">;
579defm DS_MAX_RTN_F64    : DS_1A1D_RET_mc<"ds_max_rtn_f64", VReg_64, "ds_max_f64">;
580
581defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", VReg_64>;
582defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>;
583defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>;
584
585let isConvergent = 1, usesCustomInserter = 1 in {
586def DS_GWS_INIT       : DS_GWS_1D<"ds_gws_init"> {
587  let mayLoad = 0;
588}
589def DS_GWS_SEMA_V     : DS_GWS_0D<"ds_gws_sema_v">;
590def DS_GWS_SEMA_BR    : DS_GWS_1D<"ds_gws_sema_br">;
591def DS_GWS_SEMA_P     : DS_GWS_0D<"ds_gws_sema_p">;
592def DS_GWS_BARRIER    : DS_GWS_1D<"ds_gws_barrier">;
593}
594
595let SubtargetPredicate = HasDsSrc2Insts in {
596def DS_ADD_SRC2_U32   : DS_1A<"ds_add_src2_u32">;
597def DS_SUB_SRC2_U32   : DS_1A<"ds_sub_src2_u32">;
598def DS_RSUB_SRC2_U32  : DS_1A<"ds_rsub_src2_u32">;
599def DS_INC_SRC2_U32   : DS_1A<"ds_inc_src2_u32">;
600def DS_DEC_SRC2_U32   : DS_1A<"ds_dec_src2_u32">;
601def DS_MIN_SRC2_I32   : DS_1A<"ds_min_src2_i32">;
602def DS_MAX_SRC2_I32   : DS_1A<"ds_max_src2_i32">;
603def DS_MIN_SRC2_U32   : DS_1A<"ds_min_src2_u32">;
604def DS_MAX_SRC2_U32   : DS_1A<"ds_max_src2_u32">;
605def DS_AND_SRC2_B32   : DS_1A<"ds_and_src2_b32">;
606def DS_OR_SRC2_B32    : DS_1A<"ds_or_src2_b32">;
607def DS_XOR_SRC2_B32   : DS_1A<"ds_xor_src2_b32">;
608def DS_MIN_SRC2_F32   : DS_1A<"ds_min_src2_f32">;
609def DS_MAX_SRC2_F32   : DS_1A<"ds_max_src2_f32">;
610
611def DS_ADD_SRC2_U64   : DS_1A<"ds_add_src2_u64">;
612def DS_SUB_SRC2_U64   : DS_1A<"ds_sub_src2_u64">;
613def DS_RSUB_SRC2_U64  : DS_1A<"ds_rsub_src2_u64">;
614def DS_INC_SRC2_U64   : DS_1A<"ds_inc_src2_u64">;
615def DS_DEC_SRC2_U64   : DS_1A<"ds_dec_src2_u64">;
616def DS_MIN_SRC2_I64   : DS_1A<"ds_min_src2_i64">;
617def DS_MAX_SRC2_I64   : DS_1A<"ds_max_src2_i64">;
618def DS_MIN_SRC2_U64   : DS_1A<"ds_min_src2_u64">;
619def DS_MAX_SRC2_U64   : DS_1A<"ds_max_src2_u64">;
620def DS_AND_SRC2_B64   : DS_1A<"ds_and_src2_b64">;
621def DS_OR_SRC2_B64    : DS_1A<"ds_or_src2_b64">;
622def DS_XOR_SRC2_B64   : DS_1A<"ds_xor_src2_b64">;
623def DS_MIN_SRC2_F64   : DS_1A<"ds_min_src2_f64">;
624def DS_MAX_SRC2_F64   : DS_1A<"ds_max_src2_f64">;
625
626def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">;
627def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">;
628} // End SubtargetPredicate = HasDsSrc2Insts
629
630let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in {
631def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, Swizzle>;
632}
633
634let mayStore = 0 in {
635defm DS_READ_I8      : DS_1A_RET_mc<"ds_read_i8">;
636defm DS_READ_U8      : DS_1A_RET_mc<"ds_read_u8">;
637defm DS_READ_I16     : DS_1A_RET_mc<"ds_read_i16">;
638defm DS_READ_U16     : DS_1A_RET_mc<"ds_read_u16">;
639defm DS_READ_B32     : DS_1A_RET_mc<"ds_read_b32">;
640defm DS_READ_B64     : DS_1A_RET_mc<"ds_read_b64", VReg_64>;
641
642defm DS_READ2_B32    : DS_1A_Off8_RET_mc<"ds_read2_b32", VReg_64>;
643defm DS_READ2ST64_B32: DS_1A_Off8_RET_mc<"ds_read2st64_b32", VReg_64>;
644
645defm DS_READ2_B64    : DS_1A_Off8_RET_mc<"ds_read2_b64", VReg_128>;
646defm DS_READ2ST64_B64: DS_1A_Off8_RET_mc<"ds_read2st64_b64", VReg_128>;
647
648let has_m0_read = 0 in {
649let SubtargetPredicate = HasD16LoadStore, TiedSourceNotRead = 1 in {
650def DS_READ_U8_D16     : DS_1A_RET_Tied<"ds_read_u8_d16">;
651def DS_READ_U8_D16_HI  : DS_1A_RET_Tied<"ds_read_u8_d16_hi">;
652def DS_READ_I8_D16     : DS_1A_RET_Tied<"ds_read_i8_d16">;
653def DS_READ_I8_D16_HI  : DS_1A_RET_Tied<"ds_read_i8_d16_hi">;
654def DS_READ_U16_D16    : DS_1A_RET_Tied<"ds_read_u16_d16">;
655def DS_READ_U16_D16_HI : DS_1A_RET_Tied<"ds_read_u16_d16_hi">;
656}
657} // End has_m0_read = 0
658
659let SubtargetPredicate = HasDSAddTid in {
660def DS_READ_ADDTID_B32 : DS_0A_RET<"ds_read_addtid_b32">;
661}
662
663} // End mayStore = 0
664
665def DS_CONSUME       : DS_0A_RET<"ds_consume">;
666def DS_APPEND        : DS_0A_RET<"ds_append">;
667
668let SubtargetPredicate = isNotGFX90APlus in
669def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">;
670
671//===----------------------------------------------------------------------===//
672// Instruction definitions for CI and newer.
673//===----------------------------------------------------------------------===//
674
675let SubtargetPredicate = isGFX7Plus in {
676
677defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPR_32>;
678defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", VReg_64>;
679
680let isConvergent = 1, usesCustomInserter = 1 in {
681def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">;
682}
683
684let mayStore = 0 in {
685defm DS_READ_B96 : DS_1A_RET_mc<"ds_read_b96", VReg_96>;
686defm DS_READ_B128: DS_1A_RET_mc<"ds_read_b128", VReg_128>;
687} // End mayStore = 0
688
689let mayLoad = 0 in {
690defm DS_WRITE_B96 : DS_1A1D_NORET_mc<"ds_write_b96", VReg_96>;
691defm DS_WRITE_B128 : DS_1A1D_NORET_mc<"ds_write_b128", VReg_128>;
692} // End mayLoad = 0
693
694def DS_NOP : DS_VOID<"ds_nop">;
695
696} // let SubtargetPredicate = isGFX7Plus
697
698//===----------------------------------------------------------------------===//
699// Instruction definitions for VI and newer.
700//===----------------------------------------------------------------------===//
701
702let SubtargetPredicate = isGFX8Plus in {
703
704let Uses = [EXEC] in {
705def DS_PERMUTE_B32  : DS_1A1D_PERMUTE <"ds_permute_b32",
706                                       int_amdgcn_ds_permute>;
707def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
708                                       int_amdgcn_ds_bpermute>;
709}
710
711} // let SubtargetPredicate = isGFX8Plus
712
713let SubtargetPredicate = HasLDSFPAtomicAdd, OtherPredicates = [HasDsSrc2Insts] in {
714def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
715}
716
717
718//===----------------------------------------------------------------------===//
719// Instruction definitions for GFX11.
720//===----------------------------------------------------------------------===//
721
722let SubtargetPredicate = isGFX11Only in {
723
724def DS_ADD_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_add_gs_reg_rtn", VReg_64, VGPR_32>;
725def DS_SUB_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_sub_gs_reg_rtn", VReg_64, VGPR_32>;
726
727} // let SubtargetPredicate = isGFX11Only
728
729let SubtargetPredicate = isGFX11Plus in {
730
731def DS_BVH_STACK_RTN_B32 : DS_BVH_STACK<"ds_bvh_stack_rtn_b32">;
732
733} // let SubtargetPredicate = isGFX11Plus
734
735//===----------------------------------------------------------------------===//
736// Instruction definitions for GFX12 and newer.
737//===----------------------------------------------------------------------===//
738
739let SubtargetPredicate = isGFX12Plus in {
740
741defm DS_COND_SUB_U32      : DS_1A1D_NORET_mc<"ds_cond_sub_u32">;
742defm DS_COND_SUB_RTN_U32  : DS_1A1D_RET_mc<"ds_cond_sub_rtn_u32", VGPR_32, "ds_cond_sub_u32">;
743defm DS_SUB_CLAMP_U32     : DS_1A1D_NORET_mc<"ds_sub_clamp_u32">;
744defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_clamp_rtn_u32", VGPR_32, "ds_sub_clamp_u32">;
745
746multiclass DSAtomicRetNoRetPatIntrinsic_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
747                                  ValueType vt, string frag> {
748  def : DSAtomicRetPat<inst, vt,
749                        !cast<PatFrag>(frag#"_local_addrspace")>;
750
751  let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
752    def : DSAtomicRetPat<noRetInst, vt,
753                          !cast<PatFrag>(frag#"_noret_local_addrspace"), /* complexity */ 1>;
754}
755
756defm : DSAtomicRetNoRetPatIntrinsic_mc<DS_COND_SUB_RTN_U32, DS_COND_SUB_U32, i32, "int_amdgcn_atomic_cond_sub_u32">;
757} // let SubtargetPredicate = isGFX12Plus
758
759//===----------------------------------------------------------------------===//
760// DS Patterns
761//===----------------------------------------------------------------------===//
762
763def : GCNPat <
764  (int_amdgcn_ds_swizzle i32:$src, timm:$offset16),
765  (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16timm $offset16), (i1 0))
766>;
767
768class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
769  (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
770  (inst $ptr, offset:$offset, (i1 gds))
771>;
772
773multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
774
775  let OtherPredicates = [LDSRequiresM0Init] in {
776    def : DSReadPat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
777  }
778
779  let OtherPredicates = [NotLDSRequiresM0Init] in {
780    def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
781  }
782}
783
784class DSReadPat_D16 <DS_Pseudo inst, PatFrag frag, ValueType vt> : GCNPat <
785  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$in),
786  (inst $ptr, offset:$offset, (i1 0), $in)
787>;
788
789defm : DSReadPat_mc <DS_READ_I8, i32, "sextloadi8_local">;
790defm : DSReadPat_mc <DS_READ_I8,  i16, "sextloadi8_local">;
791defm : DSReadPat_mc <DS_READ_U8,  i32, "extloadi8_local">;
792defm : DSReadPat_mc <DS_READ_U8,  i32, "zextloadi8_local">;
793defm : DSReadPat_mc <DS_READ_U8,  i16, "extloadi8_local">;
794defm : DSReadPat_mc <DS_READ_U8,  i16, "zextloadi8_local">;
795defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
796defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
797defm : DSReadPat_mc <DS_READ_U16, i32, "extloadi16_local">;
798defm : DSReadPat_mc <DS_READ_U16, i32, "zextloadi16_local">;
799defm : DSReadPat_mc <DS_READ_U16, i16, "load_local">;
800
801foreach vt = Reg32Types.types in {
802defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
803}
804
805defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_8_local">;
806defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_8_local">;
807defm : DSReadPat_mc <DS_READ_U16, i16, "atomic_load_16_local">;
808defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_16_local">;
809defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
810defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
811
812let OtherPredicates = [D16PreservesUnusedBits] in {
813def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2i16>;
814def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2f16>;
815def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2i16>;
816def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2f16>;
817def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2i16>;
818def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2f16>;
819
820def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2i16>;
821def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2f16>;
822def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2i16>;
823def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2f16>;
824def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2i16>;
825def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2f16>;
826}
827
828class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
829  (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
830  (inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))
831>;
832
833multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
834  let OtherPredicates = [LDSRequiresM0Init] in {
835    def : DSWritePat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
836  }
837
838  let OtherPredicates = [NotLDSRequiresM0Init] in {
839    def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
840  }
841}
842
843defm : DSWritePat_mc <DS_WRITE_B8, i32, "truncstorei8_local">;
844defm : DSWritePat_mc <DS_WRITE_B16, i32, "truncstorei16_local">;
845defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">;
846defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">;
847
848foreach vt = Reg32Types.types in {
849defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
850}
851
852defm : DSWritePat_mc <DS_WRITE_B8, i16, "atomic_store_8_local">;
853defm : DSWritePat_mc <DS_WRITE_B8, i32, "atomic_store_8_local">;
854defm : DSWritePat_mc <DS_WRITE_B16, i16, "atomic_store_16_local">;
855defm : DSWritePat_mc <DS_WRITE_B16, i32, "atomic_store_16_local">;
856defm : DSWritePat_mc <DS_WRITE_B32, i32, "atomic_store_32_local">;
857defm : DSWritePat_mc <DS_WRITE_B64, i64, "atomic_store_64_local">;
858
859let OtherPredicates = [HasD16LoadStore] in {
860def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_hi16_local>;
861def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_hi16_local>;
862}
863
864class DS64Bit4ByteAlignedReadPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
865  (vt:$value (frag (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
866  (inst $ptr, $offset0, $offset1, (i1 0))
867>;
868
869class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat<
870  (frag vt:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
871  (inst $ptr, (i32 (EXTRACT_SUBREG VReg_64:$value, sub0)),
872              (i32 (EXTRACT_SUBREG VReg_64:$value, sub1)), $offset0, $offset1,
873              (i1 0))
874>;
875
876class DS128Bit8ByteAlignedReadPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
877  (vt:$value (frag (DS128Bit8ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
878  (inst $ptr, $offset0, $offset1, (i1 0))
879>;
880
881class DS128Bit8ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat<
882  (frag vt:$value, (DS128Bit8ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
883  (inst $ptr, (i64 (EXTRACT_SUBREG VReg_128:$value, sub0_sub1)),
884              (i64 (EXTRACT_SUBREG VReg_128:$value, sub2_sub3)), $offset0, $offset1,
885              (i1 0))
886>;
887
888multiclass DS64Bit4ByteAlignedPat_mc<ValueType vt> {
889  let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
890    def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, vt, load_local_m0>;
891    def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, vt, store_local_m0>;
892  }
893
894  let OtherPredicates = [NotLDSRequiresM0Init] in {
895    def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32_gfx9, vt, load_local>;
896    def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32_gfx9, vt, store_local>;
897  }
898}
899
900multiclass DS128Bit8ByteAlignedPat_mc<ValueType vt> {
901  let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
902    def : DS128Bit8ByteAlignedReadPat<DS_READ2_B64, vt, load_local_m0>;
903    def : DS128Bit8ByteAlignedWritePat<DS_WRITE2_B64, vt, store_local_m0>;
904  }
905
906  let OtherPredicates = [NotLDSRequiresM0Init] in {
907    def : DS128Bit8ByteAlignedReadPat<DS_READ2_B64_gfx9, vt, load_local>;
908    def : DS128Bit8ByteAlignedWritePat<DS_WRITE2_B64_gfx9, vt, store_local>;
909  }
910}
911
912// v2i32 loads are split into i32 loads on SI during lowering, due to a bug
913// related to bounds checking.
914foreach vt = VReg_64.RegTypes in {
915defm : DS64Bit4ByteAlignedPat_mc<vt>;
916}
917
918foreach vt = VReg_128.RegTypes in {
919defm : DS128Bit8ByteAlignedPat_mc<vt>;
920}
921
922// Prefer ds_read over ds_read2 and ds_write over ds_write2, all other things
923// being equal, because it has a larger immediate offset range.
924let AddedComplexity = 100 in {
925
926foreach vt = VReg_64.RegTypes in {
927defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">;
928defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align8_local">;
929}
930
931let SubtargetPredicate = isGFX7Plus in {
932
933foreach vt = VReg_96.RegTypes in {
934defm : DSReadPat_mc <DS_READ_B96, vt, "load_align16_local">;
935defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_align16_local">;
936}
937
938foreach vt = VReg_128.RegTypes in {
939defm : DSReadPat_mc <DS_READ_B128, vt, "load_align16_local">;
940defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align16_local">;
941}
942
943let SubtargetPredicate = HasUnalignedAccessMode in {
944
945// Select 64 bit loads and stores aligned less than 4 as a single ds_read_b64/
946// ds_write_b64 instruction as this is faster than ds_read2_b32/ds_write2_b32
947// which would be used otherwise. In this case a b32 access would still be
948// misaligned, but we will have 2 of them.
949foreach vt = VReg_64.RegTypes in {
950defm : DSReadPat_mc <DS_READ_B64, vt, "load_align_less_than_4_local">;
951defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align_less_than_4_local">;
952}
953
954// Selection will split most of the unaligned 3 dword accesses due to performance
955// reasons when beneficial. Keep these two patterns for the rest of the cases.
956foreach vt = VReg_96.RegTypes in {
957defm : DSReadPat_mc <DS_READ_B96, vt, "load_local">;
958defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_local">;
959}
960
961// Select 128 bit loads and stores aligned less than 4 as a single ds_read_b128/
962// ds_write_b128 instruction as this is faster than ds_read2_b64/ds_write2_b64
963// which would be used otherwise. In this case a b64 access would still be
964// misaligned, but we will have 2 of them.
965foreach vt = VReg_128.RegTypes in {
966defm : DSReadPat_mc <DS_READ_B128, vt, "load_align_less_than_4_local">;
967defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align_less_than_4_local">;
968}
969
970} // End SubtargetPredicate = HasUnalignedAccessMode
971
972} // End SubtargetPredicate = isGFX7Plus
973
974} // End AddedComplexity = 100
975
976multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
977  let OtherPredicates = [LDSRequiresM0Init] in {
978    def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>;
979  }
980
981  let OtherPredicates = [NotLDSRequiresM0Init] in {
982    def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
983                         !cast<PatFrag>(frag#"_local_"#vt.Size)>;
984  }
985
986  let OtherPredicates = [HasGDS] in {
987    def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
988                         /* complexity */ 0, /* gds */ 1>;
989  }
990}
991
992multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
993                                  ValueType vt, string frag> {
994  let OtherPredicates = [LDSRequiresM0Init] in {
995    def : DSAtomicRetPat<inst, vt,
996                         !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>;
997    def : DSAtomicRetPat<noRetInst, vt,
998                         !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size), /* complexity */ 1>;
999  }
1000
1001  let OtherPredicates = [NotLDSRequiresM0Init] in {
1002    def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1003                         !cast<PatFrag>(frag#"_local_"#vt.Size)>;
1004    def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1005                         !cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
1006  }
1007
1008  let OtherPredicates = [HasGDS] in {
1009    def : DSAtomicRetPat<inst, vt,
1010                         !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
1011                         /* complexity */ 0, /* gds */ 1>;
1012    def : DSAtomicRetPat<noRetInst, vt,
1013                         !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
1014                         /* complexity */ 1, /* gds */ 1>;
1015  }
1016}
1017
1018
1019
1020let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1021// Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode.
1022class DSAtomicCmpXChgSwapped<DS_Pseudo inst, ValueType vt, PatFrag frag,
1023  int complexity = 0, bit gds=0> : GCNPat<
1024  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
1025  (inst $ptr, getVregSrcForVT<vt>.ret:$cmp, getVregSrcForVT<vt>.ret:$swap, offset:$offset, (i1 gds))> {
1026  let AddedComplexity = complexity;
1027}
1028
1029multiclass DSAtomicCmpXChgSwapped_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
1030                                     string frag> {
1031  let OtherPredicates = [LDSRequiresM0Init] in {
1032    def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>;
1033    def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size),
1034                                 /* complexity */ 1>;
1035  }
1036
1037  let OtherPredicates = [NotLDSRequiresM0Init] in {
1038    def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1039                                 !cast<PatFrag>(frag#"_local_"#vt.Size)>;
1040    def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1041                                 !cast<PatFrag>(frag#"_local_noret_"#vt.Size),
1042                                 /* complexity */ 1>;
1043  }
1044
1045  let OtherPredicates = [HasGDS] in {
1046    def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
1047                                 /* complexity */ 0, /* gds */ 1>;
1048    def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
1049                                 /* complexity */ 1, /* gds */ 1>;
1050  }
1051}
1052} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
1053
1054let SubtargetPredicate = isGFX11Plus in {
1055// The order of src and cmp agrees with the BUFFER_ATOMIC_CMPSWAP opcode.
1056class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag,
1057  int complexity = 0, bit gds=0> : GCNPat<
1058  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
1059  (inst $ptr, getVregSrcForVT<vt>.ret:$swap, getVregSrcForVT<vt>.ret:$cmp, offset:$offset, (i1 gds))> {
1060  let AddedComplexity = complexity;
1061}
1062
1063multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt, string frag> {
1064
1065  def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1066                        !cast<PatFrag>(frag#"_local_"#vt.Size)>;
1067  def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1068                        !cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
1069
1070  let OtherPredicates = [HasGDS] in {
1071    def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
1072                          /* complexity */ 0, /* gds */ 1>;
1073    def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
1074                          /* complexity */ 1, /* gds */ 1>;
1075  }
1076}
1077} // End SubtargetPredicate = isGFX11Plus
1078
1079// 32-bit atomics.
1080defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap">;
1081defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U32, DS_ADD_U32, i32, "atomic_load_add">;
1082defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U32, DS_SUB_U32, i32, "atomic_load_sub">;
1083defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U32, DS_INC_U32, i32, "atomic_load_uinc_wrap">;
1084defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U32, DS_DEC_U32, i32, "atomic_load_udec_wrap">;
1085defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B32, DS_AND_B32, i32, "atomic_load_and">;
1086defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B32, DS_OR_B32, i32, "atomic_load_or">;
1087defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B32, DS_XOR_B32, i32, "atomic_load_xor">;
1088defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_I32, DS_MIN_I32, i32, "atomic_load_min">;
1089defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_I32, DS_MAX_I32, i32, "atomic_load_max">;
1090defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_U32, DS_MIN_U32, i32, "atomic_load_umin">;
1091defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_U32, DS_MAX_U32, i32, "atomic_load_umax">;
1092defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_F32, DS_MIN_F32, f32, "atomic_load_fmin">;
1093defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_F32, DS_MAX_F32, f32, "atomic_load_fmax">;
1094
1095let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1096defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B32, DS_CMPST_B32, i32, "atomic_cmp_swap">;
1097}
1098
1099let SubtargetPredicate = isGFX11Plus in {
1100defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B32, DS_CMPSTORE_B32, i32, "atomic_cmp_swap">;
1101}
1102
1103let SubtargetPredicate = HasLDSFPAtomicAdd in {
1104defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_F32, DS_ADD_F32, f32, "atomic_load_fadd">;
1105}
1106
1107// 64-bit atomics.
1108defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">;
1109defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U64, DS_ADD_U64, i64, "atomic_load_add">;
1110defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U64, DS_SUB_U64, i64, "atomic_load_sub">;
1111defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U64, DS_INC_U64, i64, "atomic_load_uinc_wrap">;
1112defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U64, DS_DEC_U64, i64, "atomic_load_udec_wrap">;
1113defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B64, DS_AND_B64, i64, "atomic_load_and">;
1114defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B64, DS_OR_B64, i64, "atomic_load_or">;
1115defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B64, DS_XOR_B64, i64, "atomic_load_xor">;
1116defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_I64, DS_MIN_I64, i64, "atomic_load_min">;
1117defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_I64, DS_MAX_I64, i64, "atomic_load_max">;
1118defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_U64, DS_MIN_U64, i64, "atomic_load_umin">;
1119defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_U64, DS_MAX_U64, i64, "atomic_load_umax">;
1120defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_F64, DS_MIN_F64, f64, "atomic_load_fmin">;
1121defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_F64, DS_MAX_F64, f64, "atomic_load_fmax">;
1122
1123let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1124defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B64, DS_CMPST_B64, i64, "atomic_cmp_swap">;
1125} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
1126
1127let SubtargetPredicate = isGFX11Plus in {
1128defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B64, DS_CMPSTORE_B64, i64, "atomic_cmp_swap">;
1129} // End SubtargetPredicate = isGFX11Plus
1130
1131let SubtargetPredicate = isGFX90APlus in {
1132def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_64>;
1133let AddedComplexity = 1 in
1134def : DSAtomicRetPat<DS_ADD_F64, f64, atomic_load_fadd_local_noret_64>;
1135
1136class DSAtomicRetPatIntrinsic<DS_Pseudo inst, ValueType vt, PatFrag frag,
1137  bit gds=0> : GCNPat <
1138  (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value)),
1139  (inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))> {
1140}
1141
1142def : DSAtomicRetPatIntrinsic<DS_ADD_RTN_F64, f64, int_amdgcn_flat_atomic_fadd_local_addrspace>;
1143let AddedComplexity = 1 in
1144def : DSAtomicRetPatIntrinsic<DS_ADD_F64, f64, int_amdgcn_flat_atomic_fadd_noret_local_addrspace>;
1145}
1146
1147let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
1148def : DSAtomicRetPat<DS_PK_ADD_RTN_F16, v2f16, atomic_load_fadd_v2f16_local_32>;
1149let AddedComplexity = 1 in
1150def : DSAtomicRetPat<DS_PK_ADD_F16, v2f16, atomic_load_fadd_v2f16_local_noret_32>;
1151def : GCNPat <
1152  (v2i16 (int_amdgcn_ds_fadd_v2bf16 i32:$ptr, v2i16:$src)),
1153  (DS_PK_ADD_RTN_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0)
1154>;
1155let AddedComplexity = 1 in
1156def : GCNPat <
1157  (v2i16 (int_amdgcn_ds_fadd_v2bf16_noret i32:$ptr, v2i16:$src)),
1158  (DS_PK_ADD_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0)
1159>;
1160} // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
1161
1162let OtherPredicates = [HasGDS] in
1163def : GCNPat <
1164  (SIds_ordered_count i32:$value, i16:$offset),
1165  (DS_ORDERED_COUNT $value, (as_i16imm $offset))
1166>;
1167
1168def : GCNPat <
1169  (i64 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
1170  (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
1171>;
1172
1173def : GCNPat <
1174  (i32 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
1175  (EXTRACT_SUBREG
1176    (i64 (COPY_TO_REGCLASS
1177      (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
1178      VReg_64)),
1179    sub0)
1180>;
1181
1182def : GCNPat <
1183  (i64 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
1184  (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
1185>;
1186
1187def : GCNPat <
1188  (i32 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
1189  (EXTRACT_SUBREG
1190    (i64 (COPY_TO_REGCLASS
1191      (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
1192      VReg_64)),
1193    sub0)
1194>;
1195
1196//===----------------------------------------------------------------------===//
1197// Target-specific instruction encodings.
1198//===----------------------------------------------------------------------===//
1199
1200//===----------------------------------------------------------------------===//
1201// Base ENC_DS for GFX6, GFX7, GFX10, GFX11, GFX12.
1202//===----------------------------------------------------------------------===//
1203
1204class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op, DS_Pseudo ps, int ef,
1205                                               string opName = ps.Mnemonic,
1206                                               bit hasGFX12Enc = 0>
1207    : DS_Real<ps, opName>, SIMCInstr <ps.Mnemonic, ef> {
1208
1209  let Inst{7-0}   = !if(ps.has_offset0, offset0, 0);
1210  let Inst{15-8}  = !if(ps.has_offset1, offset1, 0);
1211  let Inst{17}    = !if(ps.has_gds, gds, ps.gdsValue);
1212  let Inst{25-18} = op;
1213  let Inst{31-26} = 0x36;
1214  let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0));
1215  let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0);
1216  let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0);
1217  let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0);
1218
1219  let gds = !if(hasGFX12Enc, 0, ?);
1220}
1221
1222//===----------------------------------------------------------------------===//
1223// GFX12.
1224//===----------------------------------------------------------------------===//
1225
1226let AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12" in {
1227  multiclass DS_Real_gfx12<bits<8> op> {
1228    defvar ps = !cast<DS_Pseudo>(NAME);
1229    def _gfx12 :
1230      Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, ps, SIEncodingFamily.GFX12,
1231                                               ps.Mnemonic, 1>;
1232  }
1233
1234  multiclass DS_Real_Renamed_gfx12<bits<8> op, DS_Pseudo backing_pseudo,
1235                                   string real_name> {
1236    def _gfx12 :
1237      Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, backing_pseudo,
1238                                               SIEncodingFamily.GFX12,
1239                                               real_name, 1>,
1240      MnemonicAlias<backing_pseudo.Mnemonic, real_name>,
1241      Requires<[isGFX12Plus]>;
1242  }
1243} // End AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12"
1244
1245defm DS_MIN_NUM_F32       : DS_Real_Renamed_gfx12<0x012, DS_MIN_F32, "ds_min_num_f32">;
1246defm DS_MAX_NUM_F32       : DS_Real_Renamed_gfx12<0x013, DS_MAX_F32, "ds_max_num_f32">;
1247defm DS_MIN_NUM_RTN_F32   : DS_Real_Renamed_gfx12<0x032, DS_MIN_RTN_F32, "ds_min_num_rtn_f32">;
1248defm DS_MAX_NUM_RTN_F32   : DS_Real_Renamed_gfx12<0x033, DS_MAX_RTN_F32, "ds_max_num_rtn_f32">;
1249defm DS_MIN_NUM_F64       : DS_Real_Renamed_gfx12<0x052, DS_MIN_F64, "ds_min_num_f64">;
1250defm DS_MAX_NUM_F64       : DS_Real_Renamed_gfx12<0x053, DS_MAX_F64, "ds_max_num_f64">;
1251defm DS_MIN_NUM_RTN_F64   : DS_Real_Renamed_gfx12<0x072, DS_MIN_RTN_F64, "ds_min_num_rtn_f64">;
1252defm DS_MAX_NUM_RTN_F64   : DS_Real_Renamed_gfx12<0x073, DS_MAX_RTN_F64, "ds_max_num_rtn_f64">;
1253defm DS_COND_SUB_U32      : DS_Real_gfx12<0x098>;
1254defm DS_SUB_CLAMP_U32     : DS_Real_gfx12<0x099>;
1255defm DS_COND_SUB_RTN_U32  : DS_Real_gfx12<0x0a8>;
1256defm DS_SUB_CLAMP_RTN_U32 : DS_Real_gfx12<0x0a9>;
1257defm DS_PK_ADD_F16        : DS_Real_gfx12<0x09a>;
1258defm DS_PK_ADD_RTN_F16    : DS_Real_gfx12<0x0aa>;
1259defm DS_PK_ADD_BF16       : DS_Real_gfx12<0x09b>;
1260defm DS_PK_ADD_RTN_BF16   : DS_Real_gfx12<0x0ab>;
1261
1262//===----------------------------------------------------------------------===//
1263// GFX11.
1264//===----------------------------------------------------------------------===//
1265
1266let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
1267  multiclass DS_Real_gfx11<bits<8> op>  {
1268    def _gfx11 :
1269      Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, !cast<DS_Pseudo>(NAME),
1270                                               SIEncodingFamily.GFX11>;
1271  }
1272
1273  multiclass DS_Real_Renamed_gfx11<bits<8> op, DS_Pseudo backing_pseudo, string real_name> {
1274     def _gfx11 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, backing_pseudo, SIEncodingFamily.GFX11, real_name>,
1275               MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX11Only]>;
1276  }
1277} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11"
1278
1279multiclass DS_Real_gfx11_gfx12<bits<8> op>
1280  : DS_Real_gfx11<op>, DS_Real_gfx12<op>;
1281
1282multiclass DS_Real_Renamed_gfx11_gfx12<bits<8> op, DS_Pseudo backing_pseudo,
1283                                       string real_name>
1284  : DS_Real_Renamed_gfx11<op, backing_pseudo, real_name>,
1285    DS_Real_Renamed_gfx12<op, backing_pseudo, real_name>;
1286
1287defm DS_STORE_B32                        : DS_Real_Renamed_gfx11_gfx12<0x00d, DS_WRITE_B32, "ds_store_b32">;
1288defm DS_STORE_2ADDR_B32                  : DS_Real_Renamed_gfx11_gfx12<0x00e, DS_WRITE2_B32, "ds_store_2addr_b32">;
1289defm DS_STORE_2ADDR_STRIDE64_B32         : DS_Real_Renamed_gfx11_gfx12<0x00f, DS_WRITE2ST64_B32, "ds_store_2addr_stride64_b32">;
1290defm DS_STORE_B8                         : DS_Real_Renamed_gfx11_gfx12<0x01e, DS_WRITE_B8, "ds_store_b8">;
1291defm DS_STORE_B16                        : DS_Real_Renamed_gfx11_gfx12<0x01f, DS_WRITE_B16, "ds_store_b16">;
1292defm DS_STOREXCHG_RTN_B32                : DS_Real_Renamed_gfx11_gfx12<0x02d, DS_WRXCHG_RTN_B32, "ds_storexchg_rtn_b32">;
1293defm DS_STOREXCHG_2ADDR_RTN_B32          : DS_Real_Renamed_gfx11_gfx12<0x02e, DS_WRXCHG2_RTN_B32, "ds_storexchg_2addr_rtn_b32">;
1294defm DS_STOREXCHG_2ADDR_STRIDE64_RTN_B32 : DS_Real_Renamed_gfx11_gfx12<0x02f, DS_WRXCHG2ST64_RTN_B32, "ds_storexchg_2addr_stride64_rtn_b32">;
1295defm DS_LOAD_B32                         : DS_Real_Renamed_gfx11_gfx12<0x036, DS_READ_B32, "ds_load_b32">;
1296defm DS_LOAD_2ADDR_B32                   : DS_Real_Renamed_gfx11_gfx12<0x037, DS_READ2_B32, "ds_load_2addr_b32">;
1297defm DS_LOAD_2ADDR_STRIDE64_B32          : DS_Real_Renamed_gfx11_gfx12<0x038, DS_READ2ST64_B32, "ds_load_2addr_stride64_b32">;
1298defm DS_LOAD_I8                          : DS_Real_Renamed_gfx11_gfx12<0x039, DS_READ_I8, "ds_load_i8">;
1299defm DS_LOAD_U8                          : DS_Real_Renamed_gfx11_gfx12<0x03a, DS_READ_U8, "ds_load_u8">;
1300defm DS_LOAD_I16                         : DS_Real_Renamed_gfx11_gfx12<0x03b, DS_READ_I16, "ds_load_i16">;
1301defm DS_LOAD_U16                         : DS_Real_Renamed_gfx11_gfx12<0x03c, DS_READ_U16, "ds_load_u16">;
1302defm DS_STORE_B64                        : DS_Real_Renamed_gfx11_gfx12<0x04d, DS_WRITE_B64, "ds_store_b64">;
1303defm DS_STORE_2ADDR_B64                  : DS_Real_Renamed_gfx11_gfx12<0x04e, DS_WRITE2_B64, "ds_store_2addr_b64">;
1304defm DS_STORE_2ADDR_STRIDE64_B64         : DS_Real_Renamed_gfx11_gfx12<0x04f, DS_WRITE2ST64_B64, "ds_store_2addr_stride64_b64">;
1305defm DS_STOREXCHG_RTN_B64                : DS_Real_Renamed_gfx11_gfx12<0x06d, DS_WRXCHG_RTN_B64, "ds_storexchg_rtn_b64">;
1306defm DS_STOREXCHG_2ADDR_RTN_B64          : DS_Real_Renamed_gfx11_gfx12<0x06e, DS_WRXCHG2_RTN_B64, "ds_storexchg_2addr_rtn_b64">;
1307defm DS_STOREXCHG_2ADDR_STRIDE64_RTN_B64 : DS_Real_Renamed_gfx11_gfx12<0x06f, DS_WRXCHG2ST64_RTN_B64, "ds_storexchg_2addr_stride64_rtn_b64">;
1308defm DS_LOAD_B64                         : DS_Real_Renamed_gfx11_gfx12<0x076, DS_READ_B64, "ds_load_b64">;
1309defm DS_LOAD_2ADDR_B64                   : DS_Real_Renamed_gfx11_gfx12<0x077, DS_READ2_B64, "ds_load_2addr_b64">;
1310defm DS_LOAD_2ADDR_STRIDE64_B64          : DS_Real_Renamed_gfx11_gfx12<0x078, DS_READ2ST64_B64, "ds_load_2addr_stride64_b64">;
1311defm DS_STORE_B8_D16_HI                  : DS_Real_Renamed_gfx11_gfx12<0x0a0, DS_WRITE_B8_D16_HI, "ds_store_b8_d16_hi">;
1312defm DS_STORE_B16_D16_HI                 : DS_Real_Renamed_gfx11_gfx12<0x0a1, DS_WRITE_B16_D16_HI, "ds_store_b16_d16_hi">;
1313defm DS_LOAD_U8_D16                      : DS_Real_Renamed_gfx11_gfx12<0x0a2, DS_READ_U8_D16, "ds_load_u8_d16">;
1314defm DS_LOAD_U8_D16_HI                   : DS_Real_Renamed_gfx11_gfx12<0x0a3, DS_READ_U8_D16_HI, "ds_load_u8_d16_hi">;
1315defm DS_LOAD_I8_D16                      : DS_Real_Renamed_gfx11_gfx12<0x0a4, DS_READ_I8_D16, "ds_load_i8_d16">;
1316defm DS_LOAD_I8_D16_HI                   : DS_Real_Renamed_gfx11_gfx12<0x0a5, DS_READ_I8_D16_HI, "ds_load_i8_d16_hi">;
1317defm DS_LOAD_U16_D16                     : DS_Real_Renamed_gfx11_gfx12<0x0a6, DS_READ_U16_D16, "ds_load_u16_d16">;
1318defm DS_LOAD_U16_D16_HI                  : DS_Real_Renamed_gfx11_gfx12<0x0a7, DS_READ_U16_D16_HI, "ds_load_u16_d16_hi">;
1319defm DS_STORE_ADDTID_B32                 : DS_Real_Renamed_gfx11_gfx12<0x0b0, DS_WRITE_ADDTID_B32, "ds_store_addtid_b32">;
1320defm DS_LOAD_ADDTID_B32                  : DS_Real_Renamed_gfx11_gfx12<0x0b1, DS_READ_ADDTID_B32, "ds_load_addtid_b32">;
1321defm DS_STORE_B96                        : DS_Real_Renamed_gfx11_gfx12<0x0de, DS_WRITE_B96, "ds_store_b96">;
1322defm DS_STORE_B128                       : DS_Real_Renamed_gfx11_gfx12<0x0df, DS_WRITE_B128, "ds_store_b128">;
1323defm DS_LOAD_B96                         : DS_Real_Renamed_gfx11_gfx12<0x0fe, DS_READ_B96, "ds_load_b96">;
1324defm DS_LOAD_B128                        : DS_Real_Renamed_gfx11_gfx12<0x0ff, DS_READ_B128, "ds_load_b128">;
1325
1326// DS_CMPST_* are renamed to DS_CMPSTORE_* in GFX11, but also the data operands (src and cmp) are swapped
1327// comparing to pre-GFX11.
1328// Note: the mnemonic alias is not generated to avoid a potential ambiguity due to the semantics change.
1329
1330defm DS_CMPSTORE_B32                     : DS_Real_gfx11_gfx12<0x010>;
1331defm DS_CMPSTORE_F32                     : DS_Real_gfx11<0x011>;
1332defm DS_CMPSTORE_RTN_B32                 : DS_Real_gfx11_gfx12<0x030>;
1333defm DS_CMPSTORE_RTN_F32                 : DS_Real_gfx11<0x031>;
1334defm DS_CMPSTORE_B64                     : DS_Real_gfx11_gfx12<0x050>;
1335defm DS_CMPSTORE_F64                     : DS_Real_gfx11<0x051>;
1336defm DS_CMPSTORE_RTN_B64                 : DS_Real_gfx11_gfx12<0x070>;
1337defm DS_CMPSTORE_RTN_F64                 : DS_Real_gfx11<0x071>;
1338
1339defm DS_ADD_RTN_F32                      : DS_Real_gfx11_gfx12<0x079>;
1340defm DS_ADD_GS_REG_RTN                   : DS_Real_gfx11<0x07a>;
1341defm DS_SUB_GS_REG_RTN                   : DS_Real_gfx11<0x07b>;
1342defm DS_BVH_STACK_RTN_B32                : DS_Real_gfx11<0x0ad>;
1343
1344//===----------------------------------------------------------------------===//
1345// GFX10.
1346//===----------------------------------------------------------------------===//
1347
1348let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
1349  multiclass DS_Real_gfx10<bits<8> op>  {
1350    def _gfx10 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1351      !cast<DS_Pseudo>(NAME), SIEncodingFamily.GFX10>;
1352  }
1353} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
1354
1355defm DS_ADD_RTN_F32      : DS_Real_gfx10<0x055>;
1356defm DS_WRITE_B8_D16_HI  : DS_Real_gfx10<0x0a0>;
1357defm DS_WRITE_B16_D16_HI : DS_Real_gfx10<0x0a1>;
1358defm DS_READ_U8_D16      : DS_Real_gfx10<0x0a2>;
1359defm DS_READ_U8_D16_HI   : DS_Real_gfx10<0x0a3>;
1360defm DS_READ_I8_D16      : DS_Real_gfx10<0x0a4>;
1361defm DS_READ_I8_D16_HI   : DS_Real_gfx10<0x0a5>;
1362defm DS_READ_U16_D16     : DS_Real_gfx10<0x0a6>;
1363defm DS_READ_U16_D16_HI  : DS_Real_gfx10<0x0a7>;
1364defm DS_WRITE_ADDTID_B32 : DS_Real_gfx10<0x0b0>;
1365defm DS_READ_ADDTID_B32  : DS_Real_gfx10<0x0b1>;
1366
1367//===----------------------------------------------------------------------===//
1368// GFX10, GFX11, GFX12.
1369//===----------------------------------------------------------------------===//
1370
1371multiclass DS_Real_gfx10_gfx11_gfx12<bits<8> op> :
1372  DS_Real_gfx10<op>, DS_Real_gfx11<op>, DS_Real_gfx12<op>;
1373
1374multiclass DS_Real_gfx10_gfx11<bits<8> op> :
1375  DS_Real_gfx10<op>, DS_Real_gfx11<op>;
1376
1377defm DS_ADD_F32          : DS_Real_gfx10_gfx11_gfx12<0x015>;
1378defm DS_ADD_SRC2_F32     : DS_Real_gfx10<0x095>;
1379defm DS_PERMUTE_B32      : DS_Real_gfx10_gfx11_gfx12<0x0b2>;
1380defm DS_BPERMUTE_B32     : DS_Real_gfx10_gfx11_gfx12<0x0b3>;
1381
1382//===----------------------------------------------------------------------===//
1383// GFX7, GFX10, GFX11, GFX12.
1384//===----------------------------------------------------------------------===//
1385
1386let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
1387  multiclass DS_Real_gfx7<bits<8> op> {
1388    def _gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1389      !cast<DS_Pseudo>(NAME), SIEncodingFamily.SI>;
1390  }
1391} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
1392
1393multiclass DS_Real_gfx7_gfx10_gfx11_gfx12<bits<8> op> :
1394  DS_Real_gfx7<op>, DS_Real_gfx10_gfx11_gfx12<op>;
1395
1396multiclass DS_Real_gfx7_gfx10_gfx11<bits<8> op> :
1397  DS_Real_gfx7<op>, DS_Real_gfx10_gfx11<op>;
1398
1399multiclass DS_Real_gfx7_gfx10<bits<8> op> :
1400  DS_Real_gfx7<op>, DS_Real_gfx10<op>;
1401
1402// FIXME-GFX7: Add tests when upstreaming this part.
1403defm DS_GWS_SEMA_RELEASE_ALL : DS_Real_gfx7_gfx10_gfx11<0x018>;
1404defm DS_WRAP_RTN_B32         : DS_Real_gfx7_gfx10_gfx11<0x034>;
1405defm DS_CONDXCHG32_RTN_B64   : DS_Real_gfx7_gfx10_gfx11_gfx12<0x07e>;
1406defm DS_WRITE_B96            : DS_Real_gfx7_gfx10<0x0de>;
1407defm DS_WRITE_B128           : DS_Real_gfx7_gfx10<0x0df>;
1408defm DS_READ_B96             : DS_Real_gfx7_gfx10<0x0fe>;
1409defm DS_READ_B128            : DS_Real_gfx7_gfx10<0x0ff>;
1410
1411//===----------------------------------------------------------------------===//
1412// GFX6, GFX7, GFX10, GFX11.
1413//===----------------------------------------------------------------------===//
1414
1415let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
1416  multiclass DS_Real_gfx6_gfx7<bits<8> op> {
1417    def _gfx6_gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1418      !cast<DS_Pseudo>(NAME), SIEncodingFamily.SI>;
1419  }
1420} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
1421
1422multiclass DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op> :
1423  DS_Real_gfx6_gfx7<op>, DS_Real_gfx10_gfx11_gfx12<op>;
1424
1425multiclass DS_Real_gfx6_gfx7_gfx10_gfx11<bits<8> op> :
1426  DS_Real_gfx6_gfx7<op>, DS_Real_gfx10_gfx11<op>;
1427
1428multiclass DS_Real_gfx6_gfx7_gfx10<bits<8> op> :
1429  DS_Real_gfx6_gfx7<op>, DS_Real_gfx10<op>;
1430
1431defm DS_ADD_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x000>;
1432defm DS_SUB_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x001>;
1433defm DS_RSUB_U32            : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>;
1434defm DS_INC_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x003>;
1435defm DS_DEC_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x004>;
1436defm DS_MIN_I32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x005>;
1437defm DS_MAX_I32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x006>;
1438defm DS_MIN_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x007>;
1439defm DS_MAX_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x008>;
1440defm DS_AND_B32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x009>;
1441defm DS_OR_B32              : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00a>;
1442defm DS_XOR_B32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00b>;
1443defm DS_MSKOR_B32           : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00c>;
1444
1445defm DS_WRITE_B32           : DS_Real_gfx6_gfx7_gfx10<0x00d>;
1446defm DS_WRITE2_B32          : DS_Real_gfx6_gfx7_gfx10<0x00e>;
1447defm DS_WRITE2ST64_B32      : DS_Real_gfx6_gfx7_gfx10<0x00f>;
1448defm DS_CMPST_B32           : DS_Real_gfx6_gfx7_gfx10<0x010>;
1449defm DS_CMPST_F32           : DS_Real_gfx6_gfx7_gfx10<0x011>;
1450
1451defm DS_MIN_F32             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x012>;
1452defm DS_MAX_F32             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x013>;
1453defm DS_NOP                 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x014>;
1454defm DS_GWS_INIT            : DS_Real_gfx6_gfx7_gfx10_gfx11<0x019>;
1455defm DS_GWS_SEMA_V          : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01a>;
1456defm DS_GWS_SEMA_BR         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01b>;
1457defm DS_GWS_SEMA_P          : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01c>;
1458defm DS_GWS_BARRIER         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01d>;
1459
1460defm DS_WRITE_B8            : DS_Real_gfx6_gfx7_gfx10<0x01e>;
1461defm DS_WRITE_B16           : DS_Real_gfx6_gfx7_gfx10<0x01f>;
1462
1463defm DS_ADD_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x020>;
1464defm DS_SUB_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x021>;
1465defm DS_RSUB_RTN_U32        : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x022>;
1466defm DS_INC_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x023>;
1467defm DS_DEC_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x024>;
1468defm DS_MIN_RTN_I32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x025>;
1469defm DS_MAX_RTN_I32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x026>;
1470defm DS_MIN_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x027>;
1471defm DS_MAX_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x028>;
1472defm DS_AND_RTN_B32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x029>;
1473defm DS_OR_RTN_B32          : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02a>;
1474defm DS_XOR_RTN_B32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02b>;
1475defm DS_MSKOR_RTN_B32       : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02c>;
1476
1477defm DS_WRXCHG_RTN_B32      : DS_Real_gfx6_gfx7_gfx10<0x02d>;
1478defm DS_WRXCHG2_RTN_B32     : DS_Real_gfx6_gfx7_gfx10<0x02e>;
1479defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02f>;
1480defm DS_CMPST_RTN_B32       : DS_Real_gfx6_gfx7_gfx10<0x030>;
1481defm DS_CMPST_RTN_F32       : DS_Real_gfx6_gfx7_gfx10<0x031>;
1482
1483defm DS_MIN_RTN_F32         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x032>;
1484defm DS_MAX_RTN_F32         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x033>;
1485defm DS_SWIZZLE_B32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x035>;
1486
1487defm DS_READ_B32            : DS_Real_gfx6_gfx7_gfx10<0x036>;
1488defm DS_READ2_B32           : DS_Real_gfx6_gfx7_gfx10<0x037>;
1489defm DS_READ2ST64_B32       : DS_Real_gfx6_gfx7_gfx10<0x038>;
1490defm DS_READ_I8             : DS_Real_gfx6_gfx7_gfx10<0x039>;
1491defm DS_READ_U8             : DS_Real_gfx6_gfx7_gfx10<0x03a>;
1492defm DS_READ_I16            : DS_Real_gfx6_gfx7_gfx10<0x03b>;
1493defm DS_READ_U16            : DS_Real_gfx6_gfx7_gfx10<0x03c>;
1494
1495defm DS_CONSUME             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x03d>;
1496defm DS_APPEND              : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x03e>;
1497defm DS_ORDERED_COUNT       : DS_Real_gfx6_gfx7_gfx10_gfx11<0x03f>;
1498defm DS_ADD_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x040>;
1499defm DS_SUB_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x041>;
1500defm DS_RSUB_U64            : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x042>;
1501defm DS_INC_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x043>;
1502defm DS_DEC_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x044>;
1503defm DS_MIN_I64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x045>;
1504defm DS_MAX_I64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x046>;
1505defm DS_MIN_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x047>;
1506defm DS_MAX_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x048>;
1507defm DS_AND_B64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x049>;
1508defm DS_OR_B64              : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04a>;
1509defm DS_XOR_B64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04b>;
1510defm DS_MSKOR_B64           : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04c>;
1511
1512defm DS_WRITE_B64           : DS_Real_gfx6_gfx7_gfx10<0x04d>;
1513defm DS_WRITE2_B64          : DS_Real_gfx6_gfx7_gfx10<0x04e>;
1514defm DS_WRITE2ST64_B64      : DS_Real_gfx6_gfx7_gfx10<0x04f>;
1515defm DS_CMPST_B64           : DS_Real_gfx6_gfx7_gfx10<0x050>;
1516defm DS_CMPST_F64           : DS_Real_gfx6_gfx7_gfx10<0x051>;
1517
1518defm DS_MIN_F64             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x052>;
1519defm DS_MAX_F64             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x053>;
1520defm DS_ADD_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x060>;
1521defm DS_SUB_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x061>;
1522defm DS_RSUB_RTN_U64        : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x062>;
1523defm DS_INC_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x063>;
1524defm DS_DEC_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x064>;
1525defm DS_MIN_RTN_I64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x065>;
1526defm DS_MAX_RTN_I64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x066>;
1527defm DS_MIN_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x067>;
1528defm DS_MAX_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x068>;
1529defm DS_AND_RTN_B64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x069>;
1530defm DS_OR_RTN_B64          : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06a>;
1531defm DS_XOR_RTN_B64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06b>;
1532defm DS_MSKOR_RTN_B64       : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06c>;
1533
1534defm DS_WRXCHG_RTN_B64      : DS_Real_gfx6_gfx7_gfx10<0x06d>;
1535defm DS_WRXCHG2_RTN_B64     : DS_Real_gfx6_gfx7_gfx10<0x06e>;
1536defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06f>;
1537defm DS_CMPST_RTN_B64       : DS_Real_gfx6_gfx7_gfx10<0x070>;
1538defm DS_CMPST_RTN_F64       : DS_Real_gfx6_gfx7_gfx10<0x071>;
1539
1540defm DS_MIN_RTN_F64         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x072>;
1541defm DS_MAX_RTN_F64         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x073>;
1542
1543defm DS_READ_B64            : DS_Real_gfx6_gfx7_gfx10<0x076>;
1544defm DS_READ2_B64           : DS_Real_gfx6_gfx7_gfx10<0x077>;
1545defm DS_READ2ST64_B64       : DS_Real_gfx6_gfx7_gfx10<0x078>;
1546defm DS_ADD_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x080>;
1547defm DS_SUB_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x081>;
1548defm DS_RSUB_SRC2_U32       : DS_Real_gfx6_gfx7_gfx10<0x082>;
1549defm DS_INC_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x083>;
1550defm DS_DEC_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x084>;
1551defm DS_MIN_SRC2_I32        : DS_Real_gfx6_gfx7_gfx10<0x085>;
1552defm DS_MAX_SRC2_I32        : DS_Real_gfx6_gfx7_gfx10<0x086>;
1553defm DS_MIN_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x087>;
1554defm DS_MAX_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x088>;
1555defm DS_AND_SRC2_B32        : DS_Real_gfx6_gfx7_gfx10<0x089>;
1556defm DS_OR_SRC2_B32         : DS_Real_gfx6_gfx7_gfx10<0x08a>;
1557defm DS_XOR_SRC2_B32        : DS_Real_gfx6_gfx7_gfx10<0x08b>;
1558defm DS_WRITE_SRC2_B32      : DS_Real_gfx6_gfx7_gfx10<0x08d>;
1559defm DS_MIN_SRC2_F32        : DS_Real_gfx6_gfx7_gfx10<0x092>;
1560defm DS_MAX_SRC2_F32        : DS_Real_gfx6_gfx7_gfx10<0x093>;
1561defm DS_ADD_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c0>;
1562defm DS_SUB_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c1>;
1563defm DS_RSUB_SRC2_U64       : DS_Real_gfx6_gfx7_gfx10<0x0c2>;
1564defm DS_INC_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c3>;
1565defm DS_DEC_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c4>;
1566defm DS_MIN_SRC2_I64        : DS_Real_gfx6_gfx7_gfx10<0x0c5>;
1567defm DS_MAX_SRC2_I64        : DS_Real_gfx6_gfx7_gfx10<0x0c6>;
1568defm DS_MIN_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c7>;
1569defm DS_MAX_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c8>;
1570defm DS_AND_SRC2_B64        : DS_Real_gfx6_gfx7_gfx10<0x0c9>;
1571defm DS_OR_SRC2_B64         : DS_Real_gfx6_gfx7_gfx10<0x0ca>;
1572defm DS_XOR_SRC2_B64        : DS_Real_gfx6_gfx7_gfx10<0x0cb>;
1573defm DS_WRITE_SRC2_B64      : DS_Real_gfx6_gfx7_gfx10<0x0cd>;
1574defm DS_MIN_SRC2_F64        : DS_Real_gfx6_gfx7_gfx10<0x0d2>;
1575defm DS_MAX_SRC2_F64        : DS_Real_gfx6_gfx7_gfx10<0x0d3>;
1576
1577//===----------------------------------------------------------------------===//
1578// GFX8, GFX9 (VI).
1579//===----------------------------------------------------------------------===//
1580
1581class DS_Real_vi <bits<8> op, DS_Pseudo ps> :
1582  DS_Real <ps>,
1583  SIMCInstr <ps.Mnemonic, SIEncodingFamily.VI> {
1584  let AssemblerPredicate = isGFX8GFX9;
1585  let DecoderNamespace = "GFX8";
1586
1587  // encoding
1588  let Inst{7-0}   = !if(ps.has_offset0, offset0, 0);
1589  let Inst{15-8}  = !if(ps.has_offset1, offset1, 0);
1590  let Inst{16}    = !if(ps.has_gds, gds, ps.gdsValue);
1591  let Inst{24-17} = op;
1592  let Inst{25}    = acc;
1593  let Inst{31-26} = 0x36; // ds prefix
1594  let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0));
1595  let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0);
1596  let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0);
1597  let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0);
1598}
1599
1600def DS_ADD_U32_vi         : DS_Real_vi<0x0,  DS_ADD_U32>;
1601def DS_SUB_U32_vi         : DS_Real_vi<0x1,  DS_SUB_U32>;
1602def DS_RSUB_U32_vi        : DS_Real_vi<0x2,  DS_RSUB_U32>;
1603def DS_INC_U32_vi         : DS_Real_vi<0x3,  DS_INC_U32>;
1604def DS_DEC_U32_vi         : DS_Real_vi<0x4,  DS_DEC_U32>;
1605def DS_MIN_I32_vi         : DS_Real_vi<0x5,  DS_MIN_I32>;
1606def DS_MAX_I32_vi         : DS_Real_vi<0x6,  DS_MAX_I32>;
1607def DS_MIN_U32_vi         : DS_Real_vi<0x7,  DS_MIN_U32>;
1608def DS_MAX_U32_vi         : DS_Real_vi<0x8,  DS_MAX_U32>;
1609def DS_AND_B32_vi         : DS_Real_vi<0x9,  DS_AND_B32>;
1610def DS_OR_B32_vi          : DS_Real_vi<0xa,  DS_OR_B32>;
1611def DS_XOR_B32_vi         : DS_Real_vi<0xb,  DS_XOR_B32>;
1612def DS_MSKOR_B32_vi       : DS_Real_vi<0xc,  DS_MSKOR_B32>;
1613def DS_WRITE_B32_vi       : DS_Real_vi<0xd,  DS_WRITE_B32>;
1614def DS_WRITE2_B32_vi      : DS_Real_vi<0xe,  DS_WRITE2_B32>;
1615def DS_WRITE2ST64_B32_vi  : DS_Real_vi<0xf,  DS_WRITE2ST64_B32>;
1616def DS_CMPST_B32_vi       : DS_Real_vi<0x10, DS_CMPST_B32>;
1617def DS_CMPST_F32_vi       : DS_Real_vi<0x11, DS_CMPST_F32>;
1618def DS_MIN_F32_vi         : DS_Real_vi<0x12, DS_MIN_F32>;
1619def DS_MAX_F32_vi         : DS_Real_vi<0x13, DS_MAX_F32>;
1620def DS_NOP_vi             : DS_Real_vi<0x14, DS_NOP>;
1621def DS_ADD_F32_vi         : DS_Real_vi<0x15, DS_ADD_F32>;
1622def DS_GWS_INIT_vi        : DS_Real_vi<0x99, DS_GWS_INIT>;
1623def DS_GWS_SEMA_V_vi      : DS_Real_vi<0x9a, DS_GWS_SEMA_V>;
1624def DS_GWS_SEMA_BR_vi     : DS_Real_vi<0x9b, DS_GWS_SEMA_BR>;
1625def DS_GWS_SEMA_P_vi      : DS_Real_vi<0x9c, DS_GWS_SEMA_P>;
1626def DS_GWS_BARRIER_vi     : DS_Real_vi<0x9d, DS_GWS_BARRIER>;
1627def DS_WRITE_ADDTID_B32_vi : DS_Real_vi<0x1d, DS_WRITE_ADDTID_B32>;
1628def DS_WRITE_B8_vi        : DS_Real_vi<0x1e, DS_WRITE_B8>;
1629def DS_WRITE_B16_vi       : DS_Real_vi<0x1f, DS_WRITE_B16>;
1630def DS_ADD_RTN_U32_vi     : DS_Real_vi<0x20, DS_ADD_RTN_U32>;
1631def DS_SUB_RTN_U32_vi     : DS_Real_vi<0x21, DS_SUB_RTN_U32>;
1632def DS_RSUB_RTN_U32_vi    : DS_Real_vi<0x22, DS_RSUB_RTN_U32>;
1633def DS_INC_RTN_U32_vi     : DS_Real_vi<0x23, DS_INC_RTN_U32>;
1634def DS_DEC_RTN_U32_vi     : DS_Real_vi<0x24, DS_DEC_RTN_U32>;
1635def DS_MIN_RTN_I32_vi     : DS_Real_vi<0x25, DS_MIN_RTN_I32>;
1636def DS_MAX_RTN_I32_vi     : DS_Real_vi<0x26, DS_MAX_RTN_I32>;
1637def DS_MIN_RTN_U32_vi     : DS_Real_vi<0x27, DS_MIN_RTN_U32>;
1638def DS_MAX_RTN_U32_vi     : DS_Real_vi<0x28, DS_MAX_RTN_U32>;
1639def DS_AND_RTN_B32_vi     : DS_Real_vi<0x29, DS_AND_RTN_B32>;
1640def DS_OR_RTN_B32_vi      : DS_Real_vi<0x2a, DS_OR_RTN_B32>;
1641def DS_XOR_RTN_B32_vi     : DS_Real_vi<0x2b, DS_XOR_RTN_B32>;
1642def DS_MSKOR_RTN_B32_vi   : DS_Real_vi<0x2c, DS_MSKOR_RTN_B32>;
1643def DS_WRXCHG_RTN_B32_vi  : DS_Real_vi<0x2d, DS_WRXCHG_RTN_B32>;
1644def DS_WRXCHG2_RTN_B32_vi : DS_Real_vi<0x2e, DS_WRXCHG2_RTN_B32>;
1645def DS_WRXCHG2ST64_RTN_B32_vi : DS_Real_vi<0x2f, DS_WRXCHG2ST64_RTN_B32>;
1646def DS_CMPST_RTN_B32_vi   : DS_Real_vi<0x30, DS_CMPST_RTN_B32>;
1647def DS_CMPST_RTN_F32_vi   : DS_Real_vi<0x31, DS_CMPST_RTN_F32>;
1648def DS_MIN_RTN_F32_vi     : DS_Real_vi<0x32, DS_MIN_RTN_F32>;
1649def DS_MAX_RTN_F32_vi     : DS_Real_vi<0x33, DS_MAX_RTN_F32>;
1650def DS_WRAP_RTN_B32_vi    : DS_Real_vi<0x34, DS_WRAP_RTN_B32>;
1651def DS_ADD_RTN_F32_vi     : DS_Real_vi<0x35, DS_ADD_RTN_F32>;
1652def DS_READ_B32_vi        : DS_Real_vi<0x36, DS_READ_B32>;
1653def DS_READ2_B32_vi       : DS_Real_vi<0x37, DS_READ2_B32>;
1654def DS_READ2ST64_B32_vi   : DS_Real_vi<0x38, DS_READ2ST64_B32>;
1655def DS_READ_I8_vi         : DS_Real_vi<0x39, DS_READ_I8>;
1656def DS_READ_U8_vi         : DS_Real_vi<0x3a, DS_READ_U8>;
1657def DS_READ_I16_vi        : DS_Real_vi<0x3b, DS_READ_I16>;
1658def DS_READ_U16_vi        : DS_Real_vi<0x3c, DS_READ_U16>;
1659def DS_READ_ADDTID_B32_vi : DS_Real_vi<0xb6, DS_READ_ADDTID_B32>;
1660def DS_CONSUME_vi         : DS_Real_vi<0xbd, DS_CONSUME>;
1661def DS_APPEND_vi          : DS_Real_vi<0xbe, DS_APPEND>;
1662def DS_ORDERED_COUNT_vi   : DS_Real_vi<0xbf, DS_ORDERED_COUNT>;
1663def DS_SWIZZLE_B32_vi     : DS_Real_vi<0x3d, DS_SWIZZLE_B32>;
1664def DS_PERMUTE_B32_vi     : DS_Real_vi<0x3e, DS_PERMUTE_B32>;
1665def DS_BPERMUTE_B32_vi    : DS_Real_vi<0x3f, DS_BPERMUTE_B32>;
1666
1667def DS_ADD_U64_vi         : DS_Real_vi<0x40, DS_ADD_U64>;
1668def DS_SUB_U64_vi         : DS_Real_vi<0x41, DS_SUB_U64>;
1669def DS_RSUB_U64_vi        : DS_Real_vi<0x42, DS_RSUB_U64>;
1670def DS_INC_U64_vi         : DS_Real_vi<0x43, DS_INC_U64>;
1671def DS_DEC_U64_vi         : DS_Real_vi<0x44, DS_DEC_U64>;
1672def DS_MIN_I64_vi         : DS_Real_vi<0x45, DS_MIN_I64>;
1673def DS_MAX_I64_vi         : DS_Real_vi<0x46, DS_MAX_I64>;
1674def DS_MIN_U64_vi         : DS_Real_vi<0x47, DS_MIN_U64>;
1675def DS_MAX_U64_vi         : DS_Real_vi<0x48, DS_MAX_U64>;
1676def DS_AND_B64_vi         : DS_Real_vi<0x49, DS_AND_B64>;
1677def DS_OR_B64_vi          : DS_Real_vi<0x4a, DS_OR_B64>;
1678def DS_XOR_B64_vi         : DS_Real_vi<0x4b, DS_XOR_B64>;
1679def DS_MSKOR_B64_vi       : DS_Real_vi<0x4c, DS_MSKOR_B64>;
1680def DS_WRITE_B64_vi       : DS_Real_vi<0x4d, DS_WRITE_B64>;
1681def DS_WRITE2_B64_vi      : DS_Real_vi<0x4E, DS_WRITE2_B64>;
1682def DS_WRITE2ST64_B64_vi  : DS_Real_vi<0x4f, DS_WRITE2ST64_B64>;
1683def DS_CMPST_B64_vi       : DS_Real_vi<0x50, DS_CMPST_B64>;
1684def DS_CMPST_F64_vi       : DS_Real_vi<0x51, DS_CMPST_F64>;
1685def DS_MIN_F64_vi         : DS_Real_vi<0x52, DS_MIN_F64>;
1686def DS_MAX_F64_vi         : DS_Real_vi<0x53, DS_MAX_F64>;
1687
1688def DS_WRITE_B8_D16_HI_vi  : DS_Real_vi<0x54, DS_WRITE_B8_D16_HI>;
1689def DS_WRITE_B16_D16_HI_vi : DS_Real_vi<0x55, DS_WRITE_B16_D16_HI>;
1690
1691def DS_READ_U8_D16_vi     : DS_Real_vi<0x56, DS_READ_U8_D16>;
1692def DS_READ_U8_D16_HI_vi  : DS_Real_vi<0x57, DS_READ_U8_D16_HI>;
1693def DS_READ_I8_D16_vi     : DS_Real_vi<0x58, DS_READ_I8_D16>;
1694def DS_READ_I8_D16_HI_vi  : DS_Real_vi<0x59, DS_READ_I8_D16_HI>;
1695def DS_READ_U16_D16_vi    : DS_Real_vi<0x5a, DS_READ_U16_D16>;
1696def DS_READ_U16_D16_HI_vi : DS_Real_vi<0x5b, DS_READ_U16_D16_HI>;
1697
1698def DS_ADD_RTN_U64_vi     : DS_Real_vi<0x60, DS_ADD_RTN_U64>;
1699def DS_SUB_RTN_U64_vi     : DS_Real_vi<0x61, DS_SUB_RTN_U64>;
1700def DS_RSUB_RTN_U64_vi    : DS_Real_vi<0x62, DS_RSUB_RTN_U64>;
1701def DS_INC_RTN_U64_vi     : DS_Real_vi<0x63, DS_INC_RTN_U64>;
1702def DS_DEC_RTN_U64_vi     : DS_Real_vi<0x64, DS_DEC_RTN_U64>;
1703def DS_MIN_RTN_I64_vi     : DS_Real_vi<0x65, DS_MIN_RTN_I64>;
1704def DS_MAX_RTN_I64_vi     : DS_Real_vi<0x66, DS_MAX_RTN_I64>;
1705def DS_MIN_RTN_U64_vi     : DS_Real_vi<0x67, DS_MIN_RTN_U64>;
1706def DS_MAX_RTN_U64_vi     : DS_Real_vi<0x68, DS_MAX_RTN_U64>;
1707def DS_AND_RTN_B64_vi     : DS_Real_vi<0x69, DS_AND_RTN_B64>;
1708def DS_OR_RTN_B64_vi      : DS_Real_vi<0x6a, DS_OR_RTN_B64>;
1709def DS_XOR_RTN_B64_vi     : DS_Real_vi<0x6b, DS_XOR_RTN_B64>;
1710def DS_MSKOR_RTN_B64_vi   : DS_Real_vi<0x6c, DS_MSKOR_RTN_B64>;
1711def DS_WRXCHG_RTN_B64_vi  : DS_Real_vi<0x6d, DS_WRXCHG_RTN_B64>;
1712def DS_WRXCHG2_RTN_B64_vi : DS_Real_vi<0x6e, DS_WRXCHG2_RTN_B64>;
1713def DS_WRXCHG2ST64_RTN_B64_vi : DS_Real_vi<0x6f, DS_WRXCHG2ST64_RTN_B64>;
1714def DS_CONDXCHG32_RTN_B64_vi   : DS_Real_vi<0x7e, DS_CONDXCHG32_RTN_B64>;
1715def DS_GWS_SEMA_RELEASE_ALL_vi : DS_Real_vi<0x98, DS_GWS_SEMA_RELEASE_ALL>;
1716def DS_CMPST_RTN_B64_vi   : DS_Real_vi<0x70, DS_CMPST_RTN_B64>;
1717def DS_CMPST_RTN_F64_vi   : DS_Real_vi<0x71, DS_CMPST_RTN_F64>;
1718def DS_MIN_RTN_F64_vi     : DS_Real_vi<0x72, DS_MIN_RTN_F64>;
1719def DS_MAX_RTN_F64_vi     : DS_Real_vi<0x73, DS_MAX_RTN_F64>;
1720
1721def DS_READ_B64_vi        : DS_Real_vi<0x76, DS_READ_B64>;
1722def DS_READ2_B64_vi       : DS_Real_vi<0x77, DS_READ2_B64>;
1723def DS_READ2ST64_B64_vi   : DS_Real_vi<0x78, DS_READ2ST64_B64>;
1724
1725def DS_ADD_SRC2_U32_vi    : DS_Real_vi<0x80, DS_ADD_SRC2_U32>;
1726def DS_SUB_SRC2_U32_vi    : DS_Real_vi<0x81, DS_SUB_SRC2_U32>;
1727def DS_RSUB_SRC2_U32_vi   : DS_Real_vi<0x82, DS_RSUB_SRC2_U32>;
1728def DS_INC_SRC2_U32_vi    : DS_Real_vi<0x83, DS_INC_SRC2_U32>;
1729def DS_DEC_SRC2_U32_vi    : DS_Real_vi<0x84, DS_DEC_SRC2_U32>;
1730def DS_MIN_SRC2_I32_vi    : DS_Real_vi<0x85, DS_MIN_SRC2_I32>;
1731def DS_MAX_SRC2_I32_vi    : DS_Real_vi<0x86, DS_MAX_SRC2_I32>;
1732def DS_MIN_SRC2_U32_vi    : DS_Real_vi<0x87, DS_MIN_SRC2_U32>;
1733def DS_MAX_SRC2_U32_vi    : DS_Real_vi<0x88, DS_MAX_SRC2_U32>;
1734def DS_AND_SRC2_B32_vi    : DS_Real_vi<0x89, DS_AND_SRC2_B32>;
1735def DS_OR_SRC2_B32_vi     : DS_Real_vi<0x8a, DS_OR_SRC2_B32>;
1736def DS_XOR_SRC2_B32_vi    : DS_Real_vi<0x8b, DS_XOR_SRC2_B32>;
1737def DS_WRITE_SRC2_B32_vi  : DS_Real_vi<0x8d, DS_WRITE_SRC2_B32>;
1738def DS_MIN_SRC2_F32_vi    : DS_Real_vi<0x92, DS_MIN_SRC2_F32>;
1739def DS_MAX_SRC2_F32_vi    : DS_Real_vi<0x93, DS_MAX_SRC2_F32>;
1740def DS_ADD_SRC2_F32_vi    : DS_Real_vi<0x95, DS_ADD_SRC2_F32>;
1741def DS_ADD_SRC2_U64_vi    : DS_Real_vi<0xc0, DS_ADD_SRC2_U64>;
1742def DS_SUB_SRC2_U64_vi    : DS_Real_vi<0xc1, DS_SUB_SRC2_U64>;
1743def DS_RSUB_SRC2_U64_vi   : DS_Real_vi<0xc2, DS_RSUB_SRC2_U64>;
1744def DS_INC_SRC2_U64_vi    : DS_Real_vi<0xc3, DS_INC_SRC2_U64>;
1745def DS_DEC_SRC2_U64_vi    : DS_Real_vi<0xc4, DS_DEC_SRC2_U64>;
1746def DS_MIN_SRC2_I64_vi    : DS_Real_vi<0xc5, DS_MIN_SRC2_I64>;
1747def DS_MAX_SRC2_I64_vi    : DS_Real_vi<0xc6, DS_MAX_SRC2_I64>;
1748def DS_MIN_SRC2_U64_vi    : DS_Real_vi<0xc7, DS_MIN_SRC2_U64>;
1749def DS_MAX_SRC2_U64_vi    : DS_Real_vi<0xc8, DS_MAX_SRC2_U64>;
1750def DS_AND_SRC2_B64_vi    : DS_Real_vi<0xc9, DS_AND_SRC2_B64>;
1751def DS_OR_SRC2_B64_vi     : DS_Real_vi<0xca, DS_OR_SRC2_B64>;
1752def DS_XOR_SRC2_B64_vi    : DS_Real_vi<0xcb, DS_XOR_SRC2_B64>;
1753def DS_WRITE_SRC2_B64_vi  : DS_Real_vi<0xcd, DS_WRITE_SRC2_B64>;
1754def DS_MIN_SRC2_F64_vi    : DS_Real_vi<0xd2, DS_MIN_SRC2_F64>;
1755def DS_MAX_SRC2_F64_vi    : DS_Real_vi<0xd3, DS_MAX_SRC2_F64>;
1756def DS_WRITE_B96_vi       : DS_Real_vi<0xde, DS_WRITE_B96>;
1757def DS_WRITE_B128_vi      : DS_Real_vi<0xdf, DS_WRITE_B128>;
1758def DS_READ_B96_vi        : DS_Real_vi<0xfe, DS_READ_B96>;
1759def DS_READ_B128_vi       : DS_Real_vi<0xff, DS_READ_B128>;
1760
1761let SubtargetPredicate = isGFX90APlus in {
1762  def DS_ADD_F64_vi     : DS_Real_vi<0x5c, DS_ADD_F64>;
1763  def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>;
1764} // End SubtargetPredicate = isGFX90APlus
1765
1766let SubtargetPredicate = isGFX940Plus in {
1767  def DS_PK_ADD_F16_vi     : DS_Real_vi<0x17, DS_PK_ADD_F16>;
1768  def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>;
1769  def DS_PK_ADD_BF16_vi     : DS_Real_vi<0x18, DS_PK_ADD_BF16>;
1770  def DS_PK_ADD_RTN_BF16_vi : DS_Real_vi<0xb8, DS_PK_ADD_RTN_BF16>;
1771} // End SubtargetPredicate = isGFX940Plus
1772