xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1//===-- DSInstructions.td - DS Instruction Definitions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> :
10  InstSI <outs, ins, "", pattern>,
11  SIMCInstr <opName, SIEncodingFamily.NONE> {
12
13  let LGKM_CNT = 1;
14  let DS = 1;
15  let GWS = 0;
16  let Size = 8;
17  let UseNamedOperandTable = 1;
18
19  // Most instruction load and store data, so set this as the default.
20  let mayLoad = 1;
21  let mayStore = 1;
22
23  let hasSideEffects = 0;
24  let SchedRW = [WriteLDS];
25
26  let isPseudo = 1;
27  let isCodeGenOnly = 1;
28
29  string Mnemonic = opName;
30  string AsmOperands = asmOps;
31
32  // Well these bits a kind of hack because it would be more natural
33  // to test "outs" and "ins" dags for the presence of particular operands
34  bits<1> has_vdst = 1;
35  bits<1> has_addr = 1;
36  bits<1> has_data0 = 1;
37  bits<1> has_data1 = 1;
38
39  bits<1> has_gws_data0 = 0; // data0 is encoded as addr
40
41  bits<1> has_offset  = 1; // has "offset" that should be split to offset0,1
42  bits<1> has_offset0 = 1;
43  bits<1> has_offset1 = 1;
44
45  bits<1> has_gds = 1;
46  bits<1> gdsValue = 0; // if has_gds == 0 set gds to this value
47
48  bits<1> has_m0_read = 1;
49
50  let Uses = !if(has_m0_read, [M0, EXEC], [EXEC]);
51}
52
53class DS_Real <DS_Pseudo ps, string opName = ps.Mnemonic> :
54  InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands>,
55  Enc64 {
56
57  let isPseudo = 0;
58  let isCodeGenOnly = 0;
59  let LGKM_CNT = 1;
60  let DS = 1;
61  let UseNamedOperandTable = 1;
62
63  // copy relevant pseudo op flags
64  let GWS                = ps.GWS;
65  let SubtargetPredicate = ps.SubtargetPredicate;
66  let OtherPredicates    = ps.OtherPredicates;
67  let SchedRW            = ps.SchedRW;
68  let mayLoad            = ps.mayLoad;
69  let mayStore           = ps.mayStore;
70  let IsAtomicRet        = ps.IsAtomicRet;
71  let IsAtomicNoRet      = ps.IsAtomicNoRet;
72
73  let Constraints = ps.Constraints;
74  let DisableEncoding = ps.DisableEncoding;
75
76  // encoding fields
77  bits<10> vdst;
78  bits<1> gds;
79  bits<8> addr;
80  bits<10> data0;
81  bits<10> data1;
82  bits<8> offset0;
83  bits<8> offset1;
84
85  bits<16> offset;
86  let offset0 = !if(ps.has_offset, offset{7-0}, ?);
87  let offset1 = !if(ps.has_offset, offset{15-8}, ?);
88
89  bits<1> acc = !if(ps.has_vdst, vdst{9},
90                    !if(!or(ps.has_data0, ps.has_gws_data0), data0{9}, 0));
91}
92
93// DS Pseudo instructions
94
95class DS_0A1D_NORET<string opName, RegisterClass rc = VGPR_32>
96: DS_Pseudo<opName,
97  (outs),
98  (ins getLdStRegisterOperand<rc>.ret:$data0, offset:$offset, gds:$gds),
99  " $data0$offset$gds"> {
100
101  let has_addr = 0;
102  let has_data1 = 0;
103  let has_vdst = 0;
104}
105
106class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32>
107: DS_Pseudo<opName,
108  (outs),
109  (ins VGPR_32:$addr, getLdStRegisterOperand<rc>.ret:$data0, offset:$offset, gds:$gds),
110  " $addr, $data0$offset$gds"> {
111
112  let has_data1 = 0;
113  let has_vdst = 0;
114  let IsAtomicNoRet = 1;
115}
116
117multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
118  def "" : DS_1A1D_NORET<opName, rc>,
119           AtomicNoRet<opName, 0>;
120
121  let has_m0_read = 0 in {
122    def _gfx9 : DS_1A1D_NORET<opName, rc>,
123                AtomicNoRet<opName#"_gfx9", 0>;
124  }
125}
126
127multiclass DS_1A1D_NORET_mc_gfx9<string opName, RegisterClass rc = VGPR_32> {
128  let has_m0_read = 0 in {
129    def "" : DS_1A1D_NORET<opName, rc>,
130                AtomicNoRet<opName, 0>;
131  }
132}
133
134class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32,
135                    RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
136: DS_Pseudo<opName,
137  (outs),
138  (ins VGPR_32:$addr, data_op:$data0, data_op:$data1, offset:$offset, gds:$gds),
139  " $addr, $data0, $data1$offset$gds"> {
140
141  let has_vdst = 0;
142  let IsAtomicNoRet = 1;
143}
144
145multiclass DS_1A2D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
146  def "" : DS_1A2D_NORET<opName, rc>,
147           AtomicNoRet<opName, 0>;
148
149  let has_m0_read = 0 in {
150    def _gfx9 : DS_1A2D_NORET<opName, rc>,
151                AtomicNoRet<opName#"_gfx9", 0>;
152  }
153}
154
155class DS_1A2D_Off8_NORET <string opName, RegisterClass rc = VGPR_32,
156                          RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
157: DS_Pseudo<opName,
158  (outs),
159  (ins VGPR_32:$addr, data_op:$data0, data_op:$data1,
160       offset0:$offset0, offset1:$offset1, gds:$gds),
161  " $addr, $data0, $data1$offset0$offset1$gds"> {
162
163  let has_vdst = 0;
164  let has_offset = 0;
165}
166
167multiclass DS_1A2D_Off8_NORET_mc <string opName, RegisterClass rc = VGPR_32> {
168  def "" : DS_1A2D_Off8_NORET<opName, rc>;
169
170  let has_m0_read = 0 in {
171    def _gfx9 : DS_1A2D_Off8_NORET<opName, rc>;
172  }
173}
174
175class DS_0A1D_RET_GDS<string opName, RegisterClass rc = VGPR_32, RegisterClass src = rc,
176                  RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
177                  RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
178: DS_Pseudo<opName,
179  (outs dst_op:$vdst),
180  (ins src_op:$data0, offset:$offset),
181  " $vdst, $data0$offset gds"> {
182
183  let has_addr = 0;
184  let has_data1 = 0;
185  let has_gds = 0;
186  let gdsValue = 1;
187  let hasSideEffects = 1;
188}
189
190class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32,
191                  RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
192: DS_Pseudo<opName,
193  (outs data_op:$vdst),
194  (ins VGPR_32:$addr, data_op:$data0, offset:$offset, gds:$gds),
195  " $vdst, $addr, $data0$offset$gds"> {
196
197  let hasPostISelHook = 1;
198  let has_data1 = 0;
199  let IsAtomicRet = 1;
200}
201
202multiclass DS_1A1D_RET_mc <string opName, RegisterClass rc = VGPR_32,
203                           string NoRetOp = ""> {
204  def "" : DS_1A1D_RET<opName, rc>,
205    AtomicNoRet<NoRetOp, !ne(NoRetOp, "")>;
206
207  let has_m0_read = 0 in {
208    def _gfx9 : DS_1A1D_RET<opName, rc>,
209      AtomicNoRet<!if(!eq(NoRetOp, ""), "", NoRetOp#"_gfx9"),
210                  !ne(NoRetOp, "")>;
211  }
212}
213
214multiclass DS_1A1D_RET_mc_gfx9 <string opName, RegisterClass rc = VGPR_32,
215                                string NoRetOp = ""> {
216  let has_m0_read = 0 in {
217    def "" : DS_1A1D_RET<opName, rc>,
218      AtomicNoRet<!if(!eq(NoRetOp, ""), "", NoRetOp),
219                  !ne(NoRetOp, "")>;
220  }
221}
222
223class DS_1A2D_RET<string opName,
224                  RegisterClass rc = VGPR_32,
225                  RegisterClass src = rc,
226                  RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
227                  RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
228: DS_Pseudo<opName,
229  (outs dst_op:$vdst),
230  (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, offset:$offset, gds:$gds),
231  " $vdst, $addr, $data0, $data1$offset$gds"> {
232
233  let hasPostISelHook = 1;
234  let IsAtomicRet = 1;
235}
236
237multiclass DS_1A2D_RET_mc<string opName,
238                          RegisterClass rc = VGPR_32,
239                          string NoRetOp = "",
240                          RegisterClass src = rc> {
241  def "" : DS_1A2D_RET<opName, rc, src>,
242    AtomicNoRet<NoRetOp, !ne(NoRetOp, "")>;
243
244  let has_m0_read = 0 in {
245    def _gfx9 : DS_1A2D_RET<opName, rc, src>,
246      AtomicNoRet<NoRetOp#"_gfx9", !ne(NoRetOp, "")>;
247  }
248}
249
250class DS_1A2D_Off8_RET<string opName,
251                       RegisterClass rc = VGPR_32,
252                       RegisterClass src = rc,
253                       RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
254                       RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
255: DS_Pseudo<opName,
256  (outs dst_op:$vdst),
257  (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, offset0:$offset0, offset1:$offset1, gds:$gds),
258  " $vdst, $addr, $data0, $data1$offset0$offset1$gds"> {
259
260  let has_offset = 0;
261  let hasPostISelHook = 1;
262}
263
264multiclass DS_1A2D_Off8_RET_mc<string opName,
265                               RegisterClass rc = VGPR_32,
266                               RegisterClass src = rc> {
267  def "" : DS_1A2D_Off8_RET<opName, rc, src>;
268
269  let has_m0_read = 0 in {
270    def _gfx9 : DS_1A2D_Off8_RET<opName, rc, src>;
271  }
272}
273
274class DS_BVH_STACK<string opName>
275: DS_Pseudo<opName,
276  (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst, VGPR_32:$addr),
277  (ins VGPR_32:$addr_in, getLdStRegisterOperand<VGPR_32>.ret:$data0, VReg_128:$data1, offset:$offset),
278  " $vdst, $addr, $data0, $data1$offset"> {
279  let Constraints = "$addr = $addr_in";
280  let DisableEncoding = "$addr_in";
281  let has_gds = 0;
282  let gdsValue = 0;
283  // TODO: Use MMOs in the LDS address space instead of hasSideEffects = 1.
284  let hasSideEffects = 1;
285  let SchedRW = [WriteLDS, WriteLDS];
286}
287
288class DS_1A_RET<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = offset,
289                RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
290: DS_Pseudo<opName,
291  (outs data_op:$vdst),
292  !if(HasTiedOutput,
293    (ins VGPR_32:$addr, ofs:$offset, gds:$gds, data_op:$vdst_in),
294    (ins VGPR_32:$addr, ofs:$offset, gds:$gds)),
295  " $vdst, $addr$offset$gds"> {
296  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
297  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
298  let has_data0 = 0;
299  let has_data1 = 0;
300}
301
302multiclass DS_1A_RET_mc<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = offset> {
303  def "" : DS_1A_RET<opName, rc, HasTiedOutput, ofs>;
304
305  let has_m0_read = 0 in {
306    def _gfx9 : DS_1A_RET<opName, rc, HasTiedOutput, ofs>;
307  }
308}
309
310class DS_1A_RET_Tied<string opName, RegisterClass rc = VGPR_32> :
311  DS_1A_RET<opName, rc, 1>;
312
313class DS_1A_Off8_RET <string opName, RegisterClass rc = VGPR_32>
314: DS_Pseudo<opName,
315  (outs getLdStRegisterOperand<rc>.ret:$vdst),
316  (ins VGPR_32:$addr, offset0:$offset0, offset1:$offset1, gds:$gds),
317  " $vdst, $addr$offset0$offset1$gds"> {
318
319  let has_offset = 0;
320  let has_data0 = 0;
321  let has_data1 = 0;
322}
323
324multiclass DS_1A_Off8_RET_mc <string opName, RegisterClass rc = VGPR_32> {
325  def "" : DS_1A_Off8_RET<opName, rc>;
326
327  let has_m0_read = 0 in {
328    def _gfx9 : DS_1A_Off8_RET<opName, rc>;
329  }
330}
331
332class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName,
333  (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst),
334  (ins VGPR_32:$addr, offset:$offset),
335  " $vdst, $addr$offset gds"> {
336
337  let has_data0 = 0;
338  let has_data1 = 0;
339  let has_gds = 0;
340  let gdsValue = 1;
341}
342
343class DS_0A_RET <string opName> : DS_Pseudo<opName,
344  (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst),
345  (ins offset:$offset, gds:$gds),
346  " $vdst$offset$gds"> {
347
348  let mayLoad = 1;
349  let mayStore = 1;
350
351  let has_addr = 0;
352  let has_data0 = 0;
353  let has_data1 = 0;
354}
355
356class DS_1A <string opName> : DS_Pseudo<opName,
357  (outs),
358  (ins VGPR_32:$addr, offset:$offset, gds:$gds),
359  " $addr$offset$gds"> {
360
361  let mayLoad = 1;
362  let mayStore = 1;
363
364  let has_vdst = 0;
365  let has_data0 = 0;
366  let has_data1 = 0;
367}
368
369multiclass DS_1A_mc <string opName> {
370  def "" : DS_1A<opName>;
371
372  let has_m0_read = 0 in {
373    def _gfx9 : DS_1A<opName>;
374  }
375}
376
377
378class DS_GWS <string opName, dag ins, string asmOps>
379: DS_Pseudo<opName, (outs), ins, asmOps> {
380  let GWS = 1;
381
382  let has_vdst  = 0;
383  let has_addr  = 0;
384  let has_data0 = 0;
385  let has_data1 = 0;
386
387  let has_gds   = 0;
388  let gdsValue  = 1;
389}
390
391class DS_GWS_0D <string opName>
392: DS_GWS<opName,
393  (ins offset:$offset), "$offset gds"> {
394  let hasSideEffects = 1;
395}
396
397class DS_GWS_1D <string opName>
398: DS_GWS<opName,
399  (ins getLdStRegisterOperand<VGPR_32>.ret:$data0, offset:$offset),
400  " $data0$offset gds"> {
401
402  let has_gws_data0 = 1;
403  let hasSideEffects = 1;
404}
405
406class DS_VOID <string opName> : DS_Pseudo<opName,
407  (outs), (ins), ""> {
408  let mayLoad = 0;
409  let mayStore = 0;
410  let hasSideEffects = 1;
411  let UseNamedOperandTable = 0;
412
413  let has_vdst = 0;
414  let has_addr = 0;
415  let has_data0 = 0;
416  let has_data1 = 0;
417  let has_offset = 0;
418  let has_offset0 = 0;
419  let has_offset1 = 0;
420  let has_gds = 0;
421}
422
423class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag,
424                       RegisterOperand data_op = getLdStRegisterOperand<VGPR_32>.ret>
425: DS_Pseudo<opName,
426  (outs data_op:$vdst),
427  (ins VGPR_32:$addr, data_op:$data0, offset:$offset),
428  " $vdst, $addr, $data0$offset",
429  [(set i32:$vdst,
430   (node (DS1Addr1Offset i32:$addr, i32:$offset), i32:$data0))] > {
431
432  let mayLoad = 0;
433  let mayStore = 0;
434  let isConvergent = 1;
435
436  let has_data1 = 0;
437  let has_gds = 0;
438}
439
440defm DS_ADD_U32       : DS_1A1D_NORET_mc<"ds_add_u32">;
441defm DS_SUB_U32       : DS_1A1D_NORET_mc<"ds_sub_u32">;
442defm DS_RSUB_U32      : DS_1A1D_NORET_mc<"ds_rsub_u32">;
443defm DS_INC_U32       : DS_1A1D_NORET_mc<"ds_inc_u32">;
444defm DS_DEC_U32       : DS_1A1D_NORET_mc<"ds_dec_u32">;
445defm DS_MIN_I32       : DS_1A1D_NORET_mc<"ds_min_i32">;
446defm DS_MAX_I32       : DS_1A1D_NORET_mc<"ds_max_i32">;
447defm DS_MIN_U32       : DS_1A1D_NORET_mc<"ds_min_u32">;
448defm DS_MAX_U32       : DS_1A1D_NORET_mc<"ds_max_u32">;
449defm DS_AND_B32       : DS_1A1D_NORET_mc<"ds_and_b32">;
450defm DS_OR_B32        : DS_1A1D_NORET_mc<"ds_or_b32">;
451defm DS_XOR_B32       : DS_1A1D_NORET_mc<"ds_xor_b32">;
452
453let SubtargetPredicate = HasLDSFPAtomicAdd in {
454defm DS_ADD_F32       : DS_1A1D_NORET_mc<"ds_add_f32">;
455}
456
457defm DS_MIN_F32       : DS_1A1D_NORET_mc<"ds_min_f32">;
458defm DS_MAX_F32       : DS_1A1D_NORET_mc<"ds_max_f32">;
459
460let mayLoad = 0 in {
461defm DS_WRITE_B8      : DS_1A1D_NORET_mc<"ds_write_b8">;
462defm DS_WRITE_B16     : DS_1A1D_NORET_mc<"ds_write_b16">;
463defm DS_WRITE_B32     : DS_1A1D_NORET_mc<"ds_write_b32">;
464defm DS_WRITE2_B32    : DS_1A2D_Off8_NORET_mc<"ds_write2_b32">;
465defm DS_WRITE2ST64_B32: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b32">;
466
467
468let has_m0_read = 0 in {
469
470let SubtargetPredicate = HasD16LoadStore in {
471def DS_WRITE_B8_D16_HI  : DS_1A1D_NORET<"ds_write_b8_d16_hi">;
472def DS_WRITE_B16_D16_HI : DS_1A1D_NORET<"ds_write_b16_d16_hi">;
473}
474
475} // End has_m0_read = 0
476
477let SubtargetPredicate = HasDSAddTid in {
478def DS_WRITE_ADDTID_B32 : DS_0A1D_NORET<"ds_write_addtid_b32">;
479}
480
481} // End mayLoad = 0
482
483let SubtargetPredicate = isGFX90APlus in {
484  defm DS_ADD_F64     : DS_1A1D_NORET_mc_gfx9<"ds_add_f64", VReg_64>;
485  defm DS_ADD_RTN_F64 : DS_1A1D_RET_mc_gfx9<"ds_add_rtn_f64", VReg_64, "ds_add_f64">;
486} // End SubtargetPredicate = isGFX90APlus
487
488let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
489  defm DS_PK_ADD_F16      : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_f16">;
490  defm DS_PK_ADD_RTN_F16  : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_f16", VGPR_32, "ds_pk_add_f16">;
491  defm DS_PK_ADD_BF16     : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_bf16">;
492  defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_bf16", VGPR_32, "ds_pk_add_bf16">;
493} // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
494
495defm DS_CMPSTORE_B32     : DS_1A2D_NORET_mc<"ds_cmpstore_b32">;
496defm DS_CMPSTORE_F32     : DS_1A2D_NORET_mc<"ds_cmpstore_f32">;
497defm DS_CMPSTORE_B64     : DS_1A2D_NORET_mc<"ds_cmpstore_b64", VReg_64>;
498defm DS_CMPSTORE_F64     : DS_1A2D_NORET_mc<"ds_cmpstore_f64", VReg_64>;
499defm DS_CMPSTORE_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b32", VGPR_32, "ds_cmpstore_b32">;
500defm DS_CMPSTORE_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f32", VGPR_32, "ds_cmpstore_f32">;
501defm DS_CMPSTORE_RTN_B64  : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b64", VReg_64, "ds_cmpstore_b64">;
502defm DS_CMPSTORE_RTN_F64  : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f64", VReg_64, "ds_cmpstore_f64">;
503
504defm DS_MSKOR_B32     : DS_1A2D_NORET_mc<"ds_mskor_b32">;
505defm DS_CMPST_B32     : DS_1A2D_NORET_mc<"ds_cmpst_b32">;
506defm DS_CMPST_F32     : DS_1A2D_NORET_mc<"ds_cmpst_f32">;
507
508defm DS_ADD_U64       : DS_1A1D_NORET_mc<"ds_add_u64", VReg_64>;
509defm DS_SUB_U64       : DS_1A1D_NORET_mc<"ds_sub_u64", VReg_64>;
510defm DS_RSUB_U64      : DS_1A1D_NORET_mc<"ds_rsub_u64", VReg_64>;
511defm DS_INC_U64       : DS_1A1D_NORET_mc<"ds_inc_u64", VReg_64>;
512defm DS_DEC_U64       : DS_1A1D_NORET_mc<"ds_dec_u64", VReg_64>;
513defm DS_MIN_I64       : DS_1A1D_NORET_mc<"ds_min_i64", VReg_64>;
514defm DS_MAX_I64       : DS_1A1D_NORET_mc<"ds_max_i64", VReg_64>;
515defm DS_MIN_U64       : DS_1A1D_NORET_mc<"ds_min_u64", VReg_64>;
516defm DS_MAX_U64       : DS_1A1D_NORET_mc<"ds_max_u64", VReg_64>;
517defm DS_AND_B64       : DS_1A1D_NORET_mc<"ds_and_b64", VReg_64>;
518defm DS_OR_B64        : DS_1A1D_NORET_mc<"ds_or_b64", VReg_64>;
519defm DS_XOR_B64       : DS_1A1D_NORET_mc<"ds_xor_b64", VReg_64>;
520defm DS_MSKOR_B64     : DS_1A2D_NORET_mc<"ds_mskor_b64", VReg_64>;
521let mayLoad = 0 in {
522defm DS_WRITE_B64     : DS_1A1D_NORET_mc<"ds_write_b64", VReg_64>;
523defm DS_WRITE2_B64    : DS_1A2D_Off8_NORET_mc<"ds_write2_b64", VReg_64>;
524defm DS_WRITE2ST64_B64: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b64", VReg_64>;
525}
526defm DS_CMPST_B64     : DS_1A2D_NORET_mc<"ds_cmpst_b64", VReg_64>;
527defm DS_CMPST_F64     : DS_1A2D_NORET_mc<"ds_cmpst_f64", VReg_64>;
528defm DS_MIN_F64       : DS_1A1D_NORET_mc<"ds_min_f64", VReg_64>;
529defm DS_MAX_F64       : DS_1A1D_NORET_mc<"ds_max_f64", VReg_64>;
530
531defm DS_ADD_RTN_U32   : DS_1A1D_RET_mc<"ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
532
533let SubtargetPredicate = HasLDSFPAtomicAdd in {
534defm DS_ADD_RTN_F32   : DS_1A1D_RET_mc<"ds_add_rtn_f32", VGPR_32, "ds_add_f32">;
535}
536defm DS_SUB_RTN_U32   : DS_1A1D_RET_mc<"ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
537defm DS_RSUB_RTN_U32  : DS_1A1D_RET_mc<"ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
538defm DS_INC_RTN_U32   : DS_1A1D_RET_mc<"ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">;
539defm DS_DEC_RTN_U32   : DS_1A1D_RET_mc<"ds_dec_rtn_u32", VGPR_32, "ds_dec_u32">;
540defm DS_MIN_RTN_I32   : DS_1A1D_RET_mc<"ds_min_rtn_i32", VGPR_32, "ds_min_i32">;
541defm DS_MAX_RTN_I32   : DS_1A1D_RET_mc<"ds_max_rtn_i32", VGPR_32, "ds_max_i32">;
542defm DS_MIN_RTN_U32   : DS_1A1D_RET_mc<"ds_min_rtn_u32", VGPR_32, "ds_min_u32">;
543defm DS_MAX_RTN_U32   : DS_1A1D_RET_mc<"ds_max_rtn_u32", VGPR_32, "ds_max_u32">;
544defm DS_AND_RTN_B32   : DS_1A1D_RET_mc<"ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
545defm DS_OR_RTN_B32    : DS_1A1D_RET_mc<"ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
546defm DS_XOR_RTN_B32   : DS_1A1D_RET_mc<"ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
547defm DS_MSKOR_RTN_B32 : DS_1A2D_RET_mc<"ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
548defm DS_CMPST_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
549defm DS_CMPST_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
550defm DS_MIN_RTN_F32   : DS_1A1D_RET_mc<"ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
551defm DS_MAX_RTN_F32   : DS_1A1D_RET_mc<"ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
552
553defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b32">;
554defm DS_WRXCHG2_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>;
555defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>;
556
557defm DS_ADD_RTN_U64  : DS_1A1D_RET_mc<"ds_add_rtn_u64", VReg_64, "ds_add_u64">;
558defm DS_SUB_RTN_U64  : DS_1A1D_RET_mc<"ds_sub_rtn_u64", VReg_64, "ds_sub_u64">;
559defm DS_RSUB_RTN_U64  : DS_1A1D_RET_mc<"ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">;
560defm DS_INC_RTN_U64   : DS_1A1D_RET_mc<"ds_inc_rtn_u64", VReg_64, "ds_inc_u64">;
561defm DS_DEC_RTN_U64   : DS_1A1D_RET_mc<"ds_dec_rtn_u64", VReg_64, "ds_dec_u64">;
562defm DS_MIN_RTN_I64    : DS_1A1D_RET_mc<"ds_min_rtn_i64", VReg_64, "ds_min_i64">;
563defm DS_MAX_RTN_I64    : DS_1A1D_RET_mc<"ds_max_rtn_i64", VReg_64, "ds_max_i64">;
564defm DS_MIN_RTN_U64   : DS_1A1D_RET_mc<"ds_min_rtn_u64", VReg_64, "ds_min_u64">;
565defm DS_MAX_RTN_U64   : DS_1A1D_RET_mc<"ds_max_rtn_u64", VReg_64, "ds_max_u64">;
566defm DS_AND_RTN_B64    : DS_1A1D_RET_mc<"ds_and_rtn_b64", VReg_64, "ds_and_b64">;
567defm DS_OR_RTN_B64     : DS_1A1D_RET_mc<"ds_or_rtn_b64", VReg_64, "ds_or_b64">;
568defm DS_XOR_RTN_B64    : DS_1A1D_RET_mc<"ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
569defm DS_MSKOR_RTN_B64  : DS_1A2D_RET_mc<"ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
570defm DS_CMPST_RTN_B64  : DS_1A2D_RET_mc<"ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
571defm DS_CMPST_RTN_F64  : DS_1A2D_RET_mc<"ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
572defm DS_MIN_RTN_F64    : DS_1A1D_RET_mc<"ds_min_rtn_f64", VReg_64, "ds_min_f64">;
573defm DS_MAX_RTN_F64    : DS_1A1D_RET_mc<"ds_max_rtn_f64", VReg_64, "ds_max_f64">;
574
575defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", VReg_64>;
576defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>;
577defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>;
578
579let isConvergent = 1, usesCustomInserter = 1 in {
580def DS_GWS_INIT       : DS_GWS_1D<"ds_gws_init"> {
581  let mayLoad = 0;
582}
583def DS_GWS_SEMA_V     : DS_GWS_0D<"ds_gws_sema_v">;
584def DS_GWS_SEMA_BR    : DS_GWS_1D<"ds_gws_sema_br">;
585def DS_GWS_SEMA_P     : DS_GWS_0D<"ds_gws_sema_p">;
586def DS_GWS_BARRIER    : DS_GWS_1D<"ds_gws_barrier">;
587}
588
589let SubtargetPredicate = HasDsSrc2Insts in {
590def DS_ADD_SRC2_U32   : DS_1A<"ds_add_src2_u32">;
591def DS_SUB_SRC2_U32   : DS_1A<"ds_sub_src2_u32">;
592def DS_RSUB_SRC2_U32  : DS_1A<"ds_rsub_src2_u32">;
593def DS_INC_SRC2_U32   : DS_1A<"ds_inc_src2_u32">;
594def DS_DEC_SRC2_U32   : DS_1A<"ds_dec_src2_u32">;
595def DS_MIN_SRC2_I32   : DS_1A<"ds_min_src2_i32">;
596def DS_MAX_SRC2_I32   : DS_1A<"ds_max_src2_i32">;
597def DS_MIN_SRC2_U32   : DS_1A<"ds_min_src2_u32">;
598def DS_MAX_SRC2_U32   : DS_1A<"ds_max_src2_u32">;
599def DS_AND_SRC2_B32   : DS_1A<"ds_and_src2_b32">;
600def DS_OR_SRC2_B32    : DS_1A<"ds_or_src2_b32">;
601def DS_XOR_SRC2_B32   : DS_1A<"ds_xor_src2_b32">;
602def DS_MIN_SRC2_F32   : DS_1A<"ds_min_src2_f32">;
603def DS_MAX_SRC2_F32   : DS_1A<"ds_max_src2_f32">;
604
605def DS_ADD_SRC2_U64   : DS_1A<"ds_add_src2_u64">;
606def DS_SUB_SRC2_U64   : DS_1A<"ds_sub_src2_u64">;
607def DS_RSUB_SRC2_U64  : DS_1A<"ds_rsub_src2_u64">;
608def DS_INC_SRC2_U64   : DS_1A<"ds_inc_src2_u64">;
609def DS_DEC_SRC2_U64   : DS_1A<"ds_dec_src2_u64">;
610def DS_MIN_SRC2_I64   : DS_1A<"ds_min_src2_i64">;
611def DS_MAX_SRC2_I64   : DS_1A<"ds_max_src2_i64">;
612def DS_MIN_SRC2_U64   : DS_1A<"ds_min_src2_u64">;
613def DS_MAX_SRC2_U64   : DS_1A<"ds_max_src2_u64">;
614def DS_AND_SRC2_B64   : DS_1A<"ds_and_src2_b64">;
615def DS_OR_SRC2_B64    : DS_1A<"ds_or_src2_b64">;
616def DS_XOR_SRC2_B64   : DS_1A<"ds_xor_src2_b64">;
617def DS_MIN_SRC2_F64   : DS_1A<"ds_min_src2_f64">;
618def DS_MAX_SRC2_F64   : DS_1A<"ds_max_src2_f64">;
619
620def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">;
621def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">;
622} // End SubtargetPredicate = HasDsSrc2Insts
623
624let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in {
625def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, Swizzle>;
626}
627
628let mayStore = 0 in {
629defm DS_READ_I8      : DS_1A_RET_mc<"ds_read_i8">;
630defm DS_READ_U8      : DS_1A_RET_mc<"ds_read_u8">;
631defm DS_READ_I16     : DS_1A_RET_mc<"ds_read_i16">;
632defm DS_READ_U16     : DS_1A_RET_mc<"ds_read_u16">;
633defm DS_READ_B32     : DS_1A_RET_mc<"ds_read_b32">;
634defm DS_READ_B64     : DS_1A_RET_mc<"ds_read_b64", VReg_64>;
635
636defm DS_READ2_B32    : DS_1A_Off8_RET_mc<"ds_read2_b32", VReg_64>;
637defm DS_READ2ST64_B32: DS_1A_Off8_RET_mc<"ds_read2st64_b32", VReg_64>;
638
639defm DS_READ2_B64    : DS_1A_Off8_RET_mc<"ds_read2_b64", VReg_128>;
640defm DS_READ2ST64_B64: DS_1A_Off8_RET_mc<"ds_read2st64_b64", VReg_128>;
641
642let has_m0_read = 0 in {
643let SubtargetPredicate = HasD16LoadStore, TiedSourceNotRead = 1 in {
644def DS_READ_U8_D16     : DS_1A_RET_Tied<"ds_read_u8_d16">;
645def DS_READ_U8_D16_HI  : DS_1A_RET_Tied<"ds_read_u8_d16_hi">;
646def DS_READ_I8_D16     : DS_1A_RET_Tied<"ds_read_i8_d16">;
647def DS_READ_I8_D16_HI  : DS_1A_RET_Tied<"ds_read_i8_d16_hi">;
648def DS_READ_U16_D16    : DS_1A_RET_Tied<"ds_read_u16_d16">;
649def DS_READ_U16_D16_HI : DS_1A_RET_Tied<"ds_read_u16_d16_hi">;
650}
651} // End has_m0_read = 0
652
653let SubtargetPredicate = HasDSAddTid in {
654def DS_READ_ADDTID_B32 : DS_0A_RET<"ds_read_addtid_b32">;
655}
656
657} // End mayStore = 0
658
659def DS_CONSUME       : DS_0A_RET<"ds_consume">;
660def DS_APPEND        : DS_0A_RET<"ds_append">;
661
662let SubtargetPredicate = isNotGFX90APlus in
663def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">;
664
665//===----------------------------------------------------------------------===//
666// Instruction definitions for CI and newer.
667//===----------------------------------------------------------------------===//
668
669let SubtargetPredicate = isGFX7Plus in {
670
671defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPR_32>;
672defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", VReg_64>;
673
674let isConvergent = 1, usesCustomInserter = 1 in {
675def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">;
676}
677
678let mayStore = 0 in {
679defm DS_READ_B96 : DS_1A_RET_mc<"ds_read_b96", VReg_96>;
680defm DS_READ_B128: DS_1A_RET_mc<"ds_read_b128", VReg_128>;
681} // End mayStore = 0
682
683let mayLoad = 0 in {
684defm DS_WRITE_B96 : DS_1A1D_NORET_mc<"ds_write_b96", VReg_96>;
685defm DS_WRITE_B128 : DS_1A1D_NORET_mc<"ds_write_b128", VReg_128>;
686} // End mayLoad = 0
687
688def DS_NOP : DS_VOID<"ds_nop">;
689
690} // let SubtargetPredicate = isGFX7Plus
691
692//===----------------------------------------------------------------------===//
693// Instruction definitions for VI and newer.
694//===----------------------------------------------------------------------===//
695
696let SubtargetPredicate = isGFX8Plus in {
697
698let Uses = [EXEC] in {
699def DS_PERMUTE_B32  : DS_1A1D_PERMUTE <"ds_permute_b32",
700                                       int_amdgcn_ds_permute>;
701def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
702                                       int_amdgcn_ds_bpermute>;
703}
704
705} // let SubtargetPredicate = isGFX8Plus
706
707let SubtargetPredicate = HasLDSFPAtomicAdd, OtherPredicates = [HasDsSrc2Insts] in {
708def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
709}
710
711
712//===----------------------------------------------------------------------===//
713// Instruction definitions for GFX11.
714//===----------------------------------------------------------------------===//
715
716let SubtargetPredicate = isGFX11Only in {
717
718def DS_ADD_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_add_gs_reg_rtn", VReg_64, VGPR_32>;
719def DS_SUB_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_sub_gs_reg_rtn", VReg_64, VGPR_32>;
720
721} // let SubtargetPredicate = isGFX11Only
722
723let SubtargetPredicate = isGFX11Plus in {
724
725def DS_BVH_STACK_RTN_B32 : DS_BVH_STACK<"ds_bvh_stack_rtn_b32">;
726
727} // let SubtargetPredicate = isGFX11Plus
728
729//===----------------------------------------------------------------------===//
730// Instruction definitions for GFX12 and newer.
731//===----------------------------------------------------------------------===//
732
733let SubtargetPredicate = isGFX12Plus in {
734
735defm DS_SUB_CLAMP_U32     : DS_1A1D_NORET_mc<"ds_sub_clamp_u32">;
736defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_clamp_rtn_u32", VGPR_32, "ds_sub_clamp_u32">;
737
738} // let SubtargetPredicate = isGFX12Plus
739
740//===----------------------------------------------------------------------===//
741// DS Patterns
742//===----------------------------------------------------------------------===//
743
744def : GCNPat <
745  (int_amdgcn_ds_swizzle i32:$src, timm:$offset16),
746  (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16timm $offset16), (i1 0))
747>;
748
749class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
750  (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
751  (inst $ptr, offset:$offset, (i1 gds))
752>;
753
754multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
755
756  let OtherPredicates = [LDSRequiresM0Init] in {
757    def : DSReadPat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
758  }
759
760  let OtherPredicates = [NotLDSRequiresM0Init] in {
761    def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
762  }
763}
764
765class DSReadPat_D16 <DS_Pseudo inst, PatFrag frag, ValueType vt> : GCNPat <
766  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$in),
767  (inst $ptr, offset:$offset, (i1 0), $in)
768>;
769
770defm : DSReadPat_mc <DS_READ_I8, i32, "sextloadi8_local">;
771defm : DSReadPat_mc <DS_READ_I8,  i16, "sextloadi8_local">;
772defm : DSReadPat_mc <DS_READ_U8,  i32, "extloadi8_local">;
773defm : DSReadPat_mc <DS_READ_U8,  i32, "zextloadi8_local">;
774defm : DSReadPat_mc <DS_READ_U8,  i16, "extloadi8_local">;
775defm : DSReadPat_mc <DS_READ_U8,  i16, "zextloadi8_local">;
776defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
777defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
778defm : DSReadPat_mc <DS_READ_U16, i32, "extloadi16_local">;
779defm : DSReadPat_mc <DS_READ_U16, i32, "zextloadi16_local">;
780defm : DSReadPat_mc <DS_READ_U16, i16, "load_local">;
781
782foreach vt = Reg32Types.types in {
783defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
784}
785
786defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_8_local">;
787defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_8_local">;
788defm : DSReadPat_mc <DS_READ_U16, i16, "atomic_load_16_local">;
789defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_16_local">;
790defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
791defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
792
793let OtherPredicates = [D16PreservesUnusedBits] in {
794def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2i16>;
795def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2f16>;
796def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2i16>;
797def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2f16>;
798def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2i16>;
799def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2f16>;
800
801def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2i16>;
802def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2f16>;
803def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2i16>;
804def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2f16>;
805def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2i16>;
806def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2f16>;
807}
808
809class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
810  (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
811  (inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))
812>;
813
814multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
815  let OtherPredicates = [LDSRequiresM0Init] in {
816    def : DSWritePat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
817  }
818
819  let OtherPredicates = [NotLDSRequiresM0Init] in {
820    def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
821  }
822}
823
824defm : DSWritePat_mc <DS_WRITE_B8, i32, "truncstorei8_local">;
825defm : DSWritePat_mc <DS_WRITE_B16, i32, "truncstorei16_local">;
826defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">;
827defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">;
828
829foreach vt = Reg32Types.types in {
830defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
831}
832
833defm : DSWritePat_mc <DS_WRITE_B8, i16, "atomic_store_8_local">;
834defm : DSWritePat_mc <DS_WRITE_B8, i32, "atomic_store_8_local">;
835defm : DSWritePat_mc <DS_WRITE_B16, i16, "atomic_store_16_local">;
836defm : DSWritePat_mc <DS_WRITE_B16, i32, "atomic_store_16_local">;
837defm : DSWritePat_mc <DS_WRITE_B32, i32, "atomic_store_32_local">;
838defm : DSWritePat_mc <DS_WRITE_B64, i64, "atomic_store_64_local">;
839
840let OtherPredicates = [HasD16LoadStore] in {
841def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_hi16_local>;
842def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_hi16_local>;
843}
844
845class DS64Bit4ByteAlignedReadPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
846  (vt:$value (frag (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
847  (inst $ptr, $offset0, $offset1, (i1 0))
848>;
849
850class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat<
851  (frag vt:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
852  (inst $ptr, (i32 (EXTRACT_SUBREG VReg_64:$value, sub0)),
853              (i32 (EXTRACT_SUBREG VReg_64:$value, sub1)), $offset0, $offset1,
854              (i1 0))
855>;
856
857class DS128Bit8ByteAlignedReadPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
858  (vt:$value (frag (DS128Bit8ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
859  (inst $ptr, $offset0, $offset1, (i1 0))
860>;
861
862class DS128Bit8ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat<
863  (frag vt:$value, (DS128Bit8ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
864  (inst $ptr, (i64 (EXTRACT_SUBREG VReg_128:$value, sub0_sub1)),
865              (i64 (EXTRACT_SUBREG VReg_128:$value, sub2_sub3)), $offset0, $offset1,
866              (i1 0))
867>;
868
869multiclass DS64Bit4ByteAlignedPat_mc<ValueType vt> {
870  let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
871    def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, vt, load_local_m0>;
872    def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, vt, store_local_m0>;
873  }
874
875  let OtherPredicates = [NotLDSRequiresM0Init] in {
876    def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32_gfx9, vt, load_local>;
877    def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32_gfx9, vt, store_local>;
878  }
879}
880
881multiclass DS128Bit8ByteAlignedPat_mc<ValueType vt> {
882  let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
883    def : DS128Bit8ByteAlignedReadPat<DS_READ2_B64, vt, load_local_m0>;
884    def : DS128Bit8ByteAlignedWritePat<DS_WRITE2_B64, vt, store_local_m0>;
885  }
886
887  let OtherPredicates = [NotLDSRequiresM0Init] in {
888    def : DS128Bit8ByteAlignedReadPat<DS_READ2_B64_gfx9, vt, load_local>;
889    def : DS128Bit8ByteAlignedWritePat<DS_WRITE2_B64_gfx9, vt, store_local>;
890  }
891}
892
893// v2i32 loads are split into i32 loads on SI during lowering, due to a bug
894// related to bounds checking.
895foreach vt = VReg_64.RegTypes in {
896defm : DS64Bit4ByteAlignedPat_mc<vt>;
897}
898
899foreach vt = VReg_128.RegTypes in {
900defm : DS128Bit8ByteAlignedPat_mc<vt>;
901}
902
903// Prefer ds_read over ds_read2 and ds_write over ds_write2, all other things
904// being equal, because it has a larger immediate offset range.
905let AddedComplexity = 100 in {
906
907foreach vt = VReg_64.RegTypes in {
908defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">;
909defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align8_local">;
910}
911
912let SubtargetPredicate = isGFX7Plus in {
913
914foreach vt = VReg_96.RegTypes in {
915defm : DSReadPat_mc <DS_READ_B96, vt, "load_align16_local">;
916defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_align16_local">;
917}
918
919foreach vt = VReg_128.RegTypes in {
920defm : DSReadPat_mc <DS_READ_B128, vt, "load_align16_local">;
921defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align16_local">;
922}
923
924let SubtargetPredicate = HasUnalignedAccessMode in {
925
926// Select 64 bit loads and stores aligned less than 4 as a single ds_read_b64/
927// ds_write_b64 instruction as this is faster than ds_read2_b32/ds_write2_b32
928// which would be used otherwise. In this case a b32 access would still be
929// misaligned, but we will have 2 of them.
930foreach vt = VReg_64.RegTypes in {
931defm : DSReadPat_mc <DS_READ_B64, vt, "load_align_less_than_4_local">;
932defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align_less_than_4_local">;
933}
934
935// Selection will split most of the unaligned 3 dword accesses due to performance
936// reasons when beneficial. Keep these two patterns for the rest of the cases.
937foreach vt = VReg_96.RegTypes in {
938defm : DSReadPat_mc <DS_READ_B96, vt, "load_local">;
939defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_local">;
940}
941
942// Select 128 bit loads and stores aligned less than 4 as a single ds_read_b128/
943// ds_write_b128 instruction as this is faster than ds_read2_b64/ds_write2_b64
944// which would be used otherwise. In this case a b64 access would still be
945// misaligned, but we will have 2 of them.
946foreach vt = VReg_128.RegTypes in {
947defm : DSReadPat_mc <DS_READ_B128, vt, "load_align_less_than_4_local">;
948defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align_less_than_4_local">;
949}
950
951} // End SubtargetPredicate = HasUnalignedAccessMode
952
953} // End SubtargetPredicate = isGFX7Plus
954
955} // End AddedComplexity = 100
956
957class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, int complexity = 0,
958  bit gds=0> : GCNPat <(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
959  (inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))> {
960  let AddedComplexity = complexity;
961}
962
963multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
964  let OtherPredicates = [LDSRequiresM0Init] in {
965    def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>;
966  }
967
968  let OtherPredicates = [NotLDSRequiresM0Init] in {
969    def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
970                         !cast<PatFrag>(frag#"_local_"#vt.Size)>;
971  }
972
973  let OtherPredicates = [HasGDS] in {
974    def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
975                         /* complexity */ 0, /* gds */ 1>;
976  }
977}
978
979multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
980                                  ValueType vt, string frag> {
981  let OtherPredicates = [LDSRequiresM0Init] in {
982    def : DSAtomicRetPat<inst, vt,
983                         !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>;
984    def : DSAtomicRetPat<noRetInst, vt,
985                         !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size), /* complexity */ 1>;
986  }
987
988  let OtherPredicates = [NotLDSRequiresM0Init] in {
989    def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
990                         !cast<PatFrag>(frag#"_local_"#vt.Size)>;
991    def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
992                         !cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
993  }
994
995  let OtherPredicates = [HasGDS] in {
996    def : DSAtomicRetPat<inst, vt,
997                         !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
998                         /* complexity */ 0, /* gds */ 1>;
999    def : DSAtomicRetPat<noRetInst, vt,
1000                         !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
1001                         /* complexity */ 1, /* gds */ 1>;
1002  }
1003}
1004
1005
1006
1007let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1008// Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode.
1009class DSAtomicCmpXChgSwapped<DS_Pseudo inst, ValueType vt, PatFrag frag,
1010  int complexity = 0, bit gds=0> : GCNPat<
1011  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
1012  (inst $ptr, getVregSrcForVT<vt>.ret:$cmp, getVregSrcForVT<vt>.ret:$swap, offset:$offset, (i1 gds))> {
1013  let AddedComplexity = complexity;
1014}
1015
1016multiclass DSAtomicCmpXChgSwapped_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
1017                                     string frag> {
1018  let OtherPredicates = [LDSRequiresM0Init] in {
1019    def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt.Size)>;
1020    def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_local_m0_noret_"#vt.Size),
1021                                 /* complexity */ 1>;
1022  }
1023
1024  let OtherPredicates = [NotLDSRequiresM0Init] in {
1025    def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1026                                 !cast<PatFrag>(frag#"_local_"#vt.Size)>;
1027    def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1028                                 !cast<PatFrag>(frag#"_local_noret_"#vt.Size),
1029                                 /* complexity */ 1>;
1030  }
1031
1032  let OtherPredicates = [HasGDS] in {
1033    def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
1034                                 /* complexity */ 0, /* gds */ 1>;
1035    def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
1036                                 /* complexity */ 1, /* gds */ 1>;
1037  }
1038}
1039} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
1040
1041let SubtargetPredicate = isGFX11Plus in {
1042// The order of src and cmp agrees with the BUFFER_ATOMIC_CMPSWAP opcode.
1043class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag,
1044  int complexity = 0, bit gds=0> : GCNPat<
1045  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
1046  (inst $ptr, getVregSrcForVT<vt>.ret:$swap, getVregSrcForVT<vt>.ret:$cmp, offset:$offset, (i1 gds))> {
1047  let AddedComplexity = complexity;
1048}
1049
1050multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt, string frag> {
1051
1052  def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1053                        !cast<PatFrag>(frag#"_local_"#vt.Size)>;
1054  def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1055                        !cast<PatFrag>(frag#"_local_noret_"#vt.Size), /* complexity */ 1>;
1056
1057  let OtherPredicates = [HasGDS] in {
1058    def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt.Size),
1059                          /* complexity */ 0, /* gds */ 1>;
1060    def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt.Size),
1061                          /* complexity */ 1, /* gds */ 1>;
1062  }
1063}
1064} // End SubtargetPredicate = isGFX11Plus
1065
1066// 32-bit atomics.
1067defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap">;
1068defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U32, DS_ADD_U32, i32, "atomic_load_add">;
1069defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U32, DS_SUB_U32, i32, "atomic_load_sub">;
1070defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U32, DS_INC_U32, i32, "atomic_load_uinc_wrap">;
1071defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U32, DS_DEC_U32, i32, "atomic_load_udec_wrap">;
1072defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B32, DS_AND_B32, i32, "atomic_load_and">;
1073defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B32, DS_OR_B32, i32, "atomic_load_or">;
1074defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B32, DS_XOR_B32, i32, "atomic_load_xor">;
1075defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_I32, DS_MIN_I32, i32, "atomic_load_min">;
1076defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_I32, DS_MAX_I32, i32, "atomic_load_max">;
1077defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_U32, DS_MIN_U32, i32, "atomic_load_umin">;
1078defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_U32, DS_MAX_U32, i32, "atomic_load_umax">;
1079defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_F32, DS_MIN_F32, f32, "atomic_load_fmin">;
1080defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_F32, DS_MAX_F32, f32, "atomic_load_fmax">;
1081
1082let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1083defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B32, DS_CMPST_B32, i32, "atomic_cmp_swap">;
1084}
1085
1086let SubtargetPredicate = isGFX11Plus in {
1087defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B32, DS_CMPSTORE_B32, i32, "atomic_cmp_swap">;
1088}
1089
1090let SubtargetPredicate = HasLDSFPAtomicAdd in {
1091defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_F32, DS_ADD_F32, f32, "atomic_load_fadd">;
1092}
1093
1094// 64-bit atomics.
1095defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">;
1096defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U64, DS_ADD_U64, i64, "atomic_load_add">;
1097defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U64, DS_SUB_U64, i64, "atomic_load_sub">;
1098defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U64, DS_INC_U64, i64, "atomic_load_uinc_wrap">;
1099defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U64, DS_DEC_U64, i64, "atomic_load_udec_wrap">;
1100defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B64, DS_AND_B64, i64, "atomic_load_and">;
1101defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B64, DS_OR_B64, i64, "atomic_load_or">;
1102defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B64, DS_XOR_B64, i64, "atomic_load_xor">;
1103defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_I64, DS_MIN_I64, i64, "atomic_load_min">;
1104defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_I64, DS_MAX_I64, i64, "atomic_load_max">;
1105defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_U64, DS_MIN_U64, i64, "atomic_load_umin">;
1106defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_U64, DS_MAX_U64, i64, "atomic_load_umax">;
1107defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_F64, DS_MIN_F64, f64, "atomic_load_fmin">;
1108defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_F64, DS_MAX_F64, f64, "atomic_load_fmax">;
1109
1110let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1111defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B64, DS_CMPST_B64, i64, "atomic_cmp_swap">;
1112} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
1113
1114let SubtargetPredicate = isGFX11Plus in {
1115defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B64, DS_CMPSTORE_B64, i64, "atomic_cmp_swap">;
1116} // End SubtargetPredicate = isGFX11Plus
1117
1118let SubtargetPredicate = isGFX90APlus in {
1119def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_64>;
1120let AddedComplexity = 1 in
1121def : DSAtomicRetPat<DS_ADD_F64, f64, atomic_load_fadd_local_noret_64>;
1122
1123class DSAtomicRetPatIntrinsic<DS_Pseudo inst, ValueType vt, PatFrag frag,
1124  bit gds=0> : GCNPat <
1125  (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value)),
1126  (inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))> {
1127}
1128
1129def : DSAtomicRetPatIntrinsic<DS_ADD_RTN_F64, f64, int_amdgcn_flat_atomic_fadd_local_addrspace>;
1130let AddedComplexity = 1 in
1131def : DSAtomicRetPatIntrinsic<DS_ADD_F64, f64, int_amdgcn_flat_atomic_fadd_noret_local_addrspace>;
1132}
1133
1134let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
1135def : DSAtomicRetPat<DS_PK_ADD_RTN_F16, v2f16, atomic_load_fadd_v2f16_local_32>;
1136let AddedComplexity = 1 in
1137def : DSAtomicRetPat<DS_PK_ADD_F16, v2f16, atomic_load_fadd_v2f16_local_noret_32>;
1138def : GCNPat <
1139  (v2i16 (int_amdgcn_ds_fadd_v2bf16 i32:$ptr, v2i16:$src)),
1140  (DS_PK_ADD_RTN_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0)
1141>;
1142let AddedComplexity = 1 in
1143def : GCNPat <
1144  (v2i16 (int_amdgcn_ds_fadd_v2bf16_noret i32:$ptr, v2i16:$src)),
1145  (DS_PK_ADD_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0)
1146>;
1147} // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
1148
1149let OtherPredicates = [HasGDS] in
1150def : GCNPat <
1151  (SIds_ordered_count i32:$value, i16:$offset),
1152  (DS_ORDERED_COUNT $value, (as_i16imm $offset))
1153>;
1154
1155def : GCNPat <
1156  (i64 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
1157  (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
1158>;
1159
1160def : GCNPat <
1161  (i32 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
1162  (EXTRACT_SUBREG
1163    (i64 (COPY_TO_REGCLASS
1164      (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
1165      VReg_64)),
1166    sub0)
1167>;
1168
1169def : GCNPat <
1170  (i64 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
1171  (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
1172>;
1173
1174def : GCNPat <
1175  (i32 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
1176  (EXTRACT_SUBREG
1177    (i64 (COPY_TO_REGCLASS
1178      (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
1179      VReg_64)),
1180    sub0)
1181>;
1182
1183//===----------------------------------------------------------------------===//
1184// Target-specific instruction encodings.
1185//===----------------------------------------------------------------------===//
1186
1187//===----------------------------------------------------------------------===//
1188// Base ENC_DS for GFX6, GFX7, GFX10, GFX11, GFX12.
1189//===----------------------------------------------------------------------===//
1190
1191class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op, DS_Pseudo ps, int ef,
1192                                               string opName = ps.Mnemonic,
1193                                               bit hasGFX12Enc = 0>
1194    : DS_Real<ps, opName>, SIMCInstr <ps.Mnemonic, ef> {
1195
1196  let Inst{7-0}   = !if(ps.has_offset0, offset0, 0);
1197  let Inst{15-8}  = !if(ps.has_offset1, offset1, 0);
1198  let Inst{17}    = !if(ps.has_gds, gds, ps.gdsValue);
1199  let Inst{25-18} = op;
1200  let Inst{31-26} = 0x36;
1201  let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0));
1202  let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0);
1203  let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0);
1204  let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0);
1205
1206  let gds = !if(hasGFX12Enc, 0, ?);
1207}
1208
1209//===----------------------------------------------------------------------===//
1210// GFX12.
1211//===----------------------------------------------------------------------===//
1212
1213let AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12" in {
1214  multiclass DS_Real_gfx12<bits<8> op> {
1215    defvar ps = !cast<DS_Pseudo>(NAME);
1216    def _gfx12 :
1217      Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, ps, SIEncodingFamily.GFX12,
1218                                               ps.Mnemonic, 1>;
1219  }
1220
1221  multiclass DS_Real_Renamed_gfx12<bits<8> op, DS_Pseudo backing_pseudo,
1222                                   string real_name> {
1223    def _gfx12 :
1224      Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, backing_pseudo,
1225                                               SIEncodingFamily.GFX12,
1226                                               real_name, 1>,
1227      MnemonicAlias<backing_pseudo.Mnemonic, real_name>,
1228      Requires<[isGFX12Plus]>;
1229  }
1230} // End AssemblerPredicate = isGFX12Plus, DecoderNamespace = "GFX12"
1231
1232defm DS_MIN_NUM_F32       : DS_Real_Renamed_gfx12<0x012, DS_MIN_F32, "ds_min_num_f32">;
1233defm DS_MAX_NUM_F32       : DS_Real_Renamed_gfx12<0x013, DS_MAX_F32, "ds_max_num_f32">;
1234defm DS_MIN_NUM_RTN_F32   : DS_Real_Renamed_gfx12<0x032, DS_MIN_RTN_F32, "ds_min_num_rtn_f32">;
1235defm DS_MAX_NUM_RTN_F32   : DS_Real_Renamed_gfx12<0x033, DS_MAX_RTN_F32, "ds_max_num_rtn_f32">;
1236defm DS_MIN_NUM_F64       : DS_Real_Renamed_gfx12<0x052, DS_MIN_F64, "ds_min_num_f64">;
1237defm DS_MAX_NUM_F64       : DS_Real_Renamed_gfx12<0x053, DS_MAX_F64, "ds_max_num_f64">;
1238defm DS_MIN_NUM_RTN_F64   : DS_Real_Renamed_gfx12<0x072, DS_MIN_RTN_F64, "ds_min_num_rtn_f64">;
1239defm DS_MAX_NUM_RTN_F64   : DS_Real_Renamed_gfx12<0x073, DS_MAX_RTN_F64, "ds_max_num_rtn_f64">;
1240defm DS_SUB_CLAMP_U32     : DS_Real_gfx12<0x099>;
1241defm DS_SUB_CLAMP_RTN_U32 : DS_Real_gfx12<0x0a9>;
1242
1243//===----------------------------------------------------------------------===//
1244// GFX11.
1245//===----------------------------------------------------------------------===//
1246
1247let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
1248  multiclass DS_Real_gfx11<bits<8> op>  {
1249    def _gfx11 :
1250      Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, !cast<DS_Pseudo>(NAME),
1251                                              SIEncodingFamily.GFX11>;
1252  }
1253
1254  multiclass DS_Real_Renamed_gfx11<bits<8> op, DS_Pseudo backing_pseudo, string real_name> {
1255     def _gfx11 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, backing_pseudo, SIEncodingFamily.GFX11, real_name>,
1256               MnemonicAlias<backing_pseudo.Mnemonic, real_name>, Requires<[isGFX11Only]>;
1257  }
1258} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11"
1259
1260multiclass DS_Real_gfx11_gfx12<bits<8> op>
1261  : DS_Real_gfx11<op>, DS_Real_gfx12<op>;
1262
1263multiclass DS_Real_Renamed_gfx11_gfx12<bits<8> op, DS_Pseudo backing_pseudo,
1264                                       string real_name>
1265  : DS_Real_Renamed_gfx11<op, backing_pseudo, real_name>,
1266    DS_Real_Renamed_gfx12<op, backing_pseudo, real_name>;
1267
1268defm DS_STORE_B32                        : DS_Real_Renamed_gfx11_gfx12<0x00d, DS_WRITE_B32, "ds_store_b32">;
1269defm DS_STORE_2ADDR_B32                  : DS_Real_Renamed_gfx11_gfx12<0x00e, DS_WRITE2_B32, "ds_store_2addr_b32">;
1270defm DS_STORE_2ADDR_STRIDE64_B32         : DS_Real_Renamed_gfx11_gfx12<0x00f, DS_WRITE2ST64_B32, "ds_store_2addr_stride64_b32">;
1271defm DS_STORE_B8                         : DS_Real_Renamed_gfx11_gfx12<0x01e, DS_WRITE_B8, "ds_store_b8">;
1272defm DS_STORE_B16                        : DS_Real_Renamed_gfx11_gfx12<0x01f, DS_WRITE_B16, "ds_store_b16">;
1273defm DS_STOREXCHG_RTN_B32                : DS_Real_Renamed_gfx11_gfx12<0x02d, DS_WRXCHG_RTN_B32, "ds_storexchg_rtn_b32">;
1274defm DS_STOREXCHG_2ADDR_RTN_B32          : DS_Real_Renamed_gfx11_gfx12<0x02e, DS_WRXCHG2_RTN_B32, "ds_storexchg_2addr_rtn_b32">;
1275defm DS_STOREXCHG_2ADDR_STRIDE64_RTN_B32 : DS_Real_Renamed_gfx11_gfx12<0x02f, DS_WRXCHG2ST64_RTN_B32, "ds_storexchg_2addr_stride64_rtn_b32">;
1276defm DS_LOAD_B32                         : DS_Real_Renamed_gfx11_gfx12<0x036, DS_READ_B32, "ds_load_b32">;
1277defm DS_LOAD_2ADDR_B32                   : DS_Real_Renamed_gfx11_gfx12<0x037, DS_READ2_B32, "ds_load_2addr_b32">;
1278defm DS_LOAD_2ADDR_STRIDE64_B32          : DS_Real_Renamed_gfx11_gfx12<0x038, DS_READ2ST64_B32, "ds_load_2addr_stride64_b32">;
1279defm DS_LOAD_I8                          : DS_Real_Renamed_gfx11_gfx12<0x039, DS_READ_I8, "ds_load_i8">;
1280defm DS_LOAD_U8                          : DS_Real_Renamed_gfx11_gfx12<0x03a, DS_READ_U8, "ds_load_u8">;
1281defm DS_LOAD_I16                         : DS_Real_Renamed_gfx11_gfx12<0x03b, DS_READ_I16, "ds_load_i16">;
1282defm DS_LOAD_U16                         : DS_Real_Renamed_gfx11_gfx12<0x03c, DS_READ_U16, "ds_load_u16">;
1283defm DS_STORE_B64                        : DS_Real_Renamed_gfx11_gfx12<0x04d, DS_WRITE_B64, "ds_store_b64">;
1284defm DS_STORE_2ADDR_B64                  : DS_Real_Renamed_gfx11_gfx12<0x04e, DS_WRITE2_B64, "ds_store_2addr_b64">;
1285defm DS_STORE_2ADDR_STRIDE64_B64         : DS_Real_Renamed_gfx11_gfx12<0x04f, DS_WRITE2ST64_B64, "ds_store_2addr_stride64_b64">;
1286defm DS_STOREXCHG_RTN_B64                : DS_Real_Renamed_gfx11_gfx12<0x06d, DS_WRXCHG_RTN_B64, "ds_storexchg_rtn_b64">;
1287defm DS_STOREXCHG_2ADDR_RTN_B64          : DS_Real_Renamed_gfx11_gfx12<0x06e, DS_WRXCHG2_RTN_B64, "ds_storexchg_2addr_rtn_b64">;
1288defm DS_STOREXCHG_2ADDR_STRIDE64_RTN_B64 : DS_Real_Renamed_gfx11_gfx12<0x06f, DS_WRXCHG2ST64_RTN_B64, "ds_storexchg_2addr_stride64_rtn_b64">;
1289defm DS_LOAD_B64                         : DS_Real_Renamed_gfx11_gfx12<0x076, DS_READ_B64, "ds_load_b64">;
1290defm DS_LOAD_2ADDR_B64                   : DS_Real_Renamed_gfx11_gfx12<0x077, DS_READ2_B64, "ds_load_2addr_b64">;
1291defm DS_LOAD_2ADDR_STRIDE64_B64          : DS_Real_Renamed_gfx11_gfx12<0x078, DS_READ2ST64_B64, "ds_load_2addr_stride64_b64">;
1292defm DS_STORE_B8_D16_HI                  : DS_Real_Renamed_gfx11_gfx12<0x0a0, DS_WRITE_B8_D16_HI, "ds_store_b8_d16_hi">;
1293defm DS_STORE_B16_D16_HI                 : DS_Real_Renamed_gfx11_gfx12<0x0a1, DS_WRITE_B16_D16_HI, "ds_store_b16_d16_hi">;
1294defm DS_LOAD_U8_D16                      : DS_Real_Renamed_gfx11_gfx12<0x0a2, DS_READ_U8_D16, "ds_load_u8_d16">;
1295defm DS_LOAD_U8_D16_HI                   : DS_Real_Renamed_gfx11_gfx12<0x0a3, DS_READ_U8_D16_HI, "ds_load_u8_d16_hi">;
1296defm DS_LOAD_I8_D16                      : DS_Real_Renamed_gfx11_gfx12<0x0a4, DS_READ_I8_D16, "ds_load_i8_d16">;
1297defm DS_LOAD_I8_D16_HI                   : DS_Real_Renamed_gfx11_gfx12<0x0a5, DS_READ_I8_D16_HI, "ds_load_i8_d16_hi">;
1298defm DS_LOAD_U16_D16                     : DS_Real_Renamed_gfx11_gfx12<0x0a6, DS_READ_U16_D16, "ds_load_u16_d16">;
1299defm DS_LOAD_U16_D16_HI                  : DS_Real_Renamed_gfx11_gfx12<0x0a7, DS_READ_U16_D16_HI, "ds_load_u16_d16_hi">;
1300defm DS_STORE_ADDTID_B32                 : DS_Real_Renamed_gfx11_gfx12<0x0b0, DS_WRITE_ADDTID_B32, "ds_store_addtid_b32">;
1301defm DS_LOAD_ADDTID_B32                  : DS_Real_Renamed_gfx11_gfx12<0x0b1, DS_READ_ADDTID_B32, "ds_load_addtid_b32">;
1302defm DS_STORE_B96                        : DS_Real_Renamed_gfx11_gfx12<0x0de, DS_WRITE_B96, "ds_store_b96">;
1303defm DS_STORE_B128                       : DS_Real_Renamed_gfx11_gfx12<0x0df, DS_WRITE_B128, "ds_store_b128">;
1304defm DS_LOAD_B96                         : DS_Real_Renamed_gfx11_gfx12<0x0fe, DS_READ_B96, "ds_load_b96">;
1305defm DS_LOAD_B128                        : DS_Real_Renamed_gfx11_gfx12<0x0ff, DS_READ_B128, "ds_load_b128">;
1306
1307// DS_CMPST_* are renamed to DS_CMPSTORE_* in GFX11, but also the data operands (src and cmp) are swapped
1308// comparing to pre-GFX11.
1309// Note: the mnemonic alias is not generated to avoid a potential ambiguity due to the semantics change.
1310
1311defm DS_CMPSTORE_B32                     : DS_Real_gfx11_gfx12<0x010>;
1312defm DS_CMPSTORE_F32                     : DS_Real_gfx11<0x011>;
1313defm DS_CMPSTORE_RTN_B32                 : DS_Real_gfx11_gfx12<0x030>;
1314defm DS_CMPSTORE_RTN_F32                 : DS_Real_gfx11<0x031>;
1315defm DS_CMPSTORE_B64                     : DS_Real_gfx11_gfx12<0x050>;
1316defm DS_CMPSTORE_F64                     : DS_Real_gfx11<0x051>;
1317defm DS_CMPSTORE_RTN_B64                 : DS_Real_gfx11_gfx12<0x070>;
1318defm DS_CMPSTORE_RTN_F64                 : DS_Real_gfx11<0x071>;
1319
1320defm DS_ADD_RTN_F32                      : DS_Real_gfx11_gfx12<0x079>;
1321defm DS_ADD_GS_REG_RTN                   : DS_Real_gfx11<0x07a>;
1322defm DS_SUB_GS_REG_RTN                   : DS_Real_gfx11<0x07b>;
1323defm DS_BVH_STACK_RTN_B32                : DS_Real_gfx11<0x0ad>;
1324
1325//===----------------------------------------------------------------------===//
1326// GFX10.
1327//===----------------------------------------------------------------------===//
1328
1329let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
1330  multiclass DS_Real_gfx10<bits<8> op>  {
1331    def _gfx10 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1332      !cast<DS_Pseudo>(NAME), SIEncodingFamily.GFX10>;
1333  }
1334} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
1335
1336defm DS_ADD_RTN_F32      : DS_Real_gfx10<0x055>;
1337defm DS_WRITE_B8_D16_HI  : DS_Real_gfx10<0x0a0>;
1338defm DS_WRITE_B16_D16_HI : DS_Real_gfx10<0x0a1>;
1339defm DS_READ_U8_D16      : DS_Real_gfx10<0x0a2>;
1340defm DS_READ_U8_D16_HI   : DS_Real_gfx10<0x0a3>;
1341defm DS_READ_I8_D16      : DS_Real_gfx10<0x0a4>;
1342defm DS_READ_I8_D16_HI   : DS_Real_gfx10<0x0a5>;
1343defm DS_READ_U16_D16     : DS_Real_gfx10<0x0a6>;
1344defm DS_READ_U16_D16_HI  : DS_Real_gfx10<0x0a7>;
1345defm DS_WRITE_ADDTID_B32 : DS_Real_gfx10<0x0b0>;
1346defm DS_READ_ADDTID_B32  : DS_Real_gfx10<0x0b1>;
1347
1348//===----------------------------------------------------------------------===//
1349// GFX10, GFX11, GFX12.
1350//===----------------------------------------------------------------------===//
1351
1352multiclass DS_Real_gfx10_gfx11_gfx12<bits<8> op> :
1353  DS_Real_gfx10<op>, DS_Real_gfx11<op>, DS_Real_gfx12<op>;
1354
1355multiclass DS_Real_gfx10_gfx11<bits<8> op> :
1356  DS_Real_gfx10<op>, DS_Real_gfx11<op>;
1357
1358defm DS_ADD_F32          : DS_Real_gfx10_gfx11_gfx12<0x015>;
1359defm DS_ADD_SRC2_F32     : DS_Real_gfx10<0x095>;
1360defm DS_PERMUTE_B32      : DS_Real_gfx10_gfx11_gfx12<0x0b2>;
1361defm DS_BPERMUTE_B32     : DS_Real_gfx10_gfx11_gfx12<0x0b3>;
1362
1363//===----------------------------------------------------------------------===//
1364// GFX7, GFX10, GFX11, GFX12.
1365//===----------------------------------------------------------------------===//
1366
1367let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
1368  multiclass DS_Real_gfx7<bits<8> op> {
1369    def _gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1370      !cast<DS_Pseudo>(NAME), SIEncodingFamily.SI>;
1371  }
1372} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
1373
1374multiclass DS_Real_gfx7_gfx10_gfx11_gfx12<bits<8> op> :
1375  DS_Real_gfx7<op>, DS_Real_gfx10_gfx11_gfx12<op>;
1376
1377multiclass DS_Real_gfx7_gfx10_gfx11<bits<8> op> :
1378  DS_Real_gfx7<op>, DS_Real_gfx10_gfx11<op>;
1379
1380multiclass DS_Real_gfx7_gfx10<bits<8> op> :
1381  DS_Real_gfx7<op>, DS_Real_gfx10<op>;
1382
1383// FIXME-GFX7: Add tests when upstreaming this part.
1384defm DS_GWS_SEMA_RELEASE_ALL : DS_Real_gfx7_gfx10_gfx11<0x018>;
1385defm DS_WRAP_RTN_B32         : DS_Real_gfx7_gfx10_gfx11<0x034>;
1386defm DS_CONDXCHG32_RTN_B64   : DS_Real_gfx7_gfx10_gfx11_gfx12<0x07e>;
1387defm DS_WRITE_B96            : DS_Real_gfx7_gfx10<0x0de>;
1388defm DS_WRITE_B128           : DS_Real_gfx7_gfx10<0x0df>;
1389defm DS_READ_B96             : DS_Real_gfx7_gfx10<0x0fe>;
1390defm DS_READ_B128            : DS_Real_gfx7_gfx10<0x0ff>;
1391
1392//===----------------------------------------------------------------------===//
1393// GFX6, GFX7, GFX10, GFX11.
1394//===----------------------------------------------------------------------===//
1395
1396let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
1397  multiclass DS_Real_gfx6_gfx7<bits<8> op> {
1398    def _gfx6_gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1399      !cast<DS_Pseudo>(NAME), SIEncodingFamily.SI>;
1400  }
1401} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
1402
1403multiclass DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op> :
1404  DS_Real_gfx6_gfx7<op>, DS_Real_gfx10_gfx11_gfx12<op>;
1405
1406multiclass DS_Real_gfx6_gfx7_gfx10_gfx11<bits<8> op> :
1407  DS_Real_gfx6_gfx7<op>, DS_Real_gfx10_gfx11<op>;
1408
1409multiclass DS_Real_gfx6_gfx7_gfx10<bits<8> op> :
1410  DS_Real_gfx6_gfx7<op>, DS_Real_gfx10<op>;
1411
1412defm DS_ADD_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x000>;
1413defm DS_SUB_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x001>;
1414defm DS_RSUB_U32            : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>;
1415defm DS_INC_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x003>;
1416defm DS_DEC_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x004>;
1417defm DS_MIN_I32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x005>;
1418defm DS_MAX_I32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x006>;
1419defm DS_MIN_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x007>;
1420defm DS_MAX_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x008>;
1421defm DS_AND_B32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x009>;
1422defm DS_OR_B32              : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00a>;
1423defm DS_XOR_B32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00b>;
1424defm DS_MSKOR_B32           : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00c>;
1425
1426defm DS_WRITE_B32           : DS_Real_gfx6_gfx7_gfx10<0x00d>;
1427defm DS_WRITE2_B32          : DS_Real_gfx6_gfx7_gfx10<0x00e>;
1428defm DS_WRITE2ST64_B32      : DS_Real_gfx6_gfx7_gfx10<0x00f>;
1429defm DS_CMPST_B32           : DS_Real_gfx6_gfx7_gfx10<0x010>;
1430defm DS_CMPST_F32           : DS_Real_gfx6_gfx7_gfx10<0x011>;
1431
1432defm DS_MIN_F32             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x012>;
1433defm DS_MAX_F32             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x013>;
1434defm DS_NOP                 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x014>;
1435defm DS_GWS_INIT            : DS_Real_gfx6_gfx7_gfx10_gfx11<0x019>;
1436defm DS_GWS_SEMA_V          : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01a>;
1437defm DS_GWS_SEMA_BR         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01b>;
1438defm DS_GWS_SEMA_P          : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01c>;
1439defm DS_GWS_BARRIER         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01d>;
1440
1441defm DS_WRITE_B8            : DS_Real_gfx6_gfx7_gfx10<0x01e>;
1442defm DS_WRITE_B16           : DS_Real_gfx6_gfx7_gfx10<0x01f>;
1443
1444defm DS_ADD_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x020>;
1445defm DS_SUB_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x021>;
1446defm DS_RSUB_RTN_U32        : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x022>;
1447defm DS_INC_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x023>;
1448defm DS_DEC_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x024>;
1449defm DS_MIN_RTN_I32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x025>;
1450defm DS_MAX_RTN_I32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x026>;
1451defm DS_MIN_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x027>;
1452defm DS_MAX_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x028>;
1453defm DS_AND_RTN_B32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x029>;
1454defm DS_OR_RTN_B32          : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02a>;
1455defm DS_XOR_RTN_B32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02b>;
1456defm DS_MSKOR_RTN_B32       : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02c>;
1457
1458defm DS_WRXCHG_RTN_B32      : DS_Real_gfx6_gfx7_gfx10<0x02d>;
1459defm DS_WRXCHG2_RTN_B32     : DS_Real_gfx6_gfx7_gfx10<0x02e>;
1460defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02f>;
1461defm DS_CMPST_RTN_B32       : DS_Real_gfx6_gfx7_gfx10<0x030>;
1462defm DS_CMPST_RTN_F32       : DS_Real_gfx6_gfx7_gfx10<0x031>;
1463
1464defm DS_MIN_RTN_F32         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x032>;
1465defm DS_MAX_RTN_F32         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x033>;
1466defm DS_SWIZZLE_B32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x035>;
1467
1468defm DS_READ_B32            : DS_Real_gfx6_gfx7_gfx10<0x036>;
1469defm DS_READ2_B32           : DS_Real_gfx6_gfx7_gfx10<0x037>;
1470defm DS_READ2ST64_B32       : DS_Real_gfx6_gfx7_gfx10<0x038>;
1471defm DS_READ_I8             : DS_Real_gfx6_gfx7_gfx10<0x039>;
1472defm DS_READ_U8             : DS_Real_gfx6_gfx7_gfx10<0x03a>;
1473defm DS_READ_I16            : DS_Real_gfx6_gfx7_gfx10<0x03b>;
1474defm DS_READ_U16            : DS_Real_gfx6_gfx7_gfx10<0x03c>;
1475
1476defm DS_CONSUME             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x03d>;
1477defm DS_APPEND              : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x03e>;
1478defm DS_ORDERED_COUNT       : DS_Real_gfx6_gfx7_gfx10_gfx11<0x03f>;
1479defm DS_ADD_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x040>;
1480defm DS_SUB_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x041>;
1481defm DS_RSUB_U64            : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x042>;
1482defm DS_INC_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x043>;
1483defm DS_DEC_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x044>;
1484defm DS_MIN_I64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x045>;
1485defm DS_MAX_I64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x046>;
1486defm DS_MIN_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x047>;
1487defm DS_MAX_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x048>;
1488defm DS_AND_B64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x049>;
1489defm DS_OR_B64              : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04a>;
1490defm DS_XOR_B64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04b>;
1491defm DS_MSKOR_B64           : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04c>;
1492
1493defm DS_WRITE_B64           : DS_Real_gfx6_gfx7_gfx10<0x04d>;
1494defm DS_WRITE2_B64          : DS_Real_gfx6_gfx7_gfx10<0x04e>;
1495defm DS_WRITE2ST64_B64      : DS_Real_gfx6_gfx7_gfx10<0x04f>;
1496defm DS_CMPST_B64           : DS_Real_gfx6_gfx7_gfx10<0x050>;
1497defm DS_CMPST_F64           : DS_Real_gfx6_gfx7_gfx10<0x051>;
1498
1499defm DS_MIN_F64             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x052>;
1500defm DS_MAX_F64             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x053>;
1501defm DS_ADD_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x060>;
1502defm DS_SUB_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x061>;
1503defm DS_RSUB_RTN_U64        : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x062>;
1504defm DS_INC_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x063>;
1505defm DS_DEC_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x064>;
1506defm DS_MIN_RTN_I64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x065>;
1507defm DS_MAX_RTN_I64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x066>;
1508defm DS_MIN_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x067>;
1509defm DS_MAX_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x068>;
1510defm DS_AND_RTN_B64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x069>;
1511defm DS_OR_RTN_B64          : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06a>;
1512defm DS_XOR_RTN_B64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06b>;
1513defm DS_MSKOR_RTN_B64       : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06c>;
1514
1515defm DS_WRXCHG_RTN_B64      : DS_Real_gfx6_gfx7_gfx10<0x06d>;
1516defm DS_WRXCHG2_RTN_B64     : DS_Real_gfx6_gfx7_gfx10<0x06e>;
1517defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06f>;
1518defm DS_CMPST_RTN_B64       : DS_Real_gfx6_gfx7_gfx10<0x070>;
1519defm DS_CMPST_RTN_F64       : DS_Real_gfx6_gfx7_gfx10<0x071>;
1520
1521defm DS_MIN_RTN_F64         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x072>;
1522defm DS_MAX_RTN_F64         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x073>;
1523
1524defm DS_READ_B64            : DS_Real_gfx6_gfx7_gfx10<0x076>;
1525defm DS_READ2_B64           : DS_Real_gfx6_gfx7_gfx10<0x077>;
1526defm DS_READ2ST64_B64       : DS_Real_gfx6_gfx7_gfx10<0x078>;
1527defm DS_ADD_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x080>;
1528defm DS_SUB_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x081>;
1529defm DS_RSUB_SRC2_U32       : DS_Real_gfx6_gfx7_gfx10<0x082>;
1530defm DS_INC_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x083>;
1531defm DS_DEC_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x084>;
1532defm DS_MIN_SRC2_I32        : DS_Real_gfx6_gfx7_gfx10<0x085>;
1533defm DS_MAX_SRC2_I32        : DS_Real_gfx6_gfx7_gfx10<0x086>;
1534defm DS_MIN_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x087>;
1535defm DS_MAX_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x088>;
1536defm DS_AND_SRC2_B32        : DS_Real_gfx6_gfx7_gfx10<0x089>;
1537defm DS_OR_SRC2_B32         : DS_Real_gfx6_gfx7_gfx10<0x08a>;
1538defm DS_XOR_SRC2_B32        : DS_Real_gfx6_gfx7_gfx10<0x08b>;
1539defm DS_WRITE_SRC2_B32      : DS_Real_gfx6_gfx7_gfx10<0x08d>;
1540defm DS_MIN_SRC2_F32        : DS_Real_gfx6_gfx7_gfx10<0x092>;
1541defm DS_MAX_SRC2_F32        : DS_Real_gfx6_gfx7_gfx10<0x093>;
1542defm DS_ADD_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c0>;
1543defm DS_SUB_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c1>;
1544defm DS_RSUB_SRC2_U64       : DS_Real_gfx6_gfx7_gfx10<0x0c2>;
1545defm DS_INC_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c3>;
1546defm DS_DEC_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c4>;
1547defm DS_MIN_SRC2_I64        : DS_Real_gfx6_gfx7_gfx10<0x0c5>;
1548defm DS_MAX_SRC2_I64        : DS_Real_gfx6_gfx7_gfx10<0x0c6>;
1549defm DS_MIN_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c7>;
1550defm DS_MAX_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c8>;
1551defm DS_AND_SRC2_B64        : DS_Real_gfx6_gfx7_gfx10<0x0c9>;
1552defm DS_OR_SRC2_B64         : DS_Real_gfx6_gfx7_gfx10<0x0ca>;
1553defm DS_XOR_SRC2_B64        : DS_Real_gfx6_gfx7_gfx10<0x0cb>;
1554defm DS_WRITE_SRC2_B64      : DS_Real_gfx6_gfx7_gfx10<0x0cd>;
1555defm DS_MIN_SRC2_F64        : DS_Real_gfx6_gfx7_gfx10<0x0d2>;
1556defm DS_MAX_SRC2_F64        : DS_Real_gfx6_gfx7_gfx10<0x0d3>;
1557
1558//===----------------------------------------------------------------------===//
1559// GFX8, GFX9 (VI).
1560//===----------------------------------------------------------------------===//
1561
1562class DS_Real_vi <bits<8> op, DS_Pseudo ps> :
1563  DS_Real <ps>,
1564  SIMCInstr <ps.Mnemonic, SIEncodingFamily.VI> {
1565  let AssemblerPredicate = isGFX8GFX9;
1566  let DecoderNamespace = "GFX8";
1567
1568  // encoding
1569  let Inst{7-0}   = !if(ps.has_offset0, offset0, 0);
1570  let Inst{15-8}  = !if(ps.has_offset1, offset1, 0);
1571  let Inst{16}    = !if(ps.has_gds, gds, ps.gdsValue);
1572  let Inst{24-17} = op;
1573  let Inst{25}    = acc;
1574  let Inst{31-26} = 0x36; // ds prefix
1575  let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0));
1576  let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0);
1577  let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0);
1578  let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0);
1579}
1580
1581def DS_ADD_U32_vi         : DS_Real_vi<0x0,  DS_ADD_U32>;
1582def DS_SUB_U32_vi         : DS_Real_vi<0x1,  DS_SUB_U32>;
1583def DS_RSUB_U32_vi        : DS_Real_vi<0x2,  DS_RSUB_U32>;
1584def DS_INC_U32_vi         : DS_Real_vi<0x3,  DS_INC_U32>;
1585def DS_DEC_U32_vi         : DS_Real_vi<0x4,  DS_DEC_U32>;
1586def DS_MIN_I32_vi         : DS_Real_vi<0x5,  DS_MIN_I32>;
1587def DS_MAX_I32_vi         : DS_Real_vi<0x6,  DS_MAX_I32>;
1588def DS_MIN_U32_vi         : DS_Real_vi<0x7,  DS_MIN_U32>;
1589def DS_MAX_U32_vi         : DS_Real_vi<0x8,  DS_MAX_U32>;
1590def DS_AND_B32_vi         : DS_Real_vi<0x9,  DS_AND_B32>;
1591def DS_OR_B32_vi          : DS_Real_vi<0xa,  DS_OR_B32>;
1592def DS_XOR_B32_vi         : DS_Real_vi<0xb,  DS_XOR_B32>;
1593def DS_MSKOR_B32_vi       : DS_Real_vi<0xc,  DS_MSKOR_B32>;
1594def DS_WRITE_B32_vi       : DS_Real_vi<0xd,  DS_WRITE_B32>;
1595def DS_WRITE2_B32_vi      : DS_Real_vi<0xe,  DS_WRITE2_B32>;
1596def DS_WRITE2ST64_B32_vi  : DS_Real_vi<0xf,  DS_WRITE2ST64_B32>;
1597def DS_CMPST_B32_vi       : DS_Real_vi<0x10, DS_CMPST_B32>;
1598def DS_CMPST_F32_vi       : DS_Real_vi<0x11, DS_CMPST_F32>;
1599def DS_MIN_F32_vi         : DS_Real_vi<0x12, DS_MIN_F32>;
1600def DS_MAX_F32_vi         : DS_Real_vi<0x13, DS_MAX_F32>;
1601def DS_NOP_vi             : DS_Real_vi<0x14, DS_NOP>;
1602def DS_ADD_F32_vi         : DS_Real_vi<0x15, DS_ADD_F32>;
1603def DS_GWS_INIT_vi        : DS_Real_vi<0x99, DS_GWS_INIT>;
1604def DS_GWS_SEMA_V_vi      : DS_Real_vi<0x9a, DS_GWS_SEMA_V>;
1605def DS_GWS_SEMA_BR_vi     : DS_Real_vi<0x9b, DS_GWS_SEMA_BR>;
1606def DS_GWS_SEMA_P_vi      : DS_Real_vi<0x9c, DS_GWS_SEMA_P>;
1607def DS_GWS_BARRIER_vi     : DS_Real_vi<0x9d, DS_GWS_BARRIER>;
1608def DS_WRITE_ADDTID_B32_vi : DS_Real_vi<0x1d, DS_WRITE_ADDTID_B32>;
1609def DS_WRITE_B8_vi        : DS_Real_vi<0x1e, DS_WRITE_B8>;
1610def DS_WRITE_B16_vi       : DS_Real_vi<0x1f, DS_WRITE_B16>;
1611def DS_ADD_RTN_U32_vi     : DS_Real_vi<0x20, DS_ADD_RTN_U32>;
1612def DS_SUB_RTN_U32_vi     : DS_Real_vi<0x21, DS_SUB_RTN_U32>;
1613def DS_RSUB_RTN_U32_vi    : DS_Real_vi<0x22, DS_RSUB_RTN_U32>;
1614def DS_INC_RTN_U32_vi     : DS_Real_vi<0x23, DS_INC_RTN_U32>;
1615def DS_DEC_RTN_U32_vi     : DS_Real_vi<0x24, DS_DEC_RTN_U32>;
1616def DS_MIN_RTN_I32_vi     : DS_Real_vi<0x25, DS_MIN_RTN_I32>;
1617def DS_MAX_RTN_I32_vi     : DS_Real_vi<0x26, DS_MAX_RTN_I32>;
1618def DS_MIN_RTN_U32_vi     : DS_Real_vi<0x27, DS_MIN_RTN_U32>;
1619def DS_MAX_RTN_U32_vi     : DS_Real_vi<0x28, DS_MAX_RTN_U32>;
1620def DS_AND_RTN_B32_vi     : DS_Real_vi<0x29, DS_AND_RTN_B32>;
1621def DS_OR_RTN_B32_vi      : DS_Real_vi<0x2a, DS_OR_RTN_B32>;
1622def DS_XOR_RTN_B32_vi     : DS_Real_vi<0x2b, DS_XOR_RTN_B32>;
1623def DS_MSKOR_RTN_B32_vi   : DS_Real_vi<0x2c, DS_MSKOR_RTN_B32>;
1624def DS_WRXCHG_RTN_B32_vi  : DS_Real_vi<0x2d, DS_WRXCHG_RTN_B32>;
1625def DS_WRXCHG2_RTN_B32_vi : DS_Real_vi<0x2e, DS_WRXCHG2_RTN_B32>;
1626def DS_WRXCHG2ST64_RTN_B32_vi : DS_Real_vi<0x2f, DS_WRXCHG2ST64_RTN_B32>;
1627def DS_CMPST_RTN_B32_vi   : DS_Real_vi<0x30, DS_CMPST_RTN_B32>;
1628def DS_CMPST_RTN_F32_vi   : DS_Real_vi<0x31, DS_CMPST_RTN_F32>;
1629def DS_MIN_RTN_F32_vi     : DS_Real_vi<0x32, DS_MIN_RTN_F32>;
1630def DS_MAX_RTN_F32_vi     : DS_Real_vi<0x33, DS_MAX_RTN_F32>;
1631def DS_WRAP_RTN_B32_vi    : DS_Real_vi<0x34, DS_WRAP_RTN_B32>;
1632def DS_ADD_RTN_F32_vi     : DS_Real_vi<0x35, DS_ADD_RTN_F32>;
1633def DS_READ_B32_vi        : DS_Real_vi<0x36, DS_READ_B32>;
1634def DS_READ2_B32_vi       : DS_Real_vi<0x37, DS_READ2_B32>;
1635def DS_READ2ST64_B32_vi   : DS_Real_vi<0x38, DS_READ2ST64_B32>;
1636def DS_READ_I8_vi         : DS_Real_vi<0x39, DS_READ_I8>;
1637def DS_READ_U8_vi         : DS_Real_vi<0x3a, DS_READ_U8>;
1638def DS_READ_I16_vi        : DS_Real_vi<0x3b, DS_READ_I16>;
1639def DS_READ_U16_vi        : DS_Real_vi<0x3c, DS_READ_U16>;
1640def DS_READ_ADDTID_B32_vi : DS_Real_vi<0xb6, DS_READ_ADDTID_B32>;
1641def DS_CONSUME_vi         : DS_Real_vi<0xbd, DS_CONSUME>;
1642def DS_APPEND_vi          : DS_Real_vi<0xbe, DS_APPEND>;
1643def DS_ORDERED_COUNT_vi   : DS_Real_vi<0xbf, DS_ORDERED_COUNT>;
1644def DS_SWIZZLE_B32_vi     : DS_Real_vi<0x3d, DS_SWIZZLE_B32>;
1645def DS_PERMUTE_B32_vi     : DS_Real_vi<0x3e, DS_PERMUTE_B32>;
1646def DS_BPERMUTE_B32_vi    : DS_Real_vi<0x3f, DS_BPERMUTE_B32>;
1647
1648def DS_ADD_U64_vi         : DS_Real_vi<0x40, DS_ADD_U64>;
1649def DS_SUB_U64_vi         : DS_Real_vi<0x41, DS_SUB_U64>;
1650def DS_RSUB_U64_vi        : DS_Real_vi<0x42, DS_RSUB_U64>;
1651def DS_INC_U64_vi         : DS_Real_vi<0x43, DS_INC_U64>;
1652def DS_DEC_U64_vi         : DS_Real_vi<0x44, DS_DEC_U64>;
1653def DS_MIN_I64_vi         : DS_Real_vi<0x45, DS_MIN_I64>;
1654def DS_MAX_I64_vi         : DS_Real_vi<0x46, DS_MAX_I64>;
1655def DS_MIN_U64_vi         : DS_Real_vi<0x47, DS_MIN_U64>;
1656def DS_MAX_U64_vi         : DS_Real_vi<0x48, DS_MAX_U64>;
1657def DS_AND_B64_vi         : DS_Real_vi<0x49, DS_AND_B64>;
1658def DS_OR_B64_vi          : DS_Real_vi<0x4a, DS_OR_B64>;
1659def DS_XOR_B64_vi         : DS_Real_vi<0x4b, DS_XOR_B64>;
1660def DS_MSKOR_B64_vi       : DS_Real_vi<0x4c, DS_MSKOR_B64>;
1661def DS_WRITE_B64_vi       : DS_Real_vi<0x4d, DS_WRITE_B64>;
1662def DS_WRITE2_B64_vi      : DS_Real_vi<0x4E, DS_WRITE2_B64>;
1663def DS_WRITE2ST64_B64_vi  : DS_Real_vi<0x4f, DS_WRITE2ST64_B64>;
1664def DS_CMPST_B64_vi       : DS_Real_vi<0x50, DS_CMPST_B64>;
1665def DS_CMPST_F64_vi       : DS_Real_vi<0x51, DS_CMPST_F64>;
1666def DS_MIN_F64_vi         : DS_Real_vi<0x52, DS_MIN_F64>;
1667def DS_MAX_F64_vi         : DS_Real_vi<0x53, DS_MAX_F64>;
1668
1669def DS_WRITE_B8_D16_HI_vi  : DS_Real_vi<0x54, DS_WRITE_B8_D16_HI>;
1670def DS_WRITE_B16_D16_HI_vi : DS_Real_vi<0x55, DS_WRITE_B16_D16_HI>;
1671
1672def DS_READ_U8_D16_vi     : DS_Real_vi<0x56, DS_READ_U8_D16>;
1673def DS_READ_U8_D16_HI_vi  : DS_Real_vi<0x57, DS_READ_U8_D16_HI>;
1674def DS_READ_I8_D16_vi     : DS_Real_vi<0x58, DS_READ_I8_D16>;
1675def DS_READ_I8_D16_HI_vi  : DS_Real_vi<0x59, DS_READ_I8_D16_HI>;
1676def DS_READ_U16_D16_vi    : DS_Real_vi<0x5a, DS_READ_U16_D16>;
1677def DS_READ_U16_D16_HI_vi : DS_Real_vi<0x5b, DS_READ_U16_D16_HI>;
1678
1679def DS_ADD_RTN_U64_vi     : DS_Real_vi<0x60, DS_ADD_RTN_U64>;
1680def DS_SUB_RTN_U64_vi     : DS_Real_vi<0x61, DS_SUB_RTN_U64>;
1681def DS_RSUB_RTN_U64_vi    : DS_Real_vi<0x62, DS_RSUB_RTN_U64>;
1682def DS_INC_RTN_U64_vi     : DS_Real_vi<0x63, DS_INC_RTN_U64>;
1683def DS_DEC_RTN_U64_vi     : DS_Real_vi<0x64, DS_DEC_RTN_U64>;
1684def DS_MIN_RTN_I64_vi     : DS_Real_vi<0x65, DS_MIN_RTN_I64>;
1685def DS_MAX_RTN_I64_vi     : DS_Real_vi<0x66, DS_MAX_RTN_I64>;
1686def DS_MIN_RTN_U64_vi     : DS_Real_vi<0x67, DS_MIN_RTN_U64>;
1687def DS_MAX_RTN_U64_vi     : DS_Real_vi<0x68, DS_MAX_RTN_U64>;
1688def DS_AND_RTN_B64_vi     : DS_Real_vi<0x69, DS_AND_RTN_B64>;
1689def DS_OR_RTN_B64_vi      : DS_Real_vi<0x6a, DS_OR_RTN_B64>;
1690def DS_XOR_RTN_B64_vi     : DS_Real_vi<0x6b, DS_XOR_RTN_B64>;
1691def DS_MSKOR_RTN_B64_vi   : DS_Real_vi<0x6c, DS_MSKOR_RTN_B64>;
1692def DS_WRXCHG_RTN_B64_vi  : DS_Real_vi<0x6d, DS_WRXCHG_RTN_B64>;
1693def DS_WRXCHG2_RTN_B64_vi : DS_Real_vi<0x6e, DS_WRXCHG2_RTN_B64>;
1694def DS_WRXCHG2ST64_RTN_B64_vi : DS_Real_vi<0x6f, DS_WRXCHG2ST64_RTN_B64>;
1695def DS_CONDXCHG32_RTN_B64_vi   : DS_Real_vi<0x7e, DS_CONDXCHG32_RTN_B64>;
1696def DS_GWS_SEMA_RELEASE_ALL_vi : DS_Real_vi<0x98, DS_GWS_SEMA_RELEASE_ALL>;
1697def DS_CMPST_RTN_B64_vi   : DS_Real_vi<0x70, DS_CMPST_RTN_B64>;
1698def DS_CMPST_RTN_F64_vi   : DS_Real_vi<0x71, DS_CMPST_RTN_F64>;
1699def DS_MIN_RTN_F64_vi     : DS_Real_vi<0x72, DS_MIN_RTN_F64>;
1700def DS_MAX_RTN_F64_vi     : DS_Real_vi<0x73, DS_MAX_RTN_F64>;
1701
1702def DS_READ_B64_vi        : DS_Real_vi<0x76, DS_READ_B64>;
1703def DS_READ2_B64_vi       : DS_Real_vi<0x77, DS_READ2_B64>;
1704def DS_READ2ST64_B64_vi   : DS_Real_vi<0x78, DS_READ2ST64_B64>;
1705
1706def DS_ADD_SRC2_U32_vi    : DS_Real_vi<0x80, DS_ADD_SRC2_U32>;
1707def DS_SUB_SRC2_U32_vi    : DS_Real_vi<0x81, DS_SUB_SRC2_U32>;
1708def DS_RSUB_SRC2_U32_vi   : DS_Real_vi<0x82, DS_RSUB_SRC2_U32>;
1709def DS_INC_SRC2_U32_vi    : DS_Real_vi<0x83, DS_INC_SRC2_U32>;
1710def DS_DEC_SRC2_U32_vi    : DS_Real_vi<0x84, DS_DEC_SRC2_U32>;
1711def DS_MIN_SRC2_I32_vi    : DS_Real_vi<0x85, DS_MIN_SRC2_I32>;
1712def DS_MAX_SRC2_I32_vi    : DS_Real_vi<0x86, DS_MAX_SRC2_I32>;
1713def DS_MIN_SRC2_U32_vi    : DS_Real_vi<0x87, DS_MIN_SRC2_U32>;
1714def DS_MAX_SRC2_U32_vi    : DS_Real_vi<0x88, DS_MAX_SRC2_U32>;
1715def DS_AND_SRC2_B32_vi    : DS_Real_vi<0x89, DS_AND_SRC2_B32>;
1716def DS_OR_SRC2_B32_vi     : DS_Real_vi<0x8a, DS_OR_SRC2_B32>;
1717def DS_XOR_SRC2_B32_vi    : DS_Real_vi<0x8b, DS_XOR_SRC2_B32>;
1718def DS_WRITE_SRC2_B32_vi  : DS_Real_vi<0x8d, DS_WRITE_SRC2_B32>;
1719def DS_MIN_SRC2_F32_vi    : DS_Real_vi<0x92, DS_MIN_SRC2_F32>;
1720def DS_MAX_SRC2_F32_vi    : DS_Real_vi<0x93, DS_MAX_SRC2_F32>;
1721def DS_ADD_SRC2_F32_vi    : DS_Real_vi<0x95, DS_ADD_SRC2_F32>;
1722def DS_ADD_SRC2_U64_vi    : DS_Real_vi<0xc0, DS_ADD_SRC2_U64>;
1723def DS_SUB_SRC2_U64_vi    : DS_Real_vi<0xc1, DS_SUB_SRC2_U64>;
1724def DS_RSUB_SRC2_U64_vi   : DS_Real_vi<0xc2, DS_RSUB_SRC2_U64>;
1725def DS_INC_SRC2_U64_vi    : DS_Real_vi<0xc3, DS_INC_SRC2_U64>;
1726def DS_DEC_SRC2_U64_vi    : DS_Real_vi<0xc4, DS_DEC_SRC2_U64>;
1727def DS_MIN_SRC2_I64_vi    : DS_Real_vi<0xc5, DS_MIN_SRC2_I64>;
1728def DS_MAX_SRC2_I64_vi    : DS_Real_vi<0xc6, DS_MAX_SRC2_I64>;
1729def DS_MIN_SRC2_U64_vi    : DS_Real_vi<0xc7, DS_MIN_SRC2_U64>;
1730def DS_MAX_SRC2_U64_vi    : DS_Real_vi<0xc8, DS_MAX_SRC2_U64>;
1731def DS_AND_SRC2_B64_vi    : DS_Real_vi<0xc9, DS_AND_SRC2_B64>;
1732def DS_OR_SRC2_B64_vi     : DS_Real_vi<0xca, DS_OR_SRC2_B64>;
1733def DS_XOR_SRC2_B64_vi    : DS_Real_vi<0xcb, DS_XOR_SRC2_B64>;
1734def DS_WRITE_SRC2_B64_vi  : DS_Real_vi<0xcd, DS_WRITE_SRC2_B64>;
1735def DS_MIN_SRC2_F64_vi    : DS_Real_vi<0xd2, DS_MIN_SRC2_F64>;
1736def DS_MAX_SRC2_F64_vi    : DS_Real_vi<0xd3, DS_MAX_SRC2_F64>;
1737def DS_WRITE_B96_vi       : DS_Real_vi<0xde, DS_WRITE_B96>;
1738def DS_WRITE_B128_vi      : DS_Real_vi<0xdf, DS_WRITE_B128>;
1739def DS_READ_B96_vi        : DS_Real_vi<0xfe, DS_READ_B96>;
1740def DS_READ_B128_vi       : DS_Real_vi<0xff, DS_READ_B128>;
1741
1742let SubtargetPredicate = isGFX90APlus in {
1743  def DS_ADD_F64_vi     : DS_Real_vi<0x5c, DS_ADD_F64>;
1744  def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>;
1745} // End SubtargetPredicate = isGFX90APlus
1746
1747let SubtargetPredicate = isGFX940Plus in {
1748  def DS_PK_ADD_F16_vi     : DS_Real_vi<0x17, DS_PK_ADD_F16>;
1749  def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>;
1750  def DS_PK_ADD_BF16_vi     : DS_Real_vi<0x18, DS_PK_ADD_BF16>;
1751  def DS_PK_ADD_RTN_BF16_vi : DS_Real_vi<0xb8, DS_PK_ADD_RTN_BF16>;
1752} // End SubtargetPredicate = isGFX940Plus
1753