xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td (revision 8311bc5f17dec348749f763b82dfe2737bc53cd7)
1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def FlatOffset : ComplexPattern<iPTR, 2, "SelectFlatOffset", [], [SDNPWantRoot], -10>;
10def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [SDNPWantRoot], -10>;
11def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [SDNPWantRoot], -10>;
12
13def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>;
14def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>;
15def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [SDNPWantRoot], -10>;
16
17//===----------------------------------------------------------------------===//
18// FLAT classes
19//===----------------------------------------------------------------------===//
20
21class FLAT_Pseudo<string opName, dag outs, dag ins,
22                  string asmOps, list<dag> pattern=[]> :
23  InstSI<outs, ins, "", pattern>,
24  SIMCInstr<opName, SIEncodingFamily.NONE> {
25
26  let isPseudo = 1;
27  let isCodeGenOnly = 1;
28
29  let FLAT = 1;
30
31  let UseNamedOperandTable = 1;
32  let hasSideEffects = 0;
33  let SchedRW = [WriteVMEM];
34
35  string Mnemonic = opName;
36  string AsmOperands = asmOps;
37
38  bits<1> is_flat_global = 0;
39  bits<1> is_flat_scratch = 0;
40
41  bits<1> has_vdst = 1;
42
43  // We need to distinguish having saddr and enabling saddr because
44  // saddr is only valid for scratch and global instructions. Pre-gfx9
45  // these bits were reserved, so we also don't necessarily want to
46  // set these bits to the disabled value for the original flat
47  // segment instructions.
48  bits<1> has_saddr = 0;
49  bits<1> enabled_saddr = 0;
50  bits<7> saddr_value = 0;
51  bits<1> has_vaddr = 1;
52
53  bits<1> has_data = 1;
54  bits<1> has_glc  = 1;
55  bits<1> glcValue = 0;
56  bits<1> has_dlc  = 1;
57  bits<1> dlcValue = 0;
58  bits<1> has_sccb  = 1;
59  bits<1> sccbValue = 0;
60  bits<1> has_sve  = 0; // Scratch VGPR Enable
61  bits<1> lds = 0;
62  bits<1> sve = 0;
63
64  let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
65    !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
66
67  // TODO: M0 if it could possibly access LDS (before gfx9? only)?
68  let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
69
70  // Internally, FLAT instruction are executed as both an LDS and a
71  // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
72  // and are not considered done until both have been decremented.
73  let VM_CNT = 1;
74  let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch));
75
76  let FlatGlobal = is_flat_global;
77
78  let FlatScratch = is_flat_scratch;
79}
80
81class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
82  InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>,
83  Enc64 {
84
85  let isPseudo = 0;
86  let isCodeGenOnly = 0;
87
88  let FLAT = 1;
89
90  // copy relevant pseudo op flags
91  let SubtargetPredicate   = ps.SubtargetPredicate;
92  let AsmMatchConverter    = ps.AsmMatchConverter;
93  let OtherPredicates      = ps.OtherPredicates;
94  let TSFlags              = ps.TSFlags;
95  let UseNamedOperandTable = ps.UseNamedOperandTable;
96  let SchedRW              = ps.SchedRW;
97  let mayLoad              = ps.mayLoad;
98  let mayStore             = ps.mayStore;
99  let IsAtomicRet          = ps.IsAtomicRet;
100  let IsAtomicNoRet        = ps.IsAtomicNoRet;
101  let VM_CNT               = ps.VM_CNT;
102  let LGKM_CNT             = ps.LGKM_CNT;
103  let VALU                 = ps.VALU;
104
105  // encoding fields
106  bits<8> vaddr;
107  bits<10> vdata;
108  bits<7> saddr;
109  bits<10> vdst;
110
111  bits<5> cpol;
112
113  // Only valid on gfx9
114  bits<1> lds = ps.lds; // LDS DMA for global and scratch
115
116  // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
117  bits<2> seg = !if(ps.is_flat_global, 0b10,
118                  !if(ps.is_flat_scratch, 0b01, 0));
119
120  // Signed offset. Highest bit ignored for flat and treated as 12-bit
121  // unsigned for flat accesses.
122  bits<13> offset;
123  // GFX90A+ only: instruction uses AccVGPR for data
124  bits<1> acc = !if(ps.has_vdst, vdst{9}, !if(ps.has_data, vdata{9}, 0));
125
126  // We don't use tfe right now, and it was removed in gfx9.
127  bits<1> tfe = 0;
128
129  // Only valid on GFX9+
130  let Inst{12-0} = offset;
131  let Inst{13} = !if(ps.has_sve, ps.sve, lds);
132  let Inst{15-14} = seg;
133
134  let Inst{16}    = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
135  let Inst{17}    = cpol{CPolBit.SLC};
136  let Inst{24-18} = op;
137  let Inst{31-26} = 0x37; // Encoding.
138  let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
139  let Inst{47-40} = !if(ps.has_data, vdata{7-0}, ?);
140  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
141
142  // 54-48 is reserved.
143  let Inst{55}    = acc; // nv on GFX9+, TFE before. AccVGPR for data on GFX90A.
144  let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, ?);
145}
146
147class GlobalSaddrTable <bit is_saddr, string Name = ""> {
148  bit IsSaddr = is_saddr;
149  string SaddrOp = Name;
150}
151
152// TODO: Is exec allowed for saddr? The disabled value 0x7f is the
153// same encoding value as exec_hi, so it isn't possible to use that if
154// saddr is 32-bit (which isn't handled here yet).
155class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
156  bit HasTiedOutput = 0,
157  bit HasSaddr = 0, bit EnableSaddr = 0,
158  RegisterOperand vdata_op = getLdStRegisterOperand<regClass>.ret> : FLAT_Pseudo<
159  opName,
160  (outs vdata_op:$vdst),
161  !con(
162    !con(
163      !if(EnableSaddr,
164        (ins SReg_64:$saddr, VGPR_32:$vaddr),
165        (ins VReg_64:$vaddr)),
166        (ins flat_offset:$offset)),
167        // FIXME: Operands with default values do not work with following non-optional operands.
168        !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in),
169                           (ins CPol_0:$cpol))),
170  " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
171  let has_data = 0;
172  let mayLoad = 1;
173  let has_saddr = HasSaddr;
174  let enabled_saddr = EnableSaddr;
175  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
176  let maybeAtomic = 1;
177
178  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
179  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
180}
181
182class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
183  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
184  opName,
185  (outs),
186  !con(
187    !if(EnableSaddr,
188      (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr),
189      (ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)),
190      (ins flat_offset:$offset, CPol_0:$cpol)),
191  " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
192  let mayLoad  = 0;
193  let mayStore = 1;
194  let has_vdst = 0;
195  let has_saddr = HasSaddr;
196  let enabled_saddr = EnableSaddr;
197  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
198  let maybeAtomic = 1;
199}
200
201multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
202  let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
203    def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
204      GlobalSaddrTable<0, opName>;
205    def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
206      GlobalSaddrTable<1, opName>;
207  }
208}
209
210class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
211  bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
212  opName,
213  (outs regClass:$vdst),
214  !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)),
215    (ins flat_offset:$offset, CPol_0:$cpol),
216    !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
217  " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
218  let is_flat_global = 1;
219  let has_data = 0;
220  let mayLoad = 1;
221  let has_vaddr = 0;
222  let has_saddr = 1;
223  let enabled_saddr = EnableSaddr;
224  let maybeAtomic = 1;
225  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
226
227  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
228  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
229}
230
231multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass,
232  bit HasTiedOutput = 0> {
233  def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput>,
234    GlobalSaddrTable<0, opName>;
235  def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, 1>,
236    GlobalSaddrTable<1, opName>;
237}
238
239multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
240  let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
241    def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
242      GlobalSaddrTable<0, opName>;
243    def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
244      GlobalSaddrTable<1, opName>;
245  }
246}
247
248class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
249  opName,
250  (outs ),
251  !con(
252      !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)),
253      (ins flat_offset:$offset, CPol_0:$cpol)),
254  " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> {
255  let LGKM_CNT = 1;
256  let is_flat_global = 1;
257  let lds = 1;
258  let has_data = 0;
259  let has_vdst = 0;
260  let mayLoad = 1;
261  let mayStore = 1;
262  let has_saddr = 1;
263  let enabled_saddr = EnableSaddr;
264  let VALU = 1;
265  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
266  let Uses = [M0, EXEC];
267  let SchedRW = [WriteVMEM, WriteLDS];
268}
269
270multiclass FLAT_Global_Load_LDS_Pseudo<string opName> {
271  def ""     : FLAT_Global_Load_LDS_Pseudo<opName>,
272    GlobalSaddrTable<0, opName>;
273  def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>,
274    GlobalSaddrTable<1, opName>;
275}
276
277class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
278  bit EnableSaddr = 0> : FLAT_Pseudo<
279  opName,
280  (outs),
281  !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)),
282    (ins flat_offset:$offset, CPol:$cpol)),
283  " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
284  let is_flat_global = 1;
285  let mayLoad  = 0;
286  let mayStore = 1;
287  let has_vdst = 0;
288  let has_vaddr = 0;
289  let has_saddr = 1;
290  let enabled_saddr = EnableSaddr;
291  let maybeAtomic = 1;
292  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
293}
294
295multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass> {
296  def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass>,
297    GlobalSaddrTable<0, opName>;
298  def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, 1>,
299    GlobalSaddrTable<1, opName>;
300}
301
302class FlatScratchInst <string sv_op, string mode> {
303  string SVOp = sv_op;
304  string Mode = mode;
305}
306
307class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
308  bit HasTiedOutput = 0,
309  bit EnableSaddr = 0,
310  bit EnableSVE = 0,
311  bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))>
312  : FLAT_Pseudo<
313  opName,
314  (outs getLdStRegisterOperand<regClass>.ret:$vdst),
315  !con(
316    !if(EnableSVE,
317        (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
318        !if(EnableSaddr,
319          (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
320          !if(EnableVaddr,
321            (ins VGPR_32:$vaddr, flat_offset:$offset),
322            (ins flat_offset:$offset)))),
323     !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in),
324                        (ins CPol_0:$cpol))),
325  " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
326  let has_data = 0;
327  let mayLoad = 1;
328  let has_saddr = 1;
329  let enabled_saddr = EnableSaddr;
330  let has_vaddr = EnableVaddr;
331  let has_sve = EnableSVE;
332  let sve = EnableVaddr;
333  let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
334  let maybeAtomic = 1;
335
336  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
337  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
338}
339
340class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0,
341  bit EnableSVE = 0,
342  bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr)),
343  RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo<
344  opName,
345  (outs),
346  !if(EnableSVE,
347    (ins vdata_op:$vdata, VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
348    !if(EnableSaddr,
349      (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
350      !if(EnableVaddr,
351        (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol),
352        (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol)))),
353  " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
354  let mayLoad  = 0;
355  let mayStore = 1;
356  let has_vdst = 0;
357  let has_saddr = 1;
358  let enabled_saddr = EnableSaddr;
359  let has_vaddr = EnableVaddr;
360  let has_sve = EnableSVE;
361  let sve = EnableVaddr;
362  let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
363  let maybeAtomic = 1;
364}
365
366multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> {
367  let is_flat_scratch = 1 in {
368    def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>,
369             FlatScratchInst<opName, "SV">;
370    def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>,
371                 FlatScratchInst<opName, "SS">;
372
373    let SubtargetPredicate = HasFlatScratchSVSMode in
374    def _SVS : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1, 1>,
375               FlatScratchInst<opName, "SVS">;
376
377    let SubtargetPredicate = HasFlatScratchSTMode in
378    def _ST  : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0, 0>,
379               FlatScratchInst<opName, "ST">;
380  }
381}
382
383multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
384  let is_flat_scratch = 1 in {
385    def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>,
386             FlatScratchInst<opName, "SV">;
387    def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>,
388                 FlatScratchInst<opName, "SS">;
389
390    let SubtargetPredicate = HasFlatScratchSVSMode in
391    def _SVS : FLAT_Scratch_Store_Pseudo<opName, regClass, 1, 1>,
392               FlatScratchInst<opName, "SVS">;
393
394    let SubtargetPredicate = HasFlatScratchSTMode in
395    def _ST  : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0, 0>,
396               FlatScratchInst<opName, "ST">;
397  }
398}
399
400class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0,
401  bit EnableSVE = 0,
402  bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo<
403  opName,
404  (outs ),
405  !if(EnableSVE,
406    (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol),
407    !if(EnableSaddr,
408      (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol),
409      !if(EnableVaddr,
410        (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol),
411        (ins flat_offset:$offset, CPol:$cpol)))),
412  " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
413
414  let LGKM_CNT = 1;
415  let is_flat_scratch = 1;
416  let lds = 1;
417  let has_data = 0;
418  let has_vdst = 0;
419  let mayLoad = 1;
420  let mayStore = 1;
421  let has_saddr = 1;
422  let enabled_saddr = EnableSaddr;
423  let has_vaddr = EnableVaddr;
424  let has_sve = EnableSVE;
425  let sve = EnableVaddr;
426  let VALU = 1;
427  let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
428  let Uses = [M0, EXEC];
429  let SchedRW = [WriteVMEM, WriteLDS];
430}
431
432multiclass FLAT_Scratch_Load_LDS_Pseudo<string opName> {
433  def ""     : FLAT_Scratch_Load_LDS_Pseudo<opName>,
434               FlatScratchInst<opName, "SV">;
435  def _SADDR : FLAT_Scratch_Load_LDS_Pseudo<opName, 1>,
436               FlatScratchInst<opName, "SS">;
437  def _SVS   : FLAT_Scratch_Load_LDS_Pseudo<opName, 1, 1>,
438               FlatScratchInst<opName, "SVS">;
439  def _ST    : FLAT_Scratch_Load_LDS_Pseudo<opName, 0, 0, 0>,
440               FlatScratchInst<opName, "ST">;
441}
442
443class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
444                               string asm, list<dag> pattern = []> :
445  FLAT_Pseudo<opName, outs, ins, asm, pattern> {
446    let mayLoad = 1;
447    let mayStore = 1;
448    let has_glc  = 0;
449    let glcValue = 0;
450    let has_vdst = 0;
451    let has_sccb  = 1;
452    let sccbValue = 0;
453    let maybeAtomic = 1;
454    let IsAtomicNoRet = 1;
455}
456
457class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
458                            string asm, list<dag> pattern = []>
459  : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> {
460  let hasPostISelHook = 1;
461  let has_vdst = 1;
462  let glcValue = 1;
463  let sccbValue = 0;
464  let IsAtomicNoRet = 0;
465  let IsAtomicRet = 1;
466  let PseudoInstr = NAME # "_RTN";
467}
468
469multiclass FLAT_Atomic_Pseudo_NO_RTN<
470  string opName,
471  RegisterClass vdst_rc,
472  ValueType vt,
473  ValueType data_vt = vt,
474  RegisterClass data_rc = vdst_rc,
475  bit isFP = isFloatType<data_vt>.ret,
476  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
477  def "" : FLAT_AtomicNoRet_Pseudo <opName,
478    (outs),
479    (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol),
480    " $vaddr, $vdata$offset$cpol">,
481    GlobalSaddrTable<0, opName>,
482    AtomicNoRet <opName, 0> {
483    let PseudoInstr = NAME;
484    let FPAtomic = isFP;
485    let AddedComplexity = -1; // Prefer global atomics if available
486  }
487}
488
489multiclass FLAT_Atomic_Pseudo_RTN<
490  string opName,
491  RegisterClass vdst_rc,
492  ValueType vt,
493  ValueType data_vt = vt,
494  RegisterClass data_rc = vdst_rc,
495  bit isFP = isFloatType<data_vt>.ret,
496  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
497  def _RTN : FLAT_AtomicRet_Pseudo <opName,
498    (outs getLdStRegisterOperand<vdst_rc>.ret:$vdst),
499    (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
500    " $vdst, $vaddr, $vdata$offset$cpol">,
501    GlobalSaddrTable<0, opName#"_rtn">,
502    AtomicNoRet <opName, 1> {
503    let FPAtomic = isFP;
504    let AddedComplexity = -1; // Prefer global atomics if available
505  }
506}
507
508multiclass FLAT_Atomic_Pseudo<
509  string opName,
510  RegisterClass vdst_rc,
511  ValueType vt,
512  ValueType data_vt = vt,
513  RegisterClass data_rc = vdst_rc,
514  bit isFP = isFloatType<data_vt>.ret,
515  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
516  defm "" : FLAT_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc, isFP, data_op>;
517  defm "" : FLAT_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc, isFP, data_op>;
518}
519
520multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
521  string opName,
522  RegisterClass vdst_rc,
523  ValueType vt,
524  ValueType data_vt = vt,
525  RegisterClass data_rc = vdst_rc,
526  bit isFP = isFloatType<data_vt>.ret,
527  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
528
529  def "" : FLAT_AtomicNoRet_Pseudo <opName,
530    (outs),
531    (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol),
532    " $vaddr, $vdata, off$offset$cpol">,
533    GlobalSaddrTable<0, opName>,
534    AtomicNoRet <opName, 0> {
535    let has_saddr = 1;
536    let PseudoInstr = NAME;
537    let FPAtomic = isFP;
538  }
539
540  def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
541    (outs),
542    (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol),
543    " $vaddr, $vdata, $saddr$offset$cpol">,
544    GlobalSaddrTable<1, opName>,
545    AtomicNoRet <opName#"_saddr", 0> {
546    let has_saddr = 1;
547    let enabled_saddr = 1;
548    let PseudoInstr = NAME#"_SADDR";
549    let FPAtomic = isFP;
550  }
551}
552
553multiclass FLAT_Global_Atomic_Pseudo_RTN<
554  string opName,
555  RegisterClass vdst_rc,
556  ValueType vt,
557  ValueType data_vt = vt,
558  RegisterClass data_rc = vdst_rc,
559  bit isFP = isFloatType<data_vt>.ret,
560  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret,
561  RegisterOperand vdst_op = getLdStRegisterOperand<vdst_rc>.ret> {
562
563  def _RTN : FLAT_AtomicRet_Pseudo <opName,
564    (outs vdst_op:$vdst),
565      (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
566    " $vdst, $vaddr, $vdata, off$offset$cpol">,
567    GlobalSaddrTable<0, opName#"_rtn">,
568    AtomicNoRet <opName, 1> {
569    let has_saddr = 1;
570    let FPAtomic = isFP;
571  }
572
573  def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
574    (outs vdst_op:$vdst),
575      (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
576    " $vdst, $vaddr, $vdata, $saddr$offset$cpol">,
577    GlobalSaddrTable<1, opName#"_rtn">,
578    AtomicNoRet <opName#"_saddr", 1> {
579     let has_saddr = 1;
580     let enabled_saddr = 1;
581     let PseudoInstr = NAME#"_SADDR_RTN";
582     let FPAtomic = isFP;
583  }
584}
585
586multiclass FLAT_Global_Atomic_Pseudo<
587  string opName,
588  RegisterClass vdst_rc,
589  ValueType vt,
590  ValueType data_vt = vt,
591  RegisterClass data_rc = vdst_rc> {
592  let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
593    defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
594    defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
595  }
596}
597
598//===----------------------------------------------------------------------===//
599// Flat Instructions
600//===----------------------------------------------------------------------===//
601
602def FLAT_LOAD_UBYTE    : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>;
603def FLAT_LOAD_SBYTE    : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>;
604def FLAT_LOAD_USHORT   : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>;
605def FLAT_LOAD_SSHORT   : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>;
606def FLAT_LOAD_DWORD    : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>;
607def FLAT_LOAD_DWORDX2  : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
608def FLAT_LOAD_DWORDX4  : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
609def FLAT_LOAD_DWORDX3  : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
610
611def FLAT_STORE_BYTE    : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
612def FLAT_STORE_SHORT   : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
613def FLAT_STORE_DWORD   : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
614def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
615def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
616def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
617
618let SubtargetPredicate = HasD16LoadStore in {
619let TiedSourceNotRead = 1 in {
620def FLAT_LOAD_UBYTE_D16     : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>;
621def FLAT_LOAD_UBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>;
622def FLAT_LOAD_SBYTE_D16     : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>;
623def FLAT_LOAD_SBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>;
624def FLAT_LOAD_SHORT_D16     : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>;
625def FLAT_LOAD_SHORT_D16_HI  : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>;
626}
627
628def FLAT_STORE_BYTE_D16_HI  : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>;
629def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>;
630}
631
632defm FLAT_ATOMIC_CMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
633                                VGPR_32, i32, v2i32, VReg_64>;
634
635defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
636                                VReg_64, i64, v2i64, VReg_128>;
637
638defm FLAT_ATOMIC_SWAP       : FLAT_Atomic_Pseudo <"flat_atomic_swap",
639                                VGPR_32, i32>;
640
641defm FLAT_ATOMIC_SWAP_X2    : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
642                                VReg_64, i64>;
643
644defm FLAT_ATOMIC_ADD        : FLAT_Atomic_Pseudo <"flat_atomic_add",
645                                VGPR_32, i32>;
646
647defm FLAT_ATOMIC_SUB        : FLAT_Atomic_Pseudo <"flat_atomic_sub",
648                                VGPR_32, i32>;
649
650defm FLAT_ATOMIC_SMIN       : FLAT_Atomic_Pseudo <"flat_atomic_smin",
651                                VGPR_32, i32>;
652
653defm FLAT_ATOMIC_UMIN       : FLAT_Atomic_Pseudo <"flat_atomic_umin",
654                                VGPR_32, i32>;
655
656defm FLAT_ATOMIC_SMAX       : FLAT_Atomic_Pseudo <"flat_atomic_smax",
657                                VGPR_32, i32>;
658
659defm FLAT_ATOMIC_UMAX       : FLAT_Atomic_Pseudo <"flat_atomic_umax",
660                                VGPR_32, i32>;
661
662defm FLAT_ATOMIC_AND        : FLAT_Atomic_Pseudo <"flat_atomic_and",
663                                VGPR_32, i32>;
664
665defm FLAT_ATOMIC_OR         : FLAT_Atomic_Pseudo <"flat_atomic_or",
666                                VGPR_32, i32>;
667
668defm FLAT_ATOMIC_XOR        : FLAT_Atomic_Pseudo <"flat_atomic_xor",
669                                VGPR_32, i32>;
670
671defm FLAT_ATOMIC_INC        : FLAT_Atomic_Pseudo <"flat_atomic_inc",
672                                VGPR_32, i32>;
673
674defm FLAT_ATOMIC_DEC        : FLAT_Atomic_Pseudo <"flat_atomic_dec",
675                                VGPR_32, i32>;
676
677defm FLAT_ATOMIC_ADD_X2     : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
678                                VReg_64, i64>;
679
680defm FLAT_ATOMIC_SUB_X2     : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
681                                VReg_64, i64>;
682
683defm FLAT_ATOMIC_SMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
684                                VReg_64, i64>;
685
686defm FLAT_ATOMIC_UMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
687                                VReg_64, i64>;
688
689defm FLAT_ATOMIC_SMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
690                                VReg_64, i64>;
691
692defm FLAT_ATOMIC_UMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
693                                VReg_64, i64>;
694
695defm FLAT_ATOMIC_AND_X2     : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
696                                VReg_64, i64>;
697
698defm FLAT_ATOMIC_OR_X2      : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
699                                VReg_64, i64>;
700
701defm FLAT_ATOMIC_XOR_X2     : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
702                                VReg_64, i64>;
703
704defm FLAT_ATOMIC_INC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
705                                VReg_64, i64>;
706
707defm FLAT_ATOMIC_DEC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
708                                VReg_64, i64>;
709
710// GFX7-, GFX10-only flat instructions.
711let SubtargetPredicate = isGFX7GFX10 in {
712
713defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
714                                VReg_64, f64, v2f64, VReg_128>;
715
716defm FLAT_ATOMIC_FMIN_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
717                                VReg_64, f64>;
718
719defm FLAT_ATOMIC_FMAX_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
720                                VReg_64, f64>;
721
722} // End SubtargetPredicate = isGFX7GFX10
723
724let SubtargetPredicate = isGFX90APlus in {
725  defm FLAT_ATOMIC_ADD_F64   : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>;
726  defm FLAT_ATOMIC_MIN_F64   : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64>;
727  defm FLAT_ATOMIC_MAX_F64   : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64>;
728  defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>;
729  defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>;
730  defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
731} // End SubtargetPredicate = isGFX90APlus
732
733let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
734  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16",  VGPR_32, v2f16>;
735  let FPAtomic = 1 in
736    defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2i16>;
737} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts
738
739let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst, FPAtomic = 1 in
740  defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2i16>;
741
742// GFX7-, GFX10-, GFX11-only flat instructions.
743let SubtargetPredicate = isGFX7GFX10GFX11 in {
744
745defm FLAT_ATOMIC_FCMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
746                                VGPR_32, f32, v2f32, VReg_64>;
747
748defm FLAT_ATOMIC_FMIN        : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
749                                VGPR_32, f32>;
750
751defm FLAT_ATOMIC_FMAX        : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
752                                VGPR_32, f32>;
753
754} // End SubtargetPredicate = isGFX7GFX10GFX11
755
756// GFX940-, GFX11-only flat instructions.
757let SubtargetPredicate = HasFlatAtomicFaddF32Inst in {
758  defm FLAT_ATOMIC_ADD_F32       : FLAT_Atomic_Pseudo<"flat_atomic_add_f32",     VGPR_32, f32>;
759} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst
760
761defm GLOBAL_LOAD_UBYTE    : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
762defm GLOBAL_LOAD_SBYTE    : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
763defm GLOBAL_LOAD_USHORT   : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
764defm GLOBAL_LOAD_SSHORT   : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>;
765defm GLOBAL_LOAD_DWORD    : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>;
766defm GLOBAL_LOAD_DWORDX2  : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>;
767defm GLOBAL_LOAD_DWORDX3  : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>;
768defm GLOBAL_LOAD_DWORDX4  : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
769
770let TiedSourceNotRead = 1 in {
771defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>;
772defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>;
773defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>;
774defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>;
775defm GLOBAL_LOAD_SHORT_D16    : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
776defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
777}
778
779let OtherPredicates = [HasGFX10_BEncoding] in
780defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
781
782defm GLOBAL_STORE_BYTE    : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
783defm GLOBAL_STORE_SHORT   : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
784defm GLOBAL_STORE_DWORD   : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
785defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
786defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
787defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
788let OtherPredicates = [HasGFX10_BEncoding] in
789defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
790
791defm GLOBAL_STORE_BYTE_D16_HI  : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
792defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
793
794let is_flat_global = 1 in {
795defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
796                               VGPR_32, i32, v2i32, VReg_64>;
797
798defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
799                                  VReg_64, i64, v2i64, VReg_128>;
800
801defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
802                             VGPR_32, i32>;
803
804defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
805                                VReg_64, i64>;
806
807defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
808                           VGPR_32, i32>;
809
810defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
811                           VGPR_32, i32>;
812
813defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
814                            VGPR_32, i32>;
815
816defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
817                            VGPR_32, i32>;
818
819defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
820                            VGPR_32, i32>;
821
822defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
823                            VGPR_32, i32>;
824
825defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
826                           VGPR_32, i32>;
827
828defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
829                          VGPR_32, i32>;
830
831defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
832                           VGPR_32, i32>;
833
834defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
835                           VGPR_32, i32>;
836
837defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
838                           VGPR_32, i32>;
839
840defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
841                              VReg_64, i64>;
842
843defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
844                              VReg_64, i64>;
845
846defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
847                               VReg_64, i64>;
848
849defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
850                               VReg_64, i64>;
851
852defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
853                               VReg_64, i64>;
854
855defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
856                               VReg_64, i64>;
857
858defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
859                              VReg_64, i64>;
860
861defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
862                             VReg_64, i64>;
863
864defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
865                              VReg_64, i64>;
866
867defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
868                              VReg_64, i64>;
869
870defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
871                              VReg_64, i64>;
872
873let SubtargetPredicate = HasGFX10_BEncoding in
874defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
875                              VGPR_32, i32>;
876
877defm GLOBAL_LOAD_LDS_UBYTE  : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">;
878defm GLOBAL_LOAD_LDS_SBYTE  : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">;
879defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">;
880defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">;
881defm GLOBAL_LOAD_LDS_DWORD  : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">;
882
883} // End is_flat_global = 1
884
885
886
887let SubtargetPredicate = HasFlatScratchInsts in {
888defm SCRATCH_LOAD_UBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
889defm SCRATCH_LOAD_SBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
890defm SCRATCH_LOAD_USHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
891defm SCRATCH_LOAD_SSHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>;
892defm SCRATCH_LOAD_DWORD    : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>;
893defm SCRATCH_LOAD_DWORDX2  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>;
894defm SCRATCH_LOAD_DWORDX3  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>;
895defm SCRATCH_LOAD_DWORDX4  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
896
897let TiedSourceNotRead = 1 in {
898defm SCRATCH_LOAD_UBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>;
899defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>;
900defm SCRATCH_LOAD_SBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>;
901defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>;
902defm SCRATCH_LOAD_SHORT_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>;
903defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>;
904}
905
906defm SCRATCH_STORE_BYTE    : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
907defm SCRATCH_STORE_SHORT   : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
908defm SCRATCH_STORE_DWORD   : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
909defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
910defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
911defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
912
913defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
914defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
915
916defm SCRATCH_LOAD_LDS_UBYTE  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">;
917defm SCRATCH_LOAD_LDS_SBYTE  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">;
918defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">;
919defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">;
920defm SCRATCH_LOAD_LDS_DWORD  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">;
921
922} // End SubtargetPredicate = HasFlatScratchInsts
923
924let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
925  defm GLOBAL_ATOMIC_FCMPSWAP :
926    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>;
927  defm GLOBAL_ATOMIC_FMIN :
928    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
929  defm GLOBAL_ATOMIC_FMAX :
930    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
931  defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
932    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>;
933  defm GLOBAL_ATOMIC_FMIN_X2 :
934    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
935  defm GLOBAL_ATOMIC_FMAX_X2 :
936    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
937} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
938
939let is_flat_global = 1 in {
940let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
941  defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
942    "global_atomic_add_f32", VGPR_32, f32
943  >;
944let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in
945  defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
946    "global_atomic_pk_add_f16", VGPR_32, v2f16
947  >;
948let OtherPredicates = [HasAtomicFaddRtnInsts] in
949  defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN <
950    "global_atomic_add_f32", VGPR_32, f32
951  >;
952let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
953  defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN <
954    "global_atomic_pk_add_f16", VGPR_32, v2f16
955  >;
956} // End is_flat_global = 1
957
958//===----------------------------------------------------------------------===//
959// Flat Patterns
960//===----------------------------------------------------------------------===//
961
962// Patterns for global loads with no offset.
963class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
964  (vt (node (FlatOffset i64:$vaddr, i32:$offset))),
965  (inst $vaddr, $offset)
966>;
967
968class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
969  (node (FlatOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in),
970  (inst $vaddr, $offset, 0, $in)
971>;
972
973class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
974  (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in),
975  (inst $vaddr, $offset, 0, $in)
976>;
977
978class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
979  (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)),
980  (inst $saddr, $voffset, $offset, 0, $in)
981>;
982
983class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
984  (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))),
985  (inst $vaddr, $offset)
986>;
987
988class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
989  (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))),
990  (inst $saddr, $voffset, $offset, 0)
991>;
992
993class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
994                           ValueType vt> : GCNPat <
995  (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset)),
996  (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
997>;
998
999class GlobalAtomicStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1000                                 ValueType vt> : GCNPat <
1001  (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$data),
1002  (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1003>;
1004
1005class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1006                            ValueType vt, ValueType data_vt = vt> : GCNPat <
1007  (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), data_vt:$data)),
1008  (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset)
1009>;
1010
1011class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1012                                 ValueType vt> : GCNPat <
1013  (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$data),
1014  (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1015>;
1016
1017class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1018  (node vt:$data, (FlatOffset i64:$vaddr, i32:$offset)),
1019  (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
1020>;
1021
1022class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1023  (node vt:$data, (GlobalOffset i64:$vaddr, i32:$offset)),
1024  (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
1025>;
1026
1027class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1028  // atomic store follows atomic binop convention so the address comes
1029  // first.
1030  (node (FlatOffset i64:$vaddr, i32:$offset), vt:$data),
1031  (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
1032>;
1033
1034class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node,
1035                                ValueType vt, ValueType data_vt = vt> : GCNPat <
1036  // atomic store follows atomic binop convention so the address comes
1037  // first.
1038  (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data),
1039  (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
1040>;
1041
1042multiclass FlatAtomicPat <string inst, string node, ValueType vt,
1043                          ValueType data_vt = vt> {
1044  defvar rtnNode = !cast<PatFrags>(node#"_"#vt.Size);
1045  defvar noRtnNode = !cast<PatFrags>(node#"_noret_"#vt.Size);
1046
1047  def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1048    (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1049
1050  let AddedComplexity = 1 in
1051  def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1052    (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1053}
1054
1055class FlatSignedAtomicPatBase <FLAT_Pseudo inst, SDPatternOperator node,
1056                               ValueType vt, ValueType data_vt = vt> : GCNPat <
1057  (vt (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1058  (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
1059>;
1060
1061multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt,
1062                                ValueType data_vt = vt, int complexity = 0,
1063                                bit isIntr = 0> {
1064  defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt.Size));
1065  defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size));
1066
1067  let AddedComplexity = complexity in
1068  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>;
1069
1070  let AddedComplexity = !add(complexity, 1) in
1071  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>;
1072}
1073
1074multiclass FlatSignedAtomicIntrPat <string inst, string node, ValueType vt,
1075                                    ValueType data_vt = vt> {
1076  defm : FlatSignedAtomicPat<inst, node, vt, data_vt, /* complexity */ 0, /* isIntr */ 1>;
1077}
1078
1079multiclass FlatSignedAtomicPatWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1080                                            ValueType vt, ValueType data_vt = vt> {
1081  defvar noRtnNode = !cast<PatFrags>(intr # "_noret_" # addrSpaceSuffix);
1082  defvar rtnNode = !cast<PatFrags>(intr # "_" # addrSpaceSuffix);
1083
1084  let AddedComplexity = 1 in
1085  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>;
1086  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>;
1087}
1088
1089class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1090  (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))),
1091  (inst $vaddr, $offset)
1092>;
1093
1094class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1095  (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset), vt:$in),
1096  (inst $vaddr, $offset, 0, $in)
1097>;
1098
1099class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1100  (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset)),
1101  (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset)
1102>;
1103
1104class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1105  (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))),
1106  (inst $saddr, $offset)
1107>;
1108
1109class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1110  (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset), vt:$in)),
1111  (inst $saddr, $offset, 0, $in)
1112>;
1113
1114class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1115                            ValueType vt> : GCNPat <
1116  (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset)),
1117  (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1118>;
1119
1120class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1121  (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset))),
1122  (inst $vaddr, $saddr, $offset, 0)
1123>;
1124
1125class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1126                             ValueType vt> : GCNPat <
1127  (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset)),
1128  (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset)
1129>;
1130
1131class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1132  (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset), vt:$in)),
1133  (inst $vaddr, $saddr, $offset, 0, $in)
1134>;
1135
1136let OtherPredicates = [HasFlatAddressSpace] in {
1137
1138def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
1139def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>;
1140def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>;
1141def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>;
1142def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
1143def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
1144def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
1145def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1146def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1147def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
1148def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
1149def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
1150def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
1151def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
1152def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
1153
1154def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
1155def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
1156
1157def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
1158def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
1159
1160foreach vt = Reg32Types.types in {
1161def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
1162def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
1163}
1164
1165foreach vt = VReg_64.RegTypes in {
1166def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>;
1167def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
1168}
1169
1170def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;
1171
1172foreach vt = VReg_128.RegTypes in {
1173def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
1174def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
1175}
1176
1177def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>;
1178def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>;
1179def : FlatStoreAtomicPat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>;
1180def : FlatStoreAtomicPat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
1181def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>;
1182def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
1183
1184foreach as = [ "flat", "global" ] in {
1185defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
1186defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>;
1187defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>;
1188defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>;
1189defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>;
1190defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>;
1191defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>;
1192defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>;
1193defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>;
1194defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>;
1195defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>;
1196defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>;
1197defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>;
1198
1199defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>;
1200defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>;
1201defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>;
1202defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>;
1203defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>;
1204defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>;
1205defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>;
1206defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>;
1207defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>;
1208defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>;
1209defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>;
1210defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>;
1211defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>;
1212} // end foreach as
1213
1214def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1215def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
1216
1217let OtherPredicates = [HasD16LoadStore] in {
1218def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
1219def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
1220}
1221
1222let OtherPredicates = [D16PreservesUnusedBits] in {
1223def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
1224def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
1225def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
1226def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
1227def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
1228def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
1229
1230def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
1231def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
1232def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
1233def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
1234def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
1235def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
1236}
1237
1238} // End OtherPredicates = [HasFlatAddressSpace]
1239
1240
1241multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1242  def : FlatLoadSignedPat <inst, node, vt> {
1243    let AddedComplexity = 10;
1244  }
1245
1246  def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1247    let AddedComplexity = 11;
1248  }
1249}
1250
1251multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1252  def : FlatSignedLoadPat_D16 <inst, node, vt> {
1253    let AddedComplexity = 10;
1254  }
1255
1256  def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1257    let AddedComplexity = 11;
1258  }
1259}
1260
1261multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
1262                               ValueType vt> {
1263  def : FlatStoreSignedPat <inst, node, vt> {
1264    let AddedComplexity = 10;
1265  }
1266
1267  def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1268    let AddedComplexity = 11;
1269  }
1270}
1271
1272// Deal with swapped operands for atomic_store vs. regular store
1273multiclass GlobalFLATAtomicStorePats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1274  def : FlatStoreSignedAtomicPat <inst, node, vt> {
1275    let AddedComplexity = 10;
1276  }
1277
1278  def : GlobalAtomicStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1279    let AddedComplexity = 11;
1280  }
1281}
1282
1283multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt,
1284                                         ValueType data_vt = vt> {
1285  let AddedComplexity = 11 in
1286  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<PatFrags>(node), vt, data_vt>;
1287
1288  let AddedComplexity = 13 in
1289  def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<PatFrags>(node), vt, data_vt>;
1290}
1291
1292multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt,
1293                                       ValueType data_vt = vt, bit isPatFrags = 0> {
1294  defvar rtnNode = !if(isPatFrags, !cast<PatFrags>(node), !cast<SDPatternOperator>(node));
1295
1296  let AddedComplexity = 10 in
1297  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>;
1298
1299  let AddedComplexity = 12 in
1300  def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>;
1301}
1302
1303multiclass GlobalFLATAtomicPatsNoRtn<string inst, string node, ValueType vt,
1304                                     ValueType data_vt = vt, bit isIntr = 0> :
1305  GlobalFLATAtomicPatsNoRtnBase<inst, node # "_noret" # !if(isIntr, "", "_" # vt.Size), vt, data_vt>;
1306
1307multiclass GlobalFLATAtomicPatsRtn<string inst, string node, ValueType vt,
1308                                   ValueType data_vt = vt, bit isIntr = 0> :
1309  GlobalFLATAtomicPatsRtnBase<inst, node # !if(isIntr, "", "_" # vt.Size), vt, data_vt>;
1310
1311multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt,
1312                                ValueType data_vt = vt, bit isIntr = 0> :
1313  GlobalFLATAtomicPatsNoRtn<inst, node, vt, data_vt, isIntr>,
1314  GlobalFLATAtomicPatsRtn<inst, node, vt, data_vt, isIntr>;
1315
1316multiclass GlobalFLATAtomicPatsNoRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1317                                                  ValueType vt, ValueType data_vt = vt> :
1318  GlobalFLATAtomicPatsNoRtnBase<inst, intr # "_noret_" # addrSpaceSuffix, vt, data_vt>;
1319
1320multiclass GlobalFLATAtomicPatsRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1321                                                ValueType vt, ValueType data_vt = vt> :
1322  GlobalFLATAtomicPatsRtnBase<inst, intr # "_" # addrSpaceSuffix, vt, data_vt, /*isPatFrags*/ 1>;
1323
1324multiclass GlobalFLATAtomicPatsWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1325                                             ValueType vt, ValueType data_vt = vt> :
1326  GlobalFLATAtomicPatsNoRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>,
1327  GlobalFLATAtomicPatsRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>;
1328
1329multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt,
1330                                    ValueType data_vt = vt> {
1331  defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>;
1332}
1333
1334multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1335  def : ScratchLoadSignedPat <inst, node, vt> {
1336    let AddedComplexity = 25;
1337  }
1338
1339  def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1340    let AddedComplexity = 26;
1341  }
1342
1343  def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1344    let SubtargetPredicate = HasFlatScratchSVSMode;
1345    let AddedComplexity = 27;
1346  }
1347}
1348
1349multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
1350                               ValueType vt> {
1351  def : ScratchStoreSignedPat <inst, node, vt> {
1352    let AddedComplexity = 25;
1353  }
1354
1355  def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1356    let AddedComplexity = 26;
1357  }
1358
1359  def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1360    let SubtargetPredicate = HasFlatScratchSVSMode;
1361    let AddedComplexity = 27;
1362  }
1363}
1364
1365multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1366  def : ScratchLoadSignedPat_D16 <inst, node, vt> {
1367    let AddedComplexity = 25;
1368  }
1369
1370  def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1371    let AddedComplexity = 26;
1372  }
1373
1374  def : ScratchLoadSVaddrPat_D16 <!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1375    let SubtargetPredicate = HasFlatScratchSVSMode;
1376    let AddedComplexity = 27;
1377  }
1378}
1379
1380let OtherPredicates = [HasFlatGlobalInsts] in {
1381
1382defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>;
1383defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>;
1384defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>;
1385defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>;
1386defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
1387defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
1388defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
1389defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
1390defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
1391defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
1392defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
1393defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
1394defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
1395defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>;
1396
1397foreach vt = Reg32Types.types in {
1398defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>;
1399defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>;
1400}
1401
1402foreach vt = VReg_64.RegTypes in {
1403defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>;
1404defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>;
1405}
1406
1407defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
1408
1409foreach vt = VReg_128.RegTypes in {
1410defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>;
1411defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>;
1412}
1413
1414// There is no distinction for atomic load lowering during selection;
1415// the memory legalizer will set the cache bits and insert the
1416// appropriate waits.
1417defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
1418defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
1419
1420defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
1421defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
1422defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
1423defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>;
1424defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>;
1425
1426let OtherPredicates = [HasD16LoadStore] in {
1427defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
1428defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
1429}
1430
1431let OtherPredicates = [D16PreservesUnusedBits] in {
1432defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
1433defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
1434defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
1435defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>;
1436defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>;
1437defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>;
1438
1439defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>;
1440defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>;
1441defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>;
1442defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>;
1443defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>;
1444defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
1445}
1446
1447defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32>;
1448defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>;
1449defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>;
1450defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>;
1451defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>;
1452defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>;
1453
1454defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>;
1455defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>;
1456defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_load_uinc_wrap_global", i32>;
1457defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_load_udec_wrap_global", i32>;
1458defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>;
1459defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>;
1460defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>;
1461defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", "atomic_load_min_global", i32>;
1462defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", "atomic_load_umin_global", i32>;
1463defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", "atomic_load_or_global", i32>;
1464defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", "atomic_swap_global", i32>;
1465defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_global", i32, v2i32>;
1466defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>;
1467defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;
1468
1469defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>;
1470defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>;
1471defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>;
1472defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_load_udec_wrap_global", i64>;
1473defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>;
1474defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>;
1475defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>;
1476defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", "atomic_load_min_global", i64>;
1477defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", "atomic_load_umin_global", i64>;
1478defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", "atomic_load_or_global", i64>;
1479defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64>;
1480defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>;
1481defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>;
1482
1483let OtherPredicates = [isGFX10Plus] in {
1484defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
1485defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
1486defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
1487defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
1488defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
1489defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
1490defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>;
1491defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>;
1492}
1493
1494let OtherPredicates = [isGFX10Only] in {
1495defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", "atomic_load_fmin_global", f64>;
1496defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", "atomic_load_fmax_global", f64>;
1497defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN_X2", "int_amdgcn_global_atomic_fmin", f64>;
1498defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX_X2", "int_amdgcn_global_atomic_fmax", f64>;
1499defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN_X2", "atomic_load_fmin_flat", f64>;
1500defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX_X2", "atomic_load_fmax_flat", f64>;
1501defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN_X2", "int_amdgcn_flat_atomic_fmin", f64>;
1502defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX_X2", "int_amdgcn_flat_atomic_fmax", f64>;
1503}
1504
1505let OtherPredicates = [HasAtomicFaddNoRtnInsts] in {
1506defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
1507defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>;
1508defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>;
1509}
1510
1511let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
1512defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>;
1513defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>;
1514}
1515
1516let OtherPredicates = [HasAtomicFaddRtnInsts] in {
1517defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
1518defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>;
1519defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>;
1520}
1521
1522let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in {
1523defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>;
1524defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>;
1525}
1526
1527let OtherPredicates = [isGFX90APlus] in {
1528defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
1529defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
1530defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
1531defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>;
1532defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;
1533defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
1534defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
1535defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
1536defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
1537defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
1538defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f64>;
1539defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
1540defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
1541}
1542
1543let OtherPredicates = [HasFlatAtomicFaddF32Inst] in {
1544defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
1545defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f32>;
1546}
1547
1548let OtherPredicates = [HasAtomicFlatPkAdd16Insts] in {
1549defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", v2f16>;
1550defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>;
1551}
1552
1553let OtherPredicates = [HasAtomicGlobalPkAddBF16Inst] in
1554defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>;
1555
1556} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
1557
1558let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in {
1559
1560defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>;
1561defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>;
1562defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>;
1563defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>;
1564defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>;
1565defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>;
1566defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>;
1567defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>;
1568defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>;
1569defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>;
1570
1571foreach vt = Reg32Types.types in {
1572defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>;
1573defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>;
1574}
1575
1576foreach vt = VReg_64.RegTypes in {
1577defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>;
1578defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>;
1579}
1580
1581defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>;
1582
1583foreach vt = VReg_128.RegTypes in {
1584defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>;
1585defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>;
1586}
1587
1588defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>;
1589defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>;
1590defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>;
1591defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>;
1592defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>;
1593
1594let OtherPredicates = [HasD16LoadStore, HasFlatScratchInsts, EnableFlatScratch] in {
1595defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>;
1596defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>;
1597}
1598
1599let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in {
1600defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>;
1601defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>;
1602defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>;
1603defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>;
1604defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>;
1605defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>;
1606
1607defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>;
1608defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>;
1609defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>;
1610defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>;
1611defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>;
1612defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>;
1613}
1614
1615} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch]
1616
1617//===----------------------------------------------------------------------===//
1618// Target
1619//===----------------------------------------------------------------------===//
1620
1621//===----------------------------------------------------------------------===//
1622// CI
1623//===----------------------------------------------------------------------===//
1624
1625class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
1626  FLAT_Real <op, ps>,
1627  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
1628  let AssemblerPredicate = isGFX7Only;
1629  let DecoderNamespace="GFX7";
1630}
1631
1632def FLAT_LOAD_UBYTE_ci         : FLAT_Real_ci <0x8,  FLAT_LOAD_UBYTE>;
1633def FLAT_LOAD_SBYTE_ci         : FLAT_Real_ci <0x9,  FLAT_LOAD_SBYTE>;
1634def FLAT_LOAD_USHORT_ci        : FLAT_Real_ci <0xa,  FLAT_LOAD_USHORT>;
1635def FLAT_LOAD_SSHORT_ci        : FLAT_Real_ci <0xb,  FLAT_LOAD_SSHORT>;
1636def FLAT_LOAD_DWORD_ci         : FLAT_Real_ci <0xc,  FLAT_LOAD_DWORD>;
1637def FLAT_LOAD_DWORDX2_ci       : FLAT_Real_ci <0xd,  FLAT_LOAD_DWORDX2>;
1638def FLAT_LOAD_DWORDX4_ci       : FLAT_Real_ci <0xe,  FLAT_LOAD_DWORDX4>;
1639def FLAT_LOAD_DWORDX3_ci       : FLAT_Real_ci <0xf,  FLAT_LOAD_DWORDX3>;
1640
1641def FLAT_STORE_BYTE_ci         : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>;
1642def FLAT_STORE_SHORT_ci        : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>;
1643def FLAT_STORE_DWORD_ci        : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>;
1644def FLAT_STORE_DWORDX2_ci      : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
1645def FLAT_STORE_DWORDX4_ci      : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
1646def FLAT_STORE_DWORDX3_ci      : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
1647
1648multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
1649  def _ci     : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
1650  def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
1651}
1652
1653defm FLAT_ATOMIC_SWAP          : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
1654defm FLAT_ATOMIC_CMPSWAP       : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
1655defm FLAT_ATOMIC_ADD           : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
1656defm FLAT_ATOMIC_SUB           : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
1657defm FLAT_ATOMIC_SMIN          : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
1658defm FLAT_ATOMIC_UMIN          : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
1659defm FLAT_ATOMIC_SMAX          : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
1660defm FLAT_ATOMIC_UMAX          : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
1661defm FLAT_ATOMIC_AND           : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
1662defm FLAT_ATOMIC_OR            : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
1663defm FLAT_ATOMIC_XOR           : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
1664defm FLAT_ATOMIC_INC           : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
1665defm FLAT_ATOMIC_DEC           : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
1666defm FLAT_ATOMIC_SWAP_X2       : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
1667defm FLAT_ATOMIC_CMPSWAP_X2    : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
1668defm FLAT_ATOMIC_ADD_X2        : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
1669defm FLAT_ATOMIC_SUB_X2        : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
1670defm FLAT_ATOMIC_SMIN_X2       : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
1671defm FLAT_ATOMIC_UMIN_X2       : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
1672defm FLAT_ATOMIC_SMAX_X2       : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
1673defm FLAT_ATOMIC_UMAX_X2       : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
1674defm FLAT_ATOMIC_AND_X2        : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
1675defm FLAT_ATOMIC_OR_X2         : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
1676defm FLAT_ATOMIC_XOR_X2        : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
1677defm FLAT_ATOMIC_INC_X2        : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
1678defm FLAT_ATOMIC_DEC_X2        : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
1679
1680// CI Only flat instructions
1681defm FLAT_ATOMIC_FCMPSWAP      : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
1682defm FLAT_ATOMIC_FMIN          : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
1683defm FLAT_ATOMIC_FMAX          : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
1684defm FLAT_ATOMIC_FCMPSWAP_X2   : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
1685defm FLAT_ATOMIC_FMIN_X2       : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
1686defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
1687
1688
1689//===----------------------------------------------------------------------===//
1690// VI
1691//===----------------------------------------------------------------------===//
1692
1693class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
1694  FLAT_Real <op, ps>,
1695  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
1696  let AssemblerPredicate = isGFX8GFX9;
1697  let DecoderNamespace = "GFX8";
1698
1699  let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
1700  let AsmString = ps.Mnemonic #
1701                  !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
1702}
1703
1704multiclass FLAT_Real_AllAddr_vi<bits<7> op,
1705  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
1706  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
1707  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
1708}
1709
1710class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
1711  FLAT_Real <op, ps>,
1712  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
1713  let AssemblerPredicate = isGFX940Plus;
1714  let DecoderNamespace = "GFX9";
1715  let Inst{13} = ps.sve;
1716  let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
1717}
1718
1719multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
1720  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
1721    let AssemblerPredicate = isGFX8GFX9NotGFX940;
1722    let OtherPredicates = [isGFX8GFX9NotGFX940];
1723  }
1724  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
1725    let DecoderNamespace = "GFX9";
1726  }
1727  let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in {
1728    def _VE_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
1729    def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
1730    def _ST_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
1731  }
1732}
1733
1734multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
1735  string pre_gfx940_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr),
1736  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
1737
1738  let OtherPredicates = [isGFX8GFX9NotGFX940] in {
1739    def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
1740      let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
1741    }
1742    def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
1743      let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
1744    }
1745  }
1746
1747  let SubtargetPredicate = isGFX940Plus in {
1748    def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
1749    def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1750  }
1751}
1752
1753multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
1754  defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx940_op>;
1755  let SubtargetPredicate = isGFX940Plus in {
1756    def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
1757    def _ST_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
1758  }
1759}
1760
1761def FLAT_LOAD_UBYTE_vi         : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
1762def FLAT_LOAD_SBYTE_vi         : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
1763def FLAT_LOAD_USHORT_vi        : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
1764def FLAT_LOAD_SSHORT_vi        : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
1765def FLAT_LOAD_DWORD_vi         : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
1766def FLAT_LOAD_DWORDX2_vi       : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
1767def FLAT_LOAD_DWORDX4_vi       : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
1768def FLAT_LOAD_DWORDX3_vi       : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
1769
1770def FLAT_STORE_BYTE_vi         : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
1771def FLAT_STORE_BYTE_D16_HI_vi  : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
1772def FLAT_STORE_SHORT_vi        : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
1773def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
1774def FLAT_STORE_DWORD_vi        : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
1775def FLAT_STORE_DWORDX2_vi      : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
1776def FLAT_STORE_DWORDX4_vi      : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
1777def FLAT_STORE_DWORDX3_vi      : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
1778
1779def FLAT_LOAD_UBYTE_D16_vi    : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
1780def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
1781def FLAT_LOAD_SBYTE_D16_vi    : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
1782def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
1783def FLAT_LOAD_SHORT_D16_vi    : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
1784def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
1785
1786multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps,
1787  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
1788  def _vi     : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
1789  def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
1790}
1791
1792multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
1793  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
1794  FLAT_Real_AllAddr_vi<op, has_sccb> {
1795  def _RTN_vi  : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
1796  def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
1797}
1798
1799
1800defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
1801defm FLAT_ATOMIC_CMPSWAP    : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
1802defm FLAT_ATOMIC_ADD        : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
1803defm FLAT_ATOMIC_SUB        : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
1804defm FLAT_ATOMIC_SMIN       : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
1805defm FLAT_ATOMIC_UMIN       : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
1806defm FLAT_ATOMIC_SMAX       : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
1807defm FLAT_ATOMIC_UMAX       : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
1808defm FLAT_ATOMIC_AND        : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
1809defm FLAT_ATOMIC_OR         : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
1810defm FLAT_ATOMIC_XOR        : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
1811defm FLAT_ATOMIC_INC        : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
1812defm FLAT_ATOMIC_DEC        : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
1813defm FLAT_ATOMIC_SWAP_X2    : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
1814defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
1815defm FLAT_ATOMIC_ADD_X2     : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
1816defm FLAT_ATOMIC_SUB_X2     : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
1817defm FLAT_ATOMIC_SMIN_X2    : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
1818defm FLAT_ATOMIC_UMIN_X2    : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
1819defm FLAT_ATOMIC_SMAX_X2    : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
1820defm FLAT_ATOMIC_UMAX_X2    : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
1821defm FLAT_ATOMIC_AND_X2     : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
1822defm FLAT_ATOMIC_OR_X2      : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
1823defm FLAT_ATOMIC_XOR_X2     : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
1824defm FLAT_ATOMIC_INC_X2     : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
1825defm FLAT_ATOMIC_DEC_X2     : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
1826
1827defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
1828defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
1829defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
1830defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
1831defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
1832defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
1833defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
1834defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
1835
1836defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Real_AllAddr_vi <0x20>;
1837defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>;
1838defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Real_AllAddr_vi <0x22>;
1839defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>;
1840defm GLOBAL_LOAD_SHORT_D16    : FLAT_Real_AllAddr_vi <0x24>;
1841defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>;
1842
1843defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
1844defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>;
1845defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
1846defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1847defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
1848defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
1849defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
1850defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
1851
1852defm GLOBAL_LOAD_LDS_UBYTE  : FLAT_Real_AllAddr_LDS <0x026, 0x10>;
1853defm GLOBAL_LOAD_LDS_SBYTE  : FLAT_Real_AllAddr_LDS <0x027, 0x11>;
1854defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>;
1855defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>;
1856defm GLOBAL_LOAD_LDS_DWORD  : FLAT_Real_AllAddr_LDS <0x02a, 0x14>;
1857
1858defm GLOBAL_ATOMIC_SWAP       : FLAT_Global_Real_Atomics_vi <0x40>;
1859defm GLOBAL_ATOMIC_CMPSWAP    : FLAT_Global_Real_Atomics_vi <0x41>;
1860defm GLOBAL_ATOMIC_ADD        : FLAT_Global_Real_Atomics_vi <0x42>;
1861defm GLOBAL_ATOMIC_SUB        : FLAT_Global_Real_Atomics_vi <0x43>;
1862defm GLOBAL_ATOMIC_SMIN       : FLAT_Global_Real_Atomics_vi <0x44>;
1863defm GLOBAL_ATOMIC_UMIN       : FLAT_Global_Real_Atomics_vi <0x45>;
1864defm GLOBAL_ATOMIC_SMAX       : FLAT_Global_Real_Atomics_vi <0x46>;
1865defm GLOBAL_ATOMIC_UMAX       : FLAT_Global_Real_Atomics_vi <0x47>;
1866defm GLOBAL_ATOMIC_AND        : FLAT_Global_Real_Atomics_vi <0x48>;
1867defm GLOBAL_ATOMIC_OR         : FLAT_Global_Real_Atomics_vi <0x49>;
1868defm GLOBAL_ATOMIC_XOR        : FLAT_Global_Real_Atomics_vi <0x4a>;
1869defm GLOBAL_ATOMIC_INC        : FLAT_Global_Real_Atomics_vi <0x4b>;
1870defm GLOBAL_ATOMIC_DEC        : FLAT_Global_Real_Atomics_vi <0x4c>;
1871defm GLOBAL_ATOMIC_SWAP_X2    : FLAT_Global_Real_Atomics_vi <0x60>;
1872defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>;
1873defm GLOBAL_ATOMIC_ADD_X2     : FLAT_Global_Real_Atomics_vi <0x62>;
1874defm GLOBAL_ATOMIC_SUB_X2     : FLAT_Global_Real_Atomics_vi <0x63>;
1875defm GLOBAL_ATOMIC_SMIN_X2    : FLAT_Global_Real_Atomics_vi <0x64>;
1876defm GLOBAL_ATOMIC_UMIN_X2    : FLAT_Global_Real_Atomics_vi <0x65>;
1877defm GLOBAL_ATOMIC_SMAX_X2    : FLAT_Global_Real_Atomics_vi <0x66>;
1878defm GLOBAL_ATOMIC_UMAX_X2    : FLAT_Global_Real_Atomics_vi <0x67>;
1879defm GLOBAL_ATOMIC_AND_X2     : FLAT_Global_Real_Atomics_vi <0x68>;
1880defm GLOBAL_ATOMIC_OR_X2      : FLAT_Global_Real_Atomics_vi <0x69>;
1881defm GLOBAL_ATOMIC_XOR_X2     : FLAT_Global_Real_Atomics_vi <0x6a>;
1882defm GLOBAL_ATOMIC_INC_X2     : FLAT_Global_Real_Atomics_vi <0x6b>;
1883defm GLOBAL_ATOMIC_DEC_X2     : FLAT_Global_Real_Atomics_vi <0x6c>;
1884
1885defm SCRATCH_LOAD_LDS_UBYTE  : FLAT_Real_AllAddr_SVE_LDS <0x026, 0x10>;
1886defm SCRATCH_LOAD_LDS_SBYTE  : FLAT_Real_AllAddr_SVE_LDS <0x027, 0x11>;
1887defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_LDS <0x028, 0x12>;
1888defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_LDS <0x029, 0x13>;
1889defm SCRATCH_LOAD_LDS_DWORD  : FLAT_Real_AllAddr_SVE_LDS <0x02a, 0x14>;
1890
1891defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_SVE_vi <0x10>;
1892defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_SVE_vi <0x11>;
1893defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_SVE_vi <0x12>;
1894defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_SVE_vi <0x13>;
1895defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_SVE_vi <0x14>;
1896defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_SVE_vi <0x15>;
1897defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_SVE_vi <0x16>;
1898defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_SVE_vi <0x17>;
1899defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_SVE_vi <0x18>;
1900defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x19>;
1901defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_SVE_vi <0x20>;
1902defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x21>;
1903defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_SVE_vi <0x22>;
1904defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x23>;
1905defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_SVE_vi <0x24>;
1906defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x25>;
1907defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_SVE_vi <0x1a>;
1908defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x1b>;
1909defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_SVE_vi <0x1c>;
1910defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_SVE_vi <0x1d>;
1911defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_SVE_vi <0x1e>;
1912defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_SVE_vi <0x1f>;
1913
1914let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
1915  // These instructions are encoded differently on gfx90* and gfx940.
1916  defm GLOBAL_ATOMIC_ADD_F32    : FLAT_Global_Real_Atomics_vi <0x04d, 0>;
1917  defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
1918}
1919
1920let SubtargetPredicate = isGFX90AOnly in {
1921  defm FLAT_ATOMIC_ADD_F64   : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>;
1922  defm FLAT_ATOMIC_MIN_F64   : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>;
1923  defm FLAT_ATOMIC_MAX_F64   : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>;
1924  defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>;
1925  defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>;
1926  defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>;
1927} // End SubtargetPredicate = isGFX90AOnly
1928
1929multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> {
1930  def _gfx940       : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
1931  def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1932}
1933
1934multiclass FLAT_Real_Atomics_gfx940 <bits<7> op, FLAT_Pseudo ps> {
1935  def _gfx940     : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
1936  def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
1937}
1938
1939multiclass FLAT_Global_Real_Atomics_gfx940<bits<7> op> :
1940  FLAT_Real_AllAddr_gfx940<op> {
1941  def _RTN_gfx940       : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
1942  def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
1943}
1944
1945let SubtargetPredicate = isGFX940Plus in {
1946  // These instructions are encoded differently on gfx90* and gfx940.
1947  defm GLOBAL_ATOMIC_ADD_F32     : FLAT_Global_Real_Atomics_gfx940 <0x04d>;
1948  defm GLOBAL_ATOMIC_PK_ADD_F16  : FLAT_Global_Real_Atomics_gfx940 <0x04e>;
1949
1950  defm FLAT_ATOMIC_ADD_F64       : FLAT_Real_Atomics_gfx940<0x4f, FLAT_ATOMIC_ADD_F64>;
1951  defm FLAT_ATOMIC_MIN_F64       : FLAT_Real_Atomics_gfx940<0x50, FLAT_ATOMIC_MIN_F64>;
1952  defm FLAT_ATOMIC_MAX_F64       : FLAT_Real_Atomics_gfx940<0x51, FLAT_ATOMIC_MAX_F64>;
1953  defm GLOBAL_ATOMIC_ADD_F64     : FLAT_Global_Real_Atomics_gfx940<0x4f>;
1954  defm GLOBAL_ATOMIC_MIN_F64     : FLAT_Global_Real_Atomics_gfx940<0x50>;
1955  defm GLOBAL_ATOMIC_MAX_F64     : FLAT_Global_Real_Atomics_gfx940<0x51>;
1956  defm FLAT_ATOMIC_ADD_F32       : FLAT_Real_Atomics_vi<0x4d, FLAT_ATOMIC_ADD_F32>;
1957  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Real_Atomics_vi<0x4e, FLAT_ATOMIC_PK_ADD_F16>;
1958  defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Real_Atomics_vi<0x52, FLAT_ATOMIC_PK_ADD_BF16>;
1959  defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
1960} // End SubtargetPredicate = isGFX940Plus
1961
1962//===----------------------------------------------------------------------===//
1963// GFX10.
1964//===----------------------------------------------------------------------===//
1965
1966class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
1967    FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
1968  let AssemblerPredicate = isGFX10Only;
1969  let DecoderNamespace = "GFX10";
1970
1971  let Inst{11-0}  = offset{11-0};
1972  let Inst{12}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue);
1973  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
1974  let Inst{55}    = 0;
1975}
1976
1977
1978multiclass FLAT_Real_Base_gfx10<bits<7> op> {
1979  def _gfx10 :
1980    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>;
1981}
1982
1983multiclass FLAT_Real_RTN_gfx10<bits<7> op> {
1984  def _RTN_gfx10 :
1985    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
1986}
1987
1988multiclass FLAT_Real_SADDR_gfx10<bits<7> op> {
1989  def _SADDR_gfx10 :
1990    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1991}
1992
1993multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
1994  def _SADDR_RTN_gfx10 :
1995    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
1996}
1997
1998multiclass FLAT_Real_ST_gfx10<bits<7> op> {
1999  def _ST_gfx10 :
2000    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")> {
2001      let Inst{54-48} = !cast<int>(EXEC_HI.HWEncoding);
2002      let OtherPredicates = [HasFlatScratchSTMode];
2003    }
2004}
2005
2006multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> :
2007  FLAT_Real_Base_gfx10<op>,
2008  FLAT_Real_SADDR_gfx10<op>;
2009
2010multiclass FLAT_Real_Atomics_gfx10<bits<7> op> :
2011  FLAT_Real_Base_gfx10<op>,
2012  FLAT_Real_RTN_gfx10<op>;
2013
2014multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
2015  FLAT_Real_AllAddr_gfx10<op>,
2016  FLAT_Real_RTN_gfx10<op>,
2017  FLAT_Real_SADDR_RTN_gfx10<op>;
2018
2019multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> :
2020  FLAT_Real_RTN_gfx10<op>,
2021  FLAT_Real_SADDR_RTN_gfx10<op>;
2022
2023multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> :
2024  FLAT_Real_Base_gfx10<op>,
2025  FLAT_Real_SADDR_gfx10<op>,
2026  FLAT_Real_ST_gfx10<op>;
2027
2028multiclass FLAT_Real_AllAddr_LDS_gfx10<bits<7> op,
2029  string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> {
2030  let AsmString = opname # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in
2031  defm "" : FLAT_Real_Base_gfx10<op>;
2032
2033  let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in
2034  defm "" : FLAT_Real_SADDR_gfx10<op>;
2035}
2036
2037multiclass FLAT_Real_ScratchAllAddr_LDS_gfx10<bits<7> op,
2038  string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> {
2039  defm "" : FLAT_Real_AllAddr_LDS_gfx10<op>;
2040
2041  let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_ST").AsmOperands # " lds" in
2042  defm "" : FLAT_Real_ST_gfx10<op>;
2043}
2044
2045// ENC_FLAT.
2046defm FLAT_LOAD_UBYTE            : FLAT_Real_Base_gfx10<0x008>;
2047defm FLAT_LOAD_SBYTE            : FLAT_Real_Base_gfx10<0x009>;
2048defm FLAT_LOAD_USHORT           : FLAT_Real_Base_gfx10<0x00a>;
2049defm FLAT_LOAD_SSHORT           : FLAT_Real_Base_gfx10<0x00b>;
2050defm FLAT_LOAD_DWORD            : FLAT_Real_Base_gfx10<0x00c>;
2051defm FLAT_LOAD_DWORDX2          : FLAT_Real_Base_gfx10<0x00d>;
2052defm FLAT_LOAD_DWORDX4          : FLAT_Real_Base_gfx10<0x00e>;
2053defm FLAT_LOAD_DWORDX3          : FLAT_Real_Base_gfx10<0x00f>;
2054defm FLAT_STORE_BYTE            : FLAT_Real_Base_gfx10<0x018>;
2055defm FLAT_STORE_BYTE_D16_HI     : FLAT_Real_Base_gfx10<0x019>;
2056defm FLAT_STORE_SHORT           : FLAT_Real_Base_gfx10<0x01a>;
2057defm FLAT_STORE_SHORT_D16_HI    : FLAT_Real_Base_gfx10<0x01b>;
2058defm FLAT_STORE_DWORD           : FLAT_Real_Base_gfx10<0x01c>;
2059defm FLAT_STORE_DWORDX2         : FLAT_Real_Base_gfx10<0x01d>;
2060defm FLAT_STORE_DWORDX4         : FLAT_Real_Base_gfx10<0x01e>;
2061defm FLAT_STORE_DWORDX3         : FLAT_Real_Base_gfx10<0x01f>;
2062defm FLAT_LOAD_UBYTE_D16        : FLAT_Real_Base_gfx10<0x020>;
2063defm FLAT_LOAD_UBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x021>;
2064defm FLAT_LOAD_SBYTE_D16        : FLAT_Real_Base_gfx10<0x022>;
2065defm FLAT_LOAD_SBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x023>;
2066defm FLAT_LOAD_SHORT_D16        : FLAT_Real_Base_gfx10<0x024>;
2067defm FLAT_LOAD_SHORT_D16_HI     : FLAT_Real_Base_gfx10<0x025>;
2068defm FLAT_ATOMIC_SWAP           : FLAT_Real_Atomics_gfx10<0x030>;
2069defm FLAT_ATOMIC_CMPSWAP        : FLAT_Real_Atomics_gfx10<0x031>;
2070defm FLAT_ATOMIC_ADD            : FLAT_Real_Atomics_gfx10<0x032>;
2071defm FLAT_ATOMIC_SUB            : FLAT_Real_Atomics_gfx10<0x033>;
2072defm FLAT_ATOMIC_SMIN           : FLAT_Real_Atomics_gfx10<0x035>;
2073defm FLAT_ATOMIC_UMIN           : FLAT_Real_Atomics_gfx10<0x036>;
2074defm FLAT_ATOMIC_SMAX           : FLAT_Real_Atomics_gfx10<0x037>;
2075defm FLAT_ATOMIC_UMAX           : FLAT_Real_Atomics_gfx10<0x038>;
2076defm FLAT_ATOMIC_AND            : FLAT_Real_Atomics_gfx10<0x039>;
2077defm FLAT_ATOMIC_OR             : FLAT_Real_Atomics_gfx10<0x03a>;
2078defm FLAT_ATOMIC_XOR            : FLAT_Real_Atomics_gfx10<0x03b>;
2079defm FLAT_ATOMIC_INC            : FLAT_Real_Atomics_gfx10<0x03c>;
2080defm FLAT_ATOMIC_DEC            : FLAT_Real_Atomics_gfx10<0x03d>;
2081defm FLAT_ATOMIC_FCMPSWAP       : FLAT_Real_Atomics_gfx10<0x03e>;
2082defm FLAT_ATOMIC_FMIN           : FLAT_Real_Atomics_gfx10<0x03f>;
2083defm FLAT_ATOMIC_FMAX           : FLAT_Real_Atomics_gfx10<0x040>;
2084defm FLAT_ATOMIC_SWAP_X2        : FLAT_Real_Atomics_gfx10<0x050>;
2085defm FLAT_ATOMIC_CMPSWAP_X2     : FLAT_Real_Atomics_gfx10<0x051>;
2086defm FLAT_ATOMIC_ADD_X2         : FLAT_Real_Atomics_gfx10<0x052>;
2087defm FLAT_ATOMIC_SUB_X2         : FLAT_Real_Atomics_gfx10<0x053>;
2088defm FLAT_ATOMIC_SMIN_X2        : FLAT_Real_Atomics_gfx10<0x055>;
2089defm FLAT_ATOMIC_UMIN_X2        : FLAT_Real_Atomics_gfx10<0x056>;
2090defm FLAT_ATOMIC_SMAX_X2        : FLAT_Real_Atomics_gfx10<0x057>;
2091defm FLAT_ATOMIC_UMAX_X2        : FLAT_Real_Atomics_gfx10<0x058>;
2092defm FLAT_ATOMIC_AND_X2         : FLAT_Real_Atomics_gfx10<0x059>;
2093defm FLAT_ATOMIC_OR_X2          : FLAT_Real_Atomics_gfx10<0x05a>;
2094defm FLAT_ATOMIC_XOR_X2         : FLAT_Real_Atomics_gfx10<0x05b>;
2095defm FLAT_ATOMIC_INC_X2         : FLAT_Real_Atomics_gfx10<0x05c>;
2096defm FLAT_ATOMIC_DEC_X2         : FLAT_Real_Atomics_gfx10<0x05d>;
2097defm FLAT_ATOMIC_FCMPSWAP_X2    : FLAT_Real_Atomics_gfx10<0x05e>;
2098defm FLAT_ATOMIC_FMIN_X2        : FLAT_Real_Atomics_gfx10<0x05f>;
2099defm FLAT_ATOMIC_FMAX_X2        : FLAT_Real_Atomics_gfx10<0x060>;
2100
2101
2102// ENC_FLAT_GLBL.
2103defm GLOBAL_LOAD_UBYTE          : FLAT_Real_AllAddr_gfx10<0x008>;
2104defm GLOBAL_LOAD_SBYTE          : FLAT_Real_AllAddr_gfx10<0x009>;
2105defm GLOBAL_LOAD_USHORT         : FLAT_Real_AllAddr_gfx10<0x00a>;
2106defm GLOBAL_LOAD_SSHORT         : FLAT_Real_AllAddr_gfx10<0x00b>;
2107defm GLOBAL_LOAD_DWORD          : FLAT_Real_AllAddr_gfx10<0x00c>;
2108defm GLOBAL_LOAD_DWORDX2        : FLAT_Real_AllAddr_gfx10<0x00d>;
2109defm GLOBAL_LOAD_DWORDX4        : FLAT_Real_AllAddr_gfx10<0x00e>;
2110defm GLOBAL_LOAD_DWORDX3        : FLAT_Real_AllAddr_gfx10<0x00f>;
2111defm GLOBAL_STORE_BYTE          : FLAT_Real_AllAddr_gfx10<0x018>;
2112defm GLOBAL_STORE_BYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x019>;
2113defm GLOBAL_STORE_SHORT         : FLAT_Real_AllAddr_gfx10<0x01a>;
2114defm GLOBAL_STORE_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x01b>;
2115defm GLOBAL_STORE_DWORD         : FLAT_Real_AllAddr_gfx10<0x01c>;
2116defm GLOBAL_STORE_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x01d>;
2117defm GLOBAL_STORE_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x01e>;
2118defm GLOBAL_STORE_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x01f>;
2119defm GLOBAL_LOAD_UBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x020>;
2120defm GLOBAL_LOAD_UBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x021>;
2121defm GLOBAL_LOAD_SBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x022>;
2122defm GLOBAL_LOAD_SBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x023>;
2123defm GLOBAL_LOAD_SHORT_D16      : FLAT_Real_AllAddr_gfx10<0x024>;
2124defm GLOBAL_LOAD_SHORT_D16_HI   : FLAT_Real_AllAddr_gfx10<0x025>;
2125defm GLOBAL_ATOMIC_SWAP         : FLAT_Real_GlblAtomics_gfx10<0x030>;
2126defm GLOBAL_ATOMIC_CMPSWAP      : FLAT_Real_GlblAtomics_gfx10<0x031>;
2127defm GLOBAL_ATOMIC_ADD          : FLAT_Real_GlblAtomics_gfx10<0x032>;
2128defm GLOBAL_ATOMIC_SUB          : FLAT_Real_GlblAtomics_gfx10<0x033>;
2129defm GLOBAL_ATOMIC_CSUB         : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>;
2130defm GLOBAL_ATOMIC_SMIN         : FLAT_Real_GlblAtomics_gfx10<0x035>;
2131defm GLOBAL_ATOMIC_UMIN         : FLAT_Real_GlblAtomics_gfx10<0x036>;
2132defm GLOBAL_ATOMIC_SMAX         : FLAT_Real_GlblAtomics_gfx10<0x037>;
2133defm GLOBAL_ATOMIC_UMAX         : FLAT_Real_GlblAtomics_gfx10<0x038>;
2134defm GLOBAL_ATOMIC_AND          : FLAT_Real_GlblAtomics_gfx10<0x039>;
2135defm GLOBAL_ATOMIC_OR           : FLAT_Real_GlblAtomics_gfx10<0x03a>;
2136defm GLOBAL_ATOMIC_XOR          : FLAT_Real_GlblAtomics_gfx10<0x03b>;
2137defm GLOBAL_ATOMIC_INC          : FLAT_Real_GlblAtomics_gfx10<0x03c>;
2138defm GLOBAL_ATOMIC_DEC          : FLAT_Real_GlblAtomics_gfx10<0x03d>;
2139defm GLOBAL_ATOMIC_FCMPSWAP     : FLAT_Real_GlblAtomics_gfx10<0x03e>;
2140defm GLOBAL_ATOMIC_FMIN         : FLAT_Real_GlblAtomics_gfx10<0x03f>;
2141defm GLOBAL_ATOMIC_FMAX         : FLAT_Real_GlblAtomics_gfx10<0x040>;
2142defm GLOBAL_ATOMIC_SWAP_X2      : FLAT_Real_GlblAtomics_gfx10<0x050>;
2143defm GLOBAL_ATOMIC_CMPSWAP_X2   : FLAT_Real_GlblAtomics_gfx10<0x051>;
2144defm GLOBAL_ATOMIC_ADD_X2       : FLAT_Real_GlblAtomics_gfx10<0x052>;
2145defm GLOBAL_ATOMIC_SUB_X2       : FLAT_Real_GlblAtomics_gfx10<0x053>;
2146defm GLOBAL_ATOMIC_SMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x055>;
2147defm GLOBAL_ATOMIC_UMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x056>;
2148defm GLOBAL_ATOMIC_SMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x057>;
2149defm GLOBAL_ATOMIC_UMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x058>;
2150defm GLOBAL_ATOMIC_AND_X2       : FLAT_Real_GlblAtomics_gfx10<0x059>;
2151defm GLOBAL_ATOMIC_OR_X2        : FLAT_Real_GlblAtomics_gfx10<0x05a>;
2152defm GLOBAL_ATOMIC_XOR_X2       : FLAT_Real_GlblAtomics_gfx10<0x05b>;
2153defm GLOBAL_ATOMIC_INC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05c>;
2154defm GLOBAL_ATOMIC_DEC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05d>;
2155defm GLOBAL_ATOMIC_FCMPSWAP_X2  : FLAT_Real_GlblAtomics_gfx10<0x05e>;
2156defm GLOBAL_ATOMIC_FMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x05f>;
2157defm GLOBAL_ATOMIC_FMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x060>;
2158defm GLOBAL_LOAD_DWORD_ADDTID   : FLAT_Real_AllAddr_gfx10<0x016>;
2159defm GLOBAL_STORE_DWORD_ADDTID  : FLAT_Real_AllAddr_gfx10<0x017>;
2160
2161defm GLOBAL_LOAD_LDS_UBYTE      : FLAT_Real_AllAddr_LDS_gfx10 <0x008>;
2162defm GLOBAL_LOAD_LDS_SBYTE      : FLAT_Real_AllAddr_LDS_gfx10 <0x009>;
2163defm GLOBAL_LOAD_LDS_USHORT     : FLAT_Real_AllAddr_LDS_gfx10 <0x00a>;
2164defm GLOBAL_LOAD_LDS_SSHORT     : FLAT_Real_AllAddr_LDS_gfx10 <0x00b>;
2165defm GLOBAL_LOAD_LDS_DWORD      : FLAT_Real_AllAddr_LDS_gfx10 <0x00c>;
2166
2167// ENC_FLAT_SCRATCH.
2168defm SCRATCH_LOAD_UBYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x008>;
2169defm SCRATCH_LOAD_SBYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x009>;
2170defm SCRATCH_LOAD_USHORT        : FLAT_Real_ScratchAllAddr_gfx10<0x00a>;
2171defm SCRATCH_LOAD_SSHORT        : FLAT_Real_ScratchAllAddr_gfx10<0x00b>;
2172defm SCRATCH_LOAD_DWORD         : FLAT_Real_ScratchAllAddr_gfx10<0x00c>;
2173defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_ScratchAllAddr_gfx10<0x00d>;
2174defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_ScratchAllAddr_gfx10<0x00e>;
2175defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_ScratchAllAddr_gfx10<0x00f>;
2176defm SCRATCH_STORE_BYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x018>;
2177defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x019>;
2178defm SCRATCH_STORE_SHORT        : FLAT_Real_ScratchAllAddr_gfx10<0x01a>;
2179defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>;
2180defm SCRATCH_STORE_DWORD        : FLAT_Real_ScratchAllAddr_gfx10<0x01c>;
2181defm SCRATCH_STORE_DWORDX2      : FLAT_Real_ScratchAllAddr_gfx10<0x01d>;
2182defm SCRATCH_STORE_DWORDX4      : FLAT_Real_ScratchAllAddr_gfx10<0x01e>;
2183defm SCRATCH_STORE_DWORDX3      : FLAT_Real_ScratchAllAddr_gfx10<0x01f>;
2184defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_ScratchAllAddr_gfx10<0x020>;
2185defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x021>;
2186defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_ScratchAllAddr_gfx10<0x022>;
2187defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x023>;
2188defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_ScratchAllAddr_gfx10<0x024>;
2189defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x025>;
2190
2191defm SCRATCH_LOAD_LDS_UBYTE     : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x008>;
2192defm SCRATCH_LOAD_LDS_SBYTE     : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x009>;
2193defm SCRATCH_LOAD_LDS_USHORT    : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00a>;
2194defm SCRATCH_LOAD_LDS_SSHORT    : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00b>;
2195defm SCRATCH_LOAD_LDS_DWORD     : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>;
2196
2197//===----------------------------------------------------------------------===//
2198// GFX11
2199//===----------------------------------------------------------------------===//
2200
2201class FLAT_Real_gfx11 <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
2202  FLAT_Real <op, ps, opName>,
2203  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX11> {
2204  let AssemblerPredicate = isGFX11Plus;
2205  let DecoderNamespace = "GFX11";
2206
2207  let Inst{13}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue);
2208  let Inst{14}    = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
2209  let Inst{15}    = cpol{CPolBit.SLC};
2210  let Inst{17-16} = seg;
2211  let Inst{55}    = ps.sve;
2212}
2213
2214multiclass FLAT_Aliases_gfx11<string ps, string opName, int renamed> {
2215  if renamed then
2216    def _renamed_gfx11 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX11Plus]>;
2217}
2218
2219multiclass FLAT_Real_Base_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2220  FLAT_Aliases_gfx11<ps, opName, renamed> {
2221  def _gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps), opName> {
2222    let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
2223  }
2224}
2225
2226multiclass FLAT_Real_RTN_gfx11<bits<7> op, string ps, string opName> {
2227  def _RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> {
2228    let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
2229  }
2230}
2231
2232multiclass FLAT_Real_SADDR_gfx11<bits<7> op, string ps, string opName> {
2233  def _SADDR_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR"), opName>;
2234}
2235
2236multiclass FLAT_Real_SADDR_RTN_gfx11<bits<7> op, string ps, string opName> {
2237  def _SADDR_RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR_RTN"), opName>;
2238}
2239
2240multiclass FLAT_Real_ST_gfx11<bits<7> op, string ps, string opName> {
2241  def _ST_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> {
2242    let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
2243    let OtherPredicates = [HasFlatScratchSTMode];
2244  }
2245}
2246
2247multiclass FLAT_Real_SVS_gfx11<bits<7> op, string ps, string opName> {
2248  def _SVS_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SVS"), opName> {
2249    let OtherPredicates = [HasFlatScratchSVSMode];
2250  }
2251}
2252
2253multiclass FLAT_Real_AllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2254  FLAT_Real_Base_gfx11<op, ps, opName, renamed>,
2255  FLAT_Real_SADDR_gfx11<op, ps, opName>;
2256
2257multiclass FLAT_Real_Atomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2258  FLAT_Real_Base_gfx11<op, ps, opName, renamed>,
2259  FLAT_Real_RTN_gfx11<op, ps, opName>;
2260
2261multiclass FLAT_Real_GlblAtomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2262  FLAT_Real_AllAddr_gfx11<op, ps, opName, renamed>,
2263  FLAT_Real_RTN_gfx11<op, ps, opName>,
2264  FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>;
2265
2266multiclass FLAT_Real_GlblAtomics_RTN_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2267  FLAT_Aliases_gfx11<ps#"_RTN", opName, renamed>,
2268  FLAT_Real_RTN_gfx11<op, ps, opName>,
2269  FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>;
2270
2271multiclass FLAT_Real_ScratchAllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2272  FLAT_Real_Base_gfx11<op, ps, opName, renamed>,
2273  FLAT_Real_SADDR_gfx11<op, ps, opName>,
2274  FLAT_Real_ST_gfx11<op, ps, opName>,
2275  FLAT_Real_SVS_gfx11<op, ps, opName>;
2276
2277// ENC_FLAT.
2278defm FLAT_LOAD_U8               : FLAT_Real_Base_gfx11<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>;
2279defm FLAT_LOAD_I8               : FLAT_Real_Base_gfx11<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>;
2280defm FLAT_LOAD_U16              : FLAT_Real_Base_gfx11<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>;
2281defm FLAT_LOAD_I16              : FLAT_Real_Base_gfx11<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>;
2282defm FLAT_LOAD_B32              : FLAT_Real_Base_gfx11<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>;
2283defm FLAT_LOAD_B64              : FLAT_Real_Base_gfx11<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>;
2284defm FLAT_LOAD_B96              : FLAT_Real_Base_gfx11<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>;
2285defm FLAT_LOAD_B128             : FLAT_Real_Base_gfx11<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>;
2286defm FLAT_STORE_B8              : FLAT_Real_Base_gfx11<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>;
2287defm FLAT_STORE_B16             : FLAT_Real_Base_gfx11<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>;
2288defm FLAT_STORE_B32             : FLAT_Real_Base_gfx11<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>;
2289defm FLAT_STORE_B64             : FLAT_Real_Base_gfx11<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>;
2290defm FLAT_STORE_B96             : FLAT_Real_Base_gfx11<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>;
2291defm FLAT_STORE_B128            : FLAT_Real_Base_gfx11<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>;
2292defm FLAT_LOAD_D16_U8           : FLAT_Real_Base_gfx11<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">;
2293defm FLAT_LOAD_D16_I8           : FLAT_Real_Base_gfx11<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">;
2294defm FLAT_LOAD_D16_B16          : FLAT_Real_Base_gfx11<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">;
2295defm FLAT_LOAD_D16_HI_U8        : FLAT_Real_Base_gfx11<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">;
2296defm FLAT_LOAD_D16_HI_I8        : FLAT_Real_Base_gfx11<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">;
2297defm FLAT_LOAD_D16_HI_B16       : FLAT_Real_Base_gfx11<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">;
2298defm FLAT_STORE_D16_HI_B8       : FLAT_Real_Base_gfx11<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">;
2299defm FLAT_STORE_D16_HI_B16      : FLAT_Real_Base_gfx11<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">;
2300defm FLAT_ATOMIC_SWAP_B32       : FLAT_Real_Atomics_gfx11<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>;
2301defm FLAT_ATOMIC_CMPSWAP_B32    : FLAT_Real_Atomics_gfx11<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>;
2302defm FLAT_ATOMIC_ADD_U32        : FLAT_Real_Atomics_gfx11<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>;
2303defm FLAT_ATOMIC_SUB_U32        : FLAT_Real_Atomics_gfx11<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>;
2304defm FLAT_ATOMIC_MIN_I32        : FLAT_Real_Atomics_gfx11<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>;
2305defm FLAT_ATOMIC_MIN_U32        : FLAT_Real_Atomics_gfx11<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>;
2306defm FLAT_ATOMIC_MAX_I32        : FLAT_Real_Atomics_gfx11<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>;
2307defm FLAT_ATOMIC_MAX_U32        : FLAT_Real_Atomics_gfx11<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>;
2308defm FLAT_ATOMIC_AND_B32        : FLAT_Real_Atomics_gfx11<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>;
2309defm FLAT_ATOMIC_OR_B32         : FLAT_Real_Atomics_gfx11<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>;
2310defm FLAT_ATOMIC_XOR_B32        : FLAT_Real_Atomics_gfx11<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>;
2311defm FLAT_ATOMIC_INC_U32        : FLAT_Real_Atomics_gfx11<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>;
2312defm FLAT_ATOMIC_DEC_U32        : FLAT_Real_Atomics_gfx11<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>;
2313defm FLAT_ATOMIC_SWAP_B64       : FLAT_Real_Atomics_gfx11<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>;
2314defm FLAT_ATOMIC_CMPSWAP_B64    : FLAT_Real_Atomics_gfx11<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>;
2315defm FLAT_ATOMIC_ADD_U64        : FLAT_Real_Atomics_gfx11<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>;
2316defm FLAT_ATOMIC_SUB_U64        : FLAT_Real_Atomics_gfx11<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>;
2317defm FLAT_ATOMIC_MIN_I64        : FLAT_Real_Atomics_gfx11<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>;
2318defm FLAT_ATOMIC_MIN_U64        : FLAT_Real_Atomics_gfx11<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>;
2319defm FLAT_ATOMIC_MAX_I64        : FLAT_Real_Atomics_gfx11<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>;
2320defm FLAT_ATOMIC_MAX_U64        : FLAT_Real_Atomics_gfx11<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>;
2321defm FLAT_ATOMIC_AND_B64        : FLAT_Real_Atomics_gfx11<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>;
2322defm FLAT_ATOMIC_OR_B64         : FLAT_Real_Atomics_gfx11<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>;
2323defm FLAT_ATOMIC_XOR_B64        : FLAT_Real_Atomics_gfx11<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>;
2324defm FLAT_ATOMIC_INC_U64        : FLAT_Real_Atomics_gfx11<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>;
2325defm FLAT_ATOMIC_DEC_U64        : FLAT_Real_Atomics_gfx11<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>;
2326defm FLAT_ATOMIC_CMPSWAP_F32    : FLAT_Real_Atomics_gfx11<0x050, "FLAT_ATOMIC_FCMPSWAP", "flat_atomic_cmpswap_f32">;
2327defm FLAT_ATOMIC_MIN_F32        : FLAT_Real_Atomics_gfx11<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_f32">;
2328defm FLAT_ATOMIC_MAX_F32        : FLAT_Real_Atomics_gfx11<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_f32">;
2329defm FLAT_ATOMIC_ADD_F32        : FLAT_Real_Atomics_gfx11<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">;
2330
2331// ENC_FLAT_GLBL.
2332defm GLOBAL_LOAD_U8             : FLAT_Real_AllAddr_gfx11<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>;
2333defm GLOBAL_LOAD_I8             : FLAT_Real_AllAddr_gfx11<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>;
2334defm GLOBAL_LOAD_U16            : FLAT_Real_AllAddr_gfx11<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>;
2335defm GLOBAL_LOAD_I16            : FLAT_Real_AllAddr_gfx11<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>;
2336defm GLOBAL_LOAD_B32            : FLAT_Real_AllAddr_gfx11<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>;
2337defm GLOBAL_LOAD_B64            : FLAT_Real_AllAddr_gfx11<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>;
2338defm GLOBAL_LOAD_B96            : FLAT_Real_AllAddr_gfx11<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>;
2339defm GLOBAL_LOAD_B128           : FLAT_Real_AllAddr_gfx11<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>;
2340defm GLOBAL_STORE_B8            : FLAT_Real_AllAddr_gfx11<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>;
2341defm GLOBAL_STORE_B16           : FLAT_Real_AllAddr_gfx11<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>;
2342defm GLOBAL_STORE_B32           : FLAT_Real_AllAddr_gfx11<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>;
2343defm GLOBAL_STORE_B64           : FLAT_Real_AllAddr_gfx11<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>;
2344defm GLOBAL_STORE_B96           : FLAT_Real_AllAddr_gfx11<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>;
2345defm GLOBAL_STORE_B128          : FLAT_Real_AllAddr_gfx11<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>;
2346defm GLOBAL_LOAD_D16_U8         : FLAT_Real_AllAddr_gfx11<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">;
2347defm GLOBAL_LOAD_D16_I8         : FLAT_Real_AllAddr_gfx11<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">;
2348defm GLOBAL_LOAD_D16_B16        : FLAT_Real_AllAddr_gfx11<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">;
2349defm GLOBAL_LOAD_D16_HI_U8      : FLAT_Real_AllAddr_gfx11<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">;
2350defm GLOBAL_LOAD_D16_HI_I8      : FLAT_Real_AllAddr_gfx11<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">;
2351defm GLOBAL_LOAD_D16_HI_B16     : FLAT_Real_AllAddr_gfx11<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">;
2352defm GLOBAL_STORE_D16_HI_B8     : FLAT_Real_AllAddr_gfx11<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">;
2353defm GLOBAL_STORE_D16_HI_B16    : FLAT_Real_AllAddr_gfx11<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">;
2354defm GLOBAL_LOAD_ADDTID_B32     : FLAT_Real_AllAddr_gfx11<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">;
2355defm GLOBAL_STORE_ADDTID_B32    : FLAT_Real_AllAddr_gfx11<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">;
2356defm GLOBAL_ATOMIC_SWAP_B32     : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>;
2357defm GLOBAL_ATOMIC_CMPSWAP_B32  : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>;
2358defm GLOBAL_ATOMIC_ADD_U32      : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>;
2359defm GLOBAL_ATOMIC_SUB_U32      : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>;
2360defm GLOBAL_ATOMIC_CSUB_U32     : FLAT_Real_GlblAtomics_RTN_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32", true>;
2361defm GLOBAL_ATOMIC_MIN_I32      : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>;
2362defm GLOBAL_ATOMIC_MIN_U32      : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>;
2363defm GLOBAL_ATOMIC_MAX_I32      : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>;
2364defm GLOBAL_ATOMIC_MAX_U32      : FLAT_Real_GlblAtomics_gfx11<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>;
2365defm GLOBAL_ATOMIC_AND_B32      : FLAT_Real_GlblAtomics_gfx11<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>;
2366defm GLOBAL_ATOMIC_OR_B32       : FLAT_Real_GlblAtomics_gfx11<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>;
2367defm GLOBAL_ATOMIC_XOR_B32      : FLAT_Real_GlblAtomics_gfx11<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>;
2368defm GLOBAL_ATOMIC_INC_U32      : FLAT_Real_GlblAtomics_gfx11<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>;
2369defm GLOBAL_ATOMIC_DEC_U32      : FLAT_Real_GlblAtomics_gfx11<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>;
2370defm GLOBAL_ATOMIC_SWAP_B64     : FLAT_Real_GlblAtomics_gfx11<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>;
2371defm GLOBAL_ATOMIC_CMPSWAP_B64  : FLAT_Real_GlblAtomics_gfx11<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>;
2372defm GLOBAL_ATOMIC_ADD_U64      : FLAT_Real_GlblAtomics_gfx11<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>;
2373defm GLOBAL_ATOMIC_SUB_U64      : FLAT_Real_GlblAtomics_gfx11<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>;
2374defm GLOBAL_ATOMIC_MIN_I64      : FLAT_Real_GlblAtomics_gfx11<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>;
2375defm GLOBAL_ATOMIC_MIN_U64      : FLAT_Real_GlblAtomics_gfx11<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>;
2376defm GLOBAL_ATOMIC_MAX_I64      : FLAT_Real_GlblAtomics_gfx11<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>;
2377defm GLOBAL_ATOMIC_MAX_U64      : FLAT_Real_GlblAtomics_gfx11<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>;
2378defm GLOBAL_ATOMIC_AND_B64      : FLAT_Real_GlblAtomics_gfx11<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>;
2379defm GLOBAL_ATOMIC_OR_B64       : FLAT_Real_GlblAtomics_gfx11<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>;
2380defm GLOBAL_ATOMIC_XOR_B64      : FLAT_Real_GlblAtomics_gfx11<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>;
2381defm GLOBAL_ATOMIC_INC_U64      : FLAT_Real_GlblAtomics_gfx11<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>;
2382defm GLOBAL_ATOMIC_DEC_U64      : FLAT_Real_GlblAtomics_gfx11<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>;
2383defm GLOBAL_ATOMIC_CMPSWAP_F32  : FLAT_Real_GlblAtomics_gfx11<0x050, "GLOBAL_ATOMIC_FCMPSWAP", "global_atomic_cmpswap_f32">;
2384defm GLOBAL_ATOMIC_MIN_F32      : FLAT_Real_GlblAtomics_gfx11<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_f32">;
2385defm GLOBAL_ATOMIC_MAX_F32      : FLAT_Real_GlblAtomics_gfx11<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_f32">;
2386defm GLOBAL_ATOMIC_ADD_F32      : FLAT_Real_GlblAtomics_gfx11<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">;
2387
2388// ENC_FLAT_SCRATCH.
2389defm SCRATCH_LOAD_U8            : FLAT_Real_ScratchAllAddr_gfx11<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>;
2390defm SCRATCH_LOAD_I8            : FLAT_Real_ScratchAllAddr_gfx11<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>;
2391defm SCRATCH_LOAD_U16           : FLAT_Real_ScratchAllAddr_gfx11<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>;
2392defm SCRATCH_LOAD_I16           : FLAT_Real_ScratchAllAddr_gfx11<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>;
2393defm SCRATCH_LOAD_B32           : FLAT_Real_ScratchAllAddr_gfx11<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>;
2394defm SCRATCH_LOAD_B64           : FLAT_Real_ScratchAllAddr_gfx11<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>;
2395defm SCRATCH_LOAD_B96           : FLAT_Real_ScratchAllAddr_gfx11<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>;
2396defm SCRATCH_LOAD_B128          : FLAT_Real_ScratchAllAddr_gfx11<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>;
2397defm SCRATCH_STORE_B8           : FLAT_Real_ScratchAllAddr_gfx11<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>;
2398defm SCRATCH_STORE_B16          : FLAT_Real_ScratchAllAddr_gfx11<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>;
2399defm SCRATCH_STORE_B32          : FLAT_Real_ScratchAllAddr_gfx11<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>;
2400defm SCRATCH_STORE_B64          : FLAT_Real_ScratchAllAddr_gfx11<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>;
2401defm SCRATCH_STORE_B96          : FLAT_Real_ScratchAllAddr_gfx11<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>;
2402defm SCRATCH_STORE_B128         : FLAT_Real_ScratchAllAddr_gfx11<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>;
2403defm SCRATCH_LOAD_D16_U8        : FLAT_Real_ScratchAllAddr_gfx11<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">;
2404defm SCRATCH_LOAD_D16_I8        : FLAT_Real_ScratchAllAddr_gfx11<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">;
2405defm SCRATCH_LOAD_D16_B16       : FLAT_Real_ScratchAllAddr_gfx11<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">;
2406defm SCRATCH_LOAD_D16_HI_U8     : FLAT_Real_ScratchAllAddr_gfx11<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">;
2407defm SCRATCH_LOAD_D16_HI_I8     : FLAT_Real_ScratchAllAddr_gfx11<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">;
2408defm SCRATCH_LOAD_D16_HI_B16    : FLAT_Real_ScratchAllAddr_gfx11<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">;
2409defm SCRATCH_STORE_D16_HI_B8    : FLAT_Real_ScratchAllAddr_gfx11<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">;
2410defm SCRATCH_STORE_D16_HI_B16   : FLAT_Real_ScratchAllAddr_gfx11<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">;
2411