xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td (revision 90ec6a30353aa7caaf995ea50e2e23aa5a099600)
1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>;
10def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>;
11
12def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;
13def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>;
14
15//===----------------------------------------------------------------------===//
16// FLAT classes
17//===----------------------------------------------------------------------===//
18
19class FLAT_Pseudo<string opName, dag outs, dag ins,
20                  string asmOps, list<dag> pattern=[]> :
21  InstSI<outs, ins, "", pattern>,
22  SIMCInstr<opName, SIEncodingFamily.NONE> {
23
24  let isPseudo = 1;
25  let isCodeGenOnly = 1;
26
27  let FLAT = 1;
28
29  let UseNamedOperandTable = 1;
30  let hasSideEffects = 0;
31  let SchedRW = [WriteVMEM];
32
33  string Mnemonic = opName;
34  string AsmOperands = asmOps;
35
36  bits<1> is_flat_global = 0;
37  bits<1> is_flat_scratch = 0;
38
39  bits<1> has_vdst = 1;
40
41  // We need to distinguish having saddr and enabling saddr because
42  // saddr is only valid for scratch and global instructions. Pre-gfx9
43  // these bits were reserved, so we also don't necessarily want to
44  // set these bits to the disabled value for the original flat
45  // segment instructions.
46  bits<1> has_saddr = 0;
47  bits<1> enabled_saddr = 0;
48  bits<7> saddr_value = 0;
49  bits<1> has_vaddr = 1;
50
51  bits<1> has_data = 1;
52  bits<1> has_glc  = 1;
53  bits<1> glcValue = 0;
54  bits<1> has_dlc  = 1;
55  bits<1> dlcValue = 0;
56
57  let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
58    !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
59
60  // TODO: M0 if it could possibly access LDS (before gfx9? only)?
61  let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
62
63  // Internally, FLAT instruction are executed as both an LDS and a
64  // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
65  // and are not considered done until both have been decremented.
66  let VM_CNT = 1;
67  let LGKM_CNT = !if(!or(is_flat_global, is_flat_scratch), 0, 1);
68
69  let IsNonFlatSeg = !if(!or(is_flat_global, is_flat_scratch), 1, 0);
70}
71
72class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
73  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
74  Enc64 {
75
76  let isPseudo = 0;
77  let isCodeGenOnly = 0;
78
79  // copy relevant pseudo op flags
80  let SubtargetPredicate = ps.SubtargetPredicate;
81  let AsmMatchConverter  = ps.AsmMatchConverter;
82  let TSFlags = ps.TSFlags;
83  let UseNamedOperandTable = ps.UseNamedOperandTable;
84
85  // encoding fields
86  bits<8> vaddr;
87  bits<8> vdata;
88  bits<7> saddr;
89  bits<8> vdst;
90
91  bits<1> slc;
92  bits<1> glc;
93  bits<1> dlc;
94
95  // Only valid on gfx9
96  bits<1> lds = 0; // XXX - What does this actually do?
97
98  // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
99  bits<2> seg = !if(ps.is_flat_global, 0b10,
100                  !if(ps.is_flat_scratch, 0b01, 0));
101
102  // Signed offset. Highest bit ignored for flat and treated as 12-bit
103  // unsigned for flat accesses.
104  bits<13> offset;
105  bits<1> nv = 0; // XXX - What does this actually do?
106
107  // We don't use tfe right now, and it was removed in gfx9.
108  bits<1> tfe = 0;
109
110  // Only valid on GFX9+
111  let Inst{12-0} = offset;
112  let Inst{13} = lds;
113  let Inst{15-14} = seg;
114
115  let Inst{16}    = !if(ps.has_glc, glc, ps.glcValue);
116  let Inst{17}    = slc;
117  let Inst{24-18} = op;
118  let Inst{31-26} = 0x37; // Encoding.
119  let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
120  let Inst{47-40} = !if(ps.has_data, vdata, ?);
121  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
122
123  // 54-48 is reserved.
124  let Inst{55}    = nv; // nv on GFX9+, TFE before.
125  let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
126}
127
128class GlobalSaddrTable <bit is_saddr, string Name = ""> {
129  bit IsSaddr = is_saddr;
130  string SaddrOp = Name;
131}
132
133// TODO: Is exec allowed for saddr? The disabled value 0x7f is the
134// same encoding value as exec_hi, so it isn't possible to use that if
135// saddr is 32-bit (which isn't handled here yet).
136class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
137  bit HasTiedOutput = 0,
138  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
139  opName,
140  (outs regClass:$vdst),
141  !con(
142    !con(
143      !con((ins VReg_64:$vaddr),
144        !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
145          (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
146          !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
147  " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
148  let has_data = 0;
149  let mayLoad = 1;
150  let has_saddr = HasSaddr;
151  let enabled_saddr = EnableSaddr;
152  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
153  let maybeAtomic = 1;
154
155  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
156  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
157}
158
159class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
160  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
161  opName,
162  (outs),
163  !con(
164    !con((ins VReg_64:$vaddr, vdataClass:$vdata),
165      !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
166        (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
167  " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
168  let mayLoad  = 0;
169  let mayStore = 1;
170  let has_vdst = 0;
171  let has_saddr = HasSaddr;
172  let enabled_saddr = EnableSaddr;
173  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
174  let maybeAtomic = 1;
175}
176
177multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
178  let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
179    def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
180      GlobalSaddrTable<0, opName>;
181    def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
182      GlobalSaddrTable<1, opName>;
183  }
184}
185
186class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
187  bit HasTiedOutput = 0, bit HasSignedOffset = 0> : FLAT_Pseudo<
188  opName,
189  (outs regClass:$vdst),
190  !con((ins SReg_64:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
191    !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
192  " $vdst, $saddr$offset$glc$slc$dlc"> {
193  let is_flat_global = 1;
194  let has_data = 0;
195  let mayLoad = 1;
196  let has_vaddr = 0;
197  let has_saddr = 1;
198  let enabled_saddr = 1;
199  let maybeAtomic = 1;
200
201  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
202  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
203}
204
205multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
206  let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
207    def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
208      GlobalSaddrTable<0, opName>;
209    def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
210      GlobalSaddrTable<1, opName>;
211  }
212}
213
214class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
215  bit HasSignedOffset = 0> : FLAT_Pseudo<
216  opName,
217  (outs),
218  !con(
219    (ins vdataClass:$vdata, SReg_64:$saddr),
220      (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
221  " $vdata, $saddr$offset$glc$slc$dlc"> {
222  let is_flat_global = 1;
223  let mayLoad  = 0;
224  let mayStore = 1;
225  let has_vdst = 0;
226  let has_vaddr = 0;
227  let has_saddr = 1;
228  let enabled_saddr = 1;
229  let maybeAtomic = 1;
230}
231
232class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
233  bit EnableSaddr = 0>: FLAT_Pseudo<
234  opName,
235  (outs regClass:$vdst),
236  !if(EnableSaddr,
237      (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
238      (ins VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
239  " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> {
240  let has_data = 0;
241  let mayLoad = 1;
242  let has_saddr = 1;
243  let enabled_saddr = EnableSaddr;
244  let has_vaddr = !if(EnableSaddr, 0, 1);
245  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
246  let maybeAtomic = 1;
247}
248
249class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0> : FLAT_Pseudo<
250  opName,
251  (outs),
252  !if(EnableSaddr,
253    (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
254    (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
255  " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
256  let mayLoad  = 0;
257  let mayStore = 1;
258  let has_vdst = 0;
259  let has_saddr = 1;
260  let enabled_saddr = EnableSaddr;
261  let has_vaddr = !if(EnableSaddr, 0, 1);
262  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
263  let maybeAtomic = 1;
264}
265
266multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> {
267  let is_flat_scratch = 1 in {
268    def "" : FLAT_Scratch_Load_Pseudo<opName, regClass>;
269    def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, 1>;
270  }
271}
272
273multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
274  let is_flat_scratch = 1 in {
275    def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>;
276    def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>;
277  }
278}
279
280class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
281                               string asm, list<dag> pattern = []> :
282  FLAT_Pseudo<opName, outs, ins, asm, pattern> {
283    let mayLoad = 1;
284    let mayStore = 1;
285    let has_glc  = 0;
286    let glcValue = 0;
287    let has_dlc  = 0;
288    let dlcValue = 0;
289    let has_vdst = 0;
290    let maybeAtomic = 1;
291}
292
293class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
294                            string asm, list<dag> pattern = []>
295  : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> {
296  let hasPostISelHook = 1;
297  let has_vdst = 1;
298  let glcValue = 1;
299  let dlcValue = 0;
300  let PseudoInstr = NAME # "_RTN";
301}
302
303multiclass FLAT_Atomic_Pseudo<
304  string opName,
305  RegisterClass vdst_rc,
306  ValueType vt,
307  SDPatternOperator atomic = null_frag,
308  ValueType data_vt = vt,
309  RegisterClass data_rc = vdst_rc,
310  bit isFP = isFloatType<data_vt>.ret> {
311  def "" : FLAT_AtomicNoRet_Pseudo <opName,
312    (outs),
313    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
314    " $vaddr, $vdata$offset$slc">,
315    GlobalSaddrTable<0, opName>,
316    AtomicNoRet <opName, 0> {
317    let PseudoInstr = NAME;
318    let FPAtomic = isFP;
319    let AddedComplexity = -1; // Prefer global atomics if available
320  }
321
322  def _RTN : FLAT_AtomicRet_Pseudo <opName,
323    (outs vdst_rc:$vdst),
324    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
325    " $vdst, $vaddr, $vdata$offset glc$slc",
326    [(set vt:$vdst,
327      (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
328       GlobalSaddrTable<0, opName#"_rtn">,
329       AtomicNoRet <opName, 1>{
330    let FPAtomic = isFP;
331    let AddedComplexity = -1; // Prefer global atomics if available
332  }
333}
334
335multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
336  string opName,
337  RegisterClass vdst_rc,
338  ValueType vt,
339  SDPatternOperator atomic = null_frag,
340  ValueType data_vt = vt,
341  RegisterClass data_rc = vdst_rc,
342  bit isFP = isFloatType<data_vt>.ret> {
343
344  def "" : FLAT_AtomicNoRet_Pseudo <opName,
345    (outs),
346    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
347    " $vaddr, $vdata, off$offset$slc">,
348    GlobalSaddrTable<0, opName>,
349    AtomicNoRet <opName, 0> {
350    let has_saddr = 1;
351    let PseudoInstr = NAME;
352    let FPAtomic = isFP;
353  }
354
355  def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
356    (outs),
357    (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
358    " $vaddr, $vdata, $saddr$offset$slc">,
359    GlobalSaddrTable<1, opName>,
360    AtomicNoRet <opName#"_saddr", 0> {
361    let has_saddr = 1;
362    let enabled_saddr = 1;
363    let PseudoInstr = NAME#"_SADDR";
364    let FPAtomic = isFP;
365  }
366}
367
368multiclass FLAT_Global_Atomic_Pseudo_RTN<
369  string opName,
370  RegisterClass vdst_rc,
371  ValueType vt,
372  SDPatternOperator atomic = null_frag,
373  ValueType data_vt = vt,
374  RegisterClass data_rc = vdst_rc,
375  bit isFP = isFloatType<data_vt>.ret> {
376
377  def _RTN : FLAT_AtomicRet_Pseudo <opName,
378    (outs vdst_rc:$vdst),
379      (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
380    " $vdst, $vaddr, $vdata, off$offset glc$slc",
381    [(set vt:$vdst,
382      (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
383      GlobalSaddrTable<0, opName#"_rtn">,
384      AtomicNoRet <opName, 1> {
385    let has_saddr = 1;
386    let FPAtomic = isFP;
387  }
388
389  def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
390    (outs vdst_rc:$vdst),
391      (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
392    " $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
393    GlobalSaddrTable<1, opName#"_rtn">,
394    AtomicNoRet <opName#"_saddr", 1> {
395     let has_saddr = 1;
396     let enabled_saddr = 1;
397     let PseudoInstr = NAME#"_SADDR_RTN";
398     let FPAtomic = isFP;
399  }
400}
401
402multiclass FLAT_Global_Atomic_Pseudo<
403  string opName,
404  RegisterClass vdst_rc,
405  ValueType vt,
406  SDPatternOperator atomic_rtn = null_frag,
407  SDPatternOperator atomic_no_rtn = null_frag,
408  ValueType data_vt = vt,
409  RegisterClass data_rc = vdst_rc> {
410  let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
411    defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>;
412    defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
413  }
414}
415
416//===----------------------------------------------------------------------===//
417// Flat Instructions
418//===----------------------------------------------------------------------===//
419
420def FLAT_LOAD_UBYTE    : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>;
421def FLAT_LOAD_SBYTE    : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>;
422def FLAT_LOAD_USHORT   : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>;
423def FLAT_LOAD_SSHORT   : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>;
424def FLAT_LOAD_DWORD    : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>;
425def FLAT_LOAD_DWORDX2  : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
426def FLAT_LOAD_DWORDX4  : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
427def FLAT_LOAD_DWORDX3  : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
428
429def FLAT_STORE_BYTE    : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
430def FLAT_STORE_SHORT   : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
431def FLAT_STORE_DWORD   : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
432def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
433def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
434def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
435
436let SubtargetPredicate = HasD16LoadStore in {
437def FLAT_LOAD_UBYTE_D16     : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>;
438def FLAT_LOAD_UBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>;
439def FLAT_LOAD_SBYTE_D16     : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>;
440def FLAT_LOAD_SBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>;
441def FLAT_LOAD_SHORT_D16     : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>;
442def FLAT_LOAD_SHORT_D16_HI  : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>;
443
444def FLAT_STORE_BYTE_D16_HI  : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>;
445def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>;
446}
447
448defm FLAT_ATOMIC_CMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
449                                VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32,
450                                v2i32, VReg_64>;
451
452defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
453                                VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64,
454                                v2i64, VReg_128>;
455
456defm FLAT_ATOMIC_SWAP       : FLAT_Atomic_Pseudo <"flat_atomic_swap",
457                                VGPR_32, i32, atomic_swap_flat_32>;
458
459defm FLAT_ATOMIC_SWAP_X2    : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
460                                VReg_64, i64, atomic_swap_flat_64>;
461
462defm FLAT_ATOMIC_ADD        : FLAT_Atomic_Pseudo <"flat_atomic_add",
463                                VGPR_32, i32, atomic_load_add_flat_32>;
464
465defm FLAT_ATOMIC_SUB        : FLAT_Atomic_Pseudo <"flat_atomic_sub",
466                                VGPR_32, i32, atomic_load_sub_flat_32>;
467
468defm FLAT_ATOMIC_SMIN       : FLAT_Atomic_Pseudo <"flat_atomic_smin",
469                                VGPR_32, i32, atomic_load_min_flat_32>;
470
471defm FLAT_ATOMIC_UMIN       : FLAT_Atomic_Pseudo <"flat_atomic_umin",
472                                VGPR_32, i32, atomic_load_umin_flat_32>;
473
474defm FLAT_ATOMIC_SMAX       : FLAT_Atomic_Pseudo <"flat_atomic_smax",
475                                VGPR_32, i32, atomic_load_max_flat_32>;
476
477defm FLAT_ATOMIC_UMAX       : FLAT_Atomic_Pseudo <"flat_atomic_umax",
478                                VGPR_32, i32, atomic_load_umax_flat_32>;
479
480defm FLAT_ATOMIC_AND        : FLAT_Atomic_Pseudo <"flat_atomic_and",
481                                VGPR_32, i32, atomic_load_and_flat_32>;
482
483defm FLAT_ATOMIC_OR         : FLAT_Atomic_Pseudo <"flat_atomic_or",
484                                VGPR_32, i32, atomic_load_or_flat_32>;
485
486defm FLAT_ATOMIC_XOR        : FLAT_Atomic_Pseudo <"flat_atomic_xor",
487                                VGPR_32, i32, atomic_load_xor_flat_32>;
488
489defm FLAT_ATOMIC_INC        : FLAT_Atomic_Pseudo <"flat_atomic_inc",
490                                VGPR_32, i32, atomic_inc_flat_32>;
491
492defm FLAT_ATOMIC_DEC        : FLAT_Atomic_Pseudo <"flat_atomic_dec",
493                                VGPR_32, i32, atomic_dec_flat_32>;
494
495defm FLAT_ATOMIC_ADD_X2     : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
496                                VReg_64, i64, atomic_load_add_flat_64>;
497
498defm FLAT_ATOMIC_SUB_X2     : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
499                                VReg_64, i64, atomic_load_sub_flat_64>;
500
501defm FLAT_ATOMIC_SMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
502                                VReg_64, i64, atomic_load_min_flat_64>;
503
504defm FLAT_ATOMIC_UMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
505                                VReg_64, i64, atomic_load_umin_flat_64>;
506
507defm FLAT_ATOMIC_SMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
508                                VReg_64, i64, atomic_load_max_flat_64>;
509
510defm FLAT_ATOMIC_UMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
511                                VReg_64, i64, atomic_load_umax_flat_64>;
512
513defm FLAT_ATOMIC_AND_X2     : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
514                                VReg_64, i64, atomic_load_and_flat_64>;
515
516defm FLAT_ATOMIC_OR_X2      : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
517                                VReg_64, i64, atomic_load_or_flat_64>;
518
519defm FLAT_ATOMIC_XOR_X2     : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
520                                VReg_64, i64, atomic_load_xor_flat_64>;
521
522defm FLAT_ATOMIC_INC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
523                                VReg_64, i64, atomic_inc_flat_64>;
524
525defm FLAT_ATOMIC_DEC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
526                                VReg_64, i64, atomic_dec_flat_64>;
527
528// GFX7-, GFX10-only flat instructions.
529let SubtargetPredicate = isGFX7GFX10 in {
530
531defm FLAT_ATOMIC_FCMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
532                                VGPR_32, f32, null_frag, v2f32, VReg_64>;
533
534defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
535                                VReg_64, f64, null_frag, v2f64, VReg_128>;
536
537defm FLAT_ATOMIC_FMIN        : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
538                                VGPR_32, f32>;
539
540defm FLAT_ATOMIC_FMAX        : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
541                                VGPR_32, f32>;
542
543defm FLAT_ATOMIC_FMIN_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
544                                VReg_64, f64>;
545
546defm FLAT_ATOMIC_FMAX_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
547                                VReg_64, f64>;
548
549} // End SubtargetPredicate = isGFX7GFX10
550
551defm GLOBAL_LOAD_UBYTE    : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
552defm GLOBAL_LOAD_SBYTE    : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
553defm GLOBAL_LOAD_USHORT   : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
554defm GLOBAL_LOAD_SSHORT   : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>;
555defm GLOBAL_LOAD_DWORD    : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>;
556defm GLOBAL_LOAD_DWORDX2  : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>;
557defm GLOBAL_LOAD_DWORDX3  : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>;
558defm GLOBAL_LOAD_DWORDX4  : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
559
560defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>;
561defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>;
562defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>;
563defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>;
564defm GLOBAL_LOAD_SHORT_D16    : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
565defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
566let OtherPredicates = [HasGFX10_BEncoding] in
567def  GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
568
569defm GLOBAL_STORE_BYTE    : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
570defm GLOBAL_STORE_SHORT   : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
571defm GLOBAL_STORE_DWORD   : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
572defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
573defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
574defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
575let OtherPredicates = [HasGFX10_BEncoding] in
576def  GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
577
578defm GLOBAL_STORE_BYTE_D16_HI  : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
579defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
580
581let is_flat_global = 1 in {
582defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
583                               VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32, null_frag,
584                               v2i32, VReg_64>;
585
586defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
587                                  VReg_64, i64, AMDGPUatomic_cmp_swap_global_64,
588                                  null_frag,
589                                  v2i64, VReg_128>;
590
591defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
592                             VGPR_32, i32, atomic_swap_global_32>;
593
594defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
595                                VReg_64, i64, atomic_swap_global_64>;
596
597defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
598                           VGPR_32, i32, atomic_load_add_global_32>;
599
600defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
601                           VGPR_32, i32, atomic_load_sub_global_32>;
602
603defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
604                            VGPR_32, i32, atomic_load_min_global_32>;
605
606defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
607                            VGPR_32, i32, atomic_load_umin_global_32>;
608
609defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
610                            VGPR_32, i32, atomic_load_max_global_32>;
611
612defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
613                            VGPR_32, i32, atomic_load_umax_global_32>;
614
615defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
616                           VGPR_32, i32, atomic_load_and_global_32>;
617
618defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
619                          VGPR_32, i32, atomic_load_or_global_32>;
620
621defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
622                           VGPR_32, i32, atomic_load_xor_global_32>;
623
624defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
625                           VGPR_32, i32, atomic_inc_global_32>;
626
627defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
628                           VGPR_32, i32, atomic_dec_global_32>;
629
630defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
631                              VReg_64, i64, atomic_load_add_global_64>;
632
633defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
634                              VReg_64, i64, atomic_load_sub_global_64>;
635
636defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
637                               VReg_64, i64, atomic_load_min_global_64>;
638
639defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
640                               VReg_64, i64, atomic_load_umin_global_64>;
641
642defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
643                               VReg_64, i64, atomic_load_max_global_64>;
644
645defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
646                               VReg_64, i64, atomic_load_umax_global_64>;
647
648defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
649                              VReg_64, i64, atomic_load_and_global_64>;
650
651defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
652                             VReg_64, i64, atomic_load_or_global_64>;
653
654defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
655                              VReg_64, i64, atomic_load_xor_global_64>;
656
657defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
658                              VReg_64, i64, atomic_inc_global_64>;
659
660defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
661                              VReg_64, i64, atomic_dec_global_64>;
662
663let SubtargetPredicate = HasGFX10_BEncoding in
664defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
665                              VGPR_32, i32, atomic_csub_global_32>;
666} // End is_flat_global = 1
667
668
669
670let SubtargetPredicate = HasFlatScratchInsts in {
671defm SCRATCH_LOAD_UBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
672defm SCRATCH_LOAD_SBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
673defm SCRATCH_LOAD_USHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
674defm SCRATCH_LOAD_SSHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>;
675defm SCRATCH_LOAD_DWORD    : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>;
676defm SCRATCH_LOAD_DWORDX2  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>;
677defm SCRATCH_LOAD_DWORDX3  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>;
678defm SCRATCH_LOAD_DWORDX4  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
679
680defm SCRATCH_LOAD_UBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32>;
681defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32>;
682defm SCRATCH_LOAD_SBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32>;
683defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32>;
684defm SCRATCH_LOAD_SHORT_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32>;
685defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32>;
686
687defm SCRATCH_STORE_BYTE    : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
688defm SCRATCH_STORE_SHORT   : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
689defm SCRATCH_STORE_DWORD   : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
690defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
691defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
692defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
693
694defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
695defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
696
697} // End SubtargetPredicate = HasFlatScratchInsts
698
699let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
700  defm GLOBAL_ATOMIC_FCMPSWAP :
701    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>;
702  defm GLOBAL_ATOMIC_FMIN :
703    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
704  defm GLOBAL_ATOMIC_FMAX :
705    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
706  defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
707    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>;
708  defm GLOBAL_ATOMIC_FMIN_X2 :
709    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
710  defm GLOBAL_ATOMIC_FMAX_X2 :
711    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
712} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
713
714let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in {
715
716defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
717  "global_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret
718>;
719defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
720  "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret
721>;
722
723} // End SubtargetPredicate = HasAtomicFaddInsts
724
725//===----------------------------------------------------------------------===//
726// Flat Patterns
727//===----------------------------------------------------------------------===//
728
729// Patterns for global loads with no offset.
730class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
731  (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
732  (inst $vaddr, $offset, 0, 0, $slc)
733>;
734
735class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
736  (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
737  (inst $vaddr, $offset, 0, 0, $slc, $in)
738>;
739
740class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
741  (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
742  (inst $vaddr, $offset, 0, 0, $slc, $in)
743>;
744
745class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
746  (vt (node (FLATAtomic (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
747  (inst $vaddr, $offset, 0, 0, $slc)
748>;
749
750class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
751  (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
752  (inst $vaddr, $offset, 0, 0, $slc)
753>;
754
755class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
756  (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
757  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
758>;
759
760class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
761  (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
762  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
763>;
764
765class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
766  // atomic store follows atomic binop convention so the address comes
767  // first.
768  (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
769  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
770>;
771
772class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
773  // atomic store follows atomic binop convention so the address comes
774  // first.
775  (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
776  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
777>;
778
779class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
780                     ValueType data_vt = vt> : GCNPat <
781  (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
782  (inst $vaddr, $data, $offset, $slc)
783>;
784
785class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
786  (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
787  (inst $vaddr, $data, $offset, $slc)
788>;
789
790class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
791                     ValueType data_vt = vt> : GCNPat <
792  (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
793  (inst $vaddr, $data, $offset, $slc)
794>;
795
796let OtherPredicates = [HasFlatAddressSpace] in {
797
798def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
799def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
800def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
801def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
802def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
803def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
804def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
805def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
806def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
807def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
808def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
809
810def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
811def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
812
813def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
814def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
815
816foreach vt = Reg32Types.types in {
817def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
818def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
819}
820
821foreach vt = VReg_64.RegTypes in {
822def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt, VReg_64>;
823def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
824}
825
826def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>;
827
828foreach vt = VReg_128.RegTypes in {
829def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
830def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt, VReg_128>;
831}
832
833def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
834def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>;
835
836def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
837def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
838def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global_32, i32>;
839def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>;
840def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>;
841def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>;
842def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>;
843def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
844def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
845def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
846def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
847def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
848def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
849
850def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
851def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
852def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>;
853def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>;
854def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>;
855def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>;
856def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>;
857def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
858def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
859def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
860def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
861def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
862def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
863
864def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
865def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
866
867let OtherPredicates = [D16PreservesUnusedBits] in {
868def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
869def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
870
871def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
872def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
873def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
874def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
875def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
876def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
877
878def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
879def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
880def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
881def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
882def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
883def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
884}
885
886} // End OtherPredicates = [HasFlatAddressSpace]
887
888let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
889
890def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
891def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
892def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
893def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
894def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
895def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
896def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
897def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
898def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
899def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>;
900
901foreach vt = Reg32Types.types in {
902def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, vt>;
903def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, vt, VGPR_32>;
904}
905
906foreach vt = VReg_64.RegTypes in {
907def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, vt>;
908def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, vt, VReg_64>;
909}
910
911def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
912
913foreach vt = VReg_128.RegTypes in {
914def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, vt>;
915def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, vt, VReg_128>;
916}
917
918def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
919def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
920
921def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>;
922def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>;
923def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>;
924def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>;
925def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>;
926
927let OtherPredicates = [D16PreservesUnusedBits] in {
928def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
929def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
930
931def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
932def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
933def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
934def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>;
935def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>;
936def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>;
937
938def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>;
939def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>;
940def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>;
941def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>;
942def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>;
943def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
944}
945
946def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, atomic_store_global_32, i32>;
947def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64, VReg_64>;
948
949def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
950def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
951def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global_32, i32>;
952def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>;
953def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>;
954def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>;
955def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>;
956def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
957def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
958def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
959def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
960def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
961def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
962def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CSUB_RTN, atomic_csub_global_32, i32>;
963
964def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
965def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
966def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>;
967def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>;
968def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>;
969def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>;
970def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>;
971def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
972def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
973def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
974def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
975def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
976def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
977
978def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32,    atomic_fadd_global_noret, f32>;
979def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global_noret, v2f16>;
980
981} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
982
983
984//===----------------------------------------------------------------------===//
985// Target
986//===----------------------------------------------------------------------===//
987
988//===----------------------------------------------------------------------===//
989// CI
990//===----------------------------------------------------------------------===//
991
992class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
993  FLAT_Real <op, ps>,
994  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
995  let AssemblerPredicate = isGFX7Only;
996  let DecoderNamespace="GFX7";
997}
998
999def FLAT_LOAD_UBYTE_ci         : FLAT_Real_ci <0x8,  FLAT_LOAD_UBYTE>;
1000def FLAT_LOAD_SBYTE_ci         : FLAT_Real_ci <0x9,  FLAT_LOAD_SBYTE>;
1001def FLAT_LOAD_USHORT_ci        : FLAT_Real_ci <0xa,  FLAT_LOAD_USHORT>;
1002def FLAT_LOAD_SSHORT_ci        : FLAT_Real_ci <0xb,  FLAT_LOAD_SSHORT>;
1003def FLAT_LOAD_DWORD_ci         : FLAT_Real_ci <0xc,  FLAT_LOAD_DWORD>;
1004def FLAT_LOAD_DWORDX2_ci       : FLAT_Real_ci <0xd,  FLAT_LOAD_DWORDX2>;
1005def FLAT_LOAD_DWORDX4_ci       : FLAT_Real_ci <0xe,  FLAT_LOAD_DWORDX4>;
1006def FLAT_LOAD_DWORDX3_ci       : FLAT_Real_ci <0xf,  FLAT_LOAD_DWORDX3>;
1007
1008def FLAT_STORE_BYTE_ci         : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>;
1009def FLAT_STORE_SHORT_ci        : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>;
1010def FLAT_STORE_DWORD_ci        : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>;
1011def FLAT_STORE_DWORDX2_ci      : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
1012def FLAT_STORE_DWORDX4_ci      : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
1013def FLAT_STORE_DWORDX3_ci      : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
1014
1015multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
1016  def _ci     : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
1017  def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
1018}
1019
1020defm FLAT_ATOMIC_SWAP          : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
1021defm FLAT_ATOMIC_CMPSWAP       : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
1022defm FLAT_ATOMIC_ADD           : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
1023defm FLAT_ATOMIC_SUB           : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
1024defm FLAT_ATOMIC_SMIN          : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
1025defm FLAT_ATOMIC_UMIN          : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
1026defm FLAT_ATOMIC_SMAX          : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
1027defm FLAT_ATOMIC_UMAX          : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
1028defm FLAT_ATOMIC_AND           : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
1029defm FLAT_ATOMIC_OR            : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
1030defm FLAT_ATOMIC_XOR           : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
1031defm FLAT_ATOMIC_INC           : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
1032defm FLAT_ATOMIC_DEC           : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
1033defm FLAT_ATOMIC_SWAP_X2       : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
1034defm FLAT_ATOMIC_CMPSWAP_X2    : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
1035defm FLAT_ATOMIC_ADD_X2        : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
1036defm FLAT_ATOMIC_SUB_X2        : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
1037defm FLAT_ATOMIC_SMIN_X2       : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
1038defm FLAT_ATOMIC_UMIN_X2       : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
1039defm FLAT_ATOMIC_SMAX_X2       : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
1040defm FLAT_ATOMIC_UMAX_X2       : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
1041defm FLAT_ATOMIC_AND_X2        : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
1042defm FLAT_ATOMIC_OR_X2         : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
1043defm FLAT_ATOMIC_XOR_X2        : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
1044defm FLAT_ATOMIC_INC_X2        : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
1045defm FLAT_ATOMIC_DEC_X2        : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
1046
1047// CI Only flat instructions
1048defm FLAT_ATOMIC_FCMPSWAP      : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
1049defm FLAT_ATOMIC_FMIN          : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
1050defm FLAT_ATOMIC_FMAX          : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
1051defm FLAT_ATOMIC_FCMPSWAP_X2   : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
1052defm FLAT_ATOMIC_FMIN_X2       : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
1053defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
1054
1055
1056//===----------------------------------------------------------------------===//
1057// VI
1058//===----------------------------------------------------------------------===//
1059
1060class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> :
1061  FLAT_Real <op, ps>,
1062  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
1063  let AssemblerPredicate = isGFX8GFX9;
1064  let DecoderNamespace = "GFX8";
1065}
1066
1067multiclass FLAT_Real_AllAddr_vi<bits<7> op> {
1068  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)>;
1069  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1070}
1071
1072def FLAT_LOAD_UBYTE_vi         : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
1073def FLAT_LOAD_SBYTE_vi         : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
1074def FLAT_LOAD_USHORT_vi        : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
1075def FLAT_LOAD_SSHORT_vi        : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
1076def FLAT_LOAD_DWORD_vi         : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
1077def FLAT_LOAD_DWORDX2_vi       : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
1078def FLAT_LOAD_DWORDX4_vi       : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
1079def FLAT_LOAD_DWORDX3_vi       : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
1080
1081def FLAT_STORE_BYTE_vi         : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
1082def FLAT_STORE_BYTE_D16_HI_vi  : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
1083def FLAT_STORE_SHORT_vi        : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
1084def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
1085def FLAT_STORE_DWORD_vi        : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
1086def FLAT_STORE_DWORDX2_vi      : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
1087def FLAT_STORE_DWORDX4_vi      : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
1088def FLAT_STORE_DWORDX3_vi      : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
1089
1090def FLAT_LOAD_UBYTE_D16_vi    : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
1091def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
1092def FLAT_LOAD_SBYTE_D16_vi    : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
1093def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
1094def FLAT_LOAD_SHORT_D16_vi    : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
1095def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
1096
1097multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> {
1098  def _vi     : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
1099  def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
1100}
1101
1102multiclass FLAT_Global_Real_Atomics_vi<bits<7> op> :
1103  FLAT_Real_AllAddr_vi<op> {
1104  def _RTN_vi  : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
1105  def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
1106}
1107
1108
1109defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
1110defm FLAT_ATOMIC_CMPSWAP    : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
1111defm FLAT_ATOMIC_ADD        : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
1112defm FLAT_ATOMIC_SUB        : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
1113defm FLAT_ATOMIC_SMIN       : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
1114defm FLAT_ATOMIC_UMIN       : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
1115defm FLAT_ATOMIC_SMAX       : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
1116defm FLAT_ATOMIC_UMAX       : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
1117defm FLAT_ATOMIC_AND        : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
1118defm FLAT_ATOMIC_OR         : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
1119defm FLAT_ATOMIC_XOR        : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
1120defm FLAT_ATOMIC_INC        : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
1121defm FLAT_ATOMIC_DEC        : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
1122defm FLAT_ATOMIC_SWAP_X2    : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
1123defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
1124defm FLAT_ATOMIC_ADD_X2     : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
1125defm FLAT_ATOMIC_SUB_X2     : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
1126defm FLAT_ATOMIC_SMIN_X2    : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
1127defm FLAT_ATOMIC_UMIN_X2    : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
1128defm FLAT_ATOMIC_SMAX_X2    : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
1129defm FLAT_ATOMIC_UMAX_X2    : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
1130defm FLAT_ATOMIC_AND_X2     : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
1131defm FLAT_ATOMIC_OR_X2      : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
1132defm FLAT_ATOMIC_XOR_X2     : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
1133defm FLAT_ATOMIC_INC_X2     : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
1134defm FLAT_ATOMIC_DEC_X2     : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
1135
1136defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
1137defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
1138defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
1139defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
1140defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
1141defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
1142defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
1143defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
1144
1145defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Real_AllAddr_vi <0x20>;
1146defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>;
1147defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Real_AllAddr_vi <0x22>;
1148defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>;
1149defm GLOBAL_LOAD_SHORT_D16    : FLAT_Real_AllAddr_vi <0x24>;
1150defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>;
1151
1152defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
1153defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>;
1154defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
1155defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1156defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
1157defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
1158defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
1159defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
1160
1161
1162defm GLOBAL_ATOMIC_SWAP       : FLAT_Global_Real_Atomics_vi <0x40>;
1163defm GLOBAL_ATOMIC_CMPSWAP    : FLAT_Global_Real_Atomics_vi <0x41>;
1164defm GLOBAL_ATOMIC_ADD        : FLAT_Global_Real_Atomics_vi <0x42>;
1165defm GLOBAL_ATOMIC_SUB        : FLAT_Global_Real_Atomics_vi <0x43>;
1166defm GLOBAL_ATOMIC_SMIN       : FLAT_Global_Real_Atomics_vi <0x44>;
1167defm GLOBAL_ATOMIC_UMIN       : FLAT_Global_Real_Atomics_vi <0x45>;
1168defm GLOBAL_ATOMIC_SMAX       : FLAT_Global_Real_Atomics_vi <0x46>;
1169defm GLOBAL_ATOMIC_UMAX       : FLAT_Global_Real_Atomics_vi <0x47>;
1170defm GLOBAL_ATOMIC_AND        : FLAT_Global_Real_Atomics_vi <0x48>;
1171defm GLOBAL_ATOMIC_OR         : FLAT_Global_Real_Atomics_vi <0x49>;
1172defm GLOBAL_ATOMIC_XOR        : FLAT_Global_Real_Atomics_vi <0x4a>;
1173defm GLOBAL_ATOMIC_INC        : FLAT_Global_Real_Atomics_vi <0x4b>;
1174defm GLOBAL_ATOMIC_DEC        : FLAT_Global_Real_Atomics_vi <0x4c>;
1175defm GLOBAL_ATOMIC_SWAP_X2    : FLAT_Global_Real_Atomics_vi <0x60>;
1176defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>;
1177defm GLOBAL_ATOMIC_ADD_X2     : FLAT_Global_Real_Atomics_vi <0x62>;
1178defm GLOBAL_ATOMIC_SUB_X2     : FLAT_Global_Real_Atomics_vi <0x63>;
1179defm GLOBAL_ATOMIC_SMIN_X2    : FLAT_Global_Real_Atomics_vi <0x64>;
1180defm GLOBAL_ATOMIC_UMIN_X2    : FLAT_Global_Real_Atomics_vi <0x65>;
1181defm GLOBAL_ATOMIC_SMAX_X2    : FLAT_Global_Real_Atomics_vi <0x66>;
1182defm GLOBAL_ATOMIC_UMAX_X2    : FLAT_Global_Real_Atomics_vi <0x67>;
1183defm GLOBAL_ATOMIC_AND_X2     : FLAT_Global_Real_Atomics_vi <0x68>;
1184defm GLOBAL_ATOMIC_OR_X2      : FLAT_Global_Real_Atomics_vi <0x69>;
1185defm GLOBAL_ATOMIC_XOR_X2     : FLAT_Global_Real_Atomics_vi <0x6a>;
1186defm GLOBAL_ATOMIC_INC_X2     : FLAT_Global_Real_Atomics_vi <0x6b>;
1187defm GLOBAL_ATOMIC_DEC_X2     : FLAT_Global_Real_Atomics_vi <0x6c>;
1188
1189defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_vi <0x10>;
1190defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_vi <0x11>;
1191defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_vi <0x12>;
1192defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_vi <0x13>;
1193defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_vi <0x14>;
1194defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_vi <0x15>;
1195defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_vi <0x16>;
1196defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_vi <0x17>;
1197defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_vi <0x18>;
1198defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x19>;
1199defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_vi <0x20>;
1200defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x21>;
1201defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_vi <0x22>;
1202defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x23>;
1203defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_vi <0x24>;
1204defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_vi <0x25>;
1205defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_vi <0x1a>;
1206defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1207defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_vi <0x1c>;
1208defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_vi <0x1d>;
1209defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_vi <0x1e>;
1210defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_vi <0x1f>;
1211
1212
1213//===----------------------------------------------------------------------===//
1214// GFX10.
1215//===----------------------------------------------------------------------===//
1216
1217class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
1218    FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
1219  let AssemblerPredicate = isGFX10Plus;
1220  let DecoderNamespace = "GFX10";
1221
1222  let Inst{11-0}  = offset{11-0};
1223  let Inst{12}    = !if(ps.has_dlc, dlc, ps.dlcValue);
1224  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
1225  let Inst{55}    = 0;
1226}
1227
1228
1229multiclass FLAT_Real_Base_gfx10<bits<7> op> {
1230  def _gfx10 :
1231    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>;
1232}
1233
1234multiclass FLAT_Real_RTN_gfx10<bits<7> op> {
1235  def _RTN_gfx10 :
1236    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
1237}
1238
1239multiclass FLAT_Real_SADDR_gfx10<bits<7> op> {
1240  def _SADDR_gfx10 :
1241    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1242}
1243
1244multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
1245  def _SADDR_RTN_gfx10 :
1246    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
1247}
1248
1249
1250multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> :
1251  FLAT_Real_Base_gfx10<op>,
1252  FLAT_Real_SADDR_gfx10<op>;
1253
1254multiclass FLAT_Real_Atomics_gfx10<bits<7> op> :
1255  FLAT_Real_Base_gfx10<op>,
1256  FLAT_Real_RTN_gfx10<op>;
1257
1258multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
1259  FLAT_Real_AllAddr_gfx10<op>,
1260  FLAT_Real_RTN_gfx10<op>,
1261  FLAT_Real_SADDR_RTN_gfx10<op>;
1262
1263multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> :
1264  FLAT_Real_RTN_gfx10<op>,
1265  FLAT_Real_SADDR_RTN_gfx10<op>;
1266
1267// ENC_FLAT.
1268defm FLAT_LOAD_UBYTE            : FLAT_Real_Base_gfx10<0x008>;
1269defm FLAT_LOAD_SBYTE            : FLAT_Real_Base_gfx10<0x009>;
1270defm FLAT_LOAD_USHORT           : FLAT_Real_Base_gfx10<0x00a>;
1271defm FLAT_LOAD_SSHORT           : FLAT_Real_Base_gfx10<0x00b>;
1272defm FLAT_LOAD_DWORD            : FLAT_Real_Base_gfx10<0x00c>;
1273defm FLAT_LOAD_DWORDX2          : FLAT_Real_Base_gfx10<0x00d>;
1274defm FLAT_LOAD_DWORDX4          : FLAT_Real_Base_gfx10<0x00e>;
1275defm FLAT_LOAD_DWORDX3          : FLAT_Real_Base_gfx10<0x00f>;
1276defm FLAT_STORE_BYTE            : FLAT_Real_Base_gfx10<0x018>;
1277defm FLAT_STORE_BYTE_D16_HI     : FLAT_Real_Base_gfx10<0x019>;
1278defm FLAT_STORE_SHORT           : FLAT_Real_Base_gfx10<0x01a>;
1279defm FLAT_STORE_SHORT_D16_HI    : FLAT_Real_Base_gfx10<0x01b>;
1280defm FLAT_STORE_DWORD           : FLAT_Real_Base_gfx10<0x01c>;
1281defm FLAT_STORE_DWORDX2         : FLAT_Real_Base_gfx10<0x01d>;
1282defm FLAT_STORE_DWORDX4         : FLAT_Real_Base_gfx10<0x01e>;
1283defm FLAT_STORE_DWORDX3         : FLAT_Real_Base_gfx10<0x01f>;
1284defm FLAT_LOAD_UBYTE_D16        : FLAT_Real_Base_gfx10<0x020>;
1285defm FLAT_LOAD_UBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x021>;
1286defm FLAT_LOAD_SBYTE_D16        : FLAT_Real_Base_gfx10<0x022>;
1287defm FLAT_LOAD_SBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x023>;
1288defm FLAT_LOAD_SHORT_D16        : FLAT_Real_Base_gfx10<0x024>;
1289defm FLAT_LOAD_SHORT_D16_HI     : FLAT_Real_Base_gfx10<0x025>;
1290defm FLAT_ATOMIC_SWAP           : FLAT_Real_Atomics_gfx10<0x030>;
1291defm FLAT_ATOMIC_CMPSWAP        : FLAT_Real_Atomics_gfx10<0x031>;
1292defm FLAT_ATOMIC_ADD            : FLAT_Real_Atomics_gfx10<0x032>;
1293defm FLAT_ATOMIC_SUB            : FLAT_Real_Atomics_gfx10<0x033>;
1294defm FLAT_ATOMIC_SMIN           : FLAT_Real_Atomics_gfx10<0x035>;
1295defm FLAT_ATOMIC_UMIN           : FLAT_Real_Atomics_gfx10<0x036>;
1296defm FLAT_ATOMIC_SMAX           : FLAT_Real_Atomics_gfx10<0x037>;
1297defm FLAT_ATOMIC_UMAX           : FLAT_Real_Atomics_gfx10<0x038>;
1298defm FLAT_ATOMIC_AND            : FLAT_Real_Atomics_gfx10<0x039>;
1299defm FLAT_ATOMIC_OR             : FLAT_Real_Atomics_gfx10<0x03a>;
1300defm FLAT_ATOMIC_XOR            : FLAT_Real_Atomics_gfx10<0x03b>;
1301defm FLAT_ATOMIC_INC            : FLAT_Real_Atomics_gfx10<0x03c>;
1302defm FLAT_ATOMIC_DEC            : FLAT_Real_Atomics_gfx10<0x03d>;
1303defm FLAT_ATOMIC_FCMPSWAP       : FLAT_Real_Atomics_gfx10<0x03e>;
1304defm FLAT_ATOMIC_FMIN           : FLAT_Real_Atomics_gfx10<0x03f>;
1305defm FLAT_ATOMIC_FMAX           : FLAT_Real_Atomics_gfx10<0x040>;
1306defm FLAT_ATOMIC_SWAP_X2        : FLAT_Real_Atomics_gfx10<0x050>;
1307defm FLAT_ATOMIC_CMPSWAP_X2     : FLAT_Real_Atomics_gfx10<0x051>;
1308defm FLAT_ATOMIC_ADD_X2         : FLAT_Real_Atomics_gfx10<0x052>;
1309defm FLAT_ATOMIC_SUB_X2         : FLAT_Real_Atomics_gfx10<0x053>;
1310defm FLAT_ATOMIC_SMIN_X2        : FLAT_Real_Atomics_gfx10<0x055>;
1311defm FLAT_ATOMIC_UMIN_X2        : FLAT_Real_Atomics_gfx10<0x056>;
1312defm FLAT_ATOMIC_SMAX_X2        : FLAT_Real_Atomics_gfx10<0x057>;
1313defm FLAT_ATOMIC_UMAX_X2        : FLAT_Real_Atomics_gfx10<0x058>;
1314defm FLAT_ATOMIC_AND_X2         : FLAT_Real_Atomics_gfx10<0x059>;
1315defm FLAT_ATOMIC_OR_X2          : FLAT_Real_Atomics_gfx10<0x05a>;
1316defm FLAT_ATOMIC_XOR_X2         : FLAT_Real_Atomics_gfx10<0x05b>;
1317defm FLAT_ATOMIC_INC_X2         : FLAT_Real_Atomics_gfx10<0x05c>;
1318defm FLAT_ATOMIC_DEC_X2         : FLAT_Real_Atomics_gfx10<0x05d>;
1319defm FLAT_ATOMIC_FCMPSWAP_X2    : FLAT_Real_Atomics_gfx10<0x05e>;
1320defm FLAT_ATOMIC_FMIN_X2        : FLAT_Real_Atomics_gfx10<0x05f>;
1321defm FLAT_ATOMIC_FMAX_X2        : FLAT_Real_Atomics_gfx10<0x060>;
1322
1323
1324// ENC_FLAT_GLBL.
1325defm GLOBAL_LOAD_UBYTE          : FLAT_Real_AllAddr_gfx10<0x008>;
1326defm GLOBAL_LOAD_SBYTE          : FLAT_Real_AllAddr_gfx10<0x009>;
1327defm GLOBAL_LOAD_USHORT         : FLAT_Real_AllAddr_gfx10<0x00a>;
1328defm GLOBAL_LOAD_SSHORT         : FLAT_Real_AllAddr_gfx10<0x00b>;
1329defm GLOBAL_LOAD_DWORD          : FLAT_Real_AllAddr_gfx10<0x00c>;
1330defm GLOBAL_LOAD_DWORDX2        : FLAT_Real_AllAddr_gfx10<0x00d>;
1331defm GLOBAL_LOAD_DWORDX4        : FLAT_Real_AllAddr_gfx10<0x00e>;
1332defm GLOBAL_LOAD_DWORDX3        : FLAT_Real_AllAddr_gfx10<0x00f>;
1333defm GLOBAL_STORE_BYTE          : FLAT_Real_AllAddr_gfx10<0x018>;
1334defm GLOBAL_STORE_BYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x019>;
1335defm GLOBAL_STORE_SHORT         : FLAT_Real_AllAddr_gfx10<0x01a>;
1336defm GLOBAL_STORE_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x01b>;
1337defm GLOBAL_STORE_DWORD         : FLAT_Real_AllAddr_gfx10<0x01c>;
1338defm GLOBAL_STORE_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x01d>;
1339defm GLOBAL_STORE_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x01e>;
1340defm GLOBAL_STORE_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x01f>;
1341defm GLOBAL_LOAD_UBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x020>;
1342defm GLOBAL_LOAD_UBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x021>;
1343defm GLOBAL_LOAD_SBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x022>;
1344defm GLOBAL_LOAD_SBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x023>;
1345defm GLOBAL_LOAD_SHORT_D16      : FLAT_Real_AllAddr_gfx10<0x024>;
1346defm GLOBAL_LOAD_SHORT_D16_HI   : FLAT_Real_AllAddr_gfx10<0x025>;
1347defm GLOBAL_ATOMIC_SWAP         : FLAT_Real_GlblAtomics_gfx10<0x030>;
1348defm GLOBAL_ATOMIC_CMPSWAP      : FLAT_Real_GlblAtomics_gfx10<0x031>;
1349defm GLOBAL_ATOMIC_ADD          : FLAT_Real_GlblAtomics_gfx10<0x032>;
1350defm GLOBAL_ATOMIC_SUB          : FLAT_Real_GlblAtomics_gfx10<0x033>;
1351defm GLOBAL_ATOMIC_CSUB         : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>;
1352defm GLOBAL_ATOMIC_SMIN         : FLAT_Real_GlblAtomics_gfx10<0x035>;
1353defm GLOBAL_ATOMIC_UMIN         : FLAT_Real_GlblAtomics_gfx10<0x036>;
1354defm GLOBAL_ATOMIC_SMAX         : FLAT_Real_GlblAtomics_gfx10<0x037>;
1355defm GLOBAL_ATOMIC_UMAX         : FLAT_Real_GlblAtomics_gfx10<0x038>;
1356defm GLOBAL_ATOMIC_AND          : FLAT_Real_GlblAtomics_gfx10<0x039>;
1357defm GLOBAL_ATOMIC_OR           : FLAT_Real_GlblAtomics_gfx10<0x03a>;
1358defm GLOBAL_ATOMIC_XOR          : FLAT_Real_GlblAtomics_gfx10<0x03b>;
1359defm GLOBAL_ATOMIC_INC          : FLAT_Real_GlblAtomics_gfx10<0x03c>;
1360defm GLOBAL_ATOMIC_DEC          : FLAT_Real_GlblAtomics_gfx10<0x03d>;
1361defm GLOBAL_ATOMIC_FCMPSWAP     : FLAT_Real_GlblAtomics_gfx10<0x03e>;
1362defm GLOBAL_ATOMIC_FMIN         : FLAT_Real_GlblAtomics_gfx10<0x03f>;
1363defm GLOBAL_ATOMIC_FMAX         : FLAT_Real_GlblAtomics_gfx10<0x040>;
1364defm GLOBAL_ATOMIC_SWAP_X2      : FLAT_Real_GlblAtomics_gfx10<0x050>;
1365defm GLOBAL_ATOMIC_CMPSWAP_X2   : FLAT_Real_GlblAtomics_gfx10<0x051>;
1366defm GLOBAL_ATOMIC_ADD_X2       : FLAT_Real_GlblAtomics_gfx10<0x052>;
1367defm GLOBAL_ATOMIC_SUB_X2       : FLAT_Real_GlblAtomics_gfx10<0x053>;
1368defm GLOBAL_ATOMIC_SMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x055>;
1369defm GLOBAL_ATOMIC_UMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x056>;
1370defm GLOBAL_ATOMIC_SMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x057>;
1371defm GLOBAL_ATOMIC_UMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x058>;
1372defm GLOBAL_ATOMIC_AND_X2       : FLAT_Real_GlblAtomics_gfx10<0x059>;
1373defm GLOBAL_ATOMIC_OR_X2        : FLAT_Real_GlblAtomics_gfx10<0x05a>;
1374defm GLOBAL_ATOMIC_XOR_X2       : FLAT_Real_GlblAtomics_gfx10<0x05b>;
1375defm GLOBAL_ATOMIC_INC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05c>;
1376defm GLOBAL_ATOMIC_DEC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05d>;
1377defm GLOBAL_ATOMIC_FCMPSWAP_X2  : FLAT_Real_GlblAtomics_gfx10<0x05e>;
1378defm GLOBAL_ATOMIC_FMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x05f>;
1379defm GLOBAL_ATOMIC_FMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x060>;
1380defm GLOBAL_LOAD_DWORD_ADDTID   : FLAT_Real_Base_gfx10<0x016>;
1381defm GLOBAL_STORE_DWORD_ADDTID  : FLAT_Real_Base_gfx10<0x017>;
1382
1383// ENC_FLAT_SCRATCH.
1384defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_gfx10<0x008>;
1385defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_gfx10<0x009>;
1386defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_gfx10<0x00a>;
1387defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_gfx10<0x00b>;
1388defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_gfx10<0x00c>;
1389defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x00d>;
1390defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x00e>;
1391defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x00f>;
1392defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_gfx10<0x018>;
1393defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x019>;
1394defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_gfx10<0x01a>;
1395defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>;
1396defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_gfx10<0x01c>;
1397defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_gfx10<0x01d>;
1398defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_gfx10<0x01e>;
1399defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_gfx10<0x01f>;
1400defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_gfx10<0x020>;
1401defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x021>;
1402defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_gfx10<0x022>;
1403defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x023>;
1404defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_gfx10<0x024>;
1405defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x025>;
1406
1407let SubtargetPredicate = HasAtomicFaddInsts in {
1408
1409defm GLOBAL_ATOMIC_ADD_F32    : FLAT_Real_AllAddr_vi <0x04d>;
1410defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Real_AllAddr_vi <0x04e>;
1411
1412} // End SubtargetPredicate = HasAtomicFaddInsts
1413