xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td (revision 2f513db72b034fd5ef7f080b11be5c711c15186a)
1//===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>;
10def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>;
11
12def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;
13def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>;
14
15//===----------------------------------------------------------------------===//
16// FLAT classes
17//===----------------------------------------------------------------------===//
18
19class FLAT_Pseudo<string opName, dag outs, dag ins,
20                  string asmOps, list<dag> pattern=[]> :
21  InstSI<outs, ins, "", pattern>,
22  SIMCInstr<opName, SIEncodingFamily.NONE> {
23
24  let isPseudo = 1;
25  let isCodeGenOnly = 1;
26
27  let FLAT = 1;
28
29  let UseNamedOperandTable = 1;
30  let hasSideEffects = 0;
31  let SchedRW = [WriteVMEM];
32
33  string Mnemonic = opName;
34  string AsmOperands = asmOps;
35
36  bits<1> is_flat_global = 0;
37  bits<1> is_flat_scratch = 0;
38
39  bits<1> has_vdst = 1;
40
41  // We need to distinguish having saddr and enabling saddr because
42  // saddr is only valid for scratch and global instructions. Pre-gfx9
43  // these bits were reserved, so we also don't necessarily want to
44  // set these bits to the disabled value for the original flat
45  // segment instructions.
46  bits<1> has_saddr = 0;
47  bits<1> enabled_saddr = 0;
48  bits<7> saddr_value = 0;
49  bits<1> has_vaddr = 1;
50
51  bits<1> has_data = 1;
52  bits<1> has_glc  = 1;
53  bits<1> glcValue = 0;
54  bits<1> has_dlc  = 1;
55  bits<1> dlcValue = 0;
56
57  let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
58    !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
59
60  // TODO: M0 if it could possibly access LDS (before gfx9? only)?
61  let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
62
63  // Internally, FLAT instruction are executed as both an LDS and a
64  // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
65  // and are not considered done until both have been decremented.
66  let VM_CNT = 1;
67  let LGKM_CNT = !if(!or(is_flat_global, is_flat_scratch), 0, 1);
68
69  let IsNonFlatSeg = !if(!or(is_flat_global, is_flat_scratch), 1, 0);
70}
71
72class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
73  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
74  Enc64 {
75
76  let isPseudo = 0;
77  let isCodeGenOnly = 0;
78
79  // copy relevant pseudo op flags
80  let SubtargetPredicate = ps.SubtargetPredicate;
81  let AsmMatchConverter  = ps.AsmMatchConverter;
82  let TSFlags = ps.TSFlags;
83  let UseNamedOperandTable = ps.UseNamedOperandTable;
84
85  // encoding fields
86  bits<8> vaddr;
87  bits<8> vdata;
88  bits<7> saddr;
89  bits<8> vdst;
90
91  bits<1> slc;
92  bits<1> glc;
93  bits<1> dlc;
94
95  // Only valid on gfx9
96  bits<1> lds = 0; // XXX - What does this actually do?
97
98  // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
99  bits<2> seg = !if(ps.is_flat_global, 0b10,
100                  !if(ps.is_flat_scratch, 0b01, 0));
101
102  // Signed offset. Highest bit ignored for flat and treated as 12-bit
103  // unsigned for flat acceses.
104  bits<13> offset;
105  bits<1> nv = 0; // XXX - What does this actually do?
106
107  // We don't use tfe right now, and it was removed in gfx9.
108  bits<1> tfe = 0;
109
110  // Only valid on GFX9+
111  let Inst{12-0} = offset;
112  let Inst{13} = lds;
113  let Inst{15-14} = seg;
114
115  let Inst{16}    = !if(ps.has_glc, glc, ps.glcValue);
116  let Inst{17}    = slc;
117  let Inst{24-18} = op;
118  let Inst{31-26} = 0x37; // Encoding.
119  let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
120  let Inst{47-40} = !if(ps.has_data, vdata, ?);
121  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
122
123  // 54-48 is reserved.
124  let Inst{55}    = nv; // nv on GFX9+, TFE before.
125  let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
126}
127
128class GlobalSaddrTable <bit is_saddr, string Name = ""> {
129  bit IsSaddr = is_saddr;
130  string SaddrOp = Name;
131}
132
133// TODO: Is exec allowed for saddr? The disabled value 0x7f is the
134// same encoding value as exec_hi, so it isn't possible to use that if
135// saddr is 32-bit (which isn't handled here yet).
136class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
137  bit HasTiedOutput = 0,
138  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
139  opName,
140  (outs regClass:$vdst),
141  !con(
142    !con(
143      !con((ins VReg_64:$vaddr),
144        !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
145          (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
146          !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
147  " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
148  let has_data = 0;
149  let mayLoad = 1;
150  let has_saddr = HasSaddr;
151  let enabled_saddr = EnableSaddr;
152  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
153  let maybeAtomic = 1;
154
155  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
156  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
157}
158
159class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
160  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
161  opName,
162  (outs),
163  !con(
164    !con((ins VReg_64:$vaddr, vdataClass:$vdata),
165      !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
166        (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
167  " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
168  let mayLoad  = 0;
169  let mayStore = 1;
170  let has_vdst = 0;
171  let has_saddr = HasSaddr;
172  let enabled_saddr = EnableSaddr;
173  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
174  let maybeAtomic = 1;
175}
176
177multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
178  let is_flat_global = 1 in {
179    def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
180      GlobalSaddrTable<0, opName>;
181    def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
182      GlobalSaddrTable<1, opName>;
183  }
184}
185
186multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
187  let is_flat_global = 1 in {
188    def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
189      GlobalSaddrTable<0, opName>;
190    def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
191      GlobalSaddrTable<1, opName>;
192  }
193}
194
195class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
196  bit EnableSaddr = 0>: FLAT_Pseudo<
197  opName,
198  (outs regClass:$vdst),
199  !if(EnableSaddr,
200      (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
201      (ins VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
202  " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> {
203  let has_data = 0;
204  let mayLoad = 1;
205  let has_saddr = 1;
206  let enabled_saddr = EnableSaddr;
207  let has_vaddr = !if(EnableSaddr, 0, 1);
208  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
209  let maybeAtomic = 1;
210}
211
212class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0> : FLAT_Pseudo<
213  opName,
214  (outs),
215  !if(EnableSaddr,
216    (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
217    (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
218  " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
219  let mayLoad  = 0;
220  let mayStore = 1;
221  let has_vdst = 0;
222  let has_saddr = 1;
223  let enabled_saddr = EnableSaddr;
224  let has_vaddr = !if(EnableSaddr, 0, 1);
225  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
226  let maybeAtomic = 1;
227}
228
229multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> {
230  let is_flat_scratch = 1 in {
231    def "" : FLAT_Scratch_Load_Pseudo<opName, regClass>;
232    def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, 1>;
233  }
234}
235
236multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
237  let is_flat_scratch = 1 in {
238    def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>;
239    def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>;
240  }
241}
242
243class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
244                               string asm, list<dag> pattern = []> :
245  FLAT_Pseudo<opName, outs, ins, asm, pattern> {
246    let mayLoad = 1;
247    let mayStore = 1;
248    let has_glc  = 0;
249    let glcValue = 0;
250    let has_dlc  = 0;
251    let dlcValue = 0;
252    let has_vdst = 0;
253    let maybeAtomic = 1;
254}
255
256class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
257                            string asm, list<dag> pattern = []>
258  : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> {
259  let hasPostISelHook = 1;
260  let has_vdst = 1;
261  let glcValue = 1;
262  let dlcValue = 0;
263  let PseudoInstr = NAME # "_RTN";
264}
265
266multiclass FLAT_Atomic_Pseudo<
267  string opName,
268  RegisterClass vdst_rc,
269  ValueType vt,
270  SDPatternOperator atomic = null_frag,
271  ValueType data_vt = vt,
272  RegisterClass data_rc = vdst_rc,
273  bit isFP = getIsFP<data_vt>.ret> {
274  def "" : FLAT_AtomicNoRet_Pseudo <opName,
275    (outs),
276    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
277    " $vaddr, $vdata$offset$slc">,
278    GlobalSaddrTable<0, opName>,
279    AtomicNoRet <opName, 0> {
280    let PseudoInstr = NAME;
281    let FPAtomic = isFP;
282  }
283
284  def _RTN : FLAT_AtomicRet_Pseudo <opName,
285    (outs vdst_rc:$vdst),
286    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
287    " $vdst, $vaddr, $vdata$offset glc$slc",
288    [(set vt:$vdst,
289      (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
290       GlobalSaddrTable<0, opName#"_rtn">,
291       AtomicNoRet <opName, 1>{
292    let FPAtomic = isFP;
293  }
294}
295
296multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
297  string opName,
298  RegisterClass vdst_rc,
299  ValueType vt,
300  SDPatternOperator atomic = null_frag,
301  ValueType data_vt = vt,
302  RegisterClass data_rc = vdst_rc,
303  bit isFP = getIsFP<data_vt>.ret> {
304
305  def "" : FLAT_AtomicNoRet_Pseudo <opName,
306    (outs),
307    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
308    " $vaddr, $vdata, off$offset$slc">,
309    GlobalSaddrTable<0, opName>,
310    AtomicNoRet <opName, 0> {
311    let has_saddr = 1;
312    let PseudoInstr = NAME;
313    let FPAtomic = isFP;
314  }
315
316  def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
317    (outs),
318    (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
319    " $vaddr, $vdata, $saddr$offset$slc">,
320    GlobalSaddrTable<1, opName>,
321    AtomicNoRet <opName#"_saddr", 0> {
322    let has_saddr = 1;
323    let enabled_saddr = 1;
324    let PseudoInstr = NAME#"_SADDR";
325    let FPAtomic = isFP;
326  }
327}
328
329multiclass FLAT_Global_Atomic_Pseudo_RTN<
330  string opName,
331  RegisterClass vdst_rc,
332  ValueType vt,
333  SDPatternOperator atomic = null_frag,
334  ValueType data_vt = vt,
335  RegisterClass data_rc = vdst_rc,
336  bit isFP = getIsFP<data_vt>.ret> {
337
338  def _RTN : FLAT_AtomicRet_Pseudo <opName,
339    (outs vdst_rc:$vdst),
340      (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
341    " $vdst, $vaddr, $vdata, off$offset glc$slc",
342    [(set vt:$vdst,
343      (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
344      GlobalSaddrTable<0, opName#"_rtn">,
345      AtomicNoRet <opName, 1> {
346    let has_saddr = 1;
347    let FPAtomic = isFP;
348  }
349
350  def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
351    (outs vdst_rc:$vdst),
352      (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
353    " $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
354    GlobalSaddrTable<1, opName#"_rtn">,
355    AtomicNoRet <opName#"_saddr", 1> {
356     let has_saddr = 1;
357     let enabled_saddr = 1;
358     let PseudoInstr = NAME#"_SADDR_RTN";
359     let FPAtomic = isFP;
360  }
361}
362
363multiclass FLAT_Global_Atomic_Pseudo<
364  string opName,
365  RegisterClass vdst_rc,
366  ValueType vt,
367  SDPatternOperator atomic = null_frag,
368  ValueType data_vt = vt,
369  RegisterClass data_rc = vdst_rc> :
370    FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>,
371    FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>;
372
373class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
374  (ops node:$ptr, node:$value),
375  (atomic_op node:$ptr, node:$value),
376  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
377>;
378
379def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
380def atomic_swap_flat     : flat_binary_atomic_op<atomic_swap>;
381def atomic_add_flat      : flat_binary_atomic_op<atomic_load_add>;
382def atomic_and_flat      : flat_binary_atomic_op<atomic_load_and>;
383def atomic_max_flat      : flat_binary_atomic_op<atomic_load_max>;
384def atomic_min_flat      : flat_binary_atomic_op<atomic_load_min>;
385def atomic_or_flat       : flat_binary_atomic_op<atomic_load_or>;
386def atomic_sub_flat      : flat_binary_atomic_op<atomic_load_sub>;
387def atomic_umax_flat     : flat_binary_atomic_op<atomic_load_umax>;
388def atomic_umin_flat     : flat_binary_atomic_op<atomic_load_umin>;
389def atomic_xor_flat      : flat_binary_atomic_op<atomic_load_xor>;
390def atomic_inc_flat      : flat_binary_atomic_op<SIatomic_inc>;
391def atomic_dec_flat      : flat_binary_atomic_op<SIatomic_dec>;
392
393
394
395//===----------------------------------------------------------------------===//
396// Flat Instructions
397//===----------------------------------------------------------------------===//
398
399def FLAT_LOAD_UBYTE    : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>;
400def FLAT_LOAD_SBYTE    : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>;
401def FLAT_LOAD_USHORT   : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>;
402def FLAT_LOAD_SSHORT   : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>;
403def FLAT_LOAD_DWORD    : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>;
404def FLAT_LOAD_DWORDX2  : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
405def FLAT_LOAD_DWORDX4  : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
406def FLAT_LOAD_DWORDX3  : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
407
408def FLAT_STORE_BYTE    : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
409def FLAT_STORE_SHORT   : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
410def FLAT_STORE_DWORD   : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
411def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
412def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
413def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
414
415let SubtargetPredicate = HasD16LoadStore in {
416def FLAT_LOAD_UBYTE_D16     : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>;
417def FLAT_LOAD_UBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>;
418def FLAT_LOAD_SBYTE_D16     : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>;
419def FLAT_LOAD_SBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>;
420def FLAT_LOAD_SHORT_D16     : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>;
421def FLAT_LOAD_SHORT_D16_HI  : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>;
422
423def FLAT_STORE_BYTE_D16_HI  : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>;
424def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>;
425}
426
427defm FLAT_ATOMIC_CMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
428                                VGPR_32, i32, atomic_cmp_swap_flat,
429                                v2i32, VReg_64>;
430
431defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
432                                VReg_64, i64, atomic_cmp_swap_flat,
433                                v2i64, VReg_128>;
434
435defm FLAT_ATOMIC_SWAP       : FLAT_Atomic_Pseudo <"flat_atomic_swap",
436                                VGPR_32, i32, atomic_swap_flat>;
437
438defm FLAT_ATOMIC_SWAP_X2    : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
439                                VReg_64, i64, atomic_swap_flat>;
440
441defm FLAT_ATOMIC_ADD        : FLAT_Atomic_Pseudo <"flat_atomic_add",
442                                VGPR_32, i32, atomic_add_flat>;
443
444defm FLAT_ATOMIC_SUB        : FLAT_Atomic_Pseudo <"flat_atomic_sub",
445                                VGPR_32, i32, atomic_sub_flat>;
446
447defm FLAT_ATOMIC_SMIN       : FLAT_Atomic_Pseudo <"flat_atomic_smin",
448                                VGPR_32, i32, atomic_min_flat>;
449
450defm FLAT_ATOMIC_UMIN       : FLAT_Atomic_Pseudo <"flat_atomic_umin",
451                                VGPR_32, i32, atomic_umin_flat>;
452
453defm FLAT_ATOMIC_SMAX       : FLAT_Atomic_Pseudo <"flat_atomic_smax",
454                                VGPR_32, i32, atomic_max_flat>;
455
456defm FLAT_ATOMIC_UMAX       : FLAT_Atomic_Pseudo <"flat_atomic_umax",
457                                VGPR_32, i32, atomic_umax_flat>;
458
459defm FLAT_ATOMIC_AND        : FLAT_Atomic_Pseudo <"flat_atomic_and",
460                                VGPR_32, i32, atomic_and_flat>;
461
462defm FLAT_ATOMIC_OR         : FLAT_Atomic_Pseudo <"flat_atomic_or",
463                                VGPR_32, i32, atomic_or_flat>;
464
465defm FLAT_ATOMIC_XOR        : FLAT_Atomic_Pseudo <"flat_atomic_xor",
466                                VGPR_32, i32, atomic_xor_flat>;
467
468defm FLAT_ATOMIC_INC        : FLAT_Atomic_Pseudo <"flat_atomic_inc",
469                                VGPR_32, i32, atomic_inc_flat>;
470
471defm FLAT_ATOMIC_DEC        : FLAT_Atomic_Pseudo <"flat_atomic_dec",
472                                VGPR_32, i32, atomic_dec_flat>;
473
474defm FLAT_ATOMIC_ADD_X2     : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
475                                VReg_64, i64, atomic_add_flat>;
476
477defm FLAT_ATOMIC_SUB_X2     : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
478                                VReg_64, i64, atomic_sub_flat>;
479
480defm FLAT_ATOMIC_SMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
481                                VReg_64, i64, atomic_min_flat>;
482
483defm FLAT_ATOMIC_UMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
484                                VReg_64, i64, atomic_umin_flat>;
485
486defm FLAT_ATOMIC_SMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
487                                VReg_64, i64, atomic_max_flat>;
488
489defm FLAT_ATOMIC_UMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
490                                VReg_64, i64, atomic_umax_flat>;
491
492defm FLAT_ATOMIC_AND_X2     : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
493                                VReg_64, i64, atomic_and_flat>;
494
495defm FLAT_ATOMIC_OR_X2      : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
496                                VReg_64, i64, atomic_or_flat>;
497
498defm FLAT_ATOMIC_XOR_X2     : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
499                                VReg_64, i64, atomic_xor_flat>;
500
501defm FLAT_ATOMIC_INC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
502                                VReg_64, i64, atomic_inc_flat>;
503
504defm FLAT_ATOMIC_DEC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
505                                VReg_64, i64, atomic_dec_flat>;
506
507// GFX7-, GFX10-only flat instructions.
508let SubtargetPredicate = isGFX7GFX10 in {
509
510defm FLAT_ATOMIC_FCMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
511                                VGPR_32, f32, null_frag, v2f32, VReg_64>;
512
513defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
514                                VReg_64, f64, null_frag, v2f64, VReg_128>;
515
516defm FLAT_ATOMIC_FMIN        : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
517                                VGPR_32, f32>;
518
519defm FLAT_ATOMIC_FMAX        : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
520                                VGPR_32, f32>;
521
522defm FLAT_ATOMIC_FMIN_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
523                                VReg_64, f64>;
524
525defm FLAT_ATOMIC_FMAX_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
526                                VReg_64, f64>;
527
528} // End SubtargetPredicate = isGFX7GFX10
529
530let SubtargetPredicate = HasFlatGlobalInsts in {
531defm GLOBAL_LOAD_UBYTE    : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
532defm GLOBAL_LOAD_SBYTE    : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
533defm GLOBAL_LOAD_USHORT   : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
534defm GLOBAL_LOAD_SSHORT   : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>;
535defm GLOBAL_LOAD_DWORD    : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>;
536defm GLOBAL_LOAD_DWORDX2  : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>;
537defm GLOBAL_LOAD_DWORDX3  : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>;
538defm GLOBAL_LOAD_DWORDX4  : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
539
540defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>;
541defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>;
542defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>;
543defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>;
544defm GLOBAL_LOAD_SHORT_D16    : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
545defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
546
547defm GLOBAL_STORE_BYTE    : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
548defm GLOBAL_STORE_SHORT   : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
549defm GLOBAL_STORE_DWORD   : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
550defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
551defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
552defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
553
554defm GLOBAL_STORE_BYTE_D16_HI  : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
555defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
556
557let is_flat_global = 1 in {
558defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
559                               VGPR_32, i32, AMDGPUatomic_cmp_swap_global,
560                               v2i32, VReg_64>;
561
562defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
563                                  VReg_64, i64, AMDGPUatomic_cmp_swap_global,
564                                  v2i64, VReg_128>;
565
566defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
567                             VGPR_32, i32, atomic_swap_global>;
568
569defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
570                                VReg_64, i64, atomic_swap_global>;
571
572defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
573                           VGPR_32, i32, atomic_add_global>;
574
575defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
576                           VGPR_32, i32, atomic_sub_global>;
577
578defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
579                            VGPR_32, i32, atomic_min_global>;
580
581defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
582                            VGPR_32, i32, atomic_umin_global>;
583
584defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
585                            VGPR_32, i32, atomic_max_global>;
586
587defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
588                            VGPR_32, i32, atomic_umax_global>;
589
590defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
591                           VGPR_32, i32, atomic_and_global>;
592
593defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
594                          VGPR_32, i32, atomic_or_global>;
595
596defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
597                           VGPR_32, i32, atomic_xor_global>;
598
599defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
600                           VGPR_32, i32, atomic_inc_global>;
601
602defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
603                           VGPR_32, i32, atomic_dec_global>;
604
605defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
606                              VReg_64, i64, atomic_add_global>;
607
608defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
609                              VReg_64, i64, atomic_sub_global>;
610
611defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
612                               VReg_64, i64, atomic_min_global>;
613
614defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
615                               VReg_64, i64, atomic_umin_global>;
616
617defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
618                               VReg_64, i64, atomic_max_global>;
619
620defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
621                               VReg_64, i64, atomic_umax_global>;
622
623defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
624                              VReg_64, i64, atomic_and_global>;
625
626defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
627                             VReg_64, i64, atomic_or_global>;
628
629defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
630                              VReg_64, i64, atomic_xor_global>;
631
632defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
633                              VReg_64, i64, atomic_inc_global>;
634
635defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
636                              VReg_64, i64, atomic_dec_global>;
637} // End is_flat_global = 1
638
639} // End SubtargetPredicate = HasFlatGlobalInsts
640
641
642let SubtargetPredicate = HasFlatScratchInsts in {
643defm SCRATCH_LOAD_UBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
644defm SCRATCH_LOAD_SBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
645defm SCRATCH_LOAD_USHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
646defm SCRATCH_LOAD_SSHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>;
647defm SCRATCH_LOAD_DWORD    : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>;
648defm SCRATCH_LOAD_DWORDX2  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>;
649defm SCRATCH_LOAD_DWORDX3  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>;
650defm SCRATCH_LOAD_DWORDX4  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
651
652defm SCRATCH_LOAD_UBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32>;
653defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32>;
654defm SCRATCH_LOAD_SBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32>;
655defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32>;
656defm SCRATCH_LOAD_SHORT_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32>;
657defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32>;
658
659defm SCRATCH_STORE_BYTE    : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
660defm SCRATCH_STORE_SHORT   : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
661defm SCRATCH_STORE_DWORD   : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
662defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
663defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
664defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
665
666defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
667defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
668
669} // End SubtargetPredicate = HasFlatScratchInsts
670
671let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
672  defm GLOBAL_ATOMIC_FCMPSWAP :
673    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>;
674  defm GLOBAL_ATOMIC_FMIN :
675    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
676  defm GLOBAL_ATOMIC_FMAX :
677    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
678  defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
679    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>;
680  defm GLOBAL_ATOMIC_FMIN_X2 :
681    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
682  defm GLOBAL_ATOMIC_FMAX_X2 :
683    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
684} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
685
686let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in {
687
688defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
689  "global_atomic_add_f32", VGPR_32, f32, atomic_add_global
690>;
691defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
692  "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global
693>;
694
695} // End SubtargetPredicate = HasAtomicFaddInsts
696
697//===----------------------------------------------------------------------===//
698// Flat Patterns
699//===----------------------------------------------------------------------===//
700
701// Patterns for global loads with no offset.
702class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
703  (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
704  (inst $vaddr, $offset, 0, 0, $slc)
705>;
706
707class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
708  (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
709  (inst $vaddr, $offset, 0, 0, $slc, $in)
710>;
711
712class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
713  (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
714  (inst $vaddr, $offset, 0, 0, $slc, $in)
715>;
716
717class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
718  (vt (node (FLATAtomic (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
719  (inst $vaddr, $offset, 0, 0, $slc)
720>;
721
722class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
723  (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
724  (inst $vaddr, $offset, 0, 0, $slc)
725>;
726
727class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
728  (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
729  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
730>;
731
732class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
733  (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
734  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
735>;
736
737class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
738  // atomic store follows atomic binop convention so the address comes
739  // first.
740  (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
741  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
742>;
743
744class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
745  // atomic store follows atomic binop convention so the address comes
746  // first.
747  (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
748  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
749>;
750
751class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
752                     ValueType data_vt = vt> : GCNPat <
753  (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
754  (inst $vaddr, $data, $offset, $slc)
755>;
756
757class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
758  (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
759  (inst $vaddr, $data, $offset, $slc)
760>;
761
762class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
763                     ValueType data_vt = vt> : GCNPat <
764  (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
765  (inst $vaddr, $data, $offset, $slc)
766>;
767
768let OtherPredicates = [HasFlatAddressSpace] in {
769
770def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
771def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
772def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
773def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
774def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
775def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
776def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
777def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
778def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
779def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
780def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, i32>;
781def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, v2i32>;
782def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
783def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, v4i32>;
784
785def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
786def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
787
788def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
789def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
790def : FlatStorePat <FLAT_STORE_DWORD, store_flat, i32>;
791def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, v2i32, VReg_64>;
792def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>;
793def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32, VReg_128>;
794
795def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
796def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>;
797
798def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
799def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
800def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>;
801def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
802def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
803def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
804def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
805def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
806def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
807def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
808def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
809def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
810def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
811
812def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>;
813def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>;
814def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
815def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
816def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>;
817def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>;
818def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>;
819def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>;
820def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>;
821def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>;
822def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
823def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
824def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
825
826def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
827def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
828
829let OtherPredicates = [D16PreservesUnusedBits] in {
830def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
831def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
832
833def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
834def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
835def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
836def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
837def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
838def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
839
840def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
841def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
842def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
843def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
844def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
845def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
846}
847
848} // End OtherPredicates = [HasFlatAddressSpace]
849
850def atomic_fadd_global    : global_binary_atomic_op_frag<SIglobal_atomic_fadd>;
851def atomic_pk_fadd_global : global_binary_atomic_op_frag<SIglobal_atomic_pk_fadd>;
852
853let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
854
855def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
856def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
857def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
858def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
859def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
860def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
861def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
862def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
863def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
864def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>;
865
866def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, i32>;
867def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, v2i32>;
868def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
869def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, v4i32>;
870
871def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
872def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
873
874def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>;
875def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>;
876def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>;
877def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>;
878def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, i32, VGPR_32>;
879def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, v2i32, VReg_64>;
880def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>;
881def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32, VReg_128>;
882
883let OtherPredicates = [D16PreservesUnusedBits] in {
884def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
885def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
886
887def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
888def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
889def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
890def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>;
891def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>;
892def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>;
893
894def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>;
895def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>;
896def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>;
897def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>;
898def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>;
899def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
900}
901
902def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, store_atomic_global, i32>;
903def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, store_atomic_global, i64, VReg_64>;
904
905def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_add_global, i32>;
906def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
907def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global, i32>;
908def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
909def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_and_global, i32>;
910def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
911def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
912def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
913def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
914def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_or_global, i32>;
915def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
916def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
917def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
918
919def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>;
920def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>;
921def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
922def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
923def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_and_global, i64>;
924def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>;
925def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>;
926def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>;
927def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>;
928def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_or_global, i64>;
929def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
930def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
931def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
932
933def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32,    atomic_fadd_global, f32>;
934def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global, v2f16>;
935
936} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
937
938
939//===----------------------------------------------------------------------===//
940// Target
941//===----------------------------------------------------------------------===//
942
943//===----------------------------------------------------------------------===//
944// CI
945//===----------------------------------------------------------------------===//
946
947class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
948  FLAT_Real <op, ps>,
949  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
950  let AssemblerPredicate = isGFX7Only;
951  let DecoderNamespace="GFX7";
952}
953
954def FLAT_LOAD_UBYTE_ci         : FLAT_Real_ci <0x8,  FLAT_LOAD_UBYTE>;
955def FLAT_LOAD_SBYTE_ci         : FLAT_Real_ci <0x9,  FLAT_LOAD_SBYTE>;
956def FLAT_LOAD_USHORT_ci        : FLAT_Real_ci <0xa,  FLAT_LOAD_USHORT>;
957def FLAT_LOAD_SSHORT_ci        : FLAT_Real_ci <0xb,  FLAT_LOAD_SSHORT>;
958def FLAT_LOAD_DWORD_ci         : FLAT_Real_ci <0xc,  FLAT_LOAD_DWORD>;
959def FLAT_LOAD_DWORDX2_ci       : FLAT_Real_ci <0xd,  FLAT_LOAD_DWORDX2>;
960def FLAT_LOAD_DWORDX4_ci       : FLAT_Real_ci <0xe,  FLAT_LOAD_DWORDX4>;
961def FLAT_LOAD_DWORDX3_ci       : FLAT_Real_ci <0xf,  FLAT_LOAD_DWORDX3>;
962
963def FLAT_STORE_BYTE_ci         : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>;
964def FLAT_STORE_SHORT_ci        : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>;
965def FLAT_STORE_DWORD_ci        : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>;
966def FLAT_STORE_DWORDX2_ci      : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
967def FLAT_STORE_DWORDX4_ci      : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
968def FLAT_STORE_DWORDX3_ci      : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
969
970multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
971  def _ci     : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
972  def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
973}
974
975defm FLAT_ATOMIC_SWAP          : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
976defm FLAT_ATOMIC_CMPSWAP       : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
977defm FLAT_ATOMIC_ADD           : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
978defm FLAT_ATOMIC_SUB           : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
979defm FLAT_ATOMIC_SMIN          : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
980defm FLAT_ATOMIC_UMIN          : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
981defm FLAT_ATOMIC_SMAX          : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
982defm FLAT_ATOMIC_UMAX          : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
983defm FLAT_ATOMIC_AND           : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
984defm FLAT_ATOMIC_OR            : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
985defm FLAT_ATOMIC_XOR           : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
986defm FLAT_ATOMIC_INC           : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
987defm FLAT_ATOMIC_DEC           : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
988defm FLAT_ATOMIC_SWAP_X2       : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
989defm FLAT_ATOMIC_CMPSWAP_X2    : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
990defm FLAT_ATOMIC_ADD_X2        : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
991defm FLAT_ATOMIC_SUB_X2        : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
992defm FLAT_ATOMIC_SMIN_X2       : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
993defm FLAT_ATOMIC_UMIN_X2       : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
994defm FLAT_ATOMIC_SMAX_X2       : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
995defm FLAT_ATOMIC_UMAX_X2       : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
996defm FLAT_ATOMIC_AND_X2        : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
997defm FLAT_ATOMIC_OR_X2         : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
998defm FLAT_ATOMIC_XOR_X2        : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
999defm FLAT_ATOMIC_INC_X2        : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
1000defm FLAT_ATOMIC_DEC_X2        : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
1001
1002// CI Only flat instructions
1003defm FLAT_ATOMIC_FCMPSWAP      : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
1004defm FLAT_ATOMIC_FMIN          : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
1005defm FLAT_ATOMIC_FMAX          : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
1006defm FLAT_ATOMIC_FCMPSWAP_X2   : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
1007defm FLAT_ATOMIC_FMIN_X2       : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
1008defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
1009
1010
1011//===----------------------------------------------------------------------===//
1012// VI
1013//===----------------------------------------------------------------------===//
1014
1015class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> :
1016  FLAT_Real <op, ps>,
1017  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
1018  let AssemblerPredicate = isGFX8GFX9;
1019  let DecoderNamespace = "GFX8";
1020}
1021
1022multiclass FLAT_Real_AllAddr_vi<bits<7> op> {
1023  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)>;
1024  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1025}
1026
1027def FLAT_LOAD_UBYTE_vi         : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
1028def FLAT_LOAD_SBYTE_vi         : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
1029def FLAT_LOAD_USHORT_vi        : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
1030def FLAT_LOAD_SSHORT_vi        : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
1031def FLAT_LOAD_DWORD_vi         : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
1032def FLAT_LOAD_DWORDX2_vi       : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
1033def FLAT_LOAD_DWORDX4_vi       : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
1034def FLAT_LOAD_DWORDX3_vi       : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
1035
1036def FLAT_STORE_BYTE_vi         : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
1037def FLAT_STORE_BYTE_D16_HI_vi  : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
1038def FLAT_STORE_SHORT_vi        : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
1039def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
1040def FLAT_STORE_DWORD_vi        : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
1041def FLAT_STORE_DWORDX2_vi      : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
1042def FLAT_STORE_DWORDX4_vi      : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
1043def FLAT_STORE_DWORDX3_vi      : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
1044
1045def FLAT_LOAD_UBYTE_D16_vi    : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
1046def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
1047def FLAT_LOAD_SBYTE_D16_vi    : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
1048def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
1049def FLAT_LOAD_SHORT_D16_vi    : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
1050def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
1051
1052multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> {
1053  def _vi     : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
1054  def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
1055}
1056
1057multiclass FLAT_Global_Real_Atomics_vi<bits<7> op> :
1058  FLAT_Real_AllAddr_vi<op> {
1059  def _RTN_vi  : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
1060  def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
1061}
1062
1063
1064defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
1065defm FLAT_ATOMIC_CMPSWAP    : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
1066defm FLAT_ATOMIC_ADD        : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
1067defm FLAT_ATOMIC_SUB        : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
1068defm FLAT_ATOMIC_SMIN       : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
1069defm FLAT_ATOMIC_UMIN       : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
1070defm FLAT_ATOMIC_SMAX       : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
1071defm FLAT_ATOMIC_UMAX       : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
1072defm FLAT_ATOMIC_AND        : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
1073defm FLAT_ATOMIC_OR         : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
1074defm FLAT_ATOMIC_XOR        : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
1075defm FLAT_ATOMIC_INC        : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
1076defm FLAT_ATOMIC_DEC        : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
1077defm FLAT_ATOMIC_SWAP_X2    : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
1078defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
1079defm FLAT_ATOMIC_ADD_X2     : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
1080defm FLAT_ATOMIC_SUB_X2     : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
1081defm FLAT_ATOMIC_SMIN_X2    : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
1082defm FLAT_ATOMIC_UMIN_X2    : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
1083defm FLAT_ATOMIC_SMAX_X2    : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
1084defm FLAT_ATOMIC_UMAX_X2    : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
1085defm FLAT_ATOMIC_AND_X2     : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
1086defm FLAT_ATOMIC_OR_X2      : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
1087defm FLAT_ATOMIC_XOR_X2     : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
1088defm FLAT_ATOMIC_INC_X2     : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
1089defm FLAT_ATOMIC_DEC_X2     : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
1090
1091defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
1092defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
1093defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
1094defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
1095defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
1096defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
1097defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
1098defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
1099
1100defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Real_AllAddr_vi <0x20>;
1101defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>;
1102defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Real_AllAddr_vi <0x22>;
1103defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>;
1104defm GLOBAL_LOAD_SHORT_D16    : FLAT_Real_AllAddr_vi <0x24>;
1105defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>;
1106
1107defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
1108defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>;
1109defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
1110defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1111defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
1112defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
1113defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
1114defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
1115
1116
1117defm GLOBAL_ATOMIC_SWAP       : FLAT_Global_Real_Atomics_vi <0x40>;
1118defm GLOBAL_ATOMIC_CMPSWAP    : FLAT_Global_Real_Atomics_vi <0x41>;
1119defm GLOBAL_ATOMIC_ADD        : FLAT_Global_Real_Atomics_vi <0x42>;
1120defm GLOBAL_ATOMIC_SUB        : FLAT_Global_Real_Atomics_vi <0x43>;
1121defm GLOBAL_ATOMIC_SMIN       : FLAT_Global_Real_Atomics_vi <0x44>;
1122defm GLOBAL_ATOMIC_UMIN       : FLAT_Global_Real_Atomics_vi <0x45>;
1123defm GLOBAL_ATOMIC_SMAX       : FLAT_Global_Real_Atomics_vi <0x46>;
1124defm GLOBAL_ATOMIC_UMAX       : FLAT_Global_Real_Atomics_vi <0x47>;
1125defm GLOBAL_ATOMIC_AND        : FLAT_Global_Real_Atomics_vi <0x48>;
1126defm GLOBAL_ATOMIC_OR         : FLAT_Global_Real_Atomics_vi <0x49>;
1127defm GLOBAL_ATOMIC_XOR        : FLAT_Global_Real_Atomics_vi <0x4a>;
1128defm GLOBAL_ATOMIC_INC        : FLAT_Global_Real_Atomics_vi <0x4b>;
1129defm GLOBAL_ATOMIC_DEC        : FLAT_Global_Real_Atomics_vi <0x4c>;
1130defm GLOBAL_ATOMIC_SWAP_X2    : FLAT_Global_Real_Atomics_vi <0x60>;
1131defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>;
1132defm GLOBAL_ATOMIC_ADD_X2     : FLAT_Global_Real_Atomics_vi <0x62>;
1133defm GLOBAL_ATOMIC_SUB_X2     : FLAT_Global_Real_Atomics_vi <0x63>;
1134defm GLOBAL_ATOMIC_SMIN_X2    : FLAT_Global_Real_Atomics_vi <0x64>;
1135defm GLOBAL_ATOMIC_UMIN_X2    : FLAT_Global_Real_Atomics_vi <0x65>;
1136defm GLOBAL_ATOMIC_SMAX_X2    : FLAT_Global_Real_Atomics_vi <0x66>;
1137defm GLOBAL_ATOMIC_UMAX_X2    : FLAT_Global_Real_Atomics_vi <0x67>;
1138defm GLOBAL_ATOMIC_AND_X2     : FLAT_Global_Real_Atomics_vi <0x68>;
1139defm GLOBAL_ATOMIC_OR_X2      : FLAT_Global_Real_Atomics_vi <0x69>;
1140defm GLOBAL_ATOMIC_XOR_X2     : FLAT_Global_Real_Atomics_vi <0x6a>;
1141defm GLOBAL_ATOMIC_INC_X2     : FLAT_Global_Real_Atomics_vi <0x6b>;
1142defm GLOBAL_ATOMIC_DEC_X2     : FLAT_Global_Real_Atomics_vi <0x6c>;
1143
1144defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_vi <0x10>;
1145defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_vi <0x11>;
1146defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_vi <0x12>;
1147defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_vi <0x13>;
1148defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_vi <0x14>;
1149defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_vi <0x15>;
1150defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_vi <0x16>;
1151defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_vi <0x17>;
1152defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_vi <0x18>;
1153defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x19>;
1154defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_vi <0x20>;
1155defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x21>;
1156defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_vi <0x22>;
1157defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x23>;
1158defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_vi <0x24>;
1159defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_vi <0x25>;
1160defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_vi <0x1a>;
1161defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1162defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_vi <0x1c>;
1163defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_vi <0x1d>;
1164defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_vi <0x1e>;
1165defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_vi <0x1f>;
1166
1167
1168//===----------------------------------------------------------------------===//
1169// GFX10.
1170//===----------------------------------------------------------------------===//
1171
1172class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
1173    FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
1174  let AssemblerPredicate = isGFX10Plus;
1175  let DecoderNamespace = "GFX10";
1176
1177  let Inst{11-0}  = {offset{12}, offset{10-0}};
1178  let Inst{12}    = !if(ps.has_dlc, dlc, ps.dlcValue);
1179  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
1180  let Inst{55}    = 0;
1181}
1182
1183
1184multiclass FLAT_Real_Base_gfx10<bits<7> op> {
1185  def _gfx10 :
1186    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>;
1187}
1188
1189multiclass FLAT_Real_RTN_gfx10<bits<7> op> {
1190  def _RTN_gfx10 :
1191    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
1192}
1193
1194multiclass FLAT_Real_SADDR_gfx10<bits<7> op> {
1195  def _SADDR_gfx10 :
1196    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1197}
1198
1199multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
1200  def _SADDR_RTN_gfx10 :
1201    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
1202}
1203
1204
1205multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> :
1206  FLAT_Real_Base_gfx10<op>,
1207  FLAT_Real_SADDR_gfx10<op>;
1208
1209multiclass FLAT_Real_Atomics_gfx10<bits<7> op> :
1210  FLAT_Real_Base_gfx10<op>,
1211  FLAT_Real_RTN_gfx10<op>;
1212
1213multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
1214  FLAT_Real_AllAddr_gfx10<op>,
1215  FLAT_Real_RTN_gfx10<op>,
1216  FLAT_Real_SADDR_RTN_gfx10<op>;
1217
1218
1219// ENC_FLAT.
1220defm FLAT_LOAD_UBYTE            : FLAT_Real_Base_gfx10<0x008>;
1221defm FLAT_LOAD_SBYTE            : FLAT_Real_Base_gfx10<0x009>;
1222defm FLAT_LOAD_USHORT           : FLAT_Real_Base_gfx10<0x00a>;
1223defm FLAT_LOAD_SSHORT           : FLAT_Real_Base_gfx10<0x00b>;
1224defm FLAT_LOAD_DWORD            : FLAT_Real_Base_gfx10<0x00c>;
1225defm FLAT_LOAD_DWORDX2          : FLAT_Real_Base_gfx10<0x00d>;
1226defm FLAT_LOAD_DWORDX4          : FLAT_Real_Base_gfx10<0x00e>;
1227defm FLAT_LOAD_DWORDX3          : FLAT_Real_Base_gfx10<0x00f>;
1228defm FLAT_STORE_BYTE            : FLAT_Real_Base_gfx10<0x018>;
1229defm FLAT_STORE_BYTE_D16_HI     : FLAT_Real_Base_gfx10<0x019>;
1230defm FLAT_STORE_SHORT           : FLAT_Real_Base_gfx10<0x01a>;
1231defm FLAT_STORE_SHORT_D16_HI    : FLAT_Real_Base_gfx10<0x01b>;
1232defm FLAT_STORE_DWORD           : FLAT_Real_Base_gfx10<0x01c>;
1233defm FLAT_STORE_DWORDX2         : FLAT_Real_Base_gfx10<0x01d>;
1234defm FLAT_STORE_DWORDX4         : FLAT_Real_Base_gfx10<0x01e>;
1235defm FLAT_STORE_DWORDX3         : FLAT_Real_Base_gfx10<0x01f>;
1236defm FLAT_LOAD_UBYTE_D16        : FLAT_Real_Base_gfx10<0x020>;
1237defm FLAT_LOAD_UBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x021>;
1238defm FLAT_LOAD_SBYTE_D16        : FLAT_Real_Base_gfx10<0x022>;
1239defm FLAT_LOAD_SBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x023>;
1240defm FLAT_LOAD_SHORT_D16        : FLAT_Real_Base_gfx10<0x024>;
1241defm FLAT_LOAD_SHORT_D16_HI     : FLAT_Real_Base_gfx10<0x025>;
1242defm FLAT_ATOMIC_SWAP           : FLAT_Real_Atomics_gfx10<0x030>;
1243defm FLAT_ATOMIC_CMPSWAP        : FLAT_Real_Atomics_gfx10<0x031>;
1244defm FLAT_ATOMIC_ADD            : FLAT_Real_Atomics_gfx10<0x032>;
1245defm FLAT_ATOMIC_SUB            : FLAT_Real_Atomics_gfx10<0x033>;
1246defm FLAT_ATOMIC_SMIN           : FLAT_Real_Atomics_gfx10<0x035>;
1247defm FLAT_ATOMIC_UMIN           : FLAT_Real_Atomics_gfx10<0x036>;
1248defm FLAT_ATOMIC_SMAX           : FLAT_Real_Atomics_gfx10<0x037>;
1249defm FLAT_ATOMIC_UMAX           : FLAT_Real_Atomics_gfx10<0x038>;
1250defm FLAT_ATOMIC_AND            : FLAT_Real_Atomics_gfx10<0x039>;
1251defm FLAT_ATOMIC_OR             : FLAT_Real_Atomics_gfx10<0x03a>;
1252defm FLAT_ATOMIC_XOR            : FLAT_Real_Atomics_gfx10<0x03b>;
1253defm FLAT_ATOMIC_INC            : FLAT_Real_Atomics_gfx10<0x03c>;
1254defm FLAT_ATOMIC_DEC            : FLAT_Real_Atomics_gfx10<0x03d>;
1255defm FLAT_ATOMIC_FCMPSWAP       : FLAT_Real_Atomics_gfx10<0x03e>;
1256defm FLAT_ATOMIC_FMIN           : FLAT_Real_Atomics_gfx10<0x03f>;
1257defm FLAT_ATOMIC_FMAX           : FLAT_Real_Atomics_gfx10<0x040>;
1258defm FLAT_ATOMIC_SWAP_X2        : FLAT_Real_Atomics_gfx10<0x050>;
1259defm FLAT_ATOMIC_CMPSWAP_X2     : FLAT_Real_Atomics_gfx10<0x051>;
1260defm FLAT_ATOMIC_ADD_X2         : FLAT_Real_Atomics_gfx10<0x052>;
1261defm FLAT_ATOMIC_SUB_X2         : FLAT_Real_Atomics_gfx10<0x053>;
1262defm FLAT_ATOMIC_SMIN_X2        : FLAT_Real_Atomics_gfx10<0x055>;
1263defm FLAT_ATOMIC_UMIN_X2        : FLAT_Real_Atomics_gfx10<0x056>;
1264defm FLAT_ATOMIC_SMAX_X2        : FLAT_Real_Atomics_gfx10<0x057>;
1265defm FLAT_ATOMIC_UMAX_X2        : FLAT_Real_Atomics_gfx10<0x058>;
1266defm FLAT_ATOMIC_AND_X2         : FLAT_Real_Atomics_gfx10<0x059>;
1267defm FLAT_ATOMIC_OR_X2          : FLAT_Real_Atomics_gfx10<0x05a>;
1268defm FLAT_ATOMIC_XOR_X2         : FLAT_Real_Atomics_gfx10<0x05b>;
1269defm FLAT_ATOMIC_INC_X2         : FLAT_Real_Atomics_gfx10<0x05c>;
1270defm FLAT_ATOMIC_DEC_X2         : FLAT_Real_Atomics_gfx10<0x05d>;
1271defm FLAT_ATOMIC_FCMPSWAP_X2    : FLAT_Real_Atomics_gfx10<0x05e>;
1272defm FLAT_ATOMIC_FMIN_X2        : FLAT_Real_Atomics_gfx10<0x05f>;
1273defm FLAT_ATOMIC_FMAX_X2        : FLAT_Real_Atomics_gfx10<0x060>;
1274
1275
1276// ENC_FLAT_GLBL.
1277defm GLOBAL_LOAD_UBYTE          : FLAT_Real_AllAddr_gfx10<0x008>;
1278defm GLOBAL_LOAD_SBYTE          : FLAT_Real_AllAddr_gfx10<0x009>;
1279defm GLOBAL_LOAD_USHORT         : FLAT_Real_AllAddr_gfx10<0x00a>;
1280defm GLOBAL_LOAD_SSHORT         : FLAT_Real_AllAddr_gfx10<0x00b>;
1281defm GLOBAL_LOAD_DWORD          : FLAT_Real_AllAddr_gfx10<0x00c>;
1282defm GLOBAL_LOAD_DWORDX2        : FLAT_Real_AllAddr_gfx10<0x00d>;
1283defm GLOBAL_LOAD_DWORDX4        : FLAT_Real_AllAddr_gfx10<0x00e>;
1284defm GLOBAL_LOAD_DWORDX3        : FLAT_Real_AllAddr_gfx10<0x00f>;
1285defm GLOBAL_STORE_BYTE          : FLAT_Real_AllAddr_gfx10<0x018>;
1286defm GLOBAL_STORE_BYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x019>;
1287defm GLOBAL_STORE_SHORT         : FLAT_Real_AllAddr_gfx10<0x01a>;
1288defm GLOBAL_STORE_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x01b>;
1289defm GLOBAL_STORE_DWORD         : FLAT_Real_AllAddr_gfx10<0x01c>;
1290defm GLOBAL_STORE_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x01d>;
1291defm GLOBAL_STORE_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x01e>;
1292defm GLOBAL_STORE_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x01f>;
1293defm GLOBAL_LOAD_UBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x020>;
1294defm GLOBAL_LOAD_UBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x021>;
1295defm GLOBAL_LOAD_SBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x022>;
1296defm GLOBAL_LOAD_SBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x023>;
1297defm GLOBAL_LOAD_SHORT_D16      : FLAT_Real_AllAddr_gfx10<0x024>;
1298defm GLOBAL_LOAD_SHORT_D16_HI   : FLAT_Real_AllAddr_gfx10<0x025>;
1299defm GLOBAL_ATOMIC_SWAP         : FLAT_Real_GlblAtomics_gfx10<0x030>;
1300defm GLOBAL_ATOMIC_CMPSWAP      : FLAT_Real_GlblAtomics_gfx10<0x031>;
1301defm GLOBAL_ATOMIC_ADD          : FLAT_Real_GlblAtomics_gfx10<0x032>;
1302defm GLOBAL_ATOMIC_SUB          : FLAT_Real_GlblAtomics_gfx10<0x033>;
1303defm GLOBAL_ATOMIC_SMIN         : FLAT_Real_GlblAtomics_gfx10<0x035>;
1304defm GLOBAL_ATOMIC_UMIN         : FLAT_Real_GlblAtomics_gfx10<0x036>;
1305defm GLOBAL_ATOMIC_SMAX         : FLAT_Real_GlblAtomics_gfx10<0x037>;
1306defm GLOBAL_ATOMIC_UMAX         : FLAT_Real_GlblAtomics_gfx10<0x038>;
1307defm GLOBAL_ATOMIC_AND          : FLAT_Real_GlblAtomics_gfx10<0x039>;
1308defm GLOBAL_ATOMIC_OR           : FLAT_Real_GlblAtomics_gfx10<0x03a>;
1309defm GLOBAL_ATOMIC_XOR          : FLAT_Real_GlblAtomics_gfx10<0x03b>;
1310defm GLOBAL_ATOMIC_INC          : FLAT_Real_GlblAtomics_gfx10<0x03c>;
1311defm GLOBAL_ATOMIC_DEC          : FLAT_Real_GlblAtomics_gfx10<0x03d>;
1312defm GLOBAL_ATOMIC_FCMPSWAP     : FLAT_Real_GlblAtomics_gfx10<0x03e>;
1313defm GLOBAL_ATOMIC_FMIN         : FLAT_Real_GlblAtomics_gfx10<0x03f>;
1314defm GLOBAL_ATOMIC_FMAX         : FLAT_Real_GlblAtomics_gfx10<0x040>;
1315defm GLOBAL_ATOMIC_SWAP_X2      : FLAT_Real_GlblAtomics_gfx10<0x050>;
1316defm GLOBAL_ATOMIC_CMPSWAP_X2   : FLAT_Real_GlblAtomics_gfx10<0x051>;
1317defm GLOBAL_ATOMIC_ADD_X2       : FLAT_Real_GlblAtomics_gfx10<0x052>;
1318defm GLOBAL_ATOMIC_SUB_X2       : FLAT_Real_GlblAtomics_gfx10<0x053>;
1319defm GLOBAL_ATOMIC_SMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x055>;
1320defm GLOBAL_ATOMIC_UMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x056>;
1321defm GLOBAL_ATOMIC_SMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x057>;
1322defm GLOBAL_ATOMIC_UMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x058>;
1323defm GLOBAL_ATOMIC_AND_X2       : FLAT_Real_GlblAtomics_gfx10<0x059>;
1324defm GLOBAL_ATOMIC_OR_X2        : FLAT_Real_GlblAtomics_gfx10<0x05a>;
1325defm GLOBAL_ATOMIC_XOR_X2       : FLAT_Real_GlblAtomics_gfx10<0x05b>;
1326defm GLOBAL_ATOMIC_INC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05c>;
1327defm GLOBAL_ATOMIC_DEC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05d>;
1328defm GLOBAL_ATOMIC_FCMPSWAP_X2  : FLAT_Real_GlblAtomics_gfx10<0x05e>;
1329defm GLOBAL_ATOMIC_FMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x05f>;
1330defm GLOBAL_ATOMIC_FMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x060>;
1331
1332
1333// ENC_FLAT_SCRATCH.
1334defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_gfx10<0x008>;
1335defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_gfx10<0x009>;
1336defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_gfx10<0x00a>;
1337defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_gfx10<0x00b>;
1338defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_gfx10<0x00c>;
1339defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x00d>;
1340defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x00e>;
1341defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x00f>;
1342defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_gfx10<0x018>;
1343defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x019>;
1344defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_gfx10<0x01a>;
1345defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>;
1346defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_gfx10<0x01c>;
1347defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_gfx10<0x01d>;
1348defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_gfx10<0x01e>;
1349defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_gfx10<0x01f>;
1350defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_gfx10<0x020>;
1351defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x021>;
1352defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_gfx10<0x022>;
1353defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x023>;
1354defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_gfx10<0x024>;
1355defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x025>;
1356
1357let SubtargetPredicate = HasAtomicFaddInsts in {
1358
1359defm GLOBAL_ATOMIC_ADD_F32    : FLAT_Real_AllAddr_vi <0x04d>;
1360defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Real_AllAddr_vi <0x04e>;
1361
1362} // End SubtargetPredicate = HasAtomicFaddInsts
1363