xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td (revision e40139ff33b48b56a24c808b166b04b8ee6f5b21)
1//===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>;
10def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>;
11
12def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;
13def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>;
14
15//===----------------------------------------------------------------------===//
16// FLAT classes
17//===----------------------------------------------------------------------===//
18
19class FLAT_Pseudo<string opName, dag outs, dag ins,
20                  string asmOps, list<dag> pattern=[]> :
21  InstSI<outs, ins, "", pattern>,
22  SIMCInstr<opName, SIEncodingFamily.NONE> {
23
24  let isPseudo = 1;
25  let isCodeGenOnly = 1;
26
27  let FLAT = 1;
28
29  let UseNamedOperandTable = 1;
30  let hasSideEffects = 0;
31  let SchedRW = [WriteVMEM];
32
33  string Mnemonic = opName;
34  string AsmOperands = asmOps;
35
36  bits<1> is_flat_global = 0;
37  bits<1> is_flat_scratch = 0;
38
39  bits<1> has_vdst = 1;
40
41  // We need to distinguish having saddr and enabling saddr because
42  // saddr is only valid for scratch and global instructions. Pre-gfx9
43  // these bits were reserved, so we also don't necessarily want to
44  // set these bits to the disabled value for the original flat
45  // segment instructions.
46  bits<1> has_saddr = 0;
47  bits<1> enabled_saddr = 0;
48  bits<7> saddr_value = 0;
49  bits<1> has_vaddr = 1;
50
51  bits<1> has_data = 1;
52  bits<1> has_glc  = 1;
53  bits<1> glcValue = 0;
54  bits<1> has_dlc  = 1;
55  bits<1> dlcValue = 0;
56
57  let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
58    !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
59
60  // TODO: M0 if it could possibly access LDS (before gfx9? only)?
61  let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
62
63  // Internally, FLAT instruction are executed as both an LDS and a
64  // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
65  // and are not considered done until both have been decremented.
66  let VM_CNT = 1;
67  let LGKM_CNT = !if(!or(is_flat_global, is_flat_scratch), 0, 1);
68
69  let IsNonFlatSeg = !if(!or(is_flat_global, is_flat_scratch), 1, 0);
70}
71
72class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
73  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
74  Enc64 {
75
76  let isPseudo = 0;
77  let isCodeGenOnly = 0;
78
79  // copy relevant pseudo op flags
80  let SubtargetPredicate = ps.SubtargetPredicate;
81  let AsmMatchConverter  = ps.AsmMatchConverter;
82  let TSFlags = ps.TSFlags;
83  let UseNamedOperandTable = ps.UseNamedOperandTable;
84
85  // encoding fields
86  bits<8> vaddr;
87  bits<8> vdata;
88  bits<7> saddr;
89  bits<8> vdst;
90
91  bits<1> slc;
92  bits<1> glc;
93  bits<1> dlc;
94
95  // Only valid on gfx9
96  bits<1> lds = 0; // XXX - What does this actually do?
97
98  // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
99  bits<2> seg = !if(ps.is_flat_global, 0b10,
100                  !if(ps.is_flat_scratch, 0b01, 0));
101
102  // Signed offset. Highest bit ignored for flat and treated as 12-bit
103  // unsigned for flat acceses.
104  bits<13> offset;
105  bits<1> nv = 0; // XXX - What does this actually do?
106
107  // We don't use tfe right now, and it was removed in gfx9.
108  bits<1> tfe = 0;
109
110  // Only valid on GFX9+
111  let Inst{12-0} = offset;
112  let Inst{13} = lds;
113  let Inst{15-14} = seg;
114
115  let Inst{16}    = !if(ps.has_glc, glc, ps.glcValue);
116  let Inst{17}    = slc;
117  let Inst{24-18} = op;
118  let Inst{31-26} = 0x37; // Encoding.
119  let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
120  let Inst{47-40} = !if(ps.has_data, vdata, ?);
121  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
122
123  // 54-48 is reserved.
124  let Inst{55}    = nv; // nv on GFX9+, TFE before.
125  let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
126}
127
128class GlobalSaddrTable <bit is_saddr, string Name = ""> {
129  bit IsSaddr = is_saddr;
130  string SaddrOp = Name;
131}
132
133// TODO: Is exec allowed for saddr? The disabled value 0x7f is the
134// same encoding value as exec_hi, so it isn't possible to use that if
135// saddr is 32-bit (which isn't handled here yet).
136class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
137  bit HasTiedOutput = 0,
138  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
139  opName,
140  (outs regClass:$vdst),
141  !con(
142    !con(
143      !con((ins VReg_64:$vaddr),
144        !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
145          (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
146          !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
147  " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
148  let has_data = 0;
149  let mayLoad = 1;
150  let has_saddr = HasSaddr;
151  let enabled_saddr = EnableSaddr;
152  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
153  let maybeAtomic = 1;
154
155  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
156  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
157}
158
159class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
160  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
161  opName,
162  (outs),
163  !con(
164    !con((ins VReg_64:$vaddr, vdataClass:$vdata),
165      !if(EnableSaddr, (ins SReg_64:$saddr), (ins))),
166        (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
167  " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
168  let mayLoad  = 0;
169  let mayStore = 1;
170  let has_vdst = 0;
171  let has_saddr = HasSaddr;
172  let enabled_saddr = EnableSaddr;
173  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
174  let maybeAtomic = 1;
175}
176
177multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
178  let is_flat_global = 1 in {
179    def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
180      GlobalSaddrTable<0, opName>;
181    def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
182      GlobalSaddrTable<1, opName>;
183  }
184}
185
186multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
187  let is_flat_global = 1 in {
188    def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
189      GlobalSaddrTable<0, opName>;
190    def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
191      GlobalSaddrTable<1, opName>;
192  }
193}
194
195class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
196  bit EnableSaddr = 0>: FLAT_Pseudo<
197  opName,
198  (outs regClass:$vdst),
199  !if(EnableSaddr,
200      (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
201      (ins VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
202  " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> {
203  let has_data = 0;
204  let mayLoad = 1;
205  let has_saddr = 1;
206  let enabled_saddr = EnableSaddr;
207  let has_vaddr = !if(EnableSaddr, 0, 1);
208  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
209  let maybeAtomic = 1;
210}
211
212class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0> : FLAT_Pseudo<
213  opName,
214  (outs),
215  !if(EnableSaddr,
216    (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc),
217    (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
218  " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
219  let mayLoad  = 0;
220  let mayStore = 1;
221  let has_vdst = 0;
222  let has_saddr = 1;
223  let enabled_saddr = EnableSaddr;
224  let has_vaddr = !if(EnableSaddr, 0, 1);
225  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
226  let maybeAtomic = 1;
227}
228
229multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> {
230  let is_flat_scratch = 1 in {
231    def "" : FLAT_Scratch_Load_Pseudo<opName, regClass>;
232    def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, 1>;
233  }
234}
235
236multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
237  let is_flat_scratch = 1 in {
238    def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>;
239    def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>;
240  }
241}
242
243class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
244                               string asm, list<dag> pattern = []> :
245  FLAT_Pseudo<opName, outs, ins, asm, pattern> {
246    let mayLoad = 1;
247    let mayStore = 1;
248    let has_glc  = 0;
249    let glcValue = 0;
250    let has_dlc  = 0;
251    let dlcValue = 0;
252    let has_vdst = 0;
253    let maybeAtomic = 1;
254}
255
256class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
257                            string asm, list<dag> pattern = []>
258  : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> {
259  let hasPostISelHook = 1;
260  let has_vdst = 1;
261  let glcValue = 1;
262  let dlcValue = 0;
263  let PseudoInstr = NAME # "_RTN";
264}
265
266multiclass FLAT_Atomic_Pseudo<
267  string opName,
268  RegisterClass vdst_rc,
269  ValueType vt,
270  SDPatternOperator atomic = null_frag,
271  ValueType data_vt = vt,
272  RegisterClass data_rc = vdst_rc,
273  bit isFP = isFloatType<data_vt>.ret> {
274  def "" : FLAT_AtomicNoRet_Pseudo <opName,
275    (outs),
276    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
277    " $vaddr, $vdata$offset$slc">,
278    GlobalSaddrTable<0, opName>,
279    AtomicNoRet <opName, 0> {
280    let PseudoInstr = NAME;
281    let FPAtomic = isFP;
282  }
283
284  def _RTN : FLAT_AtomicRet_Pseudo <opName,
285    (outs vdst_rc:$vdst),
286    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
287    " $vdst, $vaddr, $vdata$offset glc$slc",
288    [(set vt:$vdst,
289      (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
290       GlobalSaddrTable<0, opName#"_rtn">,
291       AtomicNoRet <opName, 1>{
292    let FPAtomic = isFP;
293  }
294}
295
296multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
297  string opName,
298  RegisterClass vdst_rc,
299  ValueType vt,
300  SDPatternOperator atomic = null_frag,
301  ValueType data_vt = vt,
302  RegisterClass data_rc = vdst_rc,
303  bit isFP = isFloatType<data_vt>.ret> {
304
305  def "" : FLAT_AtomicNoRet_Pseudo <opName,
306    (outs),
307    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
308    " $vaddr, $vdata, off$offset$slc">,
309    GlobalSaddrTable<0, opName>,
310    AtomicNoRet <opName, 0> {
311    let has_saddr = 1;
312    let PseudoInstr = NAME;
313    let FPAtomic = isFP;
314  }
315
316  def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
317    (outs),
318    (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
319    " $vaddr, $vdata, $saddr$offset$slc">,
320    GlobalSaddrTable<1, opName>,
321    AtomicNoRet <opName#"_saddr", 0> {
322    let has_saddr = 1;
323    let enabled_saddr = 1;
324    let PseudoInstr = NAME#"_SADDR";
325    let FPAtomic = isFP;
326  }
327}
328
329multiclass FLAT_Global_Atomic_Pseudo_RTN<
330  string opName,
331  RegisterClass vdst_rc,
332  ValueType vt,
333  SDPatternOperator atomic = null_frag,
334  ValueType data_vt = vt,
335  RegisterClass data_rc = vdst_rc,
336  bit isFP = isFloatType<data_vt>.ret> {
337
338  def _RTN : FLAT_AtomicRet_Pseudo <opName,
339    (outs vdst_rc:$vdst),
340      (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc),
341    " $vdst, $vaddr, $vdata, off$offset glc$slc",
342    [(set vt:$vdst,
343      (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
344      GlobalSaddrTable<0, opName#"_rtn">,
345      AtomicNoRet <opName, 1> {
346    let has_saddr = 1;
347    let FPAtomic = isFP;
348  }
349
350  def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
351    (outs vdst_rc:$vdst),
352      (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc),
353    " $vdst, $vaddr, $vdata, $saddr$offset glc$slc">,
354    GlobalSaddrTable<1, opName#"_rtn">,
355    AtomicNoRet <opName#"_saddr", 1> {
356     let has_saddr = 1;
357     let enabled_saddr = 1;
358     let PseudoInstr = NAME#"_SADDR_RTN";
359     let FPAtomic = isFP;
360  }
361}
362
363multiclass FLAT_Global_Atomic_Pseudo<
364  string opName,
365  RegisterClass vdst_rc,
366  ValueType vt,
367  SDPatternOperator atomic = null_frag,
368  ValueType data_vt = vt,
369  RegisterClass data_rc = vdst_rc> :
370    FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>,
371    FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>;
372
373class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
374  (ops node:$ptr, node:$value),
375  (atomic_op node:$ptr, node:$value),
376  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
377>;
378
379def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
380def atomic_swap_flat     : flat_binary_atomic_op<atomic_swap>;
381def atomic_add_flat      : flat_binary_atomic_op<atomic_load_add>;
382def atomic_and_flat      : flat_binary_atomic_op<atomic_load_and>;
383def atomic_max_flat      : flat_binary_atomic_op<atomic_load_max>;
384def atomic_min_flat      : flat_binary_atomic_op<atomic_load_min>;
385def atomic_or_flat       : flat_binary_atomic_op<atomic_load_or>;
386def atomic_sub_flat      : flat_binary_atomic_op<atomic_load_sub>;
387def atomic_umax_flat     : flat_binary_atomic_op<atomic_load_umax>;
388def atomic_umin_flat     : flat_binary_atomic_op<atomic_load_umin>;
389def atomic_xor_flat      : flat_binary_atomic_op<atomic_load_xor>;
390def atomic_inc_flat      : flat_binary_atomic_op<SIatomic_inc>;
391def atomic_dec_flat      : flat_binary_atomic_op<SIatomic_dec>;
392
393
394
395//===----------------------------------------------------------------------===//
396// Flat Instructions
397//===----------------------------------------------------------------------===//
398
399def FLAT_LOAD_UBYTE    : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>;
400def FLAT_LOAD_SBYTE    : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>;
401def FLAT_LOAD_USHORT   : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>;
402def FLAT_LOAD_SSHORT   : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>;
403def FLAT_LOAD_DWORD    : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>;
404def FLAT_LOAD_DWORDX2  : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
405def FLAT_LOAD_DWORDX4  : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
406def FLAT_LOAD_DWORDX3  : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
407
408def FLAT_STORE_BYTE    : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
409def FLAT_STORE_SHORT   : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
410def FLAT_STORE_DWORD   : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
411def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
412def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
413def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
414
415let SubtargetPredicate = HasD16LoadStore in {
416def FLAT_LOAD_UBYTE_D16     : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>;
417def FLAT_LOAD_UBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>;
418def FLAT_LOAD_SBYTE_D16     : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>;
419def FLAT_LOAD_SBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>;
420def FLAT_LOAD_SHORT_D16     : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>;
421def FLAT_LOAD_SHORT_D16_HI  : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>;
422
423def FLAT_STORE_BYTE_D16_HI  : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>;
424def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>;
425}
426
427defm FLAT_ATOMIC_CMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
428                                VGPR_32, i32, atomic_cmp_swap_flat,
429                                v2i32, VReg_64>;
430
431defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
432                                VReg_64, i64, atomic_cmp_swap_flat,
433                                v2i64, VReg_128>;
434
435defm FLAT_ATOMIC_SWAP       : FLAT_Atomic_Pseudo <"flat_atomic_swap",
436                                VGPR_32, i32, atomic_swap_flat>;
437
438defm FLAT_ATOMIC_SWAP_X2    : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
439                                VReg_64, i64, atomic_swap_flat>;
440
441defm FLAT_ATOMIC_ADD        : FLAT_Atomic_Pseudo <"flat_atomic_add",
442                                VGPR_32, i32, atomic_add_flat>;
443
444defm FLAT_ATOMIC_SUB        : FLAT_Atomic_Pseudo <"flat_atomic_sub",
445                                VGPR_32, i32, atomic_sub_flat>;
446
447defm FLAT_ATOMIC_SMIN       : FLAT_Atomic_Pseudo <"flat_atomic_smin",
448                                VGPR_32, i32, atomic_min_flat>;
449
450defm FLAT_ATOMIC_UMIN       : FLAT_Atomic_Pseudo <"flat_atomic_umin",
451                                VGPR_32, i32, atomic_umin_flat>;
452
453defm FLAT_ATOMIC_SMAX       : FLAT_Atomic_Pseudo <"flat_atomic_smax",
454                                VGPR_32, i32, atomic_max_flat>;
455
456defm FLAT_ATOMIC_UMAX       : FLAT_Atomic_Pseudo <"flat_atomic_umax",
457                                VGPR_32, i32, atomic_umax_flat>;
458
459defm FLAT_ATOMIC_AND        : FLAT_Atomic_Pseudo <"flat_atomic_and",
460                                VGPR_32, i32, atomic_and_flat>;
461
462defm FLAT_ATOMIC_OR         : FLAT_Atomic_Pseudo <"flat_atomic_or",
463                                VGPR_32, i32, atomic_or_flat>;
464
465defm FLAT_ATOMIC_XOR        : FLAT_Atomic_Pseudo <"flat_atomic_xor",
466                                VGPR_32, i32, atomic_xor_flat>;
467
468defm FLAT_ATOMIC_INC        : FLAT_Atomic_Pseudo <"flat_atomic_inc",
469                                VGPR_32, i32, atomic_inc_flat>;
470
471defm FLAT_ATOMIC_DEC        : FLAT_Atomic_Pseudo <"flat_atomic_dec",
472                                VGPR_32, i32, atomic_dec_flat>;
473
474defm FLAT_ATOMIC_ADD_X2     : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
475                                VReg_64, i64, atomic_add_flat>;
476
477defm FLAT_ATOMIC_SUB_X2     : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
478                                VReg_64, i64, atomic_sub_flat>;
479
480defm FLAT_ATOMIC_SMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
481                                VReg_64, i64, atomic_min_flat>;
482
483defm FLAT_ATOMIC_UMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
484                                VReg_64, i64, atomic_umin_flat>;
485
486defm FLAT_ATOMIC_SMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
487                                VReg_64, i64, atomic_max_flat>;
488
489defm FLAT_ATOMIC_UMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
490                                VReg_64, i64, atomic_umax_flat>;
491
492defm FLAT_ATOMIC_AND_X2     : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
493                                VReg_64, i64, atomic_and_flat>;
494
495defm FLAT_ATOMIC_OR_X2      : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
496                                VReg_64, i64, atomic_or_flat>;
497
498defm FLAT_ATOMIC_XOR_X2     : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
499                                VReg_64, i64, atomic_xor_flat>;
500
501defm FLAT_ATOMIC_INC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
502                                VReg_64, i64, atomic_inc_flat>;
503
504defm FLAT_ATOMIC_DEC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
505                                VReg_64, i64, atomic_dec_flat>;
506
507// GFX7-, GFX10-only flat instructions.
508let SubtargetPredicate = isGFX7GFX10 in {
509
510defm FLAT_ATOMIC_FCMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
511                                VGPR_32, f32, null_frag, v2f32, VReg_64>;
512
513defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
514                                VReg_64, f64, null_frag, v2f64, VReg_128>;
515
516defm FLAT_ATOMIC_FMIN        : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
517                                VGPR_32, f32>;
518
519defm FLAT_ATOMIC_FMAX        : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
520                                VGPR_32, f32>;
521
522defm FLAT_ATOMIC_FMIN_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
523                                VReg_64, f64>;
524
525defm FLAT_ATOMIC_FMAX_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
526                                VReg_64, f64>;
527
528} // End SubtargetPredicate = isGFX7GFX10
529
530let SubtargetPredicate = HasFlatGlobalInsts in {
531defm GLOBAL_LOAD_UBYTE    : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
532defm GLOBAL_LOAD_SBYTE    : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
533defm GLOBAL_LOAD_USHORT   : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
534defm GLOBAL_LOAD_SSHORT   : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>;
535defm GLOBAL_LOAD_DWORD    : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>;
536defm GLOBAL_LOAD_DWORDX2  : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>;
537defm GLOBAL_LOAD_DWORDX3  : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>;
538defm GLOBAL_LOAD_DWORDX4  : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
539
540defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>;
541defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>;
542defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>;
543defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>;
544defm GLOBAL_LOAD_SHORT_D16    : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
545defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
546
547defm GLOBAL_STORE_BYTE    : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
548defm GLOBAL_STORE_SHORT   : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
549defm GLOBAL_STORE_DWORD   : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
550defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
551defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
552defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
553
554defm GLOBAL_STORE_BYTE_D16_HI  : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
555defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
556
557let is_flat_global = 1 in {
558defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
559                               VGPR_32, i32, AMDGPUatomic_cmp_swap_global,
560                               v2i32, VReg_64>;
561
562defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
563                                  VReg_64, i64, AMDGPUatomic_cmp_swap_global,
564                                  v2i64, VReg_128>;
565
566defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
567                             VGPR_32, i32, atomic_swap_global_32>;
568
569defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
570                                VReg_64, i64, atomic_swap_global_64>;
571
572defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
573                           VGPR_32, i32, atomic_load_add_global_32>;
574
575defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
576                           VGPR_32, i32, atomic_load_sub_global_32>;
577
578defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
579                            VGPR_32, i32, atomic_load_min_global_32>;
580
581defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
582                            VGPR_32, i32, atomic_load_umin_global_32>;
583
584defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
585                            VGPR_32, i32, atomic_load_max_global_32>;
586
587defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
588                            VGPR_32, i32, atomic_load_umax_global_32>;
589
590defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
591                           VGPR_32, i32, atomic_load_and_global_32>;
592
593defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
594                          VGPR_32, i32, atomic_load_or_global_32>;
595
596defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
597                           VGPR_32, i32, atomic_load_xor_global_32>;
598
599defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
600                           VGPR_32, i32, atomic_inc_global_32>;
601
602defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
603                           VGPR_32, i32, atomic_dec_global_32>;
604
605defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
606                              VReg_64, i64, atomic_load_add_global_64>;
607
608defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
609                              VReg_64, i64, atomic_load_sub_global_64>;
610
611defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
612                               VReg_64, i64, atomic_load_min_global_64>;
613
614defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
615                               VReg_64, i64, atomic_load_umin_global_64>;
616
617defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
618                               VReg_64, i64, atomic_load_max_global_64>;
619
620defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
621                               VReg_64, i64, atomic_load_umax_global_64>;
622
623defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
624                              VReg_64, i64, atomic_load_and_global_64>;
625
626defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
627                             VReg_64, i64, atomic_load_or_global_64>;
628
629defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
630                              VReg_64, i64, atomic_load_xor_global_64>;
631
632defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
633                              VReg_64, i64, atomic_inc_global_64>;
634
635defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
636                              VReg_64, i64, atomic_dec_global_64>;
637} // End is_flat_global = 1
638
639} // End SubtargetPredicate = HasFlatGlobalInsts
640
641
642let SubtargetPredicate = HasFlatScratchInsts in {
643defm SCRATCH_LOAD_UBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
644defm SCRATCH_LOAD_SBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
645defm SCRATCH_LOAD_USHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
646defm SCRATCH_LOAD_SSHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>;
647defm SCRATCH_LOAD_DWORD    : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>;
648defm SCRATCH_LOAD_DWORDX2  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>;
649defm SCRATCH_LOAD_DWORDX3  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>;
650defm SCRATCH_LOAD_DWORDX4  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
651
652defm SCRATCH_LOAD_UBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32>;
653defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32>;
654defm SCRATCH_LOAD_SBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32>;
655defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32>;
656defm SCRATCH_LOAD_SHORT_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32>;
657defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32>;
658
659defm SCRATCH_STORE_BYTE    : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
660defm SCRATCH_STORE_SHORT   : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
661defm SCRATCH_STORE_DWORD   : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
662defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
663defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
664defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
665
666defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
667defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
668
669} // End SubtargetPredicate = HasFlatScratchInsts
670
671let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
672  defm GLOBAL_ATOMIC_FCMPSWAP :
673    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>;
674  defm GLOBAL_ATOMIC_FMIN :
675    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
676  defm GLOBAL_ATOMIC_FMAX :
677    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
678  defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
679    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>;
680  defm GLOBAL_ATOMIC_FMIN_X2 :
681    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
682  defm GLOBAL_ATOMIC_FMAX_X2 :
683    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
684} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
685
686let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in {
687
688defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
689  "global_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret
690>;
691defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
692  "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret
693>;
694
695} // End SubtargetPredicate = HasAtomicFaddInsts
696
697//===----------------------------------------------------------------------===//
698// Flat Patterns
699//===----------------------------------------------------------------------===//
700
701// Patterns for global loads with no offset.
702class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
703  (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))),
704  (inst $vaddr, $offset, 0, 0, $slc)
705>;
706
707class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
708  (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
709  (inst $vaddr, $offset, 0, 0, $slc, $in)
710>;
711
712class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
713  (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in),
714  (inst $vaddr, $offset, 0, 0, $slc, $in)
715>;
716
717class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
718  (vt (node (FLATAtomic (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
719  (inst $vaddr, $offset, 0, 0, $slc)
720>;
721
722class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
723  (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))),
724  (inst $vaddr, $offset, 0, 0, $slc)
725>;
726
727class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
728  (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)),
729  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
730>;
731
732class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
733  (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)),
734  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
735>;
736
737class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
738  // atomic store follows atomic binop convention so the address comes
739  // first.
740  (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
741  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
742>;
743
744class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat <
745  // atomic store follows atomic binop convention so the address comes
746  // first.
747  (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
748  (inst $vaddr, rc:$data, $offset, 0, 0, $slc)
749>;
750
751class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
752                     ValueType data_vt = vt> : GCNPat <
753  (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
754  (inst $vaddr, $data, $offset, $slc)
755>;
756
757class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
758  (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
759  (inst $vaddr, $data, $offset, $slc)
760>;
761
762class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
763                     ValueType data_vt = vt> : GCNPat <
764  (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
765  (inst $vaddr, $data, $offset, $slc)
766>;
767
768let OtherPredicates = [HasFlatAddressSpace] in {
769
770def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
771def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
772def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
773def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
774def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
775def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
776def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
777def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
778def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
779def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
780def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
781def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, v4i32>;
782
783def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
784def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
785
786def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
787def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
788
789foreach vt = Reg32Types.types in {
790def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
791def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
792}
793
794foreach vt = VReg_64.RegTypes in {
795def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt, VReg_64>;
796def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
797}
798
799def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>;
800def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32, VReg_128>;
801
802def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
803def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>;
804
805def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
806def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
807def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global_32, i32>;
808def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>;
809def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>;
810def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>;
811def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>;
812def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
813def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
814def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
815def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
816def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
817def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
818
819def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
820def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
821def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>;
822def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>;
823def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>;
824def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>;
825def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>;
826def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
827def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
828def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
829def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
830def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
831def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
832
833def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
834def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
835
836let OtherPredicates = [D16PreservesUnusedBits] in {
837def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
838def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
839
840def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
841def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
842def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
843def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
844def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
845def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
846
847def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
848def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
849def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
850def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
851def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
852def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
853}
854
855} // End OtherPredicates = [HasFlatAddressSpace]
856
857let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in {
858
859def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
860def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
861def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
862def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
863def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
864def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
865def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
866def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
867def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
868def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>;
869
870foreach vt = Reg32Types.types in {
871def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, vt>;
872def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, vt, VGPR_32>;
873}
874
875foreach vt = VReg_64.RegTypes in {
876def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, vt>;
877def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, vt, VReg_64>;
878}
879
880def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
881def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, v4i32>;
882
883def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
884def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
885
886def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>;
887def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>;
888def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>;
889def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>;
890def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>;
891def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32, VReg_128>;
892
893let OtherPredicates = [D16PreservesUnusedBits] in {
894def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
895def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
896
897def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
898def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
899def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
900def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>;
901def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>;
902def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>;
903
904def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>;
905def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>;
906def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>;
907def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>;
908def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>;
909def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
910}
911
912def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, store_atomic_global, i32>;
913def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, store_atomic_global, i64, VReg_64>;
914
915def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
916def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
917def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global_32, i32>;
918def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>;
919def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>;
920def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>;
921def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>;
922def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
923def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
924def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
925def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
926def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
927def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
928
929def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
930def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
931def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>;
932def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>;
933def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>;
934def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>;
935def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>;
936def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
937def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
938def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
939def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
940def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
941def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
942
943def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32,    atomic_fadd_global_noret, f32>;
944def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global_noret, v2f16>;
945
946} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
947
948
949//===----------------------------------------------------------------------===//
950// Target
951//===----------------------------------------------------------------------===//
952
953//===----------------------------------------------------------------------===//
954// CI
955//===----------------------------------------------------------------------===//
956
957class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
958  FLAT_Real <op, ps>,
959  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
960  let AssemblerPredicate = isGFX7Only;
961  let DecoderNamespace="GFX7";
962}
963
964def FLAT_LOAD_UBYTE_ci         : FLAT_Real_ci <0x8,  FLAT_LOAD_UBYTE>;
965def FLAT_LOAD_SBYTE_ci         : FLAT_Real_ci <0x9,  FLAT_LOAD_SBYTE>;
966def FLAT_LOAD_USHORT_ci        : FLAT_Real_ci <0xa,  FLAT_LOAD_USHORT>;
967def FLAT_LOAD_SSHORT_ci        : FLAT_Real_ci <0xb,  FLAT_LOAD_SSHORT>;
968def FLAT_LOAD_DWORD_ci         : FLAT_Real_ci <0xc,  FLAT_LOAD_DWORD>;
969def FLAT_LOAD_DWORDX2_ci       : FLAT_Real_ci <0xd,  FLAT_LOAD_DWORDX2>;
970def FLAT_LOAD_DWORDX4_ci       : FLAT_Real_ci <0xe,  FLAT_LOAD_DWORDX4>;
971def FLAT_LOAD_DWORDX3_ci       : FLAT_Real_ci <0xf,  FLAT_LOAD_DWORDX3>;
972
973def FLAT_STORE_BYTE_ci         : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>;
974def FLAT_STORE_SHORT_ci        : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>;
975def FLAT_STORE_DWORD_ci        : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>;
976def FLAT_STORE_DWORDX2_ci      : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
977def FLAT_STORE_DWORDX4_ci      : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
978def FLAT_STORE_DWORDX3_ci      : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
979
980multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
981  def _ci     : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
982  def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
983}
984
985defm FLAT_ATOMIC_SWAP          : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
986defm FLAT_ATOMIC_CMPSWAP       : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
987defm FLAT_ATOMIC_ADD           : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
988defm FLAT_ATOMIC_SUB           : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
989defm FLAT_ATOMIC_SMIN          : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
990defm FLAT_ATOMIC_UMIN          : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
991defm FLAT_ATOMIC_SMAX          : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
992defm FLAT_ATOMIC_UMAX          : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
993defm FLAT_ATOMIC_AND           : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
994defm FLAT_ATOMIC_OR            : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
995defm FLAT_ATOMIC_XOR           : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
996defm FLAT_ATOMIC_INC           : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
997defm FLAT_ATOMIC_DEC           : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
998defm FLAT_ATOMIC_SWAP_X2       : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
999defm FLAT_ATOMIC_CMPSWAP_X2    : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
1000defm FLAT_ATOMIC_ADD_X2        : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
1001defm FLAT_ATOMIC_SUB_X2        : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
1002defm FLAT_ATOMIC_SMIN_X2       : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
1003defm FLAT_ATOMIC_UMIN_X2       : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
1004defm FLAT_ATOMIC_SMAX_X2       : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
1005defm FLAT_ATOMIC_UMAX_X2       : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
1006defm FLAT_ATOMIC_AND_X2        : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
1007defm FLAT_ATOMIC_OR_X2         : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
1008defm FLAT_ATOMIC_XOR_X2        : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
1009defm FLAT_ATOMIC_INC_X2        : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
1010defm FLAT_ATOMIC_DEC_X2        : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
1011
1012// CI Only flat instructions
1013defm FLAT_ATOMIC_FCMPSWAP      : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
1014defm FLAT_ATOMIC_FMIN          : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
1015defm FLAT_ATOMIC_FMAX          : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
1016defm FLAT_ATOMIC_FCMPSWAP_X2   : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
1017defm FLAT_ATOMIC_FMIN_X2       : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
1018defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
1019
1020
1021//===----------------------------------------------------------------------===//
1022// VI
1023//===----------------------------------------------------------------------===//
1024
1025class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> :
1026  FLAT_Real <op, ps>,
1027  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
1028  let AssemblerPredicate = isGFX8GFX9;
1029  let DecoderNamespace = "GFX8";
1030}
1031
1032multiclass FLAT_Real_AllAddr_vi<bits<7> op> {
1033  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)>;
1034  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1035}
1036
1037def FLAT_LOAD_UBYTE_vi         : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
1038def FLAT_LOAD_SBYTE_vi         : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
1039def FLAT_LOAD_USHORT_vi        : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
1040def FLAT_LOAD_SSHORT_vi        : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
1041def FLAT_LOAD_DWORD_vi         : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
1042def FLAT_LOAD_DWORDX2_vi       : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
1043def FLAT_LOAD_DWORDX4_vi       : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
1044def FLAT_LOAD_DWORDX3_vi       : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
1045
1046def FLAT_STORE_BYTE_vi         : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
1047def FLAT_STORE_BYTE_D16_HI_vi  : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
1048def FLAT_STORE_SHORT_vi        : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
1049def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
1050def FLAT_STORE_DWORD_vi        : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
1051def FLAT_STORE_DWORDX2_vi      : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
1052def FLAT_STORE_DWORDX4_vi      : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
1053def FLAT_STORE_DWORDX3_vi      : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
1054
1055def FLAT_LOAD_UBYTE_D16_vi    : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
1056def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
1057def FLAT_LOAD_SBYTE_D16_vi    : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
1058def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
1059def FLAT_LOAD_SHORT_D16_vi    : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
1060def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
1061
1062multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> {
1063  def _vi     : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
1064  def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
1065}
1066
1067multiclass FLAT_Global_Real_Atomics_vi<bits<7> op> :
1068  FLAT_Real_AllAddr_vi<op> {
1069  def _RTN_vi  : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
1070  def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
1071}
1072
1073
1074defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
1075defm FLAT_ATOMIC_CMPSWAP    : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
1076defm FLAT_ATOMIC_ADD        : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
1077defm FLAT_ATOMIC_SUB        : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
1078defm FLAT_ATOMIC_SMIN       : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
1079defm FLAT_ATOMIC_UMIN       : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
1080defm FLAT_ATOMIC_SMAX       : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
1081defm FLAT_ATOMIC_UMAX       : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
1082defm FLAT_ATOMIC_AND        : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
1083defm FLAT_ATOMIC_OR         : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
1084defm FLAT_ATOMIC_XOR        : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
1085defm FLAT_ATOMIC_INC        : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
1086defm FLAT_ATOMIC_DEC        : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
1087defm FLAT_ATOMIC_SWAP_X2    : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
1088defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
1089defm FLAT_ATOMIC_ADD_X2     : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
1090defm FLAT_ATOMIC_SUB_X2     : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
1091defm FLAT_ATOMIC_SMIN_X2    : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
1092defm FLAT_ATOMIC_UMIN_X2    : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
1093defm FLAT_ATOMIC_SMAX_X2    : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
1094defm FLAT_ATOMIC_UMAX_X2    : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
1095defm FLAT_ATOMIC_AND_X2     : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
1096defm FLAT_ATOMIC_OR_X2      : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
1097defm FLAT_ATOMIC_XOR_X2     : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
1098defm FLAT_ATOMIC_INC_X2     : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
1099defm FLAT_ATOMIC_DEC_X2     : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
1100
1101defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
1102defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
1103defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
1104defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
1105defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
1106defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
1107defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
1108defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
1109
1110defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Real_AllAddr_vi <0x20>;
1111defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>;
1112defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Real_AllAddr_vi <0x22>;
1113defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>;
1114defm GLOBAL_LOAD_SHORT_D16    : FLAT_Real_AllAddr_vi <0x24>;
1115defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>;
1116
1117defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
1118defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>;
1119defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
1120defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1121defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
1122defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
1123defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
1124defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
1125
1126
1127defm GLOBAL_ATOMIC_SWAP       : FLAT_Global_Real_Atomics_vi <0x40>;
1128defm GLOBAL_ATOMIC_CMPSWAP    : FLAT_Global_Real_Atomics_vi <0x41>;
1129defm GLOBAL_ATOMIC_ADD        : FLAT_Global_Real_Atomics_vi <0x42>;
1130defm GLOBAL_ATOMIC_SUB        : FLAT_Global_Real_Atomics_vi <0x43>;
1131defm GLOBAL_ATOMIC_SMIN       : FLAT_Global_Real_Atomics_vi <0x44>;
1132defm GLOBAL_ATOMIC_UMIN       : FLAT_Global_Real_Atomics_vi <0x45>;
1133defm GLOBAL_ATOMIC_SMAX       : FLAT_Global_Real_Atomics_vi <0x46>;
1134defm GLOBAL_ATOMIC_UMAX       : FLAT_Global_Real_Atomics_vi <0x47>;
1135defm GLOBAL_ATOMIC_AND        : FLAT_Global_Real_Atomics_vi <0x48>;
1136defm GLOBAL_ATOMIC_OR         : FLAT_Global_Real_Atomics_vi <0x49>;
1137defm GLOBAL_ATOMIC_XOR        : FLAT_Global_Real_Atomics_vi <0x4a>;
1138defm GLOBAL_ATOMIC_INC        : FLAT_Global_Real_Atomics_vi <0x4b>;
1139defm GLOBAL_ATOMIC_DEC        : FLAT_Global_Real_Atomics_vi <0x4c>;
1140defm GLOBAL_ATOMIC_SWAP_X2    : FLAT_Global_Real_Atomics_vi <0x60>;
1141defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>;
1142defm GLOBAL_ATOMIC_ADD_X2     : FLAT_Global_Real_Atomics_vi <0x62>;
1143defm GLOBAL_ATOMIC_SUB_X2     : FLAT_Global_Real_Atomics_vi <0x63>;
1144defm GLOBAL_ATOMIC_SMIN_X2    : FLAT_Global_Real_Atomics_vi <0x64>;
1145defm GLOBAL_ATOMIC_UMIN_X2    : FLAT_Global_Real_Atomics_vi <0x65>;
1146defm GLOBAL_ATOMIC_SMAX_X2    : FLAT_Global_Real_Atomics_vi <0x66>;
1147defm GLOBAL_ATOMIC_UMAX_X2    : FLAT_Global_Real_Atomics_vi <0x67>;
1148defm GLOBAL_ATOMIC_AND_X2     : FLAT_Global_Real_Atomics_vi <0x68>;
1149defm GLOBAL_ATOMIC_OR_X2      : FLAT_Global_Real_Atomics_vi <0x69>;
1150defm GLOBAL_ATOMIC_XOR_X2     : FLAT_Global_Real_Atomics_vi <0x6a>;
1151defm GLOBAL_ATOMIC_INC_X2     : FLAT_Global_Real_Atomics_vi <0x6b>;
1152defm GLOBAL_ATOMIC_DEC_X2     : FLAT_Global_Real_Atomics_vi <0x6c>;
1153
1154defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_vi <0x10>;
1155defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_vi <0x11>;
1156defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_vi <0x12>;
1157defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_vi <0x13>;
1158defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_vi <0x14>;
1159defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_vi <0x15>;
1160defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_vi <0x16>;
1161defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_vi <0x17>;
1162defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_vi <0x18>;
1163defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x19>;
1164defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_vi <0x20>;
1165defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x21>;
1166defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_vi <0x22>;
1167defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x23>;
1168defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_vi <0x24>;
1169defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_vi <0x25>;
1170defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_vi <0x1a>;
1171defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1172defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_vi <0x1c>;
1173defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_vi <0x1d>;
1174defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_vi <0x1e>;
1175defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_vi <0x1f>;
1176
1177
1178//===----------------------------------------------------------------------===//
1179// GFX10.
1180//===----------------------------------------------------------------------===//
1181
1182class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
1183    FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
1184  let AssemblerPredicate = isGFX10Plus;
1185  let DecoderNamespace = "GFX10";
1186
1187  let Inst{11-0}  = offset{11-0};
1188  let Inst{12}    = !if(ps.has_dlc, dlc, ps.dlcValue);
1189  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
1190  let Inst{55}    = 0;
1191}
1192
1193
1194multiclass FLAT_Real_Base_gfx10<bits<7> op> {
1195  def _gfx10 :
1196    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>;
1197}
1198
1199multiclass FLAT_Real_RTN_gfx10<bits<7> op> {
1200  def _RTN_gfx10 :
1201    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
1202}
1203
1204multiclass FLAT_Real_SADDR_gfx10<bits<7> op> {
1205  def _SADDR_gfx10 :
1206    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1207}
1208
1209multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
1210  def _SADDR_RTN_gfx10 :
1211    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
1212}
1213
1214
1215multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> :
1216  FLAT_Real_Base_gfx10<op>,
1217  FLAT_Real_SADDR_gfx10<op>;
1218
1219multiclass FLAT_Real_Atomics_gfx10<bits<7> op> :
1220  FLAT_Real_Base_gfx10<op>,
1221  FLAT_Real_RTN_gfx10<op>;
1222
1223multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
1224  FLAT_Real_AllAddr_gfx10<op>,
1225  FLAT_Real_RTN_gfx10<op>,
1226  FLAT_Real_SADDR_RTN_gfx10<op>;
1227
1228
1229// ENC_FLAT.
1230defm FLAT_LOAD_UBYTE            : FLAT_Real_Base_gfx10<0x008>;
1231defm FLAT_LOAD_SBYTE            : FLAT_Real_Base_gfx10<0x009>;
1232defm FLAT_LOAD_USHORT           : FLAT_Real_Base_gfx10<0x00a>;
1233defm FLAT_LOAD_SSHORT           : FLAT_Real_Base_gfx10<0x00b>;
1234defm FLAT_LOAD_DWORD            : FLAT_Real_Base_gfx10<0x00c>;
1235defm FLAT_LOAD_DWORDX2          : FLAT_Real_Base_gfx10<0x00d>;
1236defm FLAT_LOAD_DWORDX4          : FLAT_Real_Base_gfx10<0x00e>;
1237defm FLAT_LOAD_DWORDX3          : FLAT_Real_Base_gfx10<0x00f>;
1238defm FLAT_STORE_BYTE            : FLAT_Real_Base_gfx10<0x018>;
1239defm FLAT_STORE_BYTE_D16_HI     : FLAT_Real_Base_gfx10<0x019>;
1240defm FLAT_STORE_SHORT           : FLAT_Real_Base_gfx10<0x01a>;
1241defm FLAT_STORE_SHORT_D16_HI    : FLAT_Real_Base_gfx10<0x01b>;
1242defm FLAT_STORE_DWORD           : FLAT_Real_Base_gfx10<0x01c>;
1243defm FLAT_STORE_DWORDX2         : FLAT_Real_Base_gfx10<0x01d>;
1244defm FLAT_STORE_DWORDX4         : FLAT_Real_Base_gfx10<0x01e>;
1245defm FLAT_STORE_DWORDX3         : FLAT_Real_Base_gfx10<0x01f>;
1246defm FLAT_LOAD_UBYTE_D16        : FLAT_Real_Base_gfx10<0x020>;
1247defm FLAT_LOAD_UBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x021>;
1248defm FLAT_LOAD_SBYTE_D16        : FLAT_Real_Base_gfx10<0x022>;
1249defm FLAT_LOAD_SBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x023>;
1250defm FLAT_LOAD_SHORT_D16        : FLAT_Real_Base_gfx10<0x024>;
1251defm FLAT_LOAD_SHORT_D16_HI     : FLAT_Real_Base_gfx10<0x025>;
1252defm FLAT_ATOMIC_SWAP           : FLAT_Real_Atomics_gfx10<0x030>;
1253defm FLAT_ATOMIC_CMPSWAP        : FLAT_Real_Atomics_gfx10<0x031>;
1254defm FLAT_ATOMIC_ADD            : FLAT_Real_Atomics_gfx10<0x032>;
1255defm FLAT_ATOMIC_SUB            : FLAT_Real_Atomics_gfx10<0x033>;
1256defm FLAT_ATOMIC_SMIN           : FLAT_Real_Atomics_gfx10<0x035>;
1257defm FLAT_ATOMIC_UMIN           : FLAT_Real_Atomics_gfx10<0x036>;
1258defm FLAT_ATOMIC_SMAX           : FLAT_Real_Atomics_gfx10<0x037>;
1259defm FLAT_ATOMIC_UMAX           : FLAT_Real_Atomics_gfx10<0x038>;
1260defm FLAT_ATOMIC_AND            : FLAT_Real_Atomics_gfx10<0x039>;
1261defm FLAT_ATOMIC_OR             : FLAT_Real_Atomics_gfx10<0x03a>;
1262defm FLAT_ATOMIC_XOR            : FLAT_Real_Atomics_gfx10<0x03b>;
1263defm FLAT_ATOMIC_INC            : FLAT_Real_Atomics_gfx10<0x03c>;
1264defm FLAT_ATOMIC_DEC            : FLAT_Real_Atomics_gfx10<0x03d>;
1265defm FLAT_ATOMIC_FCMPSWAP       : FLAT_Real_Atomics_gfx10<0x03e>;
1266defm FLAT_ATOMIC_FMIN           : FLAT_Real_Atomics_gfx10<0x03f>;
1267defm FLAT_ATOMIC_FMAX           : FLAT_Real_Atomics_gfx10<0x040>;
1268defm FLAT_ATOMIC_SWAP_X2        : FLAT_Real_Atomics_gfx10<0x050>;
1269defm FLAT_ATOMIC_CMPSWAP_X2     : FLAT_Real_Atomics_gfx10<0x051>;
1270defm FLAT_ATOMIC_ADD_X2         : FLAT_Real_Atomics_gfx10<0x052>;
1271defm FLAT_ATOMIC_SUB_X2         : FLAT_Real_Atomics_gfx10<0x053>;
1272defm FLAT_ATOMIC_SMIN_X2        : FLAT_Real_Atomics_gfx10<0x055>;
1273defm FLAT_ATOMIC_UMIN_X2        : FLAT_Real_Atomics_gfx10<0x056>;
1274defm FLAT_ATOMIC_SMAX_X2        : FLAT_Real_Atomics_gfx10<0x057>;
1275defm FLAT_ATOMIC_UMAX_X2        : FLAT_Real_Atomics_gfx10<0x058>;
1276defm FLAT_ATOMIC_AND_X2         : FLAT_Real_Atomics_gfx10<0x059>;
1277defm FLAT_ATOMIC_OR_X2          : FLAT_Real_Atomics_gfx10<0x05a>;
1278defm FLAT_ATOMIC_XOR_X2         : FLAT_Real_Atomics_gfx10<0x05b>;
1279defm FLAT_ATOMIC_INC_X2         : FLAT_Real_Atomics_gfx10<0x05c>;
1280defm FLAT_ATOMIC_DEC_X2         : FLAT_Real_Atomics_gfx10<0x05d>;
1281defm FLAT_ATOMIC_FCMPSWAP_X2    : FLAT_Real_Atomics_gfx10<0x05e>;
1282defm FLAT_ATOMIC_FMIN_X2        : FLAT_Real_Atomics_gfx10<0x05f>;
1283defm FLAT_ATOMIC_FMAX_X2        : FLAT_Real_Atomics_gfx10<0x060>;
1284
1285
1286// ENC_FLAT_GLBL.
1287defm GLOBAL_LOAD_UBYTE          : FLAT_Real_AllAddr_gfx10<0x008>;
1288defm GLOBAL_LOAD_SBYTE          : FLAT_Real_AllAddr_gfx10<0x009>;
1289defm GLOBAL_LOAD_USHORT         : FLAT_Real_AllAddr_gfx10<0x00a>;
1290defm GLOBAL_LOAD_SSHORT         : FLAT_Real_AllAddr_gfx10<0x00b>;
1291defm GLOBAL_LOAD_DWORD          : FLAT_Real_AllAddr_gfx10<0x00c>;
1292defm GLOBAL_LOAD_DWORDX2        : FLAT_Real_AllAddr_gfx10<0x00d>;
1293defm GLOBAL_LOAD_DWORDX4        : FLAT_Real_AllAddr_gfx10<0x00e>;
1294defm GLOBAL_LOAD_DWORDX3        : FLAT_Real_AllAddr_gfx10<0x00f>;
1295defm GLOBAL_STORE_BYTE          : FLAT_Real_AllAddr_gfx10<0x018>;
1296defm GLOBAL_STORE_BYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x019>;
1297defm GLOBAL_STORE_SHORT         : FLAT_Real_AllAddr_gfx10<0x01a>;
1298defm GLOBAL_STORE_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x01b>;
1299defm GLOBAL_STORE_DWORD         : FLAT_Real_AllAddr_gfx10<0x01c>;
1300defm GLOBAL_STORE_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x01d>;
1301defm GLOBAL_STORE_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x01e>;
1302defm GLOBAL_STORE_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x01f>;
1303defm GLOBAL_LOAD_UBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x020>;
1304defm GLOBAL_LOAD_UBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x021>;
1305defm GLOBAL_LOAD_SBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x022>;
1306defm GLOBAL_LOAD_SBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x023>;
1307defm GLOBAL_LOAD_SHORT_D16      : FLAT_Real_AllAddr_gfx10<0x024>;
1308defm GLOBAL_LOAD_SHORT_D16_HI   : FLAT_Real_AllAddr_gfx10<0x025>;
1309defm GLOBAL_ATOMIC_SWAP         : FLAT_Real_GlblAtomics_gfx10<0x030>;
1310defm GLOBAL_ATOMIC_CMPSWAP      : FLAT_Real_GlblAtomics_gfx10<0x031>;
1311defm GLOBAL_ATOMIC_ADD          : FLAT_Real_GlblAtomics_gfx10<0x032>;
1312defm GLOBAL_ATOMIC_SUB          : FLAT_Real_GlblAtomics_gfx10<0x033>;
1313defm GLOBAL_ATOMIC_SMIN         : FLAT_Real_GlblAtomics_gfx10<0x035>;
1314defm GLOBAL_ATOMIC_UMIN         : FLAT_Real_GlblAtomics_gfx10<0x036>;
1315defm GLOBAL_ATOMIC_SMAX         : FLAT_Real_GlblAtomics_gfx10<0x037>;
1316defm GLOBAL_ATOMIC_UMAX         : FLAT_Real_GlblAtomics_gfx10<0x038>;
1317defm GLOBAL_ATOMIC_AND          : FLAT_Real_GlblAtomics_gfx10<0x039>;
1318defm GLOBAL_ATOMIC_OR           : FLAT_Real_GlblAtomics_gfx10<0x03a>;
1319defm GLOBAL_ATOMIC_XOR          : FLAT_Real_GlblAtomics_gfx10<0x03b>;
1320defm GLOBAL_ATOMIC_INC          : FLAT_Real_GlblAtomics_gfx10<0x03c>;
1321defm GLOBAL_ATOMIC_DEC          : FLAT_Real_GlblAtomics_gfx10<0x03d>;
1322defm GLOBAL_ATOMIC_FCMPSWAP     : FLAT_Real_GlblAtomics_gfx10<0x03e>;
1323defm GLOBAL_ATOMIC_FMIN         : FLAT_Real_GlblAtomics_gfx10<0x03f>;
1324defm GLOBAL_ATOMIC_FMAX         : FLAT_Real_GlblAtomics_gfx10<0x040>;
1325defm GLOBAL_ATOMIC_SWAP_X2      : FLAT_Real_GlblAtomics_gfx10<0x050>;
1326defm GLOBAL_ATOMIC_CMPSWAP_X2   : FLAT_Real_GlblAtomics_gfx10<0x051>;
1327defm GLOBAL_ATOMIC_ADD_X2       : FLAT_Real_GlblAtomics_gfx10<0x052>;
1328defm GLOBAL_ATOMIC_SUB_X2       : FLAT_Real_GlblAtomics_gfx10<0x053>;
1329defm GLOBAL_ATOMIC_SMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x055>;
1330defm GLOBAL_ATOMIC_UMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x056>;
1331defm GLOBAL_ATOMIC_SMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x057>;
1332defm GLOBAL_ATOMIC_UMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x058>;
1333defm GLOBAL_ATOMIC_AND_X2       : FLAT_Real_GlblAtomics_gfx10<0x059>;
1334defm GLOBAL_ATOMIC_OR_X2        : FLAT_Real_GlblAtomics_gfx10<0x05a>;
1335defm GLOBAL_ATOMIC_XOR_X2       : FLAT_Real_GlblAtomics_gfx10<0x05b>;
1336defm GLOBAL_ATOMIC_INC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05c>;
1337defm GLOBAL_ATOMIC_DEC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05d>;
1338defm GLOBAL_ATOMIC_FCMPSWAP_X2  : FLAT_Real_GlblAtomics_gfx10<0x05e>;
1339defm GLOBAL_ATOMIC_FMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x05f>;
1340defm GLOBAL_ATOMIC_FMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x060>;
1341
1342
1343// ENC_FLAT_SCRATCH.
1344defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_gfx10<0x008>;
1345defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_gfx10<0x009>;
1346defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_gfx10<0x00a>;
1347defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_gfx10<0x00b>;
1348defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_gfx10<0x00c>;
1349defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x00d>;
1350defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x00e>;
1351defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x00f>;
1352defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_gfx10<0x018>;
1353defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x019>;
1354defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_gfx10<0x01a>;
1355defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>;
1356defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_gfx10<0x01c>;
1357defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_gfx10<0x01d>;
1358defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_gfx10<0x01e>;
1359defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_gfx10<0x01f>;
1360defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_gfx10<0x020>;
1361defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x021>;
1362defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_gfx10<0x022>;
1363defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_gfx10<0x023>;
1364defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_gfx10<0x024>;
1365defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x025>;
1366
1367let SubtargetPredicate = HasAtomicFaddInsts in {
1368
1369defm GLOBAL_ATOMIC_ADD_F32    : FLAT_Real_AllAddr_vi <0x04d>;
1370defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Real_AllAddr_vi <0x04e>;
1371
1372} // End SubtargetPredicate = HasAtomicFaddInsts
1373