xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td (revision a3266ba2697a383d2ede56803320d941866c7e76)
1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def FLATOffset : ComplexPattern<i64, 2, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>;
10def FLATOffsetSigned : ComplexPattern<i64, 2, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;
11def ScratchOffset : ComplexPattern<i32, 2, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>;
12
13def GlobalSAddr : ComplexPattern<i64, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>;
14def ScratchSAddr : ComplexPattern<i32, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>;
15
16//===----------------------------------------------------------------------===//
17// FLAT classes
18//===----------------------------------------------------------------------===//
19
20class FLAT_Pseudo<string opName, dag outs, dag ins,
21                  string asmOps, list<dag> pattern=[]> :
22  InstSI<outs, ins, "", pattern>,
23  SIMCInstr<opName, SIEncodingFamily.NONE> {
24
25  let isPseudo = 1;
26  let isCodeGenOnly = 1;
27
28  let FLAT = 1;
29
30  let UseNamedOperandTable = 1;
31  let hasSideEffects = 0;
32  let SchedRW = [WriteVMEM];
33
34  string Mnemonic = opName;
35  string AsmOperands = asmOps;
36
37  bits<1> is_flat_global = 0;
38  bits<1> is_flat_scratch = 0;
39
40  bits<1> has_vdst = 1;
41
42  // We need to distinguish having saddr and enabling saddr because
43  // saddr is only valid for scratch and global instructions. Pre-gfx9
44  // these bits were reserved, so we also don't necessarily want to
45  // set these bits to the disabled value for the original flat
46  // segment instructions.
47  bits<1> has_saddr = 0;
48  bits<1> enabled_saddr = 0;
49  bits<7> saddr_value = 0;
50  bits<1> has_vaddr = 1;
51
52  bits<1> has_data = 1;
53  bits<1> has_glc  = 1;
54  bits<1> glcValue = 0;
55  bits<1> has_dlc  = 1;
56  bits<1> dlcValue = 0;
57
58  let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
59    !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
60
61  // TODO: M0 if it could possibly access LDS (before gfx9? only)?
62  let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
63
64  // Internally, FLAT instruction are executed as both an LDS and a
65  // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
66  // and are not considered done until both have been decremented.
67  let VM_CNT = 1;
68  let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch));
69
70  let IsFlatGlobal = is_flat_global;
71
72  let IsFlatScratch = is_flat_scratch;
73}
74
75class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
76  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
77  Enc64 {
78
79  let isPseudo = 0;
80  let isCodeGenOnly = 0;
81
82  // copy relevant pseudo op flags
83  let SubtargetPredicate = ps.SubtargetPredicate;
84  let AsmMatchConverter  = ps.AsmMatchConverter;
85  let OtherPredicates = ps.OtherPredicates;
86  let TSFlags = ps.TSFlags;
87  let UseNamedOperandTable = ps.UseNamedOperandTable;
88
89  // encoding fields
90  bits<8> vaddr;
91  bits<8> vdata;
92  bits<7> saddr;
93  bits<8> vdst;
94
95  bits<1> slc;
96  bits<1> glc;
97  bits<1> dlc;
98
99  // Only valid on gfx9
100  bits<1> lds = 0; // XXX - What does this actually do?
101
102  // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
103  bits<2> seg = !if(ps.is_flat_global, 0b10,
104                  !if(ps.is_flat_scratch, 0b01, 0));
105
106  // Signed offset. Highest bit ignored for flat and treated as 12-bit
107  // unsigned for flat accesses.
108  bits<13> offset;
109  bits<1> nv = 0; // XXX - What does this actually do?
110
111  // We don't use tfe right now, and it was removed in gfx9.
112  bits<1> tfe = 0;
113
114  // Only valid on GFX9+
115  let Inst{12-0} = offset;
116  let Inst{13} = lds;
117  let Inst{15-14} = seg;
118
119  let Inst{16}    = !if(ps.has_glc, glc, ps.glcValue);
120  let Inst{17}    = slc;
121  let Inst{24-18} = op;
122  let Inst{31-26} = 0x37; // Encoding.
123  let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
124  let Inst{47-40} = !if(ps.has_data, vdata, ?);
125  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
126
127  // 54-48 is reserved.
128  let Inst{55}    = nv; // nv on GFX9+, TFE before.
129  let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
130}
131
132class GlobalSaddrTable <bit is_saddr, string Name = ""> {
133  bit IsSaddr = is_saddr;
134  string SaddrOp = Name;
135}
136
137// TODO: Is exec allowed for saddr? The disabled value 0x7f is the
138// same encoding value as exec_hi, so it isn't possible to use that if
139// saddr is 32-bit (which isn't handled here yet).
140class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
141  bit HasTiedOutput = 0,
142  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
143  opName,
144  (outs regClass:$vdst),
145  !con(
146    !con(
147      !if(EnableSaddr,
148        (ins SReg_64:$saddr, VGPR_32:$vaddr),
149        (ins VReg_64:$vaddr)),
150        (ins flat_offset:$offset)),
151        // FIXME: Operands with default values do not work with following non-optional operands.
152        !if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, regClass:$vdst_in),
153                           (ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))),
154  " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
155  let has_data = 0;
156  let mayLoad = 1;
157  let has_saddr = HasSaddr;
158  let enabled_saddr = EnableSaddr;
159  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
160  let maybeAtomic = 1;
161
162  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
163  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
164}
165
166class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
167  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
168  opName,
169  (outs),
170  !con(
171    !if(EnableSaddr,
172      (ins VGPR_32:$vaddr, vdataClass:$vdata, SReg_64:$saddr),
173      (ins VReg_64:$vaddr, vdataClass:$vdata)),
174      (ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc)),
175  " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> {
176  let mayLoad  = 0;
177  let mayStore = 1;
178  let has_vdst = 0;
179  let has_saddr = HasSaddr;
180  let enabled_saddr = EnableSaddr;
181  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
182  let maybeAtomic = 1;
183}
184
185multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
186  let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
187    def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
188      GlobalSaddrTable<0, opName>;
189    def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
190      GlobalSaddrTable<1, opName>;
191  }
192}
193
194class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
195  bit HasTiedOutput = 0, bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
196  opName,
197  (outs regClass:$vdst),
198  !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)),
199    (ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
200    !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
201  " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
202  let is_flat_global = 1;
203  let has_data = 0;
204  let mayLoad = 1;
205  let has_vaddr = 0;
206  let has_saddr = 1;
207  let enabled_saddr = EnableSaddr;
208  let maybeAtomic = 1;
209  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
210
211  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
212  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
213}
214
215multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass,
216  bit HasTiedOutput = 0, bit HasSignedOffset = 0> {
217  def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset>,
218    GlobalSaddrTable<0, opName>;
219  def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset, 1>,
220    GlobalSaddrTable<1, opName>;
221}
222
223multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
224  let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
225    def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
226      GlobalSaddrTable<0, opName>;
227    def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
228      GlobalSaddrTable<1, opName>;
229  }
230}
231
232class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
233  bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
234  opName,
235  (outs),
236  !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)),
237    (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)),
238  " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
239  let is_flat_global = 1;
240  let mayLoad  = 0;
241  let mayStore = 1;
242  let has_vdst = 0;
243  let has_vaddr = 0;
244  let has_saddr = 1;
245  let enabled_saddr = EnableSaddr;
246  let maybeAtomic = 1;
247  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
248}
249
250multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass,
251  bit HasSignedOffset = 0> {
252  def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset>,
253    GlobalSaddrTable<0, opName>;
254  def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset, 1>,
255    GlobalSaddrTable<1, opName>;
256}
257
258class FlatScratchInst <string sv_op, string mode> {
259  string SVOp = sv_op;
260  string Mode = mode;
261}
262
263class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
264  bit HasTiedOutput = 0,
265  bit EnableSaddr = 0,
266  bit EnableVaddr = !not(EnableSaddr)>
267  : FLAT_Pseudo<
268  opName,
269  (outs regClass:$vdst),
270  !con(
271     !if(EnableSaddr,
272       (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
273       !if(EnableVaddr,
274         (ins VGPR_32:$vaddr, flat_offset:$offset),
275         (ins flat_offset:$offset))),
276     !if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, regClass:$vdst_in),
277                        (ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))),
278  " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
279  let has_data = 0;
280  let mayLoad = 1;
281  let has_saddr = 1;
282  let enabled_saddr = EnableSaddr;
283  let has_vaddr = EnableVaddr;
284  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"));
285  let maybeAtomic = 1;
286
287  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
288  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
289}
290
291class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0,
292  bit EnableVaddr = !not(EnableSaddr)> : FLAT_Pseudo<
293  opName,
294  (outs),
295  !if(EnableSaddr,
296    (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
297    !if(EnableVaddr,
298      (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc),
299      (ins vdataClass:$vdata, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))),
300  " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> {
301  let mayLoad  = 0;
302  let mayStore = 1;
303  let has_vdst = 0;
304  let has_saddr = 1;
305  let enabled_saddr = EnableSaddr;
306  let has_vaddr = EnableVaddr;
307  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"));
308  let maybeAtomic = 1;
309}
310
311multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> {
312  let is_flat_scratch = 1 in {
313    def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>,
314             FlatScratchInst<opName, "SV">;
315    def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>,
316                 FlatScratchInst<opName, "SS">;
317
318    let SubtargetPredicate = HasFlatScratchSTMode in
319    def _ST  : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0>,
320               FlatScratchInst<opName, "ST">;
321  }
322}
323
324multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
325  let is_flat_scratch = 1 in {
326    def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>,
327             FlatScratchInst<opName, "SV">;
328    def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>,
329                 FlatScratchInst<opName, "SS">;
330
331    let SubtargetPredicate = HasFlatScratchSTMode in
332    def _ST  : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0>,
333               FlatScratchInst<opName, "ST">;
334  }
335}
336
337class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
338                               string asm, list<dag> pattern = []> :
339  FLAT_Pseudo<opName, outs, ins, asm, pattern> {
340    let mayLoad = 1;
341    let mayStore = 1;
342    let has_glc  = 0;
343    let glcValue = 0;
344    let has_dlc  = 0;
345    let dlcValue = 0;
346    let has_vdst = 0;
347    let maybeAtomic = 1;
348}
349
350class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
351                            string asm, list<dag> pattern = []>
352  : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> {
353  let hasPostISelHook = 1;
354  let has_vdst = 1;
355  let glcValue = 1;
356  let dlcValue = 0;
357  let PseudoInstr = NAME # "_RTN";
358}
359
360multiclass FLAT_Atomic_Pseudo<
361  string opName,
362  RegisterClass vdst_rc,
363  ValueType vt,
364  SDPatternOperator atomic = null_frag,
365  ValueType data_vt = vt,
366  RegisterClass data_rc = vdst_rc,
367  bit isFP = isFloatType<data_vt>.ret> {
368  def "" : FLAT_AtomicNoRet_Pseudo <opName,
369    (outs),
370    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC_0:$slc),
371    " $vaddr, $vdata$offset$slc">,
372    GlobalSaddrTable<0, opName>,
373    AtomicNoRet <opName, 0> {
374    let PseudoInstr = NAME;
375    let FPAtomic = isFP;
376    let AddedComplexity = -1; // Prefer global atomics if available
377  }
378
379  def _RTN : FLAT_AtomicRet_Pseudo <opName,
380    (outs vdst_rc:$vdst),
381    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc),
382    " $vdst, $vaddr, $vdata$offset$glc1$slc",
383    [(set vt:$vdst,
384      (atomic (FLATOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
385       GlobalSaddrTable<0, opName#"_rtn">,
386       AtomicNoRet <opName, 1>{
387    let FPAtomic = isFP;
388    let AddedComplexity = -1; // Prefer global atomics if available
389  }
390}
391
392multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
393  string opName,
394  RegisterClass vdst_rc,
395  ValueType vt,
396  SDPatternOperator atomic = null_frag,
397  ValueType data_vt = vt,
398  RegisterClass data_rc = vdst_rc,
399  bit isFP = isFloatType<data_vt>.ret> {
400
401  def "" : FLAT_AtomicNoRet_Pseudo <opName,
402    (outs),
403    (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC_0:$slc),
404    " $vaddr, $vdata, off$offset$slc">,
405    GlobalSaddrTable<0, opName>,
406    AtomicNoRet <opName, 0> {
407    let has_saddr = 1;
408    let PseudoInstr = NAME;
409    let FPAtomic = isFP;
410  }
411
412  def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
413    (outs),
414    (ins VGPR_32:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc),
415    " $vaddr, $vdata, $saddr$offset$slc">,
416    GlobalSaddrTable<1, opName>,
417    AtomicNoRet <opName#"_saddr", 0> {
418    let has_saddr = 1;
419    let enabled_saddr = 1;
420    let PseudoInstr = NAME#"_SADDR";
421    let FPAtomic = isFP;
422  }
423}
424
425multiclass FLAT_Global_Atomic_Pseudo_RTN<
426  string opName,
427  RegisterClass vdst_rc,
428  ValueType vt,
429  SDPatternOperator atomic = null_frag,
430  ValueType data_vt = vt,
431  RegisterClass data_rc = vdst_rc,
432  bit isFP = isFloatType<data_vt>.ret> {
433
434  def _RTN : FLAT_AtomicRet_Pseudo <opName,
435    (outs vdst_rc:$vdst),
436      (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc),
437    " $vdst, $vaddr, $vdata, off$offset$glc1$slc",
438    [(set vt:$vdst,
439      (atomic (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
440      GlobalSaddrTable<0, opName#"_rtn">,
441      AtomicNoRet <opName, 1> {
442    let has_saddr = 1;
443    let FPAtomic = isFP;
444  }
445
446  def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
447    (outs vdst_rc:$vdst),
448      (ins VGPR_32:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc),
449    " $vdst, $vaddr, $vdata, $saddr$offset$glc1$slc">,
450    GlobalSaddrTable<1, opName#"_rtn">,
451    AtomicNoRet <opName#"_saddr", 1> {
452     let has_saddr = 1;
453     let enabled_saddr = 1;
454     let PseudoInstr = NAME#"_SADDR_RTN";
455     let FPAtomic = isFP;
456  }
457}
458
459multiclass FLAT_Global_Atomic_Pseudo<
460  string opName,
461  RegisterClass vdst_rc,
462  ValueType vt,
463  SDPatternOperator atomic_rtn = null_frag,
464  SDPatternOperator atomic_no_rtn = null_frag,
465  ValueType data_vt = vt,
466  RegisterClass data_rc = vdst_rc> {
467  let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
468    defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>;
469    defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
470  }
471}
472
473//===----------------------------------------------------------------------===//
474// Flat Instructions
475//===----------------------------------------------------------------------===//
476
477def FLAT_LOAD_UBYTE    : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>;
478def FLAT_LOAD_SBYTE    : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>;
479def FLAT_LOAD_USHORT   : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>;
480def FLAT_LOAD_SSHORT   : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>;
481def FLAT_LOAD_DWORD    : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>;
482def FLAT_LOAD_DWORDX2  : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
483def FLAT_LOAD_DWORDX4  : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
484def FLAT_LOAD_DWORDX3  : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
485
486def FLAT_STORE_BYTE    : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
487def FLAT_STORE_SHORT   : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
488def FLAT_STORE_DWORD   : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
489def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
490def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
491def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
492
493let SubtargetPredicate = HasD16LoadStore in {
494def FLAT_LOAD_UBYTE_D16     : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>;
495def FLAT_LOAD_UBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>;
496def FLAT_LOAD_SBYTE_D16     : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>;
497def FLAT_LOAD_SBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>;
498def FLAT_LOAD_SHORT_D16     : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>;
499def FLAT_LOAD_SHORT_D16_HI  : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>;
500
501def FLAT_STORE_BYTE_D16_HI  : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>;
502def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>;
503}
504
505defm FLAT_ATOMIC_CMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
506                                VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32,
507                                v2i32, VReg_64>;
508
509defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
510                                VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64,
511                                v2i64, VReg_128>;
512
513defm FLAT_ATOMIC_SWAP       : FLAT_Atomic_Pseudo <"flat_atomic_swap",
514                                VGPR_32, i32, atomic_swap_flat_32>;
515
516defm FLAT_ATOMIC_SWAP_X2    : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
517                                VReg_64, i64, atomic_swap_flat_64>;
518
519defm FLAT_ATOMIC_ADD        : FLAT_Atomic_Pseudo <"flat_atomic_add",
520                                VGPR_32, i32, atomic_load_add_flat_32>;
521
522defm FLAT_ATOMIC_SUB        : FLAT_Atomic_Pseudo <"flat_atomic_sub",
523                                VGPR_32, i32, atomic_load_sub_flat_32>;
524
525defm FLAT_ATOMIC_SMIN       : FLAT_Atomic_Pseudo <"flat_atomic_smin",
526                                VGPR_32, i32, atomic_load_min_flat_32>;
527
528defm FLAT_ATOMIC_UMIN       : FLAT_Atomic_Pseudo <"flat_atomic_umin",
529                                VGPR_32, i32, atomic_load_umin_flat_32>;
530
531defm FLAT_ATOMIC_SMAX       : FLAT_Atomic_Pseudo <"flat_atomic_smax",
532                                VGPR_32, i32, atomic_load_max_flat_32>;
533
534defm FLAT_ATOMIC_UMAX       : FLAT_Atomic_Pseudo <"flat_atomic_umax",
535                                VGPR_32, i32, atomic_load_umax_flat_32>;
536
537defm FLAT_ATOMIC_AND        : FLAT_Atomic_Pseudo <"flat_atomic_and",
538                                VGPR_32, i32, atomic_load_and_flat_32>;
539
540defm FLAT_ATOMIC_OR         : FLAT_Atomic_Pseudo <"flat_atomic_or",
541                                VGPR_32, i32, atomic_load_or_flat_32>;
542
543defm FLAT_ATOMIC_XOR        : FLAT_Atomic_Pseudo <"flat_atomic_xor",
544                                VGPR_32, i32, atomic_load_xor_flat_32>;
545
546defm FLAT_ATOMIC_INC        : FLAT_Atomic_Pseudo <"flat_atomic_inc",
547                                VGPR_32, i32, atomic_inc_flat_32>;
548
549defm FLAT_ATOMIC_DEC        : FLAT_Atomic_Pseudo <"flat_atomic_dec",
550                                VGPR_32, i32, atomic_dec_flat_32>;
551
552defm FLAT_ATOMIC_ADD_X2     : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
553                                VReg_64, i64, atomic_load_add_flat_64>;
554
555defm FLAT_ATOMIC_SUB_X2     : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
556                                VReg_64, i64, atomic_load_sub_flat_64>;
557
558defm FLAT_ATOMIC_SMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
559                                VReg_64, i64, atomic_load_min_flat_64>;
560
561defm FLAT_ATOMIC_UMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
562                                VReg_64, i64, atomic_load_umin_flat_64>;
563
564defm FLAT_ATOMIC_SMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
565                                VReg_64, i64, atomic_load_max_flat_64>;
566
567defm FLAT_ATOMIC_UMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
568                                VReg_64, i64, atomic_load_umax_flat_64>;
569
570defm FLAT_ATOMIC_AND_X2     : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
571                                VReg_64, i64, atomic_load_and_flat_64>;
572
573defm FLAT_ATOMIC_OR_X2      : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
574                                VReg_64, i64, atomic_load_or_flat_64>;
575
576defm FLAT_ATOMIC_XOR_X2     : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
577                                VReg_64, i64, atomic_load_xor_flat_64>;
578
579defm FLAT_ATOMIC_INC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
580                                VReg_64, i64, atomic_inc_flat_64>;
581
582defm FLAT_ATOMIC_DEC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
583                                VReg_64, i64, atomic_dec_flat_64>;
584
585// GFX7-, GFX10-only flat instructions.
586let SubtargetPredicate = isGFX7GFX10 in {
587
588defm FLAT_ATOMIC_FCMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
589                                VGPR_32, f32, null_frag, v2f32, VReg_64>;
590
591defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
592                                VReg_64, f64, null_frag, v2f64, VReg_128>;
593
594defm FLAT_ATOMIC_FMIN        : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
595                                VGPR_32, f32>;
596
597defm FLAT_ATOMIC_FMAX        : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
598                                VGPR_32, f32>;
599
600defm FLAT_ATOMIC_FMIN_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
601                                VReg_64, f64>;
602
603defm FLAT_ATOMIC_FMAX_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
604                                VReg_64, f64>;
605
606} // End SubtargetPredicate = isGFX7GFX10
607
608defm GLOBAL_LOAD_UBYTE    : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
609defm GLOBAL_LOAD_SBYTE    : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
610defm GLOBAL_LOAD_USHORT   : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
611defm GLOBAL_LOAD_SSHORT   : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>;
612defm GLOBAL_LOAD_DWORD    : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>;
613defm GLOBAL_LOAD_DWORDX2  : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>;
614defm GLOBAL_LOAD_DWORDX3  : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>;
615defm GLOBAL_LOAD_DWORDX4  : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
616
617defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>;
618defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>;
619defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>;
620defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>;
621defm GLOBAL_LOAD_SHORT_D16    : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
622defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
623let OtherPredicates = [HasGFX10_BEncoding] in
624defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
625
626defm GLOBAL_STORE_BYTE    : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
627defm GLOBAL_STORE_SHORT   : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
628defm GLOBAL_STORE_DWORD   : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
629defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
630defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
631defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
632let OtherPredicates = [HasGFX10_BEncoding] in
633defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
634
635defm GLOBAL_STORE_BYTE_D16_HI  : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
636defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
637
638let is_flat_global = 1 in {
639defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
640                               VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32, null_frag,
641                               v2i32, VReg_64>;
642
643defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
644                                  VReg_64, i64, AMDGPUatomic_cmp_swap_global_64,
645                                  null_frag,
646                                  v2i64, VReg_128>;
647
648defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
649                             VGPR_32, i32, atomic_swap_global_32>;
650
651defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
652                                VReg_64, i64, atomic_swap_global_64>;
653
654defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
655                           VGPR_32, i32, atomic_load_add_global_32>;
656
657defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
658                           VGPR_32, i32, atomic_load_sub_global_32>;
659
660defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
661                            VGPR_32, i32, atomic_load_min_global_32>;
662
663defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
664                            VGPR_32, i32, atomic_load_umin_global_32>;
665
666defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
667                            VGPR_32, i32, atomic_load_max_global_32>;
668
669defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
670                            VGPR_32, i32, atomic_load_umax_global_32>;
671
672defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
673                           VGPR_32, i32, atomic_load_and_global_32>;
674
675defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
676                          VGPR_32, i32, atomic_load_or_global_32>;
677
678defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
679                           VGPR_32, i32, atomic_load_xor_global_32>;
680
681defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
682                           VGPR_32, i32, atomic_inc_global_32>;
683
684defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
685                           VGPR_32, i32, atomic_dec_global_32>;
686
687defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
688                              VReg_64, i64, atomic_load_add_global_64>;
689
690defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
691                              VReg_64, i64, atomic_load_sub_global_64>;
692
693defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
694                               VReg_64, i64, atomic_load_min_global_64>;
695
696defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
697                               VReg_64, i64, atomic_load_umin_global_64>;
698
699defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
700                               VReg_64, i64, atomic_load_max_global_64>;
701
702defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
703                               VReg_64, i64, atomic_load_umax_global_64>;
704
705defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
706                              VReg_64, i64, atomic_load_and_global_64>;
707
708defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
709                             VReg_64, i64, atomic_load_or_global_64>;
710
711defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
712                              VReg_64, i64, atomic_load_xor_global_64>;
713
714defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
715                              VReg_64, i64, atomic_inc_global_64>;
716
717defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
718                              VReg_64, i64, atomic_dec_global_64>;
719
720let SubtargetPredicate = HasGFX10_BEncoding in
721defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
722                              VGPR_32, i32, int_amdgcn_global_atomic_csub>;
723} // End is_flat_global = 1
724
725
726
727let SubtargetPredicate = HasFlatScratchInsts in {
728defm SCRATCH_LOAD_UBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
729defm SCRATCH_LOAD_SBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
730defm SCRATCH_LOAD_USHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
731defm SCRATCH_LOAD_SSHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>;
732defm SCRATCH_LOAD_DWORD    : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>;
733defm SCRATCH_LOAD_DWORDX2  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>;
734defm SCRATCH_LOAD_DWORDX3  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>;
735defm SCRATCH_LOAD_DWORDX4  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
736
737defm SCRATCH_LOAD_UBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>;
738defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>;
739defm SCRATCH_LOAD_SBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>;
740defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>;
741defm SCRATCH_LOAD_SHORT_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>;
742defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>;
743
744defm SCRATCH_STORE_BYTE    : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
745defm SCRATCH_STORE_SHORT   : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
746defm SCRATCH_STORE_DWORD   : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
747defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
748defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
749defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
750
751defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
752defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
753
754} // End SubtargetPredicate = HasFlatScratchInsts
755
756let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
757  defm GLOBAL_ATOMIC_FCMPSWAP :
758    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>;
759  defm GLOBAL_ATOMIC_FMIN :
760    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
761  defm GLOBAL_ATOMIC_FMAX :
762    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
763  defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
764    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>;
765  defm GLOBAL_ATOMIC_FMIN_X2 :
766    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
767  defm GLOBAL_ATOMIC_FMAX_X2 :
768    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
769} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
770
771let is_flat_global = 1 in {
772let OtherPredicates = [HasAtomicFaddInsts] in {
773  defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
774    "global_atomic_add_f32", VGPR_32, f32
775  >;
776  defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
777    "global_atomic_pk_add_f16", VGPR_32, v2f16
778  >;
779} // End OtherPredicates = [HasAtomicFaddInsts]
780} // End is_flat_global = 1
781
782//===----------------------------------------------------------------------===//
783// Flat Patterns
784//===----------------------------------------------------------------------===//
785
786// Patterns for global loads with no offset.
787class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
788  (vt (node (FLATOffset i64:$vaddr, i16:$offset))),
789  (inst $vaddr, $offset)
790>;
791
792class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
793  (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in),
794  (inst $vaddr, $offset, 0, 0, 0, $in)
795>;
796
797class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
798  (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset), vt:$in),
799  (inst $vaddr, $offset, 0, 0, 0, $in)
800>;
801
802class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
803  (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$in)),
804  (inst $saddr, $voffset, $offset, 0, 0, 0, $in)
805>;
806
807class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
808  (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset))),
809  (inst $vaddr, $offset)
810>;
811
812class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
813  (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset))),
814  (inst $saddr, $voffset, $offset, 0, 0, 0)
815>;
816
817class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
818                           ValueType vt> : GCNPat <
819  (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset)),
820  (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
821>;
822
823class GlobalAtomicStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
824                                 ValueType vt> : GCNPat <
825  (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data),
826  (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
827>;
828
829class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
830                            ValueType vt, ValueType data_vt = vt> : GCNPat <
831  (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), data_vt:$data)),
832  (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset)
833>;
834
835class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
836                                 ValueType vt> : GCNPat <
837  (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data),
838  (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
839>;
840
841class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
842  (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset)),
843  (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
844>;
845
846class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
847  (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset)),
848  (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
849>;
850
851class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
852  // atomic store follows atomic binop convention so the address comes
853  // first.
854  (node (FLATOffset i64:$vaddr, i16:$offset), vt:$data),
855  (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
856>;
857
858class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node,
859                                ValueType vt, ValueType data_vt = vt> : GCNPat <
860  // atomic store follows atomic binop convention so the address comes
861  // first.
862  (node (FLATOffset i64:$vaddr, i16:$offset), data_vt:$data),
863  (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
864>;
865
866class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
867                     ValueType data_vt = vt> : GCNPat <
868  (vt (node (FLATOffset i64:$vaddr, i16:$offset), data_vt:$data)),
869  (inst $vaddr, $data, $offset)
870>;
871
872class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
873  (node (FLATOffset i64:$vaddr, i16:$offset), vt:$data),
874  (inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
875>;
876
877class FlatSignedAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
878  (node (FLATOffsetSigned i64:$vaddr, i16:$offset), vt:$data),
879  (inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
880>;
881
882class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
883                           ValueType data_vt = vt> : GCNPat <
884  (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$data)),
885  (inst $vaddr, $data, $offset)
886>;
887
888class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
889  (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset))),
890  (inst $vaddr, $offset)
891>;
892
893class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
894  (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset), vt:$in),
895  (inst $vaddr, $offset, 0, 0, 0, $in)
896>;
897
898class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
899  (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset)),
900  (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset)
901>;
902
903class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
904  (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset))),
905  (inst $saddr, $offset)
906>;
907
908class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
909  (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset), vt:$in)),
910  (inst $saddr, $offset, 0, 0, 0, $in)
911>;
912
913class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
914                            ValueType vt> : GCNPat <
915  (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset)),
916  (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
917>;
918
919let OtherPredicates = [HasFlatAddressSpace] in {
920
921def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
922def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
923def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
924def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
925def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
926def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
927def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
928def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
929def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
930def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
931def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
932
933def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
934def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
935
936def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
937def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
938
939foreach vt = Reg32Types.types in {
940def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
941def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
942}
943
944foreach vt = VReg_64.RegTypes in {
945def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>;
946def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
947}
948
949def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;
950
951foreach vt = VReg_128.RegTypes in {
952def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
953def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
954}
955
956def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>;
957def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64>;
958
959def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>;
960def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>;
961def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global_32, i32>;
962def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>;
963def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>;
964def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>;
965def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>;
966def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>;
967def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>;
968def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>;
969def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>;
970def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
971def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>;
972
973def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>;
974def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>;
975def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>;
976def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>;
977def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>;
978def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>;
979def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>;
980def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>;
981def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>;
982def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>;
983def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>;
984def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
985def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>;
986
987def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
988def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
989
990let OtherPredicates = [D16PreservesUnusedBits] in {
991def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
992def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
993
994def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
995def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
996def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
997def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
998def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
999def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
1000
1001def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
1002def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
1003def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
1004def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
1005def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
1006def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
1007}
1008
1009} // End OtherPredicates = [HasFlatAddressSpace]
1010
1011
1012multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1013  def : FlatLoadSignedPat <inst, node, vt> {
1014    let AddedComplexity = 10;
1015  }
1016
1017  def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1018    let AddedComplexity = 11;
1019  }
1020}
1021
1022multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1023  def : FlatSignedLoadPat_D16 <inst, node, vt> {
1024    let AddedComplexity = 10;
1025  }
1026
1027  def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1028    let AddedComplexity = 11;
1029  }
1030}
1031
1032multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
1033                               ValueType vt> {
1034  def : FlatStoreSignedPat <inst, node, vt> {
1035    let AddedComplexity = 10;
1036  }
1037
1038  def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1039    let AddedComplexity = 11;
1040  }
1041}
1042
1043// Deal with swapped operands for atomic_store vs. regular store
1044multiclass GlobalFLATAtomicStorePats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1045  def : FlatStoreSignedAtomicPat <inst, node, vt> {
1046    let AddedComplexity = 10;
1047  }
1048
1049  def : GlobalAtomicStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1050    let AddedComplexity = 11;
1051  }
1052}
1053
1054multiclass GlobalFLATAtomicPats<string nortn_inst_name, SDPatternOperator node,
1055                               ValueType vt, ValueType data_vt = vt> {
1056  def : FlatSignedAtomicPat <!cast<FLAT_Pseudo>(nortn_inst_name#"_RTN"), node, vt, data_vt> {
1057    let AddedComplexity = 10;
1058  }
1059
1060  def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(nortn_inst_name#"_SADDR_RTN"), node, vt, data_vt> {
1061    let AddedComplexity = 11;
1062  }
1063}
1064
1065multiclass GlobalFLATNoRtnAtomicPats<FLAT_Pseudo inst, SDPatternOperator node,
1066                                     ValueType vt> {
1067  def : FlatSignedAtomicPatNoRtn <inst, node, vt> {
1068    let AddedComplexity = 10;
1069  }
1070
1071  def : GlobalAtomicNoRtnSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1072    let AddedComplexity = 11;
1073  }
1074}
1075
1076multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1077  def : ScratchLoadSignedPat <inst, node, vt> {
1078    let AddedComplexity = 25;
1079  }
1080
1081  def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1082    let AddedComplexity = 26;
1083  }
1084}
1085
1086multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
1087                               ValueType vt> {
1088  def : ScratchStoreSignedPat <inst, node, vt> {
1089    let AddedComplexity = 25;
1090  }
1091
1092  def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1093    let AddedComplexity = 26;
1094  }
1095}
1096
1097multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1098  def : ScratchLoadSignedPat_D16 <inst, node, vt> {
1099    let AddedComplexity = 25;
1100  }
1101
1102  def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1103    let AddedComplexity = 26;
1104  }
1105}
1106
1107let OtherPredicates = [HasFlatGlobalInsts] in {
1108
1109defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
1110defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
1111defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
1112defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
1113defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
1114defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
1115defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
1116defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
1117defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
1118defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>;
1119
1120foreach vt = Reg32Types.types in {
1121defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>;
1122defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>;
1123}
1124
1125foreach vt = VReg_64.RegTypes in {
1126defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>;
1127defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>;
1128}
1129
1130defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
1131
1132foreach vt = VReg_128.RegTypes in {
1133defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>;
1134defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>;
1135}
1136
1137// There is no distinction for atomic load lowering during selection;
1138// the memory legalizer will set the cache bits and insert the
1139// appropriate waits.
1140defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
1141defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
1142
1143defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
1144defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
1145defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
1146defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>;
1147defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>;
1148
1149let OtherPredicates = [D16PreservesUnusedBits] in {
1150defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
1151defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
1152
1153defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
1154defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
1155defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
1156defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>;
1157defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>;
1158defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>;
1159
1160defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>;
1161defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>;
1162defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>;
1163defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>;
1164defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>;
1165defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
1166}
1167
1168defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORD, atomic_store_global_32, i32>;
1169defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64>;
1170
1171defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", atomic_load_add_global_32, i32>;
1172defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", atomic_load_sub_global_32, i32>;
1173defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", atomic_inc_global_32, i32>;
1174defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", atomic_dec_global_32, i32>;
1175defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", atomic_load_and_global_32, i32>;
1176defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", atomic_load_max_global_32, i32>;
1177defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", atomic_load_umax_global_32, i32>;
1178defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", atomic_load_min_global_32, i32>;
1179defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", atomic_load_umin_global_32, i32>;
1180defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", atomic_load_or_global_32, i32>;
1181defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", atomic_swap_global_32, i32>;
1182defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", AMDGPUatomic_cmp_swap_global_32, i32, v2i32>;
1183defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", atomic_load_xor_global_32, i32>;
1184defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CSUB", int_amdgcn_global_atomic_csub, i32>;
1185
1186defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", atomic_load_add_global_64, i64>;
1187defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", atomic_load_sub_global_64, i64>;
1188defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", atomic_inc_global_64, i64>;
1189defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", atomic_dec_global_64, i64>;
1190defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", atomic_load_and_global_64, i64>;
1191defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", atomic_load_max_global_64, i64>;
1192defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", atomic_load_umax_global_64, i64>;
1193defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", atomic_load_min_global_64, i64>;
1194defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", atomic_load_umin_global_64, i64>;
1195defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", atomic_load_or_global_64, i64>;
1196defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", atomic_swap_global_64, i64>;
1197defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
1198defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", atomic_load_xor_global_64, i64>;
1199
1200let OtherPredicates = [HasAtomicFaddInsts] in {
1201defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_ADD_F32,    atomic_load_fadd_global_noret_32, f32>;
1202defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_PK_ADD_F16, atomic_load_fadd_v2f16_global_noret_32, v2f16>;
1203}
1204
1205} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
1206
1207let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in {
1208
1209defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>;
1210defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>;
1211defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>;
1212defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>;
1213defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>;
1214defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>;
1215defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>;
1216defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>;
1217defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>;
1218defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>;
1219
1220foreach vt = Reg32Types.types in {
1221defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>;
1222defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>;
1223}
1224
1225foreach vt = VReg_64.RegTypes in {
1226defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>;
1227defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>;
1228}
1229
1230defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>;
1231
1232foreach vt = VReg_128.RegTypes in {
1233defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>;
1234defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>;
1235}
1236
1237defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>;
1238defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>;
1239defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>;
1240defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>;
1241defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>;
1242
1243let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in {
1244defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>;
1245defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>;
1246
1247defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>;
1248defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>;
1249defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>;
1250defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>;
1251defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>;
1252defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>;
1253
1254defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>;
1255defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>;
1256defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>;
1257defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>;
1258defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>;
1259defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>;
1260}
1261
1262} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch]
1263
1264//===----------------------------------------------------------------------===//
1265// Target
1266//===----------------------------------------------------------------------===//
1267
1268//===----------------------------------------------------------------------===//
1269// CI
1270//===----------------------------------------------------------------------===//
1271
1272class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
1273  FLAT_Real <op, ps>,
1274  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
1275  let AssemblerPredicate = isGFX7Only;
1276  let DecoderNamespace="GFX7";
1277}
1278
1279def FLAT_LOAD_UBYTE_ci         : FLAT_Real_ci <0x8,  FLAT_LOAD_UBYTE>;
1280def FLAT_LOAD_SBYTE_ci         : FLAT_Real_ci <0x9,  FLAT_LOAD_SBYTE>;
1281def FLAT_LOAD_USHORT_ci        : FLAT_Real_ci <0xa,  FLAT_LOAD_USHORT>;
1282def FLAT_LOAD_SSHORT_ci        : FLAT_Real_ci <0xb,  FLAT_LOAD_SSHORT>;
1283def FLAT_LOAD_DWORD_ci         : FLAT_Real_ci <0xc,  FLAT_LOAD_DWORD>;
1284def FLAT_LOAD_DWORDX2_ci       : FLAT_Real_ci <0xd,  FLAT_LOAD_DWORDX2>;
1285def FLAT_LOAD_DWORDX4_ci       : FLAT_Real_ci <0xe,  FLAT_LOAD_DWORDX4>;
1286def FLAT_LOAD_DWORDX3_ci       : FLAT_Real_ci <0xf,  FLAT_LOAD_DWORDX3>;
1287
1288def FLAT_STORE_BYTE_ci         : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>;
1289def FLAT_STORE_SHORT_ci        : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>;
1290def FLAT_STORE_DWORD_ci        : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>;
1291def FLAT_STORE_DWORDX2_ci      : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
1292def FLAT_STORE_DWORDX4_ci      : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
1293def FLAT_STORE_DWORDX3_ci      : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
1294
1295multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
1296  def _ci     : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
1297  def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
1298}
1299
1300defm FLAT_ATOMIC_SWAP          : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
1301defm FLAT_ATOMIC_CMPSWAP       : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
1302defm FLAT_ATOMIC_ADD           : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
1303defm FLAT_ATOMIC_SUB           : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
1304defm FLAT_ATOMIC_SMIN          : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
1305defm FLAT_ATOMIC_UMIN          : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
1306defm FLAT_ATOMIC_SMAX          : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
1307defm FLAT_ATOMIC_UMAX          : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
1308defm FLAT_ATOMIC_AND           : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
1309defm FLAT_ATOMIC_OR            : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
1310defm FLAT_ATOMIC_XOR           : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
1311defm FLAT_ATOMIC_INC           : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
1312defm FLAT_ATOMIC_DEC           : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
1313defm FLAT_ATOMIC_SWAP_X2       : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
1314defm FLAT_ATOMIC_CMPSWAP_X2    : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
1315defm FLAT_ATOMIC_ADD_X2        : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
1316defm FLAT_ATOMIC_SUB_X2        : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
1317defm FLAT_ATOMIC_SMIN_X2       : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
1318defm FLAT_ATOMIC_UMIN_X2       : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
1319defm FLAT_ATOMIC_SMAX_X2       : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
1320defm FLAT_ATOMIC_UMAX_X2       : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
1321defm FLAT_ATOMIC_AND_X2        : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
1322defm FLAT_ATOMIC_OR_X2         : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
1323defm FLAT_ATOMIC_XOR_X2        : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
1324defm FLAT_ATOMIC_INC_X2        : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
1325defm FLAT_ATOMIC_DEC_X2        : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
1326
1327// CI Only flat instructions
1328defm FLAT_ATOMIC_FCMPSWAP      : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
1329defm FLAT_ATOMIC_FMIN          : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
1330defm FLAT_ATOMIC_FMAX          : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
1331defm FLAT_ATOMIC_FCMPSWAP_X2   : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
1332defm FLAT_ATOMIC_FMIN_X2       : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
1333defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
1334
1335
1336//===----------------------------------------------------------------------===//
1337// VI
1338//===----------------------------------------------------------------------===//
1339
1340class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> :
1341  FLAT_Real <op, ps>,
1342  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
1343  let AssemblerPredicate = isGFX8GFX9;
1344  let DecoderNamespace = "GFX8";
1345}
1346
1347multiclass FLAT_Real_AllAddr_vi<bits<7> op> {
1348  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)>;
1349  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1350}
1351
1352def FLAT_LOAD_UBYTE_vi         : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
1353def FLAT_LOAD_SBYTE_vi         : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
1354def FLAT_LOAD_USHORT_vi        : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
1355def FLAT_LOAD_SSHORT_vi        : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
1356def FLAT_LOAD_DWORD_vi         : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
1357def FLAT_LOAD_DWORDX2_vi       : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
1358def FLAT_LOAD_DWORDX4_vi       : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
1359def FLAT_LOAD_DWORDX3_vi       : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
1360
1361def FLAT_STORE_BYTE_vi         : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
1362def FLAT_STORE_BYTE_D16_HI_vi  : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
1363def FLAT_STORE_SHORT_vi        : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
1364def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
1365def FLAT_STORE_DWORD_vi        : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
1366def FLAT_STORE_DWORDX2_vi      : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
1367def FLAT_STORE_DWORDX4_vi      : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
1368def FLAT_STORE_DWORDX3_vi      : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
1369
1370def FLAT_LOAD_UBYTE_D16_vi    : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
1371def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
1372def FLAT_LOAD_SBYTE_D16_vi    : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
1373def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
1374def FLAT_LOAD_SHORT_D16_vi    : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
1375def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
1376
1377multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> {
1378  def _vi     : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
1379  def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
1380}
1381
1382multiclass FLAT_Global_Real_Atomics_vi<bits<7> op> :
1383  FLAT_Real_AllAddr_vi<op> {
1384  def _RTN_vi  : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
1385  def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
1386}
1387
1388
1389defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
1390defm FLAT_ATOMIC_CMPSWAP    : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
1391defm FLAT_ATOMIC_ADD        : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
1392defm FLAT_ATOMIC_SUB        : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
1393defm FLAT_ATOMIC_SMIN       : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
1394defm FLAT_ATOMIC_UMIN       : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
1395defm FLAT_ATOMIC_SMAX       : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
1396defm FLAT_ATOMIC_UMAX       : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
1397defm FLAT_ATOMIC_AND        : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
1398defm FLAT_ATOMIC_OR         : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
1399defm FLAT_ATOMIC_XOR        : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
1400defm FLAT_ATOMIC_INC        : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
1401defm FLAT_ATOMIC_DEC        : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
1402defm FLAT_ATOMIC_SWAP_X2    : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
1403defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
1404defm FLAT_ATOMIC_ADD_X2     : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
1405defm FLAT_ATOMIC_SUB_X2     : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
1406defm FLAT_ATOMIC_SMIN_X2    : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
1407defm FLAT_ATOMIC_UMIN_X2    : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
1408defm FLAT_ATOMIC_SMAX_X2    : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
1409defm FLAT_ATOMIC_UMAX_X2    : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
1410defm FLAT_ATOMIC_AND_X2     : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
1411defm FLAT_ATOMIC_OR_X2      : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
1412defm FLAT_ATOMIC_XOR_X2     : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
1413defm FLAT_ATOMIC_INC_X2     : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
1414defm FLAT_ATOMIC_DEC_X2     : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
1415
1416defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
1417defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
1418defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
1419defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
1420defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
1421defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
1422defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
1423defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
1424
1425defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Real_AllAddr_vi <0x20>;
1426defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>;
1427defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Real_AllAddr_vi <0x22>;
1428defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>;
1429defm GLOBAL_LOAD_SHORT_D16    : FLAT_Real_AllAddr_vi <0x24>;
1430defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>;
1431
1432defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
1433defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>;
1434defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
1435defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1436defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
1437defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
1438defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
1439defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
1440
1441
1442defm GLOBAL_ATOMIC_SWAP       : FLAT_Global_Real_Atomics_vi <0x40>;
1443defm GLOBAL_ATOMIC_CMPSWAP    : FLAT_Global_Real_Atomics_vi <0x41>;
1444defm GLOBAL_ATOMIC_ADD        : FLAT_Global_Real_Atomics_vi <0x42>;
1445defm GLOBAL_ATOMIC_SUB        : FLAT_Global_Real_Atomics_vi <0x43>;
1446defm GLOBAL_ATOMIC_SMIN       : FLAT_Global_Real_Atomics_vi <0x44>;
1447defm GLOBAL_ATOMIC_UMIN       : FLAT_Global_Real_Atomics_vi <0x45>;
1448defm GLOBAL_ATOMIC_SMAX       : FLAT_Global_Real_Atomics_vi <0x46>;
1449defm GLOBAL_ATOMIC_UMAX       : FLAT_Global_Real_Atomics_vi <0x47>;
1450defm GLOBAL_ATOMIC_AND        : FLAT_Global_Real_Atomics_vi <0x48>;
1451defm GLOBAL_ATOMIC_OR         : FLAT_Global_Real_Atomics_vi <0x49>;
1452defm GLOBAL_ATOMIC_XOR        : FLAT_Global_Real_Atomics_vi <0x4a>;
1453defm GLOBAL_ATOMIC_INC        : FLAT_Global_Real_Atomics_vi <0x4b>;
1454defm GLOBAL_ATOMIC_DEC        : FLAT_Global_Real_Atomics_vi <0x4c>;
1455defm GLOBAL_ATOMIC_SWAP_X2    : FLAT_Global_Real_Atomics_vi <0x60>;
1456defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>;
1457defm GLOBAL_ATOMIC_ADD_X2     : FLAT_Global_Real_Atomics_vi <0x62>;
1458defm GLOBAL_ATOMIC_SUB_X2     : FLAT_Global_Real_Atomics_vi <0x63>;
1459defm GLOBAL_ATOMIC_SMIN_X2    : FLAT_Global_Real_Atomics_vi <0x64>;
1460defm GLOBAL_ATOMIC_UMIN_X2    : FLAT_Global_Real_Atomics_vi <0x65>;
1461defm GLOBAL_ATOMIC_SMAX_X2    : FLAT_Global_Real_Atomics_vi <0x66>;
1462defm GLOBAL_ATOMIC_UMAX_X2    : FLAT_Global_Real_Atomics_vi <0x67>;
1463defm GLOBAL_ATOMIC_AND_X2     : FLAT_Global_Real_Atomics_vi <0x68>;
1464defm GLOBAL_ATOMIC_OR_X2      : FLAT_Global_Real_Atomics_vi <0x69>;
1465defm GLOBAL_ATOMIC_XOR_X2     : FLAT_Global_Real_Atomics_vi <0x6a>;
1466defm GLOBAL_ATOMIC_INC_X2     : FLAT_Global_Real_Atomics_vi <0x6b>;
1467defm GLOBAL_ATOMIC_DEC_X2     : FLAT_Global_Real_Atomics_vi <0x6c>;
1468
1469defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_vi <0x10>;
1470defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_vi <0x11>;
1471defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_vi <0x12>;
1472defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_vi <0x13>;
1473defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_vi <0x14>;
1474defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_vi <0x15>;
1475defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_vi <0x16>;
1476defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_vi <0x17>;
1477defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_vi <0x18>;
1478defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x19>;
1479defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_vi <0x20>;
1480defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x21>;
1481defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_vi <0x22>;
1482defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_vi <0x23>;
1483defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_vi <0x24>;
1484defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_vi <0x25>;
1485defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_vi <0x1a>;
1486defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1487defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_vi <0x1c>;
1488defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_vi <0x1d>;
1489defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_vi <0x1e>;
1490defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_vi <0x1f>;
1491
1492
1493//===----------------------------------------------------------------------===//
1494// GFX10.
1495//===----------------------------------------------------------------------===//
1496
1497class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
1498    FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
1499  let AssemblerPredicate = isGFX10Plus;
1500  let DecoderNamespace = "GFX10";
1501
1502  let Inst{11-0}  = offset{11-0};
1503  let Inst{12}    = !if(ps.has_dlc, dlc, ps.dlcValue);
1504  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
1505  let Inst{55}    = 0;
1506}
1507
1508
1509multiclass FLAT_Real_Base_gfx10<bits<7> op> {
1510  def _gfx10 :
1511    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>;
1512}
1513
1514multiclass FLAT_Real_RTN_gfx10<bits<7> op> {
1515  def _RTN_gfx10 :
1516    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
1517}
1518
1519multiclass FLAT_Real_SADDR_gfx10<bits<7> op> {
1520  def _SADDR_gfx10 :
1521    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1522}
1523
1524multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
1525  def _SADDR_RTN_gfx10 :
1526    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
1527}
1528
1529multiclass FLAT_Real_ST_gfx10<bits<7> op> {
1530  def _ST_gfx10 :
1531    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")> {
1532      let Inst{54-48} = !cast<int>(EXEC_HI.HWEncoding);
1533      let OtherPredicates = [HasFlatScratchSTMode];
1534    }
1535}
1536
1537multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> :
1538  FLAT_Real_Base_gfx10<op>,
1539  FLAT_Real_SADDR_gfx10<op>;
1540
1541multiclass FLAT_Real_Atomics_gfx10<bits<7> op> :
1542  FLAT_Real_Base_gfx10<op>,
1543  FLAT_Real_RTN_gfx10<op>;
1544
1545multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
1546  FLAT_Real_AllAddr_gfx10<op>,
1547  FLAT_Real_RTN_gfx10<op>,
1548  FLAT_Real_SADDR_RTN_gfx10<op>;
1549
1550multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> :
1551  FLAT_Real_RTN_gfx10<op>,
1552  FLAT_Real_SADDR_RTN_gfx10<op>;
1553
1554multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> :
1555  FLAT_Real_Base_gfx10<op>,
1556  FLAT_Real_SADDR_gfx10<op>,
1557  FLAT_Real_ST_gfx10<op>;
1558
1559// ENC_FLAT.
1560defm FLAT_LOAD_UBYTE            : FLAT_Real_Base_gfx10<0x008>;
1561defm FLAT_LOAD_SBYTE            : FLAT_Real_Base_gfx10<0x009>;
1562defm FLAT_LOAD_USHORT           : FLAT_Real_Base_gfx10<0x00a>;
1563defm FLAT_LOAD_SSHORT           : FLAT_Real_Base_gfx10<0x00b>;
1564defm FLAT_LOAD_DWORD            : FLAT_Real_Base_gfx10<0x00c>;
1565defm FLAT_LOAD_DWORDX2          : FLAT_Real_Base_gfx10<0x00d>;
1566defm FLAT_LOAD_DWORDX4          : FLAT_Real_Base_gfx10<0x00e>;
1567defm FLAT_LOAD_DWORDX3          : FLAT_Real_Base_gfx10<0x00f>;
1568defm FLAT_STORE_BYTE            : FLAT_Real_Base_gfx10<0x018>;
1569defm FLAT_STORE_BYTE_D16_HI     : FLAT_Real_Base_gfx10<0x019>;
1570defm FLAT_STORE_SHORT           : FLAT_Real_Base_gfx10<0x01a>;
1571defm FLAT_STORE_SHORT_D16_HI    : FLAT_Real_Base_gfx10<0x01b>;
1572defm FLAT_STORE_DWORD           : FLAT_Real_Base_gfx10<0x01c>;
1573defm FLAT_STORE_DWORDX2         : FLAT_Real_Base_gfx10<0x01d>;
1574defm FLAT_STORE_DWORDX4         : FLAT_Real_Base_gfx10<0x01e>;
1575defm FLAT_STORE_DWORDX3         : FLAT_Real_Base_gfx10<0x01f>;
1576defm FLAT_LOAD_UBYTE_D16        : FLAT_Real_Base_gfx10<0x020>;
1577defm FLAT_LOAD_UBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x021>;
1578defm FLAT_LOAD_SBYTE_D16        : FLAT_Real_Base_gfx10<0x022>;
1579defm FLAT_LOAD_SBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x023>;
1580defm FLAT_LOAD_SHORT_D16        : FLAT_Real_Base_gfx10<0x024>;
1581defm FLAT_LOAD_SHORT_D16_HI     : FLAT_Real_Base_gfx10<0x025>;
1582defm FLAT_ATOMIC_SWAP           : FLAT_Real_Atomics_gfx10<0x030>;
1583defm FLAT_ATOMIC_CMPSWAP        : FLAT_Real_Atomics_gfx10<0x031>;
1584defm FLAT_ATOMIC_ADD            : FLAT_Real_Atomics_gfx10<0x032>;
1585defm FLAT_ATOMIC_SUB            : FLAT_Real_Atomics_gfx10<0x033>;
1586defm FLAT_ATOMIC_SMIN           : FLAT_Real_Atomics_gfx10<0x035>;
1587defm FLAT_ATOMIC_UMIN           : FLAT_Real_Atomics_gfx10<0x036>;
1588defm FLAT_ATOMIC_SMAX           : FLAT_Real_Atomics_gfx10<0x037>;
1589defm FLAT_ATOMIC_UMAX           : FLAT_Real_Atomics_gfx10<0x038>;
1590defm FLAT_ATOMIC_AND            : FLAT_Real_Atomics_gfx10<0x039>;
1591defm FLAT_ATOMIC_OR             : FLAT_Real_Atomics_gfx10<0x03a>;
1592defm FLAT_ATOMIC_XOR            : FLAT_Real_Atomics_gfx10<0x03b>;
1593defm FLAT_ATOMIC_INC            : FLAT_Real_Atomics_gfx10<0x03c>;
1594defm FLAT_ATOMIC_DEC            : FLAT_Real_Atomics_gfx10<0x03d>;
1595defm FLAT_ATOMIC_FCMPSWAP       : FLAT_Real_Atomics_gfx10<0x03e>;
1596defm FLAT_ATOMIC_FMIN           : FLAT_Real_Atomics_gfx10<0x03f>;
1597defm FLAT_ATOMIC_FMAX           : FLAT_Real_Atomics_gfx10<0x040>;
1598defm FLAT_ATOMIC_SWAP_X2        : FLAT_Real_Atomics_gfx10<0x050>;
1599defm FLAT_ATOMIC_CMPSWAP_X2     : FLAT_Real_Atomics_gfx10<0x051>;
1600defm FLAT_ATOMIC_ADD_X2         : FLAT_Real_Atomics_gfx10<0x052>;
1601defm FLAT_ATOMIC_SUB_X2         : FLAT_Real_Atomics_gfx10<0x053>;
1602defm FLAT_ATOMIC_SMIN_X2        : FLAT_Real_Atomics_gfx10<0x055>;
1603defm FLAT_ATOMIC_UMIN_X2        : FLAT_Real_Atomics_gfx10<0x056>;
1604defm FLAT_ATOMIC_SMAX_X2        : FLAT_Real_Atomics_gfx10<0x057>;
1605defm FLAT_ATOMIC_UMAX_X2        : FLAT_Real_Atomics_gfx10<0x058>;
1606defm FLAT_ATOMIC_AND_X2         : FLAT_Real_Atomics_gfx10<0x059>;
1607defm FLAT_ATOMIC_OR_X2          : FLAT_Real_Atomics_gfx10<0x05a>;
1608defm FLAT_ATOMIC_XOR_X2         : FLAT_Real_Atomics_gfx10<0x05b>;
1609defm FLAT_ATOMIC_INC_X2         : FLAT_Real_Atomics_gfx10<0x05c>;
1610defm FLAT_ATOMIC_DEC_X2         : FLAT_Real_Atomics_gfx10<0x05d>;
1611defm FLAT_ATOMIC_FCMPSWAP_X2    : FLAT_Real_Atomics_gfx10<0x05e>;
1612defm FLAT_ATOMIC_FMIN_X2        : FLAT_Real_Atomics_gfx10<0x05f>;
1613defm FLAT_ATOMIC_FMAX_X2        : FLAT_Real_Atomics_gfx10<0x060>;
1614
1615
1616// ENC_FLAT_GLBL.
1617defm GLOBAL_LOAD_UBYTE          : FLAT_Real_AllAddr_gfx10<0x008>;
1618defm GLOBAL_LOAD_SBYTE          : FLAT_Real_AllAddr_gfx10<0x009>;
1619defm GLOBAL_LOAD_USHORT         : FLAT_Real_AllAddr_gfx10<0x00a>;
1620defm GLOBAL_LOAD_SSHORT         : FLAT_Real_AllAddr_gfx10<0x00b>;
1621defm GLOBAL_LOAD_DWORD          : FLAT_Real_AllAddr_gfx10<0x00c>;
1622defm GLOBAL_LOAD_DWORDX2        : FLAT_Real_AllAddr_gfx10<0x00d>;
1623defm GLOBAL_LOAD_DWORDX4        : FLAT_Real_AllAddr_gfx10<0x00e>;
1624defm GLOBAL_LOAD_DWORDX3        : FLAT_Real_AllAddr_gfx10<0x00f>;
1625defm GLOBAL_STORE_BYTE          : FLAT_Real_AllAddr_gfx10<0x018>;
1626defm GLOBAL_STORE_BYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x019>;
1627defm GLOBAL_STORE_SHORT         : FLAT_Real_AllAddr_gfx10<0x01a>;
1628defm GLOBAL_STORE_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x01b>;
1629defm GLOBAL_STORE_DWORD         : FLAT_Real_AllAddr_gfx10<0x01c>;
1630defm GLOBAL_STORE_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x01d>;
1631defm GLOBAL_STORE_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x01e>;
1632defm GLOBAL_STORE_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x01f>;
1633defm GLOBAL_LOAD_UBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x020>;
1634defm GLOBAL_LOAD_UBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x021>;
1635defm GLOBAL_LOAD_SBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x022>;
1636defm GLOBAL_LOAD_SBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x023>;
1637defm GLOBAL_LOAD_SHORT_D16      : FLAT_Real_AllAddr_gfx10<0x024>;
1638defm GLOBAL_LOAD_SHORT_D16_HI   : FLAT_Real_AllAddr_gfx10<0x025>;
1639defm GLOBAL_ATOMIC_SWAP         : FLAT_Real_GlblAtomics_gfx10<0x030>;
1640defm GLOBAL_ATOMIC_CMPSWAP      : FLAT_Real_GlblAtomics_gfx10<0x031>;
1641defm GLOBAL_ATOMIC_ADD          : FLAT_Real_GlblAtomics_gfx10<0x032>;
1642defm GLOBAL_ATOMIC_SUB          : FLAT_Real_GlblAtomics_gfx10<0x033>;
1643defm GLOBAL_ATOMIC_CSUB         : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>;
1644defm GLOBAL_ATOMIC_SMIN         : FLAT_Real_GlblAtomics_gfx10<0x035>;
1645defm GLOBAL_ATOMIC_UMIN         : FLAT_Real_GlblAtomics_gfx10<0x036>;
1646defm GLOBAL_ATOMIC_SMAX         : FLAT_Real_GlblAtomics_gfx10<0x037>;
1647defm GLOBAL_ATOMIC_UMAX         : FLAT_Real_GlblAtomics_gfx10<0x038>;
1648defm GLOBAL_ATOMIC_AND          : FLAT_Real_GlblAtomics_gfx10<0x039>;
1649defm GLOBAL_ATOMIC_OR           : FLAT_Real_GlblAtomics_gfx10<0x03a>;
1650defm GLOBAL_ATOMIC_XOR          : FLAT_Real_GlblAtomics_gfx10<0x03b>;
1651defm GLOBAL_ATOMIC_INC          : FLAT_Real_GlblAtomics_gfx10<0x03c>;
1652defm GLOBAL_ATOMIC_DEC          : FLAT_Real_GlblAtomics_gfx10<0x03d>;
1653defm GLOBAL_ATOMIC_FCMPSWAP     : FLAT_Real_GlblAtomics_gfx10<0x03e>;
1654defm GLOBAL_ATOMIC_FMIN         : FLAT_Real_GlblAtomics_gfx10<0x03f>;
1655defm GLOBAL_ATOMIC_FMAX         : FLAT_Real_GlblAtomics_gfx10<0x040>;
1656defm GLOBAL_ATOMIC_SWAP_X2      : FLAT_Real_GlblAtomics_gfx10<0x050>;
1657defm GLOBAL_ATOMIC_CMPSWAP_X2   : FLAT_Real_GlblAtomics_gfx10<0x051>;
1658defm GLOBAL_ATOMIC_ADD_X2       : FLAT_Real_GlblAtomics_gfx10<0x052>;
1659defm GLOBAL_ATOMIC_SUB_X2       : FLAT_Real_GlblAtomics_gfx10<0x053>;
1660defm GLOBAL_ATOMIC_SMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x055>;
1661defm GLOBAL_ATOMIC_UMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x056>;
1662defm GLOBAL_ATOMIC_SMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x057>;
1663defm GLOBAL_ATOMIC_UMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x058>;
1664defm GLOBAL_ATOMIC_AND_X2       : FLAT_Real_GlblAtomics_gfx10<0x059>;
1665defm GLOBAL_ATOMIC_OR_X2        : FLAT_Real_GlblAtomics_gfx10<0x05a>;
1666defm GLOBAL_ATOMIC_XOR_X2       : FLAT_Real_GlblAtomics_gfx10<0x05b>;
1667defm GLOBAL_ATOMIC_INC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05c>;
1668defm GLOBAL_ATOMIC_DEC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05d>;
1669defm GLOBAL_ATOMIC_FCMPSWAP_X2  : FLAT_Real_GlblAtomics_gfx10<0x05e>;
1670defm GLOBAL_ATOMIC_FMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x05f>;
1671defm GLOBAL_ATOMIC_FMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x060>;
1672defm GLOBAL_LOAD_DWORD_ADDTID   : FLAT_Real_AllAddr_gfx10<0x016>;
1673defm GLOBAL_STORE_DWORD_ADDTID  : FLAT_Real_AllAddr_gfx10<0x017>;
1674
1675// ENC_FLAT_SCRATCH.
1676defm SCRATCH_LOAD_UBYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x008>;
1677defm SCRATCH_LOAD_SBYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x009>;
1678defm SCRATCH_LOAD_USHORT        : FLAT_Real_ScratchAllAddr_gfx10<0x00a>;
1679defm SCRATCH_LOAD_SSHORT        : FLAT_Real_ScratchAllAddr_gfx10<0x00b>;
1680defm SCRATCH_LOAD_DWORD         : FLAT_Real_ScratchAllAddr_gfx10<0x00c>;
1681defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_ScratchAllAddr_gfx10<0x00d>;
1682defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_ScratchAllAddr_gfx10<0x00e>;
1683defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_ScratchAllAddr_gfx10<0x00f>;
1684defm SCRATCH_STORE_BYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x018>;
1685defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x019>;
1686defm SCRATCH_STORE_SHORT        : FLAT_Real_ScratchAllAddr_gfx10<0x01a>;
1687defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>;
1688defm SCRATCH_STORE_DWORD        : FLAT_Real_ScratchAllAddr_gfx10<0x01c>;
1689defm SCRATCH_STORE_DWORDX2      : FLAT_Real_ScratchAllAddr_gfx10<0x01d>;
1690defm SCRATCH_STORE_DWORDX4      : FLAT_Real_ScratchAllAddr_gfx10<0x01e>;
1691defm SCRATCH_STORE_DWORDX3      : FLAT_Real_ScratchAllAddr_gfx10<0x01f>;
1692defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_ScratchAllAddr_gfx10<0x020>;
1693defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x021>;
1694defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_ScratchAllAddr_gfx10<0x022>;
1695defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x023>;
1696defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_ScratchAllAddr_gfx10<0x024>;
1697defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x025>;
1698
1699let SubtargetPredicate = HasAtomicFaddInsts in {
1700
1701defm GLOBAL_ATOMIC_ADD_F32    : FLAT_Real_AllAddr_vi <0x04d>;
1702defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Real_AllAddr_vi <0x04e>;
1703
1704} // End SubtargetPredicate = HasAtomicFaddInsts
1705