xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td (revision 647cbc5de815c5651677bf8582797f716ec7b48d)
1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def FlatOffset : ComplexPattern<iPTR, 2, "SelectFlatOffset", [], [SDNPWantRoot], -10>;
10def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [SDNPWantRoot], -10>;
11def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [SDNPWantRoot], -10>;
12
13def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>;
14def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>;
15def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [SDNPWantRoot], -10>;
16
17//===----------------------------------------------------------------------===//
18// FLAT classes
19//===----------------------------------------------------------------------===//
20
21class FLAT_Pseudo<string opName, dag outs, dag ins,
22                  string asmOps, list<dag> pattern=[]> :
23  InstSI<outs, ins, "", pattern>,
24  SIMCInstr<opName, SIEncodingFamily.NONE> {
25
26  let isPseudo = 1;
27  let isCodeGenOnly = 1;
28
29  let FLAT = 1;
30
31  let UseNamedOperandTable = 1;
32  let hasSideEffects = 0;
33  let SchedRW = [WriteVMEM];
34
35  string Mnemonic = opName;
36  string AsmOperands = asmOps;
37
38  bits<1> is_flat_global = 0;
39  bits<1> is_flat_scratch = 0;
40
41  bits<1> has_vdst = 1;
42
43  // We need to distinguish having saddr and enabling saddr because
44  // saddr is only valid for scratch and global instructions. Pre-gfx9
45  // these bits were reserved, so we also don't necessarily want to
46  // set these bits to the disabled value for the original flat
47  // segment instructions.
48  bits<1> has_saddr = 0;
49  bits<1> enabled_saddr = 0;
50  bits<7> saddr_value = 0;
51  bits<1> has_vaddr = 1;
52
53  bits<1> has_data = 1;
54  bits<1> has_glc  = 1;
55  bits<1> glcValue = 0;
56  bits<1> has_dlc  = 1;
57  bits<1> dlcValue = 0;
58  bits<1> has_sccb  = 1;
59  bits<1> sccbValue = 0;
60  bits<1> has_sve  = 0; // Scratch VGPR Enable
61  bits<1> lds = 0;
62  bits<1> sve = 0;
63
64  let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
65    !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
66
67  // TODO: M0 if it could possibly access LDS (before gfx9? only)?
68  let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
69
70  // Internally, FLAT instruction are executed as both an LDS and a
71  // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
72  // and are not considered done until both have been decremented.
73  let VM_CNT = 1;
74  let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch));
75
76  let FlatGlobal = is_flat_global;
77
78  let FlatScratch = is_flat_scratch;
79}
80
81class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
82  InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>,
83  Enc64 {
84
85  let isPseudo = 0;
86  let isCodeGenOnly = 0;
87
88  let FLAT = 1;
89
90  // copy relevant pseudo op flags
91  let SubtargetPredicate   = ps.SubtargetPredicate;
92  let AsmMatchConverter    = ps.AsmMatchConverter;
93  let OtherPredicates      = ps.OtherPredicates;
94  let TSFlags              = ps.TSFlags;
95  let UseNamedOperandTable = ps.UseNamedOperandTable;
96  let SchedRW              = ps.SchedRW;
97  let mayLoad              = ps.mayLoad;
98  let mayStore             = ps.mayStore;
99  let IsAtomicRet          = ps.IsAtomicRet;
100  let IsAtomicNoRet        = ps.IsAtomicNoRet;
101  let VM_CNT               = ps.VM_CNT;
102  let LGKM_CNT             = ps.LGKM_CNT;
103  let VALU                 = ps.VALU;
104
105  // encoding fields
106  bits<8> vaddr;
107  bits<10> vdata;
108  bits<7> saddr;
109  bits<10> vdst;
110
111  bits<5> cpol;
112
113  // Only valid on gfx9
114  bits<1> lds = ps.lds; // LDS DMA for global and scratch
115
116  // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
117  bits<2> seg = !if(ps.is_flat_global, 0b10,
118                  !if(ps.is_flat_scratch, 0b01, 0));
119
120  // Signed offset. Highest bit ignored for flat and treated as 12-bit
121  // unsigned for flat accesses.
122  bits<13> offset;
123  // GFX90A+ only: instruction uses AccVGPR for data
124  bits<1> acc = !if(ps.has_vdst, vdst{9}, !if(ps.has_data, vdata{9}, 0));
125
126  // We don't use tfe right now, and it was removed in gfx9.
127  bits<1> tfe = 0;
128
129  // Only valid on GFX9+
130  let Inst{12-0} = offset;
131  let Inst{13} = !if(ps.has_sve, ps.sve, lds);
132  let Inst{15-14} = seg;
133
134  let Inst{16}    = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
135  let Inst{17}    = cpol{CPolBit.SLC};
136  let Inst{24-18} = op;
137  let Inst{31-26} = 0x37; // Encoding.
138  let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
139  let Inst{47-40} = !if(ps.has_data, vdata{7-0}, ?);
140  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
141
142  // 54-48 is reserved.
143  let Inst{55}    = acc; // nv on GFX9+, TFE before. AccVGPR for data on GFX90A.
144  let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, ?);
145}
146
147class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
148  InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>,
149  Enc96 {
150
151  let FLAT = 1;
152
153  // copy relevant pseudo op flags
154  let SubtargetPredicate   = ps.SubtargetPredicate;
155  let AsmMatchConverter    = ps.AsmMatchConverter;
156  let OtherPredicates      = ps.OtherPredicates;
157  let TSFlags              = ps.TSFlags;
158  let UseNamedOperandTable = ps.UseNamedOperandTable;
159  let SchedRW              = ps.SchedRW;
160  let mayLoad              = ps.mayLoad;
161  let mayStore             = ps.mayStore;
162  let IsAtomicRet          = ps.IsAtomicRet;
163  let IsAtomicNoRet        = ps.IsAtomicNoRet;
164  let VM_CNT               = ps.VM_CNT;
165  let LGKM_CNT             = ps.LGKM_CNT;
166  let VALU                 = ps.VALU;
167
168  bits<7> saddr;
169  bits<8> vdst;
170  bits<6> cpol;
171  bits<8> vdata; // vsrc
172  bits<8> vaddr;
173  bits<24> offset;
174
175  let Inst{6-0} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
176  let Inst{21-14} = op;
177  let Inst{31-26} = 0x3b;
178  let Inst{39-32} = !if(ps.has_vdst, vdst, ?);
179  let Inst{49} = ps.sve;
180  let Inst{54-53} = cpol{2-1}; // th{2-1}
181  let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0}
182  let Inst{51-50} = cpol{4-3}; // scope
183  let Inst{62-55} = !if(ps.has_data, vdata{7-0}, ?);
184  let Inst{71-64} = !if(ps.has_vaddr, vaddr, ?);
185  let Inst{95-72} = offset;
186}
187
188class GlobalSaddrTable <bit is_saddr, string Name = ""> {
189  bit IsSaddr = is_saddr;
190  string SaddrOp = Name;
191}
192
193// TODO: Is exec allowed for saddr? The disabled value 0x7f is the
194// same encoding value as exec_hi, so it isn't possible to use that if
195// saddr is 32-bit (which isn't handled here yet).
196class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
197  bit HasTiedOutput = 0,
198  bit HasSaddr = 0, bit EnableSaddr = 0,
199  RegisterOperand vdata_op = getLdStRegisterOperand<regClass>.ret> : FLAT_Pseudo<
200  opName,
201  (outs vdata_op:$vdst),
202  !con(
203    !con(
204      !if(EnableSaddr,
205        (ins SReg_64:$saddr, VGPR_32:$vaddr),
206        (ins VReg_64:$vaddr)),
207        (ins flat_offset:$offset)),
208        // FIXME: Operands with default values do not work with following non-optional operands.
209        !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in),
210                           (ins CPol_0:$cpol))),
211  " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
212  let has_data = 0;
213  let mayLoad = 1;
214  let has_saddr = HasSaddr;
215  let enabled_saddr = EnableSaddr;
216  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
217  let maybeAtomic = 1;
218
219  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
220  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
221}
222
223class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
224  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
225  opName,
226  (outs),
227  !con(
228    !if(EnableSaddr,
229      (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr),
230      (ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)),
231      (ins flat_offset:$offset, CPol_0:$cpol)),
232  " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
233  let mayLoad  = 0;
234  let mayStore = 1;
235  let has_vdst = 0;
236  let has_saddr = HasSaddr;
237  let enabled_saddr = EnableSaddr;
238  let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
239  let maybeAtomic = 1;
240}
241
242multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
243  let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
244    def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
245      GlobalSaddrTable<0, opName>;
246    def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
247      GlobalSaddrTable<1, opName>;
248  }
249}
250
251class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
252  bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
253  opName,
254  (outs regClass:$vdst),
255  !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)),
256    (ins flat_offset:$offset, CPol_0:$cpol),
257    !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
258  " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
259  let is_flat_global = 1;
260  let has_data = 0;
261  let mayLoad = 1;
262  let has_vaddr = 0;
263  let has_saddr = 1;
264  let enabled_saddr = EnableSaddr;
265  let maybeAtomic = 1;
266  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
267
268  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
269  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
270}
271
272multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass,
273  bit HasTiedOutput = 0> {
274  def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput>,
275    GlobalSaddrTable<0, opName>;
276  def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, 1>,
277    GlobalSaddrTable<1, opName>;
278}
279
280multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
281  let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
282    def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
283      GlobalSaddrTable<0, opName>;
284    def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
285      GlobalSaddrTable<1, opName>;
286  }
287}
288
289class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
290  opName,
291  (outs ),
292  !con(
293      !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)),
294      (ins flat_offset:$offset, CPol_0:$cpol)),
295  " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> {
296  let LGKM_CNT = 1;
297  let is_flat_global = 1;
298  let lds = 1;
299  let has_data = 0;
300  let has_vdst = 0;
301  let mayLoad = 1;
302  let mayStore = 1;
303  let has_saddr = 1;
304  let enabled_saddr = EnableSaddr;
305  let VALU = 1;
306  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
307  let Uses = [M0, EXEC];
308  let SchedRW = [WriteVMEM, WriteLDS];
309}
310
311multiclass FLAT_Global_Load_LDS_Pseudo<string opName> {
312  def ""     : FLAT_Global_Load_LDS_Pseudo<opName>,
313    GlobalSaddrTable<0, opName>;
314  def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>,
315    GlobalSaddrTable<1, opName>;
316}
317
318class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
319  bit EnableSaddr = 0> : FLAT_Pseudo<
320  opName,
321  (outs),
322  !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)),
323    (ins flat_offset:$offset, CPol:$cpol)),
324  " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
325  let is_flat_global = 1;
326  let mayLoad  = 0;
327  let mayStore = 1;
328  let has_vdst = 0;
329  let has_vaddr = 0;
330  let has_saddr = 1;
331  let enabled_saddr = EnableSaddr;
332  let maybeAtomic = 1;
333  let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
334}
335
336multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass> {
337  def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass>,
338    GlobalSaddrTable<0, opName>;
339  def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, 1>,
340    GlobalSaddrTable<1, opName>;
341}
342
343class FlatScratchInst <string sv_op, string mode> {
344  string SVOp = sv_op;
345  string Mode = mode;
346}
347
348class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
349  bit HasTiedOutput = 0,
350  bit EnableSaddr = 0,
351  bit EnableSVE = 0,
352  bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))>
353  : FLAT_Pseudo<
354  opName,
355  (outs getLdStRegisterOperand<regClass>.ret:$vdst),
356  !con(
357    !if(EnableSVE,
358        (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
359        !if(EnableSaddr,
360          (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
361          !if(EnableVaddr,
362            (ins VGPR_32:$vaddr, flat_offset:$offset),
363            (ins flat_offset:$offset)))),
364     !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in),
365                        (ins CPol_0:$cpol))),
366  " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
367  let has_data = 0;
368  let mayLoad = 1;
369  let has_saddr = 1;
370  let enabled_saddr = EnableSaddr;
371  let has_vaddr = EnableVaddr;
372  let has_sve = EnableSVE;
373  let sve = EnableVaddr;
374  let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
375  let maybeAtomic = 1;
376
377  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
378  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
379}
380
381class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0,
382  bit EnableSVE = 0,
383  bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr)),
384  RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo<
385  opName,
386  (outs),
387  !if(EnableSVE,
388    (ins vdata_op:$vdata, VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
389    !if(EnableSaddr,
390      (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
391      !if(EnableVaddr,
392        (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol),
393        (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol)))),
394  " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
395  let mayLoad  = 0;
396  let mayStore = 1;
397  let has_vdst = 0;
398  let has_saddr = 1;
399  let enabled_saddr = EnableSaddr;
400  let has_vaddr = EnableVaddr;
401  let has_sve = EnableSVE;
402  let sve = EnableVaddr;
403  let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
404  let maybeAtomic = 1;
405}
406
407multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> {
408  let is_flat_scratch = 1 in {
409    def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>,
410             FlatScratchInst<opName, "SV">;
411    def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>,
412                 FlatScratchInst<opName, "SS">;
413
414    let SubtargetPredicate = HasFlatScratchSVSMode in
415    def _SVS : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1, 1>,
416               FlatScratchInst<opName, "SVS">;
417
418    let SubtargetPredicate = HasFlatScratchSTMode in
419    def _ST  : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0, 0>,
420               FlatScratchInst<opName, "ST">;
421  }
422}
423
424multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
425  let is_flat_scratch = 1 in {
426    def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>,
427             FlatScratchInst<opName, "SV">;
428    def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>,
429                 FlatScratchInst<opName, "SS">;
430
431    let SubtargetPredicate = HasFlatScratchSVSMode in
432    def _SVS : FLAT_Scratch_Store_Pseudo<opName, regClass, 1, 1>,
433               FlatScratchInst<opName, "SVS">;
434
435    let SubtargetPredicate = HasFlatScratchSTMode in
436    def _ST  : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0, 0>,
437               FlatScratchInst<opName, "ST">;
438  }
439}
440
441class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0,
442  bit EnableSVE = 0,
443  bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo<
444  opName,
445  (outs ),
446  !if(EnableSVE,
447    (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol),
448    !if(EnableSaddr,
449      (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol),
450      !if(EnableVaddr,
451        (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol),
452        (ins flat_offset:$offset, CPol:$cpol)))),
453  " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
454
455  let LGKM_CNT = 1;
456  let is_flat_scratch = 1;
457  let lds = 1;
458  let has_data = 0;
459  let has_vdst = 0;
460  let mayLoad = 1;
461  let mayStore = 1;
462  let has_saddr = 1;
463  let enabled_saddr = EnableSaddr;
464  let has_vaddr = EnableVaddr;
465  let has_sve = EnableSVE;
466  let sve = EnableVaddr;
467  let VALU = 1;
468  let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")));
469  let Uses = [M0, EXEC];
470  let SchedRW = [WriteVMEM, WriteLDS];
471}
472
473multiclass FLAT_Scratch_Load_LDS_Pseudo<string opName> {
474  def ""     : FLAT_Scratch_Load_LDS_Pseudo<opName>,
475               FlatScratchInst<opName, "SV">;
476  def _SADDR : FLAT_Scratch_Load_LDS_Pseudo<opName, 1>,
477               FlatScratchInst<opName, "SS">;
478  def _SVS   : FLAT_Scratch_Load_LDS_Pseudo<opName, 1, 1>,
479               FlatScratchInst<opName, "SVS">;
480  def _ST    : FLAT_Scratch_Load_LDS_Pseudo<opName, 0, 0, 0>,
481               FlatScratchInst<opName, "ST">;
482}
483
484class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
485                               string asm, list<dag> pattern = []> :
486  FLAT_Pseudo<opName, outs, ins, asm, pattern> {
487    let mayLoad = 1;
488    let mayStore = 1;
489    let has_glc  = 0;
490    let glcValue = 0;
491    let has_vdst = 0;
492    let has_sccb  = 1;
493    let sccbValue = 0;
494    let maybeAtomic = 1;
495    let IsAtomicNoRet = 1;
496}
497
498class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
499                            string asm, list<dag> pattern = []>
500  : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> {
501  let hasPostISelHook = 1;
502  let has_vdst = 1;
503  let glcValue = 1;
504  let sccbValue = 0;
505  let IsAtomicNoRet = 0;
506  let IsAtomicRet = 1;
507  let PseudoInstr = NAME # "_RTN";
508}
509
510multiclass FLAT_Atomic_Pseudo_NO_RTN<
511  string opName,
512  RegisterClass vdst_rc,
513  ValueType vt,
514  ValueType data_vt = vt,
515  RegisterClass data_rc = vdst_rc,
516  bit isFP = isFloatType<data_vt>.ret,
517  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
518  def "" : FLAT_AtomicNoRet_Pseudo <opName,
519    (outs),
520    (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol),
521    " $vaddr, $vdata$offset$cpol">,
522    GlobalSaddrTable<0, opName>,
523    AtomicNoRet <opName, 0> {
524    let PseudoInstr = NAME;
525    let FPAtomic = isFP;
526    let AddedComplexity = -1; // Prefer global atomics if available
527  }
528}
529
530multiclass FLAT_Atomic_Pseudo_RTN<
531  string opName,
532  RegisterClass vdst_rc,
533  ValueType vt,
534  ValueType data_vt = vt,
535  RegisterClass data_rc = vdst_rc,
536  bit isFP = isFloatType<data_vt>.ret,
537  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
538  def _RTN : FLAT_AtomicRet_Pseudo <opName,
539    (outs getLdStRegisterOperand<vdst_rc>.ret:$vdst),
540    (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
541    " $vdst, $vaddr, $vdata$offset$cpol">,
542    GlobalSaddrTable<0, opName#"_rtn">,
543    AtomicNoRet <opName, 1> {
544    let FPAtomic = isFP;
545    let AddedComplexity = -1; // Prefer global atomics if available
546  }
547}
548
549multiclass FLAT_Atomic_Pseudo<
550  string opName,
551  RegisterClass vdst_rc,
552  ValueType vt,
553  ValueType data_vt = vt,
554  RegisterClass data_rc = vdst_rc,
555  bit isFP = isFloatType<data_vt>.ret,
556  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
557  defm "" : FLAT_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc, isFP, data_op>;
558  defm "" : FLAT_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc, isFP, data_op>;
559}
560
561multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
562  string opName,
563  RegisterClass vdst_rc,
564  ValueType vt,
565  ValueType data_vt = vt,
566  RegisterClass data_rc = vdst_rc,
567  bit isFP = isFloatType<data_vt>.ret,
568  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
569
570  def "" : FLAT_AtomicNoRet_Pseudo <opName,
571    (outs),
572    (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol),
573    " $vaddr, $vdata, off$offset$cpol">,
574    GlobalSaddrTable<0, opName>,
575    AtomicNoRet <opName, 0> {
576    let has_saddr = 1;
577    let PseudoInstr = NAME;
578    let FPAtomic = isFP;
579  }
580
581  def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
582    (outs),
583    (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol),
584    " $vaddr, $vdata, $saddr$offset$cpol">,
585    GlobalSaddrTable<1, opName>,
586    AtomicNoRet <opName#"_saddr", 0> {
587    let has_saddr = 1;
588    let enabled_saddr = 1;
589    let PseudoInstr = NAME#"_SADDR";
590    let FPAtomic = isFP;
591  }
592}
593
594multiclass FLAT_Global_Atomic_Pseudo_RTN<
595  string opName,
596  RegisterClass vdst_rc,
597  ValueType vt,
598  ValueType data_vt = vt,
599  RegisterClass data_rc = vdst_rc,
600  bit isFP = isFloatType<data_vt>.ret,
601  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret,
602  RegisterOperand vdst_op = getLdStRegisterOperand<vdst_rc>.ret> {
603
604  def _RTN : FLAT_AtomicRet_Pseudo <opName,
605    (outs vdst_op:$vdst),
606      (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
607    " $vdst, $vaddr, $vdata, off$offset$cpol">,
608    GlobalSaddrTable<0, opName#"_rtn">,
609    AtomicNoRet <opName, 1> {
610    let has_saddr = 1;
611    let FPAtomic = isFP;
612  }
613
614  def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
615    (outs vdst_op:$vdst),
616      (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
617    " $vdst, $vaddr, $vdata, $saddr$offset$cpol">,
618    GlobalSaddrTable<1, opName#"_rtn">,
619    AtomicNoRet <opName#"_saddr", 1> {
620     let has_saddr = 1;
621     let enabled_saddr = 1;
622     let PseudoInstr = NAME#"_SADDR_RTN";
623     let FPAtomic = isFP;
624  }
625}
626
627multiclass FLAT_Global_Atomic_Pseudo<
628  string opName,
629  RegisterClass vdst_rc,
630  ValueType vt,
631  ValueType data_vt = vt,
632  RegisterClass data_rc = vdst_rc> {
633  let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
634    defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
635    defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
636  }
637}
638
639//===----------------------------------------------------------------------===//
640// Flat Instructions
641//===----------------------------------------------------------------------===//
642
643def FLAT_LOAD_UBYTE    : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>;
644def FLAT_LOAD_SBYTE    : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>;
645def FLAT_LOAD_USHORT   : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>;
646def FLAT_LOAD_SSHORT   : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>;
647def FLAT_LOAD_DWORD    : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>;
648def FLAT_LOAD_DWORDX2  : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
649def FLAT_LOAD_DWORDX4  : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
650def FLAT_LOAD_DWORDX3  : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
651
652def FLAT_STORE_BYTE    : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
653def FLAT_STORE_SHORT   : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
654def FLAT_STORE_DWORD   : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
655def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
656def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
657def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
658
659let SubtargetPredicate = HasD16LoadStore in {
660let TiedSourceNotRead = 1 in {
661def FLAT_LOAD_UBYTE_D16     : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>;
662def FLAT_LOAD_UBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>;
663def FLAT_LOAD_SBYTE_D16     : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>;
664def FLAT_LOAD_SBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>;
665def FLAT_LOAD_SHORT_D16     : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>;
666def FLAT_LOAD_SHORT_D16_HI  : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>;
667}
668
669def FLAT_STORE_BYTE_D16_HI  : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>;
670def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>;
671}
672
673defm FLAT_ATOMIC_CMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
674                                VGPR_32, i32, v2i32, VReg_64>;
675
676defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
677                                VReg_64, i64, v2i64, VReg_128>;
678
679defm FLAT_ATOMIC_SWAP       : FLAT_Atomic_Pseudo <"flat_atomic_swap",
680                                VGPR_32, i32>;
681
682defm FLAT_ATOMIC_SWAP_X2    : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
683                                VReg_64, i64>;
684
685defm FLAT_ATOMIC_ADD        : FLAT_Atomic_Pseudo <"flat_atomic_add",
686                                VGPR_32, i32>;
687
688defm FLAT_ATOMIC_SUB        : FLAT_Atomic_Pseudo <"flat_atomic_sub",
689                                VGPR_32, i32>;
690
691defm FLAT_ATOMIC_SMIN       : FLAT_Atomic_Pseudo <"flat_atomic_smin",
692                                VGPR_32, i32>;
693
694defm FLAT_ATOMIC_UMIN       : FLAT_Atomic_Pseudo <"flat_atomic_umin",
695                                VGPR_32, i32>;
696
697defm FLAT_ATOMIC_SMAX       : FLAT_Atomic_Pseudo <"flat_atomic_smax",
698                                VGPR_32, i32>;
699
700defm FLAT_ATOMIC_UMAX       : FLAT_Atomic_Pseudo <"flat_atomic_umax",
701                                VGPR_32, i32>;
702
703defm FLAT_ATOMIC_AND        : FLAT_Atomic_Pseudo <"flat_atomic_and",
704                                VGPR_32, i32>;
705
706defm FLAT_ATOMIC_OR         : FLAT_Atomic_Pseudo <"flat_atomic_or",
707                                VGPR_32, i32>;
708
709defm FLAT_ATOMIC_XOR        : FLAT_Atomic_Pseudo <"flat_atomic_xor",
710                                VGPR_32, i32>;
711
712defm FLAT_ATOMIC_INC        : FLAT_Atomic_Pseudo <"flat_atomic_inc",
713                                VGPR_32, i32>;
714
715defm FLAT_ATOMIC_DEC        : FLAT_Atomic_Pseudo <"flat_atomic_dec",
716                                VGPR_32, i32>;
717
718defm FLAT_ATOMIC_ADD_X2     : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
719                                VReg_64, i64>;
720
721defm FLAT_ATOMIC_SUB_X2     : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
722                                VReg_64, i64>;
723
724defm FLAT_ATOMIC_SMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
725                                VReg_64, i64>;
726
727defm FLAT_ATOMIC_UMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
728                                VReg_64, i64>;
729
730defm FLAT_ATOMIC_SMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
731                                VReg_64, i64>;
732
733defm FLAT_ATOMIC_UMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
734                                VReg_64, i64>;
735
736defm FLAT_ATOMIC_AND_X2     : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
737                                VReg_64, i64>;
738
739defm FLAT_ATOMIC_OR_X2      : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
740                                VReg_64, i64>;
741
742defm FLAT_ATOMIC_XOR_X2     : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
743                                VReg_64, i64>;
744
745defm FLAT_ATOMIC_INC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
746                                VReg_64, i64>;
747
748defm FLAT_ATOMIC_DEC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
749                                VReg_64, i64>;
750
751// GFX7-, GFX10-only flat instructions.
752let SubtargetPredicate = isGFX7GFX10 in {
753
754defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
755                                VReg_64, f64, v2f64, VReg_128>;
756
757defm FLAT_ATOMIC_FMIN_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
758                                VReg_64, f64>;
759
760defm FLAT_ATOMIC_FMAX_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
761                                VReg_64, f64>;
762
763} // End SubtargetPredicate = isGFX7GFX10
764
765let SubtargetPredicate = isGFX90APlus in {
766  defm FLAT_ATOMIC_ADD_F64   : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>;
767  defm FLAT_ATOMIC_MIN_F64   : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64>;
768  defm FLAT_ATOMIC_MAX_F64   : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64>;
769  defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>;
770  defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>;
771  defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
772} // End SubtargetPredicate = isGFX90APlus
773
774let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
775  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16",  VGPR_32, v2f16>;
776  let FPAtomic = 1 in
777    defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2i16>;
778} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts
779
780let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst, FPAtomic = 1 in
781  defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2i16>;
782
783// GFX7-, GFX10-, GFX11-only flat instructions.
784let SubtargetPredicate = isGFX7GFX10GFX11 in {
785
786defm FLAT_ATOMIC_FCMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
787                                VGPR_32, f32, v2f32, VReg_64>;
788
789defm FLAT_ATOMIC_FMIN        : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
790                                VGPR_32, f32>;
791
792defm FLAT_ATOMIC_FMAX        : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
793                                VGPR_32, f32>;
794
795} // End SubtargetPredicate = isGFX7GFX10GFX11
796
797// GFX940-, GFX11-only flat instructions.
798let SubtargetPredicate = HasFlatAtomicFaddF32Inst in {
799  defm FLAT_ATOMIC_ADD_F32       : FLAT_Atomic_Pseudo<"flat_atomic_add_f32",     VGPR_32, f32>;
800} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst
801
802let SubtargetPredicate = isGFX12Plus in {
803  defm FLAT_ATOMIC_CSUB_U32 : FLAT_Atomic_Pseudo <"flat_atomic_csub_u32", VGPR_32, i32>;
804} // End SubtargetPredicate = isGFX12Plus
805
806defm GLOBAL_LOAD_UBYTE    : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
807defm GLOBAL_LOAD_SBYTE    : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
808defm GLOBAL_LOAD_USHORT   : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
809defm GLOBAL_LOAD_SSHORT   : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>;
810defm GLOBAL_LOAD_DWORD    : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>;
811defm GLOBAL_LOAD_DWORDX2  : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>;
812defm GLOBAL_LOAD_DWORDX3  : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>;
813defm GLOBAL_LOAD_DWORDX4  : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
814
815let TiedSourceNotRead = 1 in {
816defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>;
817defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>;
818defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>;
819defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>;
820defm GLOBAL_LOAD_SHORT_D16    : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
821defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
822}
823
824let OtherPredicates = [HasGFX10_BEncoding] in
825defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
826
827defm GLOBAL_STORE_BYTE    : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
828defm GLOBAL_STORE_SHORT   : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
829defm GLOBAL_STORE_DWORD   : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
830defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
831defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
832defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
833let OtherPredicates = [HasGFX10_BEncoding] in
834defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
835
836defm GLOBAL_STORE_BYTE_D16_HI  : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
837defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
838
839let is_flat_global = 1 in {
840defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
841                               VGPR_32, i32, v2i32, VReg_64>;
842
843defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
844                                  VReg_64, i64, v2i64, VReg_128>;
845
846defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
847                             VGPR_32, i32>;
848
849defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
850                                VReg_64, i64>;
851
852defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
853                           VGPR_32, i32>;
854
855defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
856                           VGPR_32, i32>;
857
858defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
859                            VGPR_32, i32>;
860
861defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
862                            VGPR_32, i32>;
863
864defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
865                            VGPR_32, i32>;
866
867defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
868                            VGPR_32, i32>;
869
870defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
871                           VGPR_32, i32>;
872
873defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
874                          VGPR_32, i32>;
875
876defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
877                           VGPR_32, i32>;
878
879defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
880                           VGPR_32, i32>;
881
882defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
883                           VGPR_32, i32>;
884
885defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
886                              VReg_64, i64>;
887
888defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
889                              VReg_64, i64>;
890
891defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
892                               VReg_64, i64>;
893
894defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
895                               VReg_64, i64>;
896
897defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
898                               VReg_64, i64>;
899
900defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
901                               VReg_64, i64>;
902
903defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
904                              VReg_64, i64>;
905
906defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
907                             VReg_64, i64>;
908
909defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
910                              VReg_64, i64>;
911
912defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
913                              VReg_64, i64>;
914
915defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
916                              VReg_64, i64>;
917
918let SubtargetPredicate = HasGFX10_BEncoding in {
919  defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo <"global_atomic_csub",
920                                VGPR_32, i32>;
921}
922
923defm GLOBAL_LOAD_LDS_UBYTE  : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">;
924defm GLOBAL_LOAD_LDS_SBYTE  : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">;
925defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">;
926defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">;
927defm GLOBAL_LOAD_LDS_DWORD  : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">;
928
929let SubtargetPredicate = isGFX12Plus in {
930  defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>;
931} // End SubtargetPredicate = isGFX12Plus
932
933} // End is_flat_global = 1
934
935let SubtargetPredicate = HasFlatScratchInsts in {
936defm SCRATCH_LOAD_UBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
937defm SCRATCH_LOAD_SBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
938defm SCRATCH_LOAD_USHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
939defm SCRATCH_LOAD_SSHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>;
940defm SCRATCH_LOAD_DWORD    : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>;
941defm SCRATCH_LOAD_DWORDX2  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>;
942defm SCRATCH_LOAD_DWORDX3  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>;
943defm SCRATCH_LOAD_DWORDX4  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
944
945let TiedSourceNotRead = 1 in {
946defm SCRATCH_LOAD_UBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>;
947defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>;
948defm SCRATCH_LOAD_SBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>;
949defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>;
950defm SCRATCH_LOAD_SHORT_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>;
951defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>;
952}
953
954defm SCRATCH_STORE_BYTE    : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
955defm SCRATCH_STORE_SHORT   : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
956defm SCRATCH_STORE_DWORD   : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
957defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
958defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
959defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
960
961defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
962defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
963
964defm SCRATCH_LOAD_LDS_UBYTE  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">;
965defm SCRATCH_LOAD_LDS_SBYTE  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">;
966defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">;
967defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">;
968defm SCRATCH_LOAD_LDS_DWORD  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">;
969
970} // End SubtargetPredicate = HasFlatScratchInsts
971
972let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
973  defm GLOBAL_ATOMIC_FCMPSWAP :
974    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>;
975  defm GLOBAL_ATOMIC_FMIN :
976    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
977  defm GLOBAL_ATOMIC_FMAX :
978    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
979  defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
980    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>;
981  defm GLOBAL_ATOMIC_FMIN_X2 :
982    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
983  defm GLOBAL_ATOMIC_FMAX_X2 :
984    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
985} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
986
987let is_flat_global = 1 in {
988let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
989  defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
990    "global_atomic_add_f32", VGPR_32, f32
991  >;
992let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in
993  defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
994    "global_atomic_pk_add_f16", VGPR_32, v2f16
995  >;
996let OtherPredicates = [HasAtomicFaddRtnInsts] in
997  defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN <
998    "global_atomic_add_f32", VGPR_32, f32
999  >;
1000let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
1001  defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN <
1002    "global_atomic_pk_add_f16", VGPR_32, v2f16
1003  >;
1004} // End is_flat_global = 1
1005
1006//===----------------------------------------------------------------------===//
1007// Flat Patterns
1008//===----------------------------------------------------------------------===//
1009
1010// Patterns for global loads with no offset.
1011class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1012  (vt (node (FlatOffset i64:$vaddr, i32:$offset))),
1013  (inst $vaddr, $offset)
1014>;
1015
1016class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1017  (node (FlatOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in),
1018  (inst $vaddr, $offset, 0, $in)
1019>;
1020
1021class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1022  (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in),
1023  (inst $vaddr, $offset, 0, $in)
1024>;
1025
1026class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1027  (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)),
1028  (inst $saddr, $voffset, $offset, 0, $in)
1029>;
1030
1031class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1032  (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))),
1033  (inst $vaddr, $offset)
1034>;
1035
1036class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1037  (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))),
1038  (inst $saddr, $voffset, $offset, 0)
1039>;
1040
1041class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1042                           ValueType vt> : GCNPat <
1043  (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset)),
1044  (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1045>;
1046
1047class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1048                            ValueType vt, ValueType data_vt = vt> : GCNPat <
1049  (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), data_vt:$data)),
1050  (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset)
1051>;
1052
1053class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1054                                 ValueType vt> : GCNPat <
1055  (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$data),
1056  (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1057>;
1058
1059class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1060  (node vt:$data, (FlatOffset i64:$vaddr, i32:$offset)),
1061  (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
1062>;
1063
1064class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1065  (node vt:$data, (GlobalOffset i64:$vaddr, i32:$offset)),
1066  (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
1067>;
1068
1069class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node,
1070                                ValueType vt, ValueType data_vt = vt> : GCNPat <
1071  // atomic store follows atomic binop convention so the address comes
1072  // first.
1073  (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data),
1074  (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
1075>;
1076
1077multiclass FlatAtomicNoRtnPat <string inst, string node, ValueType vt,
1078                          ValueType data_vt = vt, bit isIntr = 0> {
1079  defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_"#vt.Size));
1080
1081  let AddedComplexity = 1 in
1082  def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1083    (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1084}
1085
1086multiclass FlatAtomicRtnPat <string inst, string node, ValueType vt,
1087                             ValueType data_vt = vt, bit isIntr = 0> {
1088  defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_"#vt.Size));
1089
1090  def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1091    (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1092}
1093
1094multiclass FlatAtomicPat <string inst, string node, ValueType vt,
1095                          ValueType data_vt = vt, bit isIntr = 0> :
1096  FlatAtomicRtnPat<inst, node, vt, data_vt, isIntr>,
1097  FlatAtomicNoRtnPat<inst, node, vt, data_vt, isIntr>;
1098
1099multiclass FlatAtomicIntrNoRtnPat <string inst, string node, ValueType vt,
1100                                 ValueType data_vt = vt> {
1101  defm : FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>;
1102}
1103
1104multiclass FlatAtomicIntrRtnPat <string inst, string node, ValueType vt,
1105                                ValueType data_vt = vt> {
1106  defm : FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>;
1107}
1108
1109multiclass FlatAtomicIntrPat <string inst, string node, ValueType vt,
1110                              ValueType data_vt = vt> :
1111  FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>,
1112  FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>;
1113
1114class FlatSignedAtomicPatBase <FLAT_Pseudo inst, SDPatternOperator node,
1115                               ValueType vt, ValueType data_vt = vt> : GCNPat <
1116  (vt (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1117  (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
1118>;
1119
1120multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt,
1121                                ValueType data_vt = vt, int complexity = 0,
1122                                bit isIntr = 0> {
1123  defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt.Size));
1124  defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size));
1125
1126  let AddedComplexity = complexity in
1127  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>;
1128
1129  let AddedComplexity = !add(complexity, 1) in
1130  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>;
1131}
1132
1133multiclass FlatSignedAtomicIntrPat <string inst, string node, ValueType vt,
1134                                    ValueType data_vt = vt> {
1135  defm : FlatSignedAtomicPat<inst, node, vt, data_vt, /* complexity */ 0, /* isIntr */ 1>;
1136}
1137
1138multiclass FlatSignedAtomicPatWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1139                                            ValueType vt, ValueType data_vt = vt> {
1140  defvar noRtnNode = !cast<PatFrags>(intr # "_noret_" # addrSpaceSuffix);
1141  defvar rtnNode = !cast<PatFrags>(intr # "_" # addrSpaceSuffix);
1142
1143  let AddedComplexity = 1 in
1144  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>;
1145  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>;
1146}
1147
1148class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1149  (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))),
1150  (inst $vaddr, $offset)
1151>;
1152
1153class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1154  (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset), vt:$in),
1155  (inst $vaddr, $offset, 0, $in)
1156>;
1157
1158class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1159  (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset)),
1160  (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset)
1161>;
1162
1163class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1164  (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))),
1165  (inst $saddr, $offset)
1166>;
1167
1168class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1169  (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset), vt:$in)),
1170  (inst $saddr, $offset, 0, $in)
1171>;
1172
1173class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1174                            ValueType vt> : GCNPat <
1175  (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset)),
1176  (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1177>;
1178
1179class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1180  (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset))),
1181  (inst $vaddr, $saddr, $offset, 0)
1182>;
1183
1184class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1185                             ValueType vt> : GCNPat <
1186  (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset)),
1187  (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset)
1188>;
1189
1190class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1191  (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset), vt:$in)),
1192  (inst $vaddr, $saddr, $offset, 0, $in)
1193>;
1194
1195let OtherPredicates = [HasFlatAddressSpace] in {
1196
1197def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
1198def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>;
1199def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>;
1200def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>;
1201def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
1202def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
1203def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
1204def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1205def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1206def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
1207def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
1208def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
1209def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
1210def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
1211def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
1212
1213def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
1214def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
1215
1216def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
1217def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
1218
1219foreach vt = Reg32Types.types in {
1220def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
1221def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
1222}
1223
1224foreach vt = VReg_64.RegTypes in {
1225def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>;
1226def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
1227}
1228
1229def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;
1230
1231foreach vt = VReg_128.RegTypes in {
1232def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
1233def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
1234}
1235
1236def : FlatStorePat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>;
1237def : FlatStorePat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>;
1238def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>;
1239def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
1240def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>;
1241def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
1242
1243foreach as = [ "flat", "global" ] in {
1244defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
1245defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>;
1246defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>;
1247defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>;
1248defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>;
1249defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>;
1250defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>;
1251defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>;
1252defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>;
1253defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>;
1254defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>;
1255defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>;
1256defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>;
1257
1258defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>;
1259defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>;
1260defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>;
1261defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>;
1262defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>;
1263defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>;
1264defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>;
1265defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>;
1266defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>;
1267defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>;
1268defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>;
1269defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>;
1270defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>;
1271} // end foreach as
1272
1273def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1274def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
1275
1276let OtherPredicates = [HasD16LoadStore] in {
1277def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
1278def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
1279}
1280
1281let OtherPredicates = [D16PreservesUnusedBits] in {
1282def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
1283def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
1284def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
1285def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
1286def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
1287def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
1288
1289def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
1290def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
1291def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
1292def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
1293def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
1294def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
1295}
1296
1297} // End OtherPredicates = [HasFlatAddressSpace]
1298
1299
1300multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1301  def : FlatLoadSignedPat <inst, node, vt> {
1302    let AddedComplexity = 10;
1303  }
1304
1305  def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1306    let AddedComplexity = 11;
1307  }
1308}
1309
1310multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1311  def : FlatSignedLoadPat_D16 <inst, node, vt> {
1312    let AddedComplexity = 10;
1313  }
1314
1315  def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1316    let AddedComplexity = 11;
1317  }
1318}
1319
1320multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
1321                               ValueType vt> {
1322  def : FlatStoreSignedPat <inst, node, vt> {
1323    let AddedComplexity = 10;
1324  }
1325
1326  def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1327    let AddedComplexity = 11;
1328  }
1329}
1330
1331multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt,
1332                                         ValueType data_vt = vt> {
1333  let AddedComplexity = 11 in
1334  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<SDPatternOperator>(node), vt, data_vt>;
1335
1336  let AddedComplexity = 13 in
1337  def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node), vt, data_vt>;
1338}
1339
1340multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt,
1341                                       ValueType data_vt = vt, bit isPatFrags = 0> {
1342  defvar rtnNode = !if(isPatFrags, !cast<PatFrags>(node), !cast<SDPatternOperator>(node));
1343
1344  let AddedComplexity = 10 in
1345  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>;
1346
1347  let AddedComplexity = 12 in
1348  def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>;
1349}
1350
1351multiclass GlobalFLATAtomicPatsNoRtn<string inst, string node, ValueType vt,
1352                                     ValueType data_vt = vt, bit isIntr = 0> :
1353  GlobalFLATAtomicPatsNoRtnBase<inst, node # "_noret" # !if(isIntr, "", "_" # vt.Size), vt, data_vt>;
1354
1355multiclass GlobalFLATAtomicPatsRtn<string inst, string node, ValueType vt,
1356                                   ValueType data_vt = vt, bit isIntr = 0> :
1357  GlobalFLATAtomicPatsRtnBase<inst, node # !if(isIntr, "", "_" # vt.Size), vt, data_vt>;
1358
1359multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt,
1360                                ValueType data_vt = vt, bit isIntr = 0> :
1361  GlobalFLATAtomicPatsNoRtn<inst, node, vt, data_vt, isIntr>,
1362  GlobalFLATAtomicPatsRtn<inst, node, vt, data_vt, isIntr>;
1363
1364multiclass GlobalFLATAtomicPatsNoRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1365                                                  ValueType vt, ValueType data_vt = vt> :
1366  GlobalFLATAtomicPatsNoRtnBase<inst, intr # "_noret_" # addrSpaceSuffix, vt, data_vt>;
1367
1368multiclass GlobalFLATAtomicPatsRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1369                                                ValueType vt, ValueType data_vt = vt> :
1370  GlobalFLATAtomicPatsRtnBase<inst, intr # "_" # addrSpaceSuffix, vt, data_vt, /*isPatFrags*/ 1>;
1371
1372multiclass GlobalFLATAtomicPatsWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1373                                             ValueType vt, ValueType data_vt = vt> :
1374  GlobalFLATAtomicPatsNoRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>,
1375  GlobalFLATAtomicPatsRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>;
1376
1377multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt,
1378                                    ValueType data_vt = vt> {
1379  defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>;
1380}
1381
1382multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1383  def : ScratchLoadSignedPat <inst, node, vt> {
1384    let AddedComplexity = 25;
1385  }
1386
1387  def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1388    let AddedComplexity = 26;
1389  }
1390
1391  def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1392    let SubtargetPredicate = HasFlatScratchSVSMode;
1393    let AddedComplexity = 27;
1394  }
1395}
1396
1397multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
1398                               ValueType vt> {
1399  def : ScratchStoreSignedPat <inst, node, vt> {
1400    let AddedComplexity = 25;
1401  }
1402
1403  def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1404    let AddedComplexity = 26;
1405  }
1406
1407  def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1408    let SubtargetPredicate = HasFlatScratchSVSMode;
1409    let AddedComplexity = 27;
1410  }
1411}
1412
1413multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1414  def : ScratchLoadSignedPat_D16 <inst, node, vt> {
1415    let AddedComplexity = 25;
1416  }
1417
1418  def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1419    let AddedComplexity = 26;
1420  }
1421
1422  def : ScratchLoadSVaddrPat_D16 <!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1423    let SubtargetPredicate = HasFlatScratchSVSMode;
1424    let AddedComplexity = 27;
1425  }
1426}
1427
1428let OtherPredicates = [HasFlatGlobalInsts] in {
1429
1430defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>;
1431defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>;
1432defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>;
1433defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>;
1434defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
1435defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
1436defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
1437defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
1438defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
1439defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
1440defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
1441defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
1442defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
1443defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>;
1444
1445foreach vt = Reg32Types.types in {
1446defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>;
1447defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>;
1448}
1449
1450foreach vt = VReg_64.RegTypes in {
1451defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>;
1452defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>;
1453}
1454
1455defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
1456
1457foreach vt = VReg_128.RegTypes in {
1458defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>;
1459defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>;
1460}
1461
1462// There is no distinction for atomic load lowering during selection;
1463// the memory legalizer will set the cache bits and insert the
1464// appropriate waits.
1465defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
1466defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
1467
1468defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
1469defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
1470defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
1471defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>;
1472defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>;
1473
1474let OtherPredicates = [HasD16LoadStore] in {
1475defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
1476defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
1477}
1478
1479let OtherPredicates = [D16PreservesUnusedBits] in {
1480defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
1481defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
1482defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
1483defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>;
1484defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>;
1485defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>;
1486
1487defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>;
1488defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>;
1489defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>;
1490defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>;
1491defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>;
1492defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
1493}
1494
1495defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32>;
1496defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>;
1497defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>;
1498defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>;
1499defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>;
1500defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>;
1501
1502defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>;
1503defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>;
1504defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_load_uinc_wrap_global", i32>;
1505defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_load_udec_wrap_global", i32>;
1506defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>;
1507defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>;
1508defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>;
1509defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", "atomic_load_min_global", i32>;
1510defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", "atomic_load_umin_global", i32>;
1511defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", "atomic_load_or_global", i32>;
1512defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", "atomic_swap_global", i32>;
1513defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_global", i32, v2i32>;
1514defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>;
1515defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;
1516
1517let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1518defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;
1519
1520defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>;
1521defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>;
1522defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>;
1523defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_load_udec_wrap_global", i64>;
1524defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>;
1525defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>;
1526defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>;
1527defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", "atomic_load_min_global", i64>;
1528defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", "atomic_load_umin_global", i64>;
1529defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", "atomic_load_or_global", i64>;
1530defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64>;
1531defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>;
1532defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>;
1533
1534let OtherPredicates = [isGFX12Plus] in {
1535  defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ORDERED_ADD_B64", "int_amdgcn_global_atomic_ordered_add_b64", i64, i64, /* isIntr */ 1>;
1536}
1537
1538let OtherPredicates = [isGFX10Plus] in {
1539defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
1540defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
1541defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
1542defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
1543}
1544
1545let OtherPredicates = [isGFX10GFX11] in {
1546defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
1547defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
1548
1549defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>;
1550defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>;
1551}
1552
1553let OtherPredicates = [isGFX10Only] in {
1554defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", "atomic_load_fmin_global", f64>;
1555defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", "atomic_load_fmax_global", f64>;
1556defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN_X2", "int_amdgcn_global_atomic_fmin", f64>;
1557defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX_X2", "int_amdgcn_global_atomic_fmax", f64>;
1558defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN_X2", "atomic_load_fmin_flat", f64>;
1559defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX_X2", "atomic_load_fmax_flat", f64>;
1560defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN_X2", "int_amdgcn_flat_atomic_fmin", f64>;
1561defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX_X2", "int_amdgcn_flat_atomic_fmax", f64>;
1562}
1563
1564let OtherPredicates = [isGFX12Only] in {
1565  defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin_num", f32>;
1566  defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax_num", f32>;
1567  defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin_num", f32>;
1568  defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax_num", f32>;
1569}
1570
1571let OtherPredicates = [HasAtomicFaddNoRtnInsts] in {
1572defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
1573defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>;
1574defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>;
1575}
1576
1577let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
1578defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>;
1579defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>;
1580}
1581
1582let OtherPredicates = [HasAtomicFaddRtnInsts] in {
1583defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
1584defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>;
1585defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>;
1586}
1587
1588let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in {
1589defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>;
1590defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>;
1591}
1592
1593let OtherPredicates = [isGFX90APlus] in {
1594defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
1595defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
1596defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
1597defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>;
1598defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;
1599defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
1600defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
1601defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
1602defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>;
1603defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>;
1604defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f64>;
1605defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>;
1606defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>;
1607}
1608
1609let OtherPredicates = [HasFlatAtomicFaddF32Inst] in {
1610defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
1611defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f32>;
1612}
1613
1614let OtherPredicates = [HasAtomicFlatPkAdd16Insts] in {
1615defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", v2f16>;
1616defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>;
1617}
1618
1619let OtherPredicates = [HasAtomicGlobalPkAddBF16Inst] in
1620defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>;
1621
1622} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
1623
1624let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in {
1625
1626defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>;
1627defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>;
1628defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>;
1629defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>;
1630defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>;
1631defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>;
1632defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>;
1633defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>;
1634defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>;
1635defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>;
1636
1637foreach vt = Reg32Types.types in {
1638defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>;
1639defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>;
1640}
1641
1642foreach vt = VReg_64.RegTypes in {
1643defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>;
1644defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>;
1645}
1646
1647defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>;
1648
1649foreach vt = VReg_128.RegTypes in {
1650defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>;
1651defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>;
1652}
1653
1654defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>;
1655defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>;
1656defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>;
1657defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>;
1658defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>;
1659
1660let OtherPredicates = [HasD16LoadStore, HasFlatScratchInsts, EnableFlatScratch] in {
1661defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>;
1662defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>;
1663}
1664
1665let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in {
1666defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>;
1667defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>;
1668defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>;
1669defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>;
1670defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>;
1671defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>;
1672
1673defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>;
1674defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>;
1675defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>;
1676defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>;
1677defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>;
1678defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>;
1679}
1680
1681} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch]
1682
1683//===----------------------------------------------------------------------===//
1684// Target
1685//===----------------------------------------------------------------------===//
1686
1687//===----------------------------------------------------------------------===//
1688// CI
1689//===----------------------------------------------------------------------===//
1690
1691class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
1692  FLAT_Real <op, ps>,
1693  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
1694  let AssemblerPredicate = isGFX7Only;
1695  let DecoderNamespace="GFX7";
1696}
1697
1698def FLAT_LOAD_UBYTE_ci         : FLAT_Real_ci <0x8,  FLAT_LOAD_UBYTE>;
1699def FLAT_LOAD_SBYTE_ci         : FLAT_Real_ci <0x9,  FLAT_LOAD_SBYTE>;
1700def FLAT_LOAD_USHORT_ci        : FLAT_Real_ci <0xa,  FLAT_LOAD_USHORT>;
1701def FLAT_LOAD_SSHORT_ci        : FLAT_Real_ci <0xb,  FLAT_LOAD_SSHORT>;
1702def FLAT_LOAD_DWORD_ci         : FLAT_Real_ci <0xc,  FLAT_LOAD_DWORD>;
1703def FLAT_LOAD_DWORDX2_ci       : FLAT_Real_ci <0xd,  FLAT_LOAD_DWORDX2>;
1704def FLAT_LOAD_DWORDX4_ci       : FLAT_Real_ci <0xe,  FLAT_LOAD_DWORDX4>;
1705def FLAT_LOAD_DWORDX3_ci       : FLAT_Real_ci <0xf,  FLAT_LOAD_DWORDX3>;
1706
1707def FLAT_STORE_BYTE_ci         : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>;
1708def FLAT_STORE_SHORT_ci        : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>;
1709def FLAT_STORE_DWORD_ci        : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>;
1710def FLAT_STORE_DWORDX2_ci      : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
1711def FLAT_STORE_DWORDX4_ci      : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
1712def FLAT_STORE_DWORDX3_ci      : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
1713
1714multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
1715  def _ci     : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
1716  def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
1717}
1718
1719defm FLAT_ATOMIC_SWAP          : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
1720defm FLAT_ATOMIC_CMPSWAP       : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
1721defm FLAT_ATOMIC_ADD           : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
1722defm FLAT_ATOMIC_SUB           : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
1723defm FLAT_ATOMIC_SMIN          : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
1724defm FLAT_ATOMIC_UMIN          : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
1725defm FLAT_ATOMIC_SMAX          : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
1726defm FLAT_ATOMIC_UMAX          : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
1727defm FLAT_ATOMIC_AND           : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
1728defm FLAT_ATOMIC_OR            : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
1729defm FLAT_ATOMIC_XOR           : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
1730defm FLAT_ATOMIC_INC           : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
1731defm FLAT_ATOMIC_DEC           : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
1732defm FLAT_ATOMIC_SWAP_X2       : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
1733defm FLAT_ATOMIC_CMPSWAP_X2    : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
1734defm FLAT_ATOMIC_ADD_X2        : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
1735defm FLAT_ATOMIC_SUB_X2        : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
1736defm FLAT_ATOMIC_SMIN_X2       : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
1737defm FLAT_ATOMIC_UMIN_X2       : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
1738defm FLAT_ATOMIC_SMAX_X2       : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
1739defm FLAT_ATOMIC_UMAX_X2       : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
1740defm FLAT_ATOMIC_AND_X2        : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
1741defm FLAT_ATOMIC_OR_X2         : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
1742defm FLAT_ATOMIC_XOR_X2        : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
1743defm FLAT_ATOMIC_INC_X2        : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
1744defm FLAT_ATOMIC_DEC_X2        : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
1745
1746// CI Only flat instructions
1747defm FLAT_ATOMIC_FCMPSWAP      : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
1748defm FLAT_ATOMIC_FMIN          : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
1749defm FLAT_ATOMIC_FMAX          : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
1750defm FLAT_ATOMIC_FCMPSWAP_X2   : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
1751defm FLAT_ATOMIC_FMIN_X2       : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
1752defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
1753
1754
1755//===----------------------------------------------------------------------===//
1756// VI
1757//===----------------------------------------------------------------------===//
1758
1759class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
1760  FLAT_Real <op, ps>,
1761  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
1762  let AssemblerPredicate = isGFX8GFX9;
1763  let DecoderNamespace = "GFX8";
1764
1765  let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
1766  let AsmString = ps.Mnemonic #
1767                  !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
1768}
1769
1770multiclass FLAT_Real_AllAddr_vi<bits<7> op,
1771  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
1772  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
1773  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
1774}
1775
1776class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
1777  FLAT_Real <op, ps>,
1778  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
1779  let AssemblerPredicate = isGFX940Plus;
1780  let DecoderNamespace = "GFX9";
1781  let Inst{13} = ps.sve;
1782  let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
1783}
1784
1785multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
1786  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
1787    let AssemblerPredicate = isGFX8GFX9NotGFX940;
1788    let OtherPredicates = [isGFX8GFX9NotGFX940];
1789  }
1790  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
1791    let DecoderNamespace = "GFX9";
1792  }
1793  let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in {
1794    def _VE_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
1795    def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
1796    def _ST_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
1797  }
1798}
1799
1800multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
1801  string pre_gfx940_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr),
1802  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
1803
1804  let OtherPredicates = [isGFX8GFX9NotGFX940] in {
1805    def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
1806      let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
1807    }
1808    def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
1809      let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
1810    }
1811  }
1812
1813  let SubtargetPredicate = isGFX940Plus in {
1814    def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
1815    def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1816  }
1817}
1818
1819multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
1820  defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx940_op>;
1821  let SubtargetPredicate = isGFX940Plus in {
1822    def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
1823    def _ST_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
1824  }
1825}
1826
1827def FLAT_LOAD_UBYTE_vi         : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
1828def FLAT_LOAD_SBYTE_vi         : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
1829def FLAT_LOAD_USHORT_vi        : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
1830def FLAT_LOAD_SSHORT_vi        : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
1831def FLAT_LOAD_DWORD_vi         : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
1832def FLAT_LOAD_DWORDX2_vi       : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
1833def FLAT_LOAD_DWORDX4_vi       : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
1834def FLAT_LOAD_DWORDX3_vi       : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
1835
1836def FLAT_STORE_BYTE_vi         : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
1837def FLAT_STORE_BYTE_D16_HI_vi  : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
1838def FLAT_STORE_SHORT_vi        : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
1839def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
1840def FLAT_STORE_DWORD_vi        : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
1841def FLAT_STORE_DWORDX2_vi      : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
1842def FLAT_STORE_DWORDX4_vi      : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
1843def FLAT_STORE_DWORDX3_vi      : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
1844
1845def FLAT_LOAD_UBYTE_D16_vi    : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
1846def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
1847def FLAT_LOAD_SBYTE_D16_vi    : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
1848def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
1849def FLAT_LOAD_SHORT_D16_vi    : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
1850def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
1851
1852multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps,
1853  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
1854  def _vi     : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
1855  def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
1856}
1857
1858multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
1859  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
1860  FLAT_Real_AllAddr_vi<op, has_sccb> {
1861  def _RTN_vi  : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
1862  def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
1863}
1864
1865
1866defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
1867defm FLAT_ATOMIC_CMPSWAP    : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
1868defm FLAT_ATOMIC_ADD        : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
1869defm FLAT_ATOMIC_SUB        : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
1870defm FLAT_ATOMIC_SMIN       : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
1871defm FLAT_ATOMIC_UMIN       : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
1872defm FLAT_ATOMIC_SMAX       : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
1873defm FLAT_ATOMIC_UMAX       : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
1874defm FLAT_ATOMIC_AND        : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
1875defm FLAT_ATOMIC_OR         : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
1876defm FLAT_ATOMIC_XOR        : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
1877defm FLAT_ATOMIC_INC        : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
1878defm FLAT_ATOMIC_DEC        : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
1879defm FLAT_ATOMIC_SWAP_X2    : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
1880defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
1881defm FLAT_ATOMIC_ADD_X2     : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
1882defm FLAT_ATOMIC_SUB_X2     : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
1883defm FLAT_ATOMIC_SMIN_X2    : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
1884defm FLAT_ATOMIC_UMIN_X2    : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
1885defm FLAT_ATOMIC_SMAX_X2    : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
1886defm FLAT_ATOMIC_UMAX_X2    : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
1887defm FLAT_ATOMIC_AND_X2     : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
1888defm FLAT_ATOMIC_OR_X2      : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
1889defm FLAT_ATOMIC_XOR_X2     : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
1890defm FLAT_ATOMIC_INC_X2     : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
1891defm FLAT_ATOMIC_DEC_X2     : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
1892
1893defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
1894defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
1895defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
1896defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
1897defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
1898defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
1899defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
1900defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
1901
1902defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Real_AllAddr_vi <0x20>;
1903defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>;
1904defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Real_AllAddr_vi <0x22>;
1905defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>;
1906defm GLOBAL_LOAD_SHORT_D16    : FLAT_Real_AllAddr_vi <0x24>;
1907defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>;
1908
1909defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
1910defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>;
1911defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
1912defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1913defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
1914defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
1915defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
1916defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
1917
1918defm GLOBAL_LOAD_LDS_UBYTE  : FLAT_Real_AllAddr_LDS <0x026, 0x10>;
1919defm GLOBAL_LOAD_LDS_SBYTE  : FLAT_Real_AllAddr_LDS <0x027, 0x11>;
1920defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>;
1921defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>;
1922defm GLOBAL_LOAD_LDS_DWORD  : FLAT_Real_AllAddr_LDS <0x02a, 0x14>;
1923
1924defm GLOBAL_ATOMIC_SWAP       : FLAT_Global_Real_Atomics_vi <0x40>;
1925defm GLOBAL_ATOMIC_CMPSWAP    : FLAT_Global_Real_Atomics_vi <0x41>;
1926defm GLOBAL_ATOMIC_ADD        : FLAT_Global_Real_Atomics_vi <0x42>;
1927defm GLOBAL_ATOMIC_SUB        : FLAT_Global_Real_Atomics_vi <0x43>;
1928defm GLOBAL_ATOMIC_SMIN       : FLAT_Global_Real_Atomics_vi <0x44>;
1929defm GLOBAL_ATOMIC_UMIN       : FLAT_Global_Real_Atomics_vi <0x45>;
1930defm GLOBAL_ATOMIC_SMAX       : FLAT_Global_Real_Atomics_vi <0x46>;
1931defm GLOBAL_ATOMIC_UMAX       : FLAT_Global_Real_Atomics_vi <0x47>;
1932defm GLOBAL_ATOMIC_AND        : FLAT_Global_Real_Atomics_vi <0x48>;
1933defm GLOBAL_ATOMIC_OR         : FLAT_Global_Real_Atomics_vi <0x49>;
1934defm GLOBAL_ATOMIC_XOR        : FLAT_Global_Real_Atomics_vi <0x4a>;
1935defm GLOBAL_ATOMIC_INC        : FLAT_Global_Real_Atomics_vi <0x4b>;
1936defm GLOBAL_ATOMIC_DEC        : FLAT_Global_Real_Atomics_vi <0x4c>;
1937defm GLOBAL_ATOMIC_SWAP_X2    : FLAT_Global_Real_Atomics_vi <0x60>;
1938defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>;
1939defm GLOBAL_ATOMIC_ADD_X2     : FLAT_Global_Real_Atomics_vi <0x62>;
1940defm GLOBAL_ATOMIC_SUB_X2     : FLAT_Global_Real_Atomics_vi <0x63>;
1941defm GLOBAL_ATOMIC_SMIN_X2    : FLAT_Global_Real_Atomics_vi <0x64>;
1942defm GLOBAL_ATOMIC_UMIN_X2    : FLAT_Global_Real_Atomics_vi <0x65>;
1943defm GLOBAL_ATOMIC_SMAX_X2    : FLAT_Global_Real_Atomics_vi <0x66>;
1944defm GLOBAL_ATOMIC_UMAX_X2    : FLAT_Global_Real_Atomics_vi <0x67>;
1945defm GLOBAL_ATOMIC_AND_X2     : FLAT_Global_Real_Atomics_vi <0x68>;
1946defm GLOBAL_ATOMIC_OR_X2      : FLAT_Global_Real_Atomics_vi <0x69>;
1947defm GLOBAL_ATOMIC_XOR_X2     : FLAT_Global_Real_Atomics_vi <0x6a>;
1948defm GLOBAL_ATOMIC_INC_X2     : FLAT_Global_Real_Atomics_vi <0x6b>;
1949defm GLOBAL_ATOMIC_DEC_X2     : FLAT_Global_Real_Atomics_vi <0x6c>;
1950
1951defm SCRATCH_LOAD_LDS_UBYTE  : FLAT_Real_AllAddr_SVE_LDS <0x026, 0x10>;
1952defm SCRATCH_LOAD_LDS_SBYTE  : FLAT_Real_AllAddr_SVE_LDS <0x027, 0x11>;
1953defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_LDS <0x028, 0x12>;
1954defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_LDS <0x029, 0x13>;
1955defm SCRATCH_LOAD_LDS_DWORD  : FLAT_Real_AllAddr_SVE_LDS <0x02a, 0x14>;
1956
1957defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_SVE_vi <0x10>;
1958defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_SVE_vi <0x11>;
1959defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_SVE_vi <0x12>;
1960defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_SVE_vi <0x13>;
1961defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_SVE_vi <0x14>;
1962defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_SVE_vi <0x15>;
1963defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_SVE_vi <0x16>;
1964defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_SVE_vi <0x17>;
1965defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_SVE_vi <0x18>;
1966defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x19>;
1967defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_SVE_vi <0x20>;
1968defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x21>;
1969defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_SVE_vi <0x22>;
1970defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x23>;
1971defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_SVE_vi <0x24>;
1972defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x25>;
1973defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_SVE_vi <0x1a>;
1974defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x1b>;
1975defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_SVE_vi <0x1c>;
1976defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_SVE_vi <0x1d>;
1977defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_SVE_vi <0x1e>;
1978defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_SVE_vi <0x1f>;
1979
1980let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
1981  // These instructions are encoded differently on gfx90* and gfx940.
1982  defm GLOBAL_ATOMIC_ADD_F32    : FLAT_Global_Real_Atomics_vi <0x04d, 0>;
1983  defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
1984}
1985
1986let SubtargetPredicate = isGFX90AOnly in {
1987  defm FLAT_ATOMIC_ADD_F64   : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>;
1988  defm FLAT_ATOMIC_MIN_F64   : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>;
1989  defm FLAT_ATOMIC_MAX_F64   : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>;
1990  defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>;
1991  defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>;
1992  defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>;
1993} // End SubtargetPredicate = isGFX90AOnly
1994
1995multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> {
1996  def _gfx940       : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
1997  def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1998}
1999
2000multiclass FLAT_Real_Atomics_gfx940 <bits<7> op, FLAT_Pseudo ps> {
2001  def _gfx940     : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
2002  def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
2003}
2004
2005multiclass FLAT_Global_Real_Atomics_gfx940<bits<7> op> :
2006  FLAT_Real_AllAddr_gfx940<op> {
2007  def _RTN_gfx940       : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
2008  def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
2009}
2010
2011let SubtargetPredicate = isGFX940Plus in {
2012  // These instructions are encoded differently on gfx90* and gfx940.
2013  defm GLOBAL_ATOMIC_ADD_F32     : FLAT_Global_Real_Atomics_gfx940 <0x04d>;
2014  defm GLOBAL_ATOMIC_PK_ADD_F16  : FLAT_Global_Real_Atomics_gfx940 <0x04e>;
2015
2016  defm FLAT_ATOMIC_ADD_F64       : FLAT_Real_Atomics_gfx940<0x4f, FLAT_ATOMIC_ADD_F64>;
2017  defm FLAT_ATOMIC_MIN_F64       : FLAT_Real_Atomics_gfx940<0x50, FLAT_ATOMIC_MIN_F64>;
2018  defm FLAT_ATOMIC_MAX_F64       : FLAT_Real_Atomics_gfx940<0x51, FLAT_ATOMIC_MAX_F64>;
2019  defm GLOBAL_ATOMIC_ADD_F64     : FLAT_Global_Real_Atomics_gfx940<0x4f>;
2020  defm GLOBAL_ATOMIC_MIN_F64     : FLAT_Global_Real_Atomics_gfx940<0x50>;
2021  defm GLOBAL_ATOMIC_MAX_F64     : FLAT_Global_Real_Atomics_gfx940<0x51>;
2022  defm FLAT_ATOMIC_ADD_F32       : FLAT_Real_Atomics_vi<0x4d, FLAT_ATOMIC_ADD_F32>;
2023  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Real_Atomics_vi<0x4e, FLAT_ATOMIC_PK_ADD_F16>;
2024  defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Real_Atomics_vi<0x52, FLAT_ATOMIC_PK_ADD_BF16>;
2025  defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
2026} // End SubtargetPredicate = isGFX940Plus
2027
2028//===----------------------------------------------------------------------===//
2029// GFX10.
2030//===----------------------------------------------------------------------===//
2031
2032class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
2033    FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
2034  let AssemblerPredicate = isGFX10Only;
2035  let DecoderNamespace = "GFX10";
2036
2037  let Inst{11-0}  = offset{11-0};
2038  let Inst{12}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue);
2039  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
2040  let Inst{55}    = 0;
2041}
2042
2043
2044multiclass FLAT_Real_Base_gfx10<bits<7> op> {
2045  def _gfx10 :
2046    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>;
2047}
2048
2049multiclass FLAT_Real_RTN_gfx10<bits<7> op> {
2050  def _RTN_gfx10 :
2051    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
2052}
2053
2054multiclass FLAT_Real_SADDR_gfx10<bits<7> op> {
2055  def _SADDR_gfx10 :
2056    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
2057}
2058
2059multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> {
2060  def _SADDR_RTN_gfx10 :
2061    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
2062}
2063
2064multiclass FLAT_Real_ST_gfx10<bits<7> op> {
2065  def _ST_gfx10 :
2066    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")> {
2067      let Inst{54-48} = EXEC_HI.Index;
2068      let OtherPredicates = [HasFlatScratchSTMode];
2069    }
2070}
2071
2072multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> :
2073  FLAT_Real_Base_gfx10<op>,
2074  FLAT_Real_SADDR_gfx10<op>;
2075
2076multiclass FLAT_Real_Atomics_gfx10<bits<7> op> :
2077  FLAT_Real_Base_gfx10<op>,
2078  FLAT_Real_RTN_gfx10<op>;
2079
2080multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> :
2081  FLAT_Real_AllAddr_gfx10<op>,
2082  FLAT_Real_RTN_gfx10<op>,
2083  FLAT_Real_SADDR_RTN_gfx10<op>;
2084
2085multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> :
2086  FLAT_Real_RTN_gfx10<op>,
2087  FLAT_Real_SADDR_RTN_gfx10<op>;
2088
2089multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> :
2090  FLAT_Real_Base_gfx10<op>,
2091  FLAT_Real_SADDR_gfx10<op>,
2092  FLAT_Real_ST_gfx10<op>;
2093
2094multiclass FLAT_Real_AllAddr_LDS_gfx10<bits<7> op,
2095  string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> {
2096  let AsmString = opname # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in
2097  defm "" : FLAT_Real_Base_gfx10<op>;
2098
2099  let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in
2100  defm "" : FLAT_Real_SADDR_gfx10<op>;
2101}
2102
2103multiclass FLAT_Real_ScratchAllAddr_LDS_gfx10<bits<7> op,
2104  string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> {
2105  defm "" : FLAT_Real_AllAddr_LDS_gfx10<op>;
2106
2107  let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_ST").AsmOperands # " lds" in
2108  defm "" : FLAT_Real_ST_gfx10<op>;
2109}
2110
2111// ENC_FLAT.
2112defm FLAT_LOAD_UBYTE            : FLAT_Real_Base_gfx10<0x008>;
2113defm FLAT_LOAD_SBYTE            : FLAT_Real_Base_gfx10<0x009>;
2114defm FLAT_LOAD_USHORT           : FLAT_Real_Base_gfx10<0x00a>;
2115defm FLAT_LOAD_SSHORT           : FLAT_Real_Base_gfx10<0x00b>;
2116defm FLAT_LOAD_DWORD            : FLAT_Real_Base_gfx10<0x00c>;
2117defm FLAT_LOAD_DWORDX2          : FLAT_Real_Base_gfx10<0x00d>;
2118defm FLAT_LOAD_DWORDX4          : FLAT_Real_Base_gfx10<0x00e>;
2119defm FLAT_LOAD_DWORDX3          : FLAT_Real_Base_gfx10<0x00f>;
2120defm FLAT_STORE_BYTE            : FLAT_Real_Base_gfx10<0x018>;
2121defm FLAT_STORE_BYTE_D16_HI     : FLAT_Real_Base_gfx10<0x019>;
2122defm FLAT_STORE_SHORT           : FLAT_Real_Base_gfx10<0x01a>;
2123defm FLAT_STORE_SHORT_D16_HI    : FLAT_Real_Base_gfx10<0x01b>;
2124defm FLAT_STORE_DWORD           : FLAT_Real_Base_gfx10<0x01c>;
2125defm FLAT_STORE_DWORDX2         : FLAT_Real_Base_gfx10<0x01d>;
2126defm FLAT_STORE_DWORDX4         : FLAT_Real_Base_gfx10<0x01e>;
2127defm FLAT_STORE_DWORDX3         : FLAT_Real_Base_gfx10<0x01f>;
2128defm FLAT_LOAD_UBYTE_D16        : FLAT_Real_Base_gfx10<0x020>;
2129defm FLAT_LOAD_UBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x021>;
2130defm FLAT_LOAD_SBYTE_D16        : FLAT_Real_Base_gfx10<0x022>;
2131defm FLAT_LOAD_SBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x023>;
2132defm FLAT_LOAD_SHORT_D16        : FLAT_Real_Base_gfx10<0x024>;
2133defm FLAT_LOAD_SHORT_D16_HI     : FLAT_Real_Base_gfx10<0x025>;
2134defm FLAT_ATOMIC_SWAP           : FLAT_Real_Atomics_gfx10<0x030>;
2135defm FLAT_ATOMIC_CMPSWAP        : FLAT_Real_Atomics_gfx10<0x031>;
2136defm FLAT_ATOMIC_ADD            : FLAT_Real_Atomics_gfx10<0x032>;
2137defm FLAT_ATOMIC_SUB            : FLAT_Real_Atomics_gfx10<0x033>;
2138defm FLAT_ATOMIC_SMIN           : FLAT_Real_Atomics_gfx10<0x035>;
2139defm FLAT_ATOMIC_UMIN           : FLAT_Real_Atomics_gfx10<0x036>;
2140defm FLAT_ATOMIC_SMAX           : FLAT_Real_Atomics_gfx10<0x037>;
2141defm FLAT_ATOMIC_UMAX           : FLAT_Real_Atomics_gfx10<0x038>;
2142defm FLAT_ATOMIC_AND            : FLAT_Real_Atomics_gfx10<0x039>;
2143defm FLAT_ATOMIC_OR             : FLAT_Real_Atomics_gfx10<0x03a>;
2144defm FLAT_ATOMIC_XOR            : FLAT_Real_Atomics_gfx10<0x03b>;
2145defm FLAT_ATOMIC_INC            : FLAT_Real_Atomics_gfx10<0x03c>;
2146defm FLAT_ATOMIC_DEC            : FLAT_Real_Atomics_gfx10<0x03d>;
2147defm FLAT_ATOMIC_FCMPSWAP       : FLAT_Real_Atomics_gfx10<0x03e>;
2148defm FLAT_ATOMIC_FMIN           : FLAT_Real_Atomics_gfx10<0x03f>;
2149defm FLAT_ATOMIC_FMAX           : FLAT_Real_Atomics_gfx10<0x040>;
2150defm FLAT_ATOMIC_SWAP_X2        : FLAT_Real_Atomics_gfx10<0x050>;
2151defm FLAT_ATOMIC_CMPSWAP_X2     : FLAT_Real_Atomics_gfx10<0x051>;
2152defm FLAT_ATOMIC_ADD_X2         : FLAT_Real_Atomics_gfx10<0x052>;
2153defm FLAT_ATOMIC_SUB_X2         : FLAT_Real_Atomics_gfx10<0x053>;
2154defm FLAT_ATOMIC_SMIN_X2        : FLAT_Real_Atomics_gfx10<0x055>;
2155defm FLAT_ATOMIC_UMIN_X2        : FLAT_Real_Atomics_gfx10<0x056>;
2156defm FLAT_ATOMIC_SMAX_X2        : FLAT_Real_Atomics_gfx10<0x057>;
2157defm FLAT_ATOMIC_UMAX_X2        : FLAT_Real_Atomics_gfx10<0x058>;
2158defm FLAT_ATOMIC_AND_X2         : FLAT_Real_Atomics_gfx10<0x059>;
2159defm FLAT_ATOMIC_OR_X2          : FLAT_Real_Atomics_gfx10<0x05a>;
2160defm FLAT_ATOMIC_XOR_X2         : FLAT_Real_Atomics_gfx10<0x05b>;
2161defm FLAT_ATOMIC_INC_X2         : FLAT_Real_Atomics_gfx10<0x05c>;
2162defm FLAT_ATOMIC_DEC_X2         : FLAT_Real_Atomics_gfx10<0x05d>;
2163defm FLAT_ATOMIC_FCMPSWAP_X2    : FLAT_Real_Atomics_gfx10<0x05e>;
2164defm FLAT_ATOMIC_FMIN_X2        : FLAT_Real_Atomics_gfx10<0x05f>;
2165defm FLAT_ATOMIC_FMAX_X2        : FLAT_Real_Atomics_gfx10<0x060>;
2166
2167
2168// ENC_FLAT_GLBL.
2169defm GLOBAL_LOAD_UBYTE          : FLAT_Real_AllAddr_gfx10<0x008>;
2170defm GLOBAL_LOAD_SBYTE          : FLAT_Real_AllAddr_gfx10<0x009>;
2171defm GLOBAL_LOAD_USHORT         : FLAT_Real_AllAddr_gfx10<0x00a>;
2172defm GLOBAL_LOAD_SSHORT         : FLAT_Real_AllAddr_gfx10<0x00b>;
2173defm GLOBAL_LOAD_DWORD          : FLAT_Real_AllAddr_gfx10<0x00c>;
2174defm GLOBAL_LOAD_DWORDX2        : FLAT_Real_AllAddr_gfx10<0x00d>;
2175defm GLOBAL_LOAD_DWORDX4        : FLAT_Real_AllAddr_gfx10<0x00e>;
2176defm GLOBAL_LOAD_DWORDX3        : FLAT_Real_AllAddr_gfx10<0x00f>;
2177defm GLOBAL_STORE_BYTE          : FLAT_Real_AllAddr_gfx10<0x018>;
2178defm GLOBAL_STORE_BYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x019>;
2179defm GLOBAL_STORE_SHORT         : FLAT_Real_AllAddr_gfx10<0x01a>;
2180defm GLOBAL_STORE_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x01b>;
2181defm GLOBAL_STORE_DWORD         : FLAT_Real_AllAddr_gfx10<0x01c>;
2182defm GLOBAL_STORE_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x01d>;
2183defm GLOBAL_STORE_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x01e>;
2184defm GLOBAL_STORE_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x01f>;
2185defm GLOBAL_LOAD_UBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x020>;
2186defm GLOBAL_LOAD_UBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x021>;
2187defm GLOBAL_LOAD_SBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x022>;
2188defm GLOBAL_LOAD_SBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x023>;
2189defm GLOBAL_LOAD_SHORT_D16      : FLAT_Real_AllAddr_gfx10<0x024>;
2190defm GLOBAL_LOAD_SHORT_D16_HI   : FLAT_Real_AllAddr_gfx10<0x025>;
2191defm GLOBAL_ATOMIC_SWAP         : FLAT_Real_GlblAtomics_gfx10<0x030>;
2192defm GLOBAL_ATOMIC_CMPSWAP      : FLAT_Real_GlblAtomics_gfx10<0x031>;
2193defm GLOBAL_ATOMIC_ADD          : FLAT_Real_GlblAtomics_gfx10<0x032>;
2194defm GLOBAL_ATOMIC_SUB          : FLAT_Real_GlblAtomics_gfx10<0x033>;
2195defm GLOBAL_ATOMIC_CSUB         : FLAT_Real_GlblAtomics_gfx10<0x034>;
2196defm GLOBAL_ATOMIC_SMIN         : FLAT_Real_GlblAtomics_gfx10<0x035>;
2197defm GLOBAL_ATOMIC_UMIN         : FLAT_Real_GlblAtomics_gfx10<0x036>;
2198defm GLOBAL_ATOMIC_SMAX         : FLAT_Real_GlblAtomics_gfx10<0x037>;
2199defm GLOBAL_ATOMIC_UMAX         : FLAT_Real_GlblAtomics_gfx10<0x038>;
2200defm GLOBAL_ATOMIC_AND          : FLAT_Real_GlblAtomics_gfx10<0x039>;
2201defm GLOBAL_ATOMIC_OR           : FLAT_Real_GlblAtomics_gfx10<0x03a>;
2202defm GLOBAL_ATOMIC_XOR          : FLAT_Real_GlblAtomics_gfx10<0x03b>;
2203defm GLOBAL_ATOMIC_INC          : FLAT_Real_GlblAtomics_gfx10<0x03c>;
2204defm GLOBAL_ATOMIC_DEC          : FLAT_Real_GlblAtomics_gfx10<0x03d>;
2205defm GLOBAL_ATOMIC_FCMPSWAP     : FLAT_Real_GlblAtomics_gfx10<0x03e>;
2206defm GLOBAL_ATOMIC_FMIN         : FLAT_Real_GlblAtomics_gfx10<0x03f>;
2207defm GLOBAL_ATOMIC_FMAX         : FLAT_Real_GlblAtomics_gfx10<0x040>;
2208defm GLOBAL_ATOMIC_SWAP_X2      : FLAT_Real_GlblAtomics_gfx10<0x050>;
2209defm GLOBAL_ATOMIC_CMPSWAP_X2   : FLAT_Real_GlblAtomics_gfx10<0x051>;
2210defm GLOBAL_ATOMIC_ADD_X2       : FLAT_Real_GlblAtomics_gfx10<0x052>;
2211defm GLOBAL_ATOMIC_SUB_X2       : FLAT_Real_GlblAtomics_gfx10<0x053>;
2212defm GLOBAL_ATOMIC_SMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x055>;
2213defm GLOBAL_ATOMIC_UMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x056>;
2214defm GLOBAL_ATOMIC_SMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x057>;
2215defm GLOBAL_ATOMIC_UMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x058>;
2216defm GLOBAL_ATOMIC_AND_X2       : FLAT_Real_GlblAtomics_gfx10<0x059>;
2217defm GLOBAL_ATOMIC_OR_X2        : FLAT_Real_GlblAtomics_gfx10<0x05a>;
2218defm GLOBAL_ATOMIC_XOR_X2       : FLAT_Real_GlblAtomics_gfx10<0x05b>;
2219defm GLOBAL_ATOMIC_INC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05c>;
2220defm GLOBAL_ATOMIC_DEC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05d>;
2221defm GLOBAL_ATOMIC_FCMPSWAP_X2  : FLAT_Real_GlblAtomics_gfx10<0x05e>;
2222defm GLOBAL_ATOMIC_FMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x05f>;
2223defm GLOBAL_ATOMIC_FMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x060>;
2224defm GLOBAL_LOAD_DWORD_ADDTID   : FLAT_Real_AllAddr_gfx10<0x016>;
2225defm GLOBAL_STORE_DWORD_ADDTID  : FLAT_Real_AllAddr_gfx10<0x017>;
2226
2227defm GLOBAL_LOAD_LDS_UBYTE      : FLAT_Real_AllAddr_LDS_gfx10 <0x008>;
2228defm GLOBAL_LOAD_LDS_SBYTE      : FLAT_Real_AllAddr_LDS_gfx10 <0x009>;
2229defm GLOBAL_LOAD_LDS_USHORT     : FLAT_Real_AllAddr_LDS_gfx10 <0x00a>;
2230defm GLOBAL_LOAD_LDS_SSHORT     : FLAT_Real_AllAddr_LDS_gfx10 <0x00b>;
2231defm GLOBAL_LOAD_LDS_DWORD      : FLAT_Real_AllAddr_LDS_gfx10 <0x00c>;
2232
2233// ENC_FLAT_SCRATCH.
2234defm SCRATCH_LOAD_UBYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x008>;
2235defm SCRATCH_LOAD_SBYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x009>;
2236defm SCRATCH_LOAD_USHORT        : FLAT_Real_ScratchAllAddr_gfx10<0x00a>;
2237defm SCRATCH_LOAD_SSHORT        : FLAT_Real_ScratchAllAddr_gfx10<0x00b>;
2238defm SCRATCH_LOAD_DWORD         : FLAT_Real_ScratchAllAddr_gfx10<0x00c>;
2239defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_ScratchAllAddr_gfx10<0x00d>;
2240defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_ScratchAllAddr_gfx10<0x00e>;
2241defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_ScratchAllAddr_gfx10<0x00f>;
2242defm SCRATCH_STORE_BYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x018>;
2243defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x019>;
2244defm SCRATCH_STORE_SHORT        : FLAT_Real_ScratchAllAddr_gfx10<0x01a>;
2245defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>;
2246defm SCRATCH_STORE_DWORD        : FLAT_Real_ScratchAllAddr_gfx10<0x01c>;
2247defm SCRATCH_STORE_DWORDX2      : FLAT_Real_ScratchAllAddr_gfx10<0x01d>;
2248defm SCRATCH_STORE_DWORDX4      : FLAT_Real_ScratchAllAddr_gfx10<0x01e>;
2249defm SCRATCH_STORE_DWORDX3      : FLAT_Real_ScratchAllAddr_gfx10<0x01f>;
2250defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_ScratchAllAddr_gfx10<0x020>;
2251defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x021>;
2252defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_ScratchAllAddr_gfx10<0x022>;
2253defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x023>;
2254defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_ScratchAllAddr_gfx10<0x024>;
2255defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x025>;
2256
2257defm SCRATCH_LOAD_LDS_UBYTE     : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x008>;
2258defm SCRATCH_LOAD_LDS_SBYTE     : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x009>;
2259defm SCRATCH_LOAD_LDS_USHORT    : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00a>;
2260defm SCRATCH_LOAD_LDS_SSHORT    : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00b>;
2261defm SCRATCH_LOAD_LDS_DWORD     : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>;
2262
2263//===----------------------------------------------------------------------===//
2264// GFX11
2265//===----------------------------------------------------------------------===//
2266
2267class FLAT_Real_gfx11 <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
2268  FLAT_Real <op, ps, opName>,
2269  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX11> {
2270  let AssemblerPredicate = isGFX11Only;
2271  let DecoderNamespace = "GFX11";
2272
2273  let Inst{13}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue);
2274  let Inst{14}    = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
2275  let Inst{15}    = cpol{CPolBit.SLC};
2276  let Inst{17-16} = seg;
2277  let Inst{55}    = ps.sve;
2278}
2279
2280multiclass FLAT_Aliases_gfx11<string ps, string opName, int renamed> {
2281  if renamed then
2282    def _renamed_gfx11 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX11Only]>;
2283}
2284
2285multiclass FLAT_Real_Base_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2286  FLAT_Aliases_gfx11<ps, opName, renamed> {
2287  def _gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps), opName> {
2288    let Inst{54-48} = SGPR_NULL_gfx11plus.Index;
2289  }
2290}
2291
2292multiclass FLAT_Real_RTN_gfx11<bits<7> op, string ps, string opName> {
2293  def _RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> {
2294    let Inst{54-48} = SGPR_NULL_gfx11plus.Index;
2295  }
2296}
2297
2298multiclass FLAT_Real_SADDR_gfx11<bits<7> op, string ps, string opName> {
2299  def _SADDR_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR"), opName>;
2300}
2301
2302multiclass FLAT_Real_SADDR_RTN_gfx11<bits<7> op, string ps, string opName> {
2303  def _SADDR_RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR_RTN"), opName>;
2304}
2305
2306multiclass FLAT_Real_ST_gfx11<bits<7> op, string ps, string opName> {
2307  def _ST_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> {
2308    let Inst{54-48} = SGPR_NULL_gfx11plus.Index;
2309    let OtherPredicates = [HasFlatScratchSTMode];
2310  }
2311}
2312
2313multiclass FLAT_Real_SVS_gfx11<bits<7> op, string ps, string opName> {
2314  def _SVS_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SVS"), opName> {
2315    let OtherPredicates = [HasFlatScratchSVSMode];
2316  }
2317}
2318
2319multiclass FLAT_Real_AllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2320  FLAT_Real_Base_gfx11<op, ps, opName, renamed>,
2321  FLAT_Real_SADDR_gfx11<op, ps, opName>;
2322
2323multiclass FLAT_Real_Atomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2324  FLAT_Real_Base_gfx11<op, ps, opName, renamed>,
2325  FLAT_Real_RTN_gfx11<op, ps, opName>;
2326
2327multiclass FLAT_Real_GlblAtomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2328  FLAT_Real_AllAddr_gfx11<op, ps, opName, renamed>,
2329  FLAT_Real_RTN_gfx11<op, ps, opName>,
2330  FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>;
2331
2332multiclass FLAT_Real_GlblAtomics_RTN_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2333  FLAT_Aliases_gfx11<ps#"_RTN", opName, renamed>,
2334  FLAT_Real_RTN_gfx11<op, ps, opName>,
2335  FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>;
2336
2337multiclass FLAT_Real_ScratchAllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
2338  FLAT_Real_Base_gfx11<op, ps, opName, renamed>,
2339  FLAT_Real_SADDR_gfx11<op, ps, opName>,
2340  FLAT_Real_ST_gfx11<op, ps, opName>,
2341  FLAT_Real_SVS_gfx11<op, ps, opName>;
2342
2343// ENC_FLAT.
2344defm FLAT_LOAD_U8               : FLAT_Real_Base_gfx11<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>;
2345defm FLAT_LOAD_I8               : FLAT_Real_Base_gfx11<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>;
2346defm FLAT_LOAD_U16              : FLAT_Real_Base_gfx11<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>;
2347defm FLAT_LOAD_I16              : FLAT_Real_Base_gfx11<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>;
2348defm FLAT_LOAD_B32              : FLAT_Real_Base_gfx11<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>;
2349defm FLAT_LOAD_B64              : FLAT_Real_Base_gfx11<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>;
2350defm FLAT_LOAD_B96              : FLAT_Real_Base_gfx11<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>;
2351defm FLAT_LOAD_B128             : FLAT_Real_Base_gfx11<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>;
2352defm FLAT_STORE_B8              : FLAT_Real_Base_gfx11<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>;
2353defm FLAT_STORE_B16             : FLAT_Real_Base_gfx11<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>;
2354defm FLAT_STORE_B32             : FLAT_Real_Base_gfx11<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>;
2355defm FLAT_STORE_B64             : FLAT_Real_Base_gfx11<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>;
2356defm FLAT_STORE_B96             : FLAT_Real_Base_gfx11<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>;
2357defm FLAT_STORE_B128            : FLAT_Real_Base_gfx11<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>;
2358defm FLAT_LOAD_D16_U8           : FLAT_Real_Base_gfx11<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">;
2359defm FLAT_LOAD_D16_I8           : FLAT_Real_Base_gfx11<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">;
2360defm FLAT_LOAD_D16_B16          : FLAT_Real_Base_gfx11<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">;
2361defm FLAT_LOAD_D16_HI_U8        : FLAT_Real_Base_gfx11<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">;
2362defm FLAT_LOAD_D16_HI_I8        : FLAT_Real_Base_gfx11<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">;
2363defm FLAT_LOAD_D16_HI_B16       : FLAT_Real_Base_gfx11<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">;
2364defm FLAT_STORE_D16_HI_B8       : FLAT_Real_Base_gfx11<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">;
2365defm FLAT_STORE_D16_HI_B16      : FLAT_Real_Base_gfx11<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">;
2366defm FLAT_ATOMIC_SWAP_B32       : FLAT_Real_Atomics_gfx11<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>;
2367defm FLAT_ATOMIC_CMPSWAP_B32    : FLAT_Real_Atomics_gfx11<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>;
2368defm FLAT_ATOMIC_ADD_U32        : FLAT_Real_Atomics_gfx11<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>;
2369defm FLAT_ATOMIC_SUB_U32        : FLAT_Real_Atomics_gfx11<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>;
2370defm FLAT_ATOMIC_MIN_I32        : FLAT_Real_Atomics_gfx11<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>;
2371defm FLAT_ATOMIC_MIN_U32        : FLAT_Real_Atomics_gfx11<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>;
2372defm FLAT_ATOMIC_MAX_I32        : FLAT_Real_Atomics_gfx11<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>;
2373defm FLAT_ATOMIC_MAX_U32        : FLAT_Real_Atomics_gfx11<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>;
2374defm FLAT_ATOMIC_AND_B32        : FLAT_Real_Atomics_gfx11<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>;
2375defm FLAT_ATOMIC_OR_B32         : FLAT_Real_Atomics_gfx11<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>;
2376defm FLAT_ATOMIC_XOR_B32        : FLAT_Real_Atomics_gfx11<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>;
2377defm FLAT_ATOMIC_INC_U32        : FLAT_Real_Atomics_gfx11<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>;
2378defm FLAT_ATOMIC_DEC_U32        : FLAT_Real_Atomics_gfx11<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>;
2379defm FLAT_ATOMIC_SWAP_B64       : FLAT_Real_Atomics_gfx11<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>;
2380defm FLAT_ATOMIC_CMPSWAP_B64    : FLAT_Real_Atomics_gfx11<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>;
2381defm FLAT_ATOMIC_ADD_U64        : FLAT_Real_Atomics_gfx11<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>;
2382defm FLAT_ATOMIC_SUB_U64        : FLAT_Real_Atomics_gfx11<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>;
2383defm FLAT_ATOMIC_MIN_I64        : FLAT_Real_Atomics_gfx11<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>;
2384defm FLAT_ATOMIC_MIN_U64        : FLAT_Real_Atomics_gfx11<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>;
2385defm FLAT_ATOMIC_MAX_I64        : FLAT_Real_Atomics_gfx11<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>;
2386defm FLAT_ATOMIC_MAX_U64        : FLAT_Real_Atomics_gfx11<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>;
2387defm FLAT_ATOMIC_AND_B64        : FLAT_Real_Atomics_gfx11<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>;
2388defm FLAT_ATOMIC_OR_B64         : FLAT_Real_Atomics_gfx11<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>;
2389defm FLAT_ATOMIC_XOR_B64        : FLAT_Real_Atomics_gfx11<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>;
2390defm FLAT_ATOMIC_INC_U64        : FLAT_Real_Atomics_gfx11<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>;
2391defm FLAT_ATOMIC_DEC_U64        : FLAT_Real_Atomics_gfx11<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>;
2392defm FLAT_ATOMIC_CMPSWAP_F32    : FLAT_Real_Atomics_gfx11<0x050, "FLAT_ATOMIC_FCMPSWAP", "flat_atomic_cmpswap_f32">;
2393defm FLAT_ATOMIC_MIN_F32        : FLAT_Real_Atomics_gfx11<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_f32">;
2394defm FLAT_ATOMIC_MAX_F32        : FLAT_Real_Atomics_gfx11<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_f32">;
2395defm FLAT_ATOMIC_ADD_F32        : FLAT_Real_Atomics_gfx11<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">;
2396
2397// ENC_FLAT_GLBL.
2398defm GLOBAL_LOAD_U8             : FLAT_Real_AllAddr_gfx11<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>;
2399defm GLOBAL_LOAD_I8             : FLAT_Real_AllAddr_gfx11<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>;
2400defm GLOBAL_LOAD_U16            : FLAT_Real_AllAddr_gfx11<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>;
2401defm GLOBAL_LOAD_I16            : FLAT_Real_AllAddr_gfx11<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>;
2402defm GLOBAL_LOAD_B32            : FLAT_Real_AllAddr_gfx11<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>;
2403defm GLOBAL_LOAD_B64            : FLAT_Real_AllAddr_gfx11<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>;
2404defm GLOBAL_LOAD_B96            : FLAT_Real_AllAddr_gfx11<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>;
2405defm GLOBAL_LOAD_B128           : FLAT_Real_AllAddr_gfx11<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>;
2406defm GLOBAL_STORE_B8            : FLAT_Real_AllAddr_gfx11<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>;
2407defm GLOBAL_STORE_B16           : FLAT_Real_AllAddr_gfx11<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>;
2408defm GLOBAL_STORE_B32           : FLAT_Real_AllAddr_gfx11<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>;
2409defm GLOBAL_STORE_B64           : FLAT_Real_AllAddr_gfx11<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>;
2410defm GLOBAL_STORE_B96           : FLAT_Real_AllAddr_gfx11<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>;
2411defm GLOBAL_STORE_B128          : FLAT_Real_AllAddr_gfx11<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>;
2412defm GLOBAL_LOAD_D16_U8         : FLAT_Real_AllAddr_gfx11<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">;
2413defm GLOBAL_LOAD_D16_I8         : FLAT_Real_AllAddr_gfx11<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">;
2414defm GLOBAL_LOAD_D16_B16        : FLAT_Real_AllAddr_gfx11<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">;
2415defm GLOBAL_LOAD_D16_HI_U8      : FLAT_Real_AllAddr_gfx11<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">;
2416defm GLOBAL_LOAD_D16_HI_I8      : FLAT_Real_AllAddr_gfx11<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">;
2417defm GLOBAL_LOAD_D16_HI_B16     : FLAT_Real_AllAddr_gfx11<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">;
2418defm GLOBAL_STORE_D16_HI_B8     : FLAT_Real_AllAddr_gfx11<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">;
2419defm GLOBAL_STORE_D16_HI_B16    : FLAT_Real_AllAddr_gfx11<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">;
2420defm GLOBAL_LOAD_ADDTID_B32     : FLAT_Real_AllAddr_gfx11<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">;
2421defm GLOBAL_STORE_ADDTID_B32    : FLAT_Real_AllAddr_gfx11<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">;
2422defm GLOBAL_ATOMIC_SWAP_B32     : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>;
2423defm GLOBAL_ATOMIC_CMPSWAP_B32  : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>;
2424defm GLOBAL_ATOMIC_ADD_U32      : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>;
2425defm GLOBAL_ATOMIC_SUB_U32      : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>;
2426defm GLOBAL_ATOMIC_CSUB_U32     : FLAT_Real_GlblAtomics_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32", true>;
2427defm GLOBAL_ATOMIC_MIN_I32      : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>;
2428defm GLOBAL_ATOMIC_MIN_U32      : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>;
2429defm GLOBAL_ATOMIC_MAX_I32      : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>;
2430defm GLOBAL_ATOMIC_MAX_U32      : FLAT_Real_GlblAtomics_gfx11<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>;
2431defm GLOBAL_ATOMIC_AND_B32      : FLAT_Real_GlblAtomics_gfx11<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>;
2432defm GLOBAL_ATOMIC_OR_B32       : FLAT_Real_GlblAtomics_gfx11<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>;
2433defm GLOBAL_ATOMIC_XOR_B32      : FLAT_Real_GlblAtomics_gfx11<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>;
2434defm GLOBAL_ATOMIC_INC_U32      : FLAT_Real_GlblAtomics_gfx11<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>;
2435defm GLOBAL_ATOMIC_DEC_U32      : FLAT_Real_GlblAtomics_gfx11<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>;
2436defm GLOBAL_ATOMIC_SWAP_B64     : FLAT_Real_GlblAtomics_gfx11<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>;
2437defm GLOBAL_ATOMIC_CMPSWAP_B64  : FLAT_Real_GlblAtomics_gfx11<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>;
2438defm GLOBAL_ATOMIC_ADD_U64      : FLAT_Real_GlblAtomics_gfx11<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>;
2439defm GLOBAL_ATOMIC_SUB_U64      : FLAT_Real_GlblAtomics_gfx11<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>;
2440defm GLOBAL_ATOMIC_MIN_I64      : FLAT_Real_GlblAtomics_gfx11<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>;
2441defm GLOBAL_ATOMIC_MIN_U64      : FLAT_Real_GlblAtomics_gfx11<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>;
2442defm GLOBAL_ATOMIC_MAX_I64      : FLAT_Real_GlblAtomics_gfx11<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>;
2443defm GLOBAL_ATOMIC_MAX_U64      : FLAT_Real_GlblAtomics_gfx11<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>;
2444defm GLOBAL_ATOMIC_AND_B64      : FLAT_Real_GlblAtomics_gfx11<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>;
2445defm GLOBAL_ATOMIC_OR_B64       : FLAT_Real_GlblAtomics_gfx11<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>;
2446defm GLOBAL_ATOMIC_XOR_B64      : FLAT_Real_GlblAtomics_gfx11<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>;
2447defm GLOBAL_ATOMIC_INC_U64      : FLAT_Real_GlblAtomics_gfx11<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>;
2448defm GLOBAL_ATOMIC_DEC_U64      : FLAT_Real_GlblAtomics_gfx11<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>;
2449defm GLOBAL_ATOMIC_CMPSWAP_F32  : FLAT_Real_GlblAtomics_gfx11<0x050, "GLOBAL_ATOMIC_FCMPSWAP", "global_atomic_cmpswap_f32">;
2450defm GLOBAL_ATOMIC_MIN_F32      : FLAT_Real_GlblAtomics_gfx11<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_f32">;
2451defm GLOBAL_ATOMIC_MAX_F32      : FLAT_Real_GlblAtomics_gfx11<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_f32">;
2452defm GLOBAL_ATOMIC_ADD_F32      : FLAT_Real_GlblAtomics_gfx11<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">;
2453
2454// ENC_FLAT_SCRATCH.
2455defm SCRATCH_LOAD_U8            : FLAT_Real_ScratchAllAddr_gfx11<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>;
2456defm SCRATCH_LOAD_I8            : FLAT_Real_ScratchAllAddr_gfx11<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>;
2457defm SCRATCH_LOAD_U16           : FLAT_Real_ScratchAllAddr_gfx11<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>;
2458defm SCRATCH_LOAD_I16           : FLAT_Real_ScratchAllAddr_gfx11<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>;
2459defm SCRATCH_LOAD_B32           : FLAT_Real_ScratchAllAddr_gfx11<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>;
2460defm SCRATCH_LOAD_B64           : FLAT_Real_ScratchAllAddr_gfx11<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>;
2461defm SCRATCH_LOAD_B96           : FLAT_Real_ScratchAllAddr_gfx11<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>;
2462defm SCRATCH_LOAD_B128          : FLAT_Real_ScratchAllAddr_gfx11<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>;
2463defm SCRATCH_STORE_B8           : FLAT_Real_ScratchAllAddr_gfx11<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>;
2464defm SCRATCH_STORE_B16          : FLAT_Real_ScratchAllAddr_gfx11<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>;
2465defm SCRATCH_STORE_B32          : FLAT_Real_ScratchAllAddr_gfx11<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>;
2466defm SCRATCH_STORE_B64          : FLAT_Real_ScratchAllAddr_gfx11<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>;
2467defm SCRATCH_STORE_B96          : FLAT_Real_ScratchAllAddr_gfx11<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>;
2468defm SCRATCH_STORE_B128         : FLAT_Real_ScratchAllAddr_gfx11<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>;
2469defm SCRATCH_LOAD_D16_U8        : FLAT_Real_ScratchAllAddr_gfx11<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">;
2470defm SCRATCH_LOAD_D16_I8        : FLAT_Real_ScratchAllAddr_gfx11<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">;
2471defm SCRATCH_LOAD_D16_B16       : FLAT_Real_ScratchAllAddr_gfx11<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">;
2472defm SCRATCH_LOAD_D16_HI_U8     : FLAT_Real_ScratchAllAddr_gfx11<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">;
2473defm SCRATCH_LOAD_D16_HI_I8     : FLAT_Real_ScratchAllAddr_gfx11<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">;
2474defm SCRATCH_LOAD_D16_HI_B16    : FLAT_Real_ScratchAllAddr_gfx11<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">;
2475defm SCRATCH_STORE_D16_HI_B8    : FLAT_Real_ScratchAllAddr_gfx11<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">;
2476defm SCRATCH_STORE_D16_HI_B16   : FLAT_Real_ScratchAllAddr_gfx11<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">;
2477
2478//===----------------------------------------------------------------------===//
2479// GFX12
2480//===----------------------------------------------------------------------===//
2481
2482class VFLAT_Real_gfx12 <bits<8> op, FLAT_Pseudo ps,
2483                        string opName = ps.Mnemonic> :
2484  VFLAT_Real <op, ps, opName>,
2485  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX12> {
2486  let AssemblerPredicate = isGFX12Plus;
2487  let DecoderNamespace = "GFX12";
2488
2489  let Inst{25-24} = !if(ps.is_flat_scratch, 0b01,
2490                        !if(ps.is_flat_global, 0b10, 0b00));
2491}
2492
2493multiclass VFLAT_Aliases_gfx12<string ps, string opName, int renamed, string alias> {
2494  if renamed then
2495    def _renamed_gfx12 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX12Plus]>;
2496  if !not(!empty(alias)) then
2497    def _alias_gfx12 : MnemonicAlias<alias, opName>, Requires<[isGFX12Plus]>;
2498}
2499
2500multiclass VFLAT_Real_Base_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> :
2501  VFLAT_Aliases_gfx12<ps, opName, renamed, alias> {
2502  def _gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps), opName> {
2503    let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
2504  }
2505}
2506
2507multiclass VFLAT_Real_RTN_gfx12<bits<8> op, string ps, string opName> {
2508  def _RTN_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> {
2509    let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
2510  }
2511}
2512
2513multiclass VFLAT_Real_SADDR_gfx12<bits<8> op, string ps, string opName> {
2514  def _SADDR_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SADDR"), opName>;
2515}
2516
2517multiclass VFLAT_Real_SADDR_RTN_gfx12<bits<8> op, string ps, string opName> {
2518  def _SADDR_RTN_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SADDR_RTN"), opName>;
2519}
2520
2521multiclass VFLAT_Real_ST_gfx12<bits<8> op, string ps, string opName> {
2522  def _ST_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> {
2523    let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
2524    let OtherPredicates = [HasFlatScratchSTMode];
2525  }
2526}
2527
2528multiclass VFLAT_Real_SVS_gfx12<bits<8> op, string ps, string opName> {
2529  def _SVS_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SVS"), opName> {
2530    let OtherPredicates = [HasFlatScratchSVSMode];
2531  }
2532}
2533
2534multiclass VFLAT_Real_Atomics_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> :
2535  VFLAT_Real_Base_gfx12<op, ps, opName, renamed, alias>,
2536  VFLAT_Real_RTN_gfx12<op, ps, opName>;
2537
2538multiclass VGLOBAL_Real_AllAddr_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> :
2539  VFLAT_Real_Base_gfx12<op, ps, opName, renamed, alias>,
2540  VFLAT_Real_SADDR_gfx12<op, ps, opName>;
2541
2542multiclass VGLOBAL_Real_Atomics_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> :
2543  VGLOBAL_Real_AllAddr_gfx12<op, ps, opName, renamed, alias>,
2544  VFLAT_Real_RTN_gfx12<op, ps, opName>,
2545  VFLAT_Real_SADDR_RTN_gfx12<op, ps, opName>;
2546
2547multiclass VSCRATCH_Real_AllAddr_gfx12<bits<8> op, string ps, string opName, int renamed = false> :
2548  VFLAT_Real_Base_gfx12<op, ps, opName, renamed>,
2549  VFLAT_Real_SADDR_gfx12<op, ps, opName>,
2550  VFLAT_Real_ST_gfx12<op, ps, opName>,
2551  VFLAT_Real_SVS_gfx12<op, ps, opName>;
2552
2553// ENC_VFLAT.
2554defm FLAT_LOAD_U8                  : VFLAT_Real_Base_gfx12<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>;
2555defm FLAT_LOAD_I8                  : VFLAT_Real_Base_gfx12<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>;
2556defm FLAT_LOAD_U16                 : VFLAT_Real_Base_gfx12<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>;
2557defm FLAT_LOAD_I16                 : VFLAT_Real_Base_gfx12<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>;
2558defm FLAT_LOAD_B32                 : VFLAT_Real_Base_gfx12<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>;
2559defm FLAT_LOAD_B64                 : VFLAT_Real_Base_gfx12<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>;
2560defm FLAT_LOAD_B96                 : VFLAT_Real_Base_gfx12<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>;
2561defm FLAT_LOAD_B128                : VFLAT_Real_Base_gfx12<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>;
2562defm FLAT_STORE_B8                 : VFLAT_Real_Base_gfx12<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>;
2563defm FLAT_STORE_B16                : VFLAT_Real_Base_gfx12<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>;
2564defm FLAT_STORE_B32                : VFLAT_Real_Base_gfx12<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>;
2565defm FLAT_STORE_B64                : VFLAT_Real_Base_gfx12<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>;
2566defm FLAT_STORE_B96                : VFLAT_Real_Base_gfx12<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>;
2567defm FLAT_STORE_B128               : VFLAT_Real_Base_gfx12<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>;
2568defm FLAT_LOAD_D16_U8              : VFLAT_Real_Base_gfx12<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">;
2569defm FLAT_LOAD_D16_I8              : VFLAT_Real_Base_gfx12<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">;
2570defm FLAT_LOAD_D16_B16             : VFLAT_Real_Base_gfx12<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">;
2571defm FLAT_LOAD_D16_HI_U8           : VFLAT_Real_Base_gfx12<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">;
2572defm FLAT_LOAD_D16_HI_I8           : VFLAT_Real_Base_gfx12<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">;
2573defm FLAT_LOAD_D16_HI_B16          : VFLAT_Real_Base_gfx12<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">;
2574defm FLAT_STORE_D16_HI_B8          : VFLAT_Real_Base_gfx12<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">;
2575defm FLAT_STORE_D16_HI_B16         : VFLAT_Real_Base_gfx12<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">;
2576defm FLAT_ATOMIC_SWAP_B32          : VFLAT_Real_Atomics_gfx12<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>;
2577defm FLAT_ATOMIC_CMPSWAP_B32       : VFLAT_Real_Atomics_gfx12<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>;
2578defm FLAT_ATOMIC_ADD_U32           : VFLAT_Real_Atomics_gfx12<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>;
2579defm FLAT_ATOMIC_SUB_U32           : VFLAT_Real_Atomics_gfx12<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>;
2580defm FLAT_ATOMIC_SUB_CLAMP_U32     : VFLAT_Real_Atomics_gfx12<0x037, "FLAT_ATOMIC_CSUB_U32", "flat_atomic_sub_clamp_u32", true>;
2581defm FLAT_ATOMIC_MIN_I32           : VFLAT_Real_Atomics_gfx12<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>;
2582defm FLAT_ATOMIC_MIN_U32           : VFLAT_Real_Atomics_gfx12<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>;
2583defm FLAT_ATOMIC_MAX_I32           : VFLAT_Real_Atomics_gfx12<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>;
2584defm FLAT_ATOMIC_MAX_U32           : VFLAT_Real_Atomics_gfx12<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>;
2585defm FLAT_ATOMIC_AND_B32           : VFLAT_Real_Atomics_gfx12<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>;
2586defm FLAT_ATOMIC_OR_B32            : VFLAT_Real_Atomics_gfx12<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>;
2587defm FLAT_ATOMIC_XOR_B32           : VFLAT_Real_Atomics_gfx12<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>;
2588defm FLAT_ATOMIC_INC_U32           : VFLAT_Real_Atomics_gfx12<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>;
2589defm FLAT_ATOMIC_DEC_U32           : VFLAT_Real_Atomics_gfx12<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>;
2590defm FLAT_ATOMIC_SWAP_B64          : VFLAT_Real_Atomics_gfx12<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>;
2591defm FLAT_ATOMIC_CMPSWAP_B64       : VFLAT_Real_Atomics_gfx12<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>;
2592defm FLAT_ATOMIC_ADD_U64           : VFLAT_Real_Atomics_gfx12<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>;
2593defm FLAT_ATOMIC_SUB_U64           : VFLAT_Real_Atomics_gfx12<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>;
2594defm FLAT_ATOMIC_MIN_I64           : VFLAT_Real_Atomics_gfx12<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>;
2595defm FLAT_ATOMIC_MIN_U64           : VFLAT_Real_Atomics_gfx12<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>;
2596defm FLAT_ATOMIC_MAX_I64           : VFLAT_Real_Atomics_gfx12<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>;
2597defm FLAT_ATOMIC_MAX_U64           : VFLAT_Real_Atomics_gfx12<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>;
2598defm FLAT_ATOMIC_AND_B64           : VFLAT_Real_Atomics_gfx12<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>;
2599defm FLAT_ATOMIC_OR_B64            : VFLAT_Real_Atomics_gfx12<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>;
2600defm FLAT_ATOMIC_XOR_B64           : VFLAT_Real_Atomics_gfx12<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>;
2601defm FLAT_ATOMIC_INC_U64           : VFLAT_Real_Atomics_gfx12<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>;
2602defm FLAT_ATOMIC_DEC_U64           : VFLAT_Real_Atomics_gfx12<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>;
2603defm FLAT_ATOMIC_MIN_NUM_F32       : VFLAT_Real_Atomics_gfx12<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_num_f32", true, "flat_atomic_min_f32">;
2604defm FLAT_ATOMIC_MAX_NUM_F32       : VFLAT_Real_Atomics_gfx12<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_num_f32", true, "flat_atomic_max_f32">;
2605defm FLAT_ATOMIC_ADD_F32           : VFLAT_Real_Atomics_gfx12<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">;
2606
2607// ENC_VGLOBAL.
2608defm GLOBAL_LOAD_U8                : VGLOBAL_Real_AllAddr_gfx12<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>;
2609defm GLOBAL_LOAD_I8                : VGLOBAL_Real_AllAddr_gfx12<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>;
2610defm GLOBAL_LOAD_U16               : VGLOBAL_Real_AllAddr_gfx12<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>;
2611defm GLOBAL_LOAD_I16               : VGLOBAL_Real_AllAddr_gfx12<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>;
2612defm GLOBAL_LOAD_B32               : VGLOBAL_Real_AllAddr_gfx12<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>;
2613defm GLOBAL_LOAD_B64               : VGLOBAL_Real_AllAddr_gfx12<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>;
2614defm GLOBAL_LOAD_B96               : VGLOBAL_Real_AllAddr_gfx12<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>;
2615defm GLOBAL_LOAD_B128              : VGLOBAL_Real_AllAddr_gfx12<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>;
2616defm GLOBAL_STORE_B8               : VGLOBAL_Real_AllAddr_gfx12<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>;
2617defm GLOBAL_STORE_B16              : VGLOBAL_Real_AllAddr_gfx12<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>;
2618defm GLOBAL_STORE_B32              : VGLOBAL_Real_AllAddr_gfx12<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>;
2619defm GLOBAL_STORE_B64              : VGLOBAL_Real_AllAddr_gfx12<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>;
2620defm GLOBAL_STORE_B96              : VGLOBAL_Real_AllAddr_gfx12<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>;
2621defm GLOBAL_STORE_B128             : VGLOBAL_Real_AllAddr_gfx12<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>;
2622defm GLOBAL_LOAD_D16_U8            : VGLOBAL_Real_AllAddr_gfx12<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">;
2623defm GLOBAL_LOAD_D16_I8            : VGLOBAL_Real_AllAddr_gfx12<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">;
2624defm GLOBAL_LOAD_D16_B16           : VGLOBAL_Real_AllAddr_gfx12<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">;
2625defm GLOBAL_LOAD_D16_HI_U8         : VGLOBAL_Real_AllAddr_gfx12<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">;
2626defm GLOBAL_LOAD_D16_HI_I8         : VGLOBAL_Real_AllAddr_gfx12<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">;
2627defm GLOBAL_LOAD_D16_HI_B16        : VGLOBAL_Real_AllAddr_gfx12<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">;
2628defm GLOBAL_STORE_D16_HI_B8        : VGLOBAL_Real_AllAddr_gfx12<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">;
2629defm GLOBAL_STORE_D16_HI_B16       : VGLOBAL_Real_AllAddr_gfx12<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">;
2630defm GLOBAL_LOAD_ADDTID_B32        : VGLOBAL_Real_AllAddr_gfx12<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">;
2631defm GLOBAL_STORE_ADDTID_B32       : VGLOBAL_Real_AllAddr_gfx12<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">;
2632
2633defm GLOBAL_ATOMIC_SWAP_B32        : VGLOBAL_Real_Atomics_gfx12<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>;
2634defm GLOBAL_ATOMIC_CMPSWAP_B32     : VGLOBAL_Real_Atomics_gfx12<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>;
2635defm GLOBAL_ATOMIC_ADD_U32         : VGLOBAL_Real_Atomics_gfx12<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>;
2636defm GLOBAL_ATOMIC_SUB_U32         : VGLOBAL_Real_Atomics_gfx12<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>;
2637defm GLOBAL_ATOMIC_SUB_CLAMP_U32   : VGLOBAL_Real_Atomics_gfx12<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_sub_clamp_u32", true, "global_atomic_csub_u32">;
2638defm GLOBAL_ATOMIC_MIN_I32         : VGLOBAL_Real_Atomics_gfx12<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>;
2639defm GLOBAL_ATOMIC_MIN_U32         : VGLOBAL_Real_Atomics_gfx12<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>;
2640defm GLOBAL_ATOMIC_MAX_I32         : VGLOBAL_Real_Atomics_gfx12<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>;
2641defm GLOBAL_ATOMIC_MAX_U32         : VGLOBAL_Real_Atomics_gfx12<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>;
2642defm GLOBAL_ATOMIC_AND_B32         : VGLOBAL_Real_Atomics_gfx12<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>;
2643defm GLOBAL_ATOMIC_OR_B32          : VGLOBAL_Real_Atomics_gfx12<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>;
2644defm GLOBAL_ATOMIC_XOR_B32         : VGLOBAL_Real_Atomics_gfx12<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>;
2645defm GLOBAL_ATOMIC_INC_U32         : VGLOBAL_Real_Atomics_gfx12<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>;
2646defm GLOBAL_ATOMIC_DEC_U32         : VGLOBAL_Real_Atomics_gfx12<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>;
2647defm GLOBAL_ATOMIC_SWAP_B64        : VGLOBAL_Real_Atomics_gfx12<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>;
2648defm GLOBAL_ATOMIC_CMPSWAP_B64     : VGLOBAL_Real_Atomics_gfx12<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>;
2649defm GLOBAL_ATOMIC_ADD_U64         : VGLOBAL_Real_Atomics_gfx12<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>;
2650defm GLOBAL_ATOMIC_SUB_U64         : VGLOBAL_Real_Atomics_gfx12<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>;
2651defm GLOBAL_ATOMIC_MIN_I64         : VGLOBAL_Real_Atomics_gfx12<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>;
2652defm GLOBAL_ATOMIC_MIN_U64         : VGLOBAL_Real_Atomics_gfx12<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>;
2653defm GLOBAL_ATOMIC_MAX_I64         : VGLOBAL_Real_Atomics_gfx12<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>;
2654defm GLOBAL_ATOMIC_MAX_U64         : VGLOBAL_Real_Atomics_gfx12<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>;
2655defm GLOBAL_ATOMIC_AND_B64         : VGLOBAL_Real_Atomics_gfx12<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>;
2656defm GLOBAL_ATOMIC_OR_B64          : VGLOBAL_Real_Atomics_gfx12<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>;
2657defm GLOBAL_ATOMIC_XOR_B64         : VGLOBAL_Real_Atomics_gfx12<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>;
2658defm GLOBAL_ATOMIC_INC_U64         : VGLOBAL_Real_Atomics_gfx12<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>;
2659defm GLOBAL_ATOMIC_DEC_U64         : VGLOBAL_Real_Atomics_gfx12<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>;
2660defm GLOBAL_ATOMIC_MIN_NUM_F32     : VGLOBAL_Real_Atomics_gfx12<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_num_f32", true, "global_atomic_min_f32">;
2661defm GLOBAL_ATOMIC_MAX_NUM_F32     : VGLOBAL_Real_Atomics_gfx12<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_num_f32", true, "global_atomic_max_f32">;
2662defm GLOBAL_ATOMIC_ADD_F32         : VGLOBAL_Real_Atomics_gfx12<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">;
2663defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : VGLOBAL_Real_Atomics_gfx12<0x073, "GLOBAL_ATOMIC_ORDERED_ADD_B64", "global_atomic_ordered_add_b64">;
2664
2665// ENC_VSCRATCH.
2666defm SCRATCH_LOAD_U8               : VSCRATCH_Real_AllAddr_gfx12<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>;
2667defm SCRATCH_LOAD_I8               : VSCRATCH_Real_AllAddr_gfx12<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>;
2668defm SCRATCH_LOAD_U16              : VSCRATCH_Real_AllAddr_gfx12<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>;
2669defm SCRATCH_LOAD_I16              : VSCRATCH_Real_AllAddr_gfx12<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>;
2670defm SCRATCH_LOAD_B32              : VSCRATCH_Real_AllAddr_gfx12<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>;
2671defm SCRATCH_LOAD_B64              : VSCRATCH_Real_AllAddr_gfx12<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>;
2672defm SCRATCH_LOAD_B96              : VSCRATCH_Real_AllAddr_gfx12<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>;
2673defm SCRATCH_LOAD_B128             : VSCRATCH_Real_AllAddr_gfx12<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>;
2674defm SCRATCH_STORE_B8              : VSCRATCH_Real_AllAddr_gfx12<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>;
2675defm SCRATCH_STORE_B16             : VSCRATCH_Real_AllAddr_gfx12<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>;
2676defm SCRATCH_STORE_B32             : VSCRATCH_Real_AllAddr_gfx12<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>;
2677defm SCRATCH_STORE_B64             : VSCRATCH_Real_AllAddr_gfx12<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>;
2678defm SCRATCH_STORE_B96             : VSCRATCH_Real_AllAddr_gfx12<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>;
2679defm SCRATCH_STORE_B128            : VSCRATCH_Real_AllAddr_gfx12<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>;
2680defm SCRATCH_LOAD_D16_U8           : VSCRATCH_Real_AllAddr_gfx12<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">;
2681defm SCRATCH_LOAD_D16_I8           : VSCRATCH_Real_AllAddr_gfx12<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">;
2682defm SCRATCH_LOAD_D16_B16          : VSCRATCH_Real_AllAddr_gfx12<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">;
2683defm SCRATCH_LOAD_D16_HI_U8        : VSCRATCH_Real_AllAddr_gfx12<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">;
2684defm SCRATCH_LOAD_D16_HI_I8        : VSCRATCH_Real_AllAddr_gfx12<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">;
2685defm SCRATCH_LOAD_D16_HI_B16       : VSCRATCH_Real_AllAddr_gfx12<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">;
2686defm SCRATCH_STORE_D16_HI_B8       : VSCRATCH_Real_AllAddr_gfx12<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">;
2687defm SCRATCH_STORE_D16_HI_B16      : VSCRATCH_Real_AllAddr_gfx12<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">;
2688