xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains instruction defs that are common to all hw codegen
10// targets.
11//
12//===----------------------------------------------------------------------===//
13
14class AddressSpacesImpl {
15  int Flat = 0;
16  int Global = 1;
17  int Region = 2;
18  int Local = 3;
19  int Constant = 4;
20  int Private = 5;
21  int Constant32Bit = 6;
22}
23
24def AddrSpaces : AddressSpacesImpl;
25
26
27class AMDGPUInst <dag outs, dag ins, string asm = "",
28  list<dag> pattern = []> : Instruction {
29  field bit isRegisterLoad = 0;
30  field bit isRegisterStore = 0;
31
32  let Namespace = "AMDGPU";
33  let OutOperandList = outs;
34  let InOperandList = ins;
35  let AsmString = asm;
36  let Pattern = pattern;
37  let Itinerary = NullALU;
38
39  // SoftFail is a field the disassembler can use to provide a way for
40  // instructions to not match without killing the whole decode process. It is
41  // mainly used for ARM, but Tablegen expects this field to exist or it fails
42  // to build the decode table.
43  field bits<96> SoftFail = 0;
44
45  let DecoderNamespace = Namespace;
46
47  let TSFlags{63} = isRegisterLoad;
48  let TSFlags{62} = isRegisterStore;
49}
50
51class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
52  list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> {
53
54  field bits<32> Inst = 0xffffffff;
55}
56
57//===---------------------------------------------------------------------===//
58// Return instruction
59//===---------------------------------------------------------------------===//
60
61class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
62: Instruction {
63
64     let Namespace = "AMDGPU";
65     dag OutOperandList = outs;
66     dag InOperandList = ins;
67     let Pattern = pattern;
68     let AsmString = !strconcat(asmstr, "\n");
69     let isPseudo = 1;
70     let Itinerary = NullALU;
71     bit hasIEEEFlag = 0;
72     bit hasZeroOpFlag = 0;
73     let mayLoad = 0;
74     let mayStore = 0;
75     let hasSideEffects = 0;
76     let isCodeGenOnly = 1;
77}
78
79// Get the union of two Register lists
80class RegListUnion<list<Register> lstA, list<Register> lstB> {
81  list<Register> ret = !listconcat(lstA, !listremove(lstB, lstA));
82}
83
84class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
85      PredicateControl, GISelFlags;
86
87let GIIgnoreCopies = 1 in
88class AMDGPUPatIgnoreCopies<dag pattern, dag result> : AMDGPUPat<pattern, result>;
89
90let RecomputePerFunction = 1 in {
91def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
92def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals != DenormalMode::getPreserveSign()">;
93def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
94def NoFP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
95def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">;
96def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
97def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
98}
99
100def FMA : Predicate<"Subtarget->hasFMA()">;
101
102def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
103
104def i1imm_0 : OperandWithDefaultOps<i1, (ops (i1 0))>;
105
106class CustomOperandClass<string name, bit optional, string predicateMethod,
107                         string parserMethod, string defaultMethod>
108    : AsmOperandClass {
109  let Name = name;
110  let PredicateMethod = predicateMethod;
111  let ParserMethod = parserMethod;
112  let RenderMethod = "addImmOperands";
113  let IsOptional = optional;
114  let DefaultMethod = defaultMethod;
115}
116
117class CustomOperandProps<bit optional = 0, string name = NAME> {
118  string ImmTy = "ImmTy"#name;
119  string PredicateMethod = "is"#name;
120  string ParserMethod = "parse"#name;
121  string DefaultValue = "0";
122  string DefaultMethod = "[this]() { return "#
123    "AMDGPUOperand::CreateImm(this, "#DefaultValue#", SMLoc(), "#
124    "AMDGPUOperand::"#ImmTy#"); }";
125  string PrintMethod = "print"#name;
126  AsmOperandClass ParserMatchClass =
127    CustomOperandClass<name, optional, PredicateMethod, ParserMethod,
128                       DefaultMethod>;
129  string OperandType = "OPERAND_IMMEDIATE";
130}
131
132class CustomOperand<ValueType type, bit optional = 0, string name = NAME>
133  : Operand<type>, CustomOperandProps<optional, name>;
134
135class ImmOperand<ValueType type, string name = NAME, bit optional = 0,
136                 string printer = "print"#name>
137    : CustomOperand<type, optional, name> {
138  let ImmTy = "ImmTyNone";
139  let ParserMethod = "";
140  let PrintMethod = printer;
141}
142
143class S16ImmOperand : ImmOperand<i16, "S16Imm", 0, "printU16ImmOperand">;
144
145def s16imm : S16ImmOperand;
146def u16imm : ImmOperand<i16, "U16Imm", 0, "printU16ImmOperand">;
147
148class ValuePredicatedOperand<CustomOperand op, string valuePredicate,
149                             bit optional = 0>
150    : CustomOperand<op.Type, optional> {
151  let ImmTy = op.ImmTy;
152  defvar OpPredicate = op.ParserMatchClass.PredicateMethod;
153  let PredicateMethod =
154    "getPredicate([](const AMDGPUOperand &Op) -> bool { "#
155    "return Op."#OpPredicate#"() && "#valuePredicate#"; })";
156  let ParserMethod = op.ParserMatchClass.ParserMethod;
157  let DefaultValue = op.DefaultValue;
158  let DefaultMethod = op.DefaultMethod;
159  let PrintMethod = op.PrintMethod;
160}
161
162//===--------------------------------------------------------------------===//
163// Custom Operands
164//===--------------------------------------------------------------------===//
165def brtarget   : Operand<OtherVT>;
166
167//===----------------------------------------------------------------------===//
168// Misc. PatFrags
169//===----------------------------------------------------------------------===//
170
171class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
172  (ops node:$src0),
173  (op $src0)> {
174  let HasOneUse = 1;
175}
176
177class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
178  (ops node:$src0, node:$src1),
179  (op $src0, $src1)> {
180  let HasOneUse = 1;
181}
182
183class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
184  (ops node:$src0, node:$src1, node:$src2),
185  (op $src0, $src1, $src2)> {
186  let HasOneUse = 1;
187}
188
189class is_canonicalized_1<SDPatternOperator op> : PatFrag<
190  (ops node:$src0),
191  (op $src0),
192  [{
193    const SITargetLowering &Lowering =
194              *static_cast<const SITargetLowering *>(getTargetLowering());
195
196    return Lowering.isCanonicalized(*CurDAG, N->getOperand(0));
197   }]> {
198
199  let GISelPredicateCode = [{
200    const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
201      MF.getSubtarget().getTargetLowering());
202
203    return TLI->isCanonicalized(MI.getOperand(1).getReg(), MF);
204  }];
205}
206
207class is_canonicalized_2<SDPatternOperator op> : PatFrag<
208  (ops node:$src0, node:$src1),
209  (op $src0, $src1),
210  [{
211    const SITargetLowering &Lowering =
212              *static_cast<const SITargetLowering *>(getTargetLowering());
213
214    return Lowering.isCanonicalized(*CurDAG, N->getOperand(0)) &&
215      Lowering.isCanonicalized(*CurDAG, N->getOperand(1));
216   }]> {
217
218  // TODO: Improve the Legalizer for g_build_vector in Global Isel to match this class
219  let GISelPredicateCode = [{
220    const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
221      MF.getSubtarget().getTargetLowering());
222
223    return TLI->isCanonicalized(MI.getOperand(1).getReg(), MF) &&
224      TLI->isCanonicalized(MI.getOperand(2).getReg(), MF);
225  }];
226}
227
228class FoldTernaryOpPat<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
229  (ops node:$src0, node:$src1, node:$src2),
230  (op2 (op1 node:$src0, node:$src1), node:$src2)
231>;
232
233def imad : FoldTernaryOpPat<mul, add>;
234
235let Properties = [SDNPCommutative, SDNPAssociative] in {
236def smax_oneuse : HasOneUseBinOp<smax>;
237def smin_oneuse : HasOneUseBinOp<smin>;
238def umax_oneuse : HasOneUseBinOp<umax>;
239def umin_oneuse : HasOneUseBinOp<umin>;
240
241def fminnum_oneuse : HasOneUseBinOp<fminnum>;
242def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
243def fminimum_oneuse : HasOneUseBinOp<fminimum>;
244def fmaximum_oneuse : HasOneUseBinOp<fmaximum>;
245
246def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>;
247def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>;
248
249
250def and_oneuse : HasOneUseBinOp<and>;
251def or_oneuse : HasOneUseBinOp<or>;
252def xor_oneuse : HasOneUseBinOp<xor>;
253} // Properties = [SDNPCommutative, SDNPAssociative]
254
255def not_oneuse : HasOneUseUnaryOp<not>;
256
257def add_oneuse : HasOneUseBinOp<add>;
258def sub_oneuse : HasOneUseBinOp<sub>;
259
260def srl_oneuse : HasOneUseBinOp<srl>;
261def shl_oneuse : HasOneUseBinOp<shl>;
262
263def select_oneuse : HasOneUseTernaryOp<select>;
264
265def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>;
266def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
267
268//===----------------------------------------------------------------------===//
269// PatFrags for shifts
270//===----------------------------------------------------------------------===//
271
272// Constrained shift PatFrags.
273
274def csh_mask_16 : PatFrag<(ops node:$src0), (and node:$src0, imm),
275  [{ return isUnneededShiftMask(N, 4); }]> {
276    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 4); }];
277  }
278
279def csh_mask_32 : PatFrag<(ops node:$src0), (and node:$src0, imm),
280  [{ return isUnneededShiftMask(N, 5); }]> {
281    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 5); }];
282  }
283
284def csh_mask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm),
285  [{ return isUnneededShiftMask(N, 6); }]> {
286    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 6); }];
287  }
288
289foreach width = [16, 32, 64] in {
290defvar csh_mask = !cast<SDPatternOperator>("csh_mask_"#width);
291
292def cshl_#width : PatFrags<(ops node:$src0, node:$src1),
293  [(shl node:$src0, node:$src1), (shl node:$src0, (csh_mask node:$src1))]>;
294defvar cshl = !cast<SDPatternOperator>("cshl_"#width);
295def cshl_#width#_oneuse : HasOneUseBinOp<cshl>;
296def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1),
297  (cshl $src1, $src0)>;
298
299def csrl_#width : PatFrags<(ops node:$src0, node:$src1),
300  [(srl node:$src0, node:$src1), (srl node:$src0, (csh_mask node:$src1))]>;
301defvar csrl = !cast<SDPatternOperator>("csrl_"#width);
302def csrl_#width#_oneuse : HasOneUseBinOp<csrl>;
303def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
304  (csrl $src1, $src0)>;
305
306def csra_#width : PatFrags<(ops node:$src0, node:$src1),
307  [(sra node:$src0, node:$src1), (sra node:$src0, (csh_mask node:$src1))]>;
308defvar csra = !cast<SDPatternOperator>("csra_"#width);
309def csra_#width#_oneuse : HasOneUseBinOp<csra>;
310def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
311  (csra $src1, $src0)>;
312} // end foreach width
313
314def srl_16 : PatFrag<
315  (ops node:$src0), (srl_oneuse node:$src0, (i32 16))
316>;
317
318
319def hi_i16_elt : PatFrag<
320  (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0))))
321>;
322
323
324def hi_f16_elt : PatLeaf<
325  (vt), [{
326  if (N->getOpcode() != ISD::BITCAST)
327    return false;
328  SDValue Tmp = N->getOperand(0);
329
330  if (Tmp.getOpcode() != ISD::SRL)
331    return false;
332    if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1))
333      return RHS->getZExtValue() == 16;
334    return false;
335}]>;
336
337//===----------------------------------------------------------------------===//
338// PatLeafs for zero immediate
339//===----------------------------------------------------------------------===//
340
341def immzero : PatLeaf<(imm), [{ return N->isZero(); }]>;
342def fpimmzero : PatLeaf<(fpimm), [{ return N->isZero(); }]>;
343
344//===----------------------------------------------------------------------===//
345// PatLeafs for floating-point comparisons
346//===----------------------------------------------------------------------===//
347
348def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>;
349def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>;
350def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>;
351def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>;
352def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>;
353def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>;
354def COND_O   : PatFrags<(ops), [(OtherVT SETO)]>;
355def COND_UO  : PatFrags<(ops), [(OtherVT SETUO)]>;
356
357//===----------------------------------------------------------------------===//
358// PatLeafs for unsigned / unordered comparisons
359//===----------------------------------------------------------------------===//
360
361def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>;
362def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>;
363def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>;
364def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>;
365def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>;
366def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>;
367
368// XXX - For some reason R600 version is preferring to use unordered
369// for setne?
370def COND_UNE_NE  : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>;
371
372//===----------------------------------------------------------------------===//
373// PatLeafs for signed comparisons
374//===----------------------------------------------------------------------===//
375
376def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>;
377def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>;
378def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>;
379def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>;
380
381//===----------------------------------------------------------------------===//
382// PatLeafs for integer equality
383//===----------------------------------------------------------------------===//
384
385def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>;
386def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>;
387
388// FIXME: Should not need code predicate
389//def COND_NULL : PatLeaf<(OtherVT null_frag)>;
390def COND_NULL : PatLeaf <
391  (cond),
392  [{(void)N; return false;}]
393>;
394
395//===----------------------------------------------------------------------===//
396// PatLeafs for Texture Constants
397//===----------------------------------------------------------------------===//
398
399def TEX_ARRAY : PatLeaf<
400  (imm),
401  [{uint32_t TType = (uint32_t)N->getZExtValue();
402    return TType == 9 || TType == 10 || TType == 16;
403  }]
404>;
405
406def TEX_RECT : PatLeaf<
407  (imm),
408  [{uint32_t TType = (uint32_t)N->getZExtValue();
409    return TType == 5;
410  }]
411>;
412
413def TEX_SHADOW : PatLeaf<
414  (imm),
415  [{uint32_t TType = (uint32_t)N->getZExtValue();
416    return (TType >= 6 && TType <= 8) || TType == 13;
417  }]
418>;
419
420def TEX_SHADOW_ARRAY : PatLeaf<
421  (imm),
422  [{uint32_t TType = (uint32_t)N->getZExtValue();
423    return TType == 11 || TType == 12 || TType == 17;
424  }]
425>;
426
427//===----------------------------------------------------------------------===//
428// Load/Store Pattern Fragments
429//===----------------------------------------------------------------------===//
430
431def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
432  [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
433>;
434
435class AddressSpaceList<list<int> AS> {
436  list<int> AddrSpaces = AS;
437}
438
439class Aligned<int Bytes> {
440  int MinAlignment = Bytes;
441}
442
443class StoreHi16<SDPatternOperator op, ValueType vt> : PatFrag <
444  (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> {
445  let IsStore = 1;
446  let MemoryVT = vt;
447}
448
449def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant,
450                                              AddrSpaces.Constant32Bit ]>;
451def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global,
452                                            AddrSpaces.Constant,
453                                            AddrSpaces.Constant32Bit ]>;
454def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>;
455
456def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat,
457                                          AddrSpaces.Global,
458                                          AddrSpaces.Constant,
459                                          AddrSpaces.Constant32Bit ]>;
460def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>;
461
462def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
463def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
464
465def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
466def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
467
468def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
469def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
470
471
472
473foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
474let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
475
476def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
477  let IsLoad = 1;
478  let IsNonExtLoad = 1;
479}
480
481def extloadi8_#as  : PatFrag<(ops node:$ptr), (extloadi8 node:$ptr)> {
482  let IsLoad = 1;
483}
484
485def extloadi16_#as : PatFrag<(ops node:$ptr), (extloadi16 node:$ptr)> {
486  let IsLoad = 1;
487}
488
489def sextloadi8_#as  : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr)> {
490  let IsLoad = 1;
491}
492
493def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr)> {
494  let IsLoad = 1;
495}
496
497def zextloadi8_#as  : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr)> {
498  let IsLoad = 1;
499}
500
501def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextloadi16 node:$ptr)> {
502  let IsLoad = 1;
503}
504
505def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> {
506  let IsAtomic = 1;
507  let MemoryVT = i8;
508}
509
510def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> {
511  let IsAtomic = 1;
512  let MemoryVT = i16;
513}
514
515def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
516  let IsAtomic = 1;
517  let MemoryVT = i32;
518}
519
520def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
521  let IsAtomic = 1;
522  let MemoryVT = i64;
523}
524} // End let AddressSpaces
525} // End foreach as
526
527
528foreach as = [ "global", "flat", "local", "private", "region" ] in {
529let IsStore = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
530def store_#as : PatFrag<(ops node:$val, node:$ptr),
531                    (unindexedstore node:$val, node:$ptr)> {
532  let IsTruncStore = 0;
533}
534
535// truncstore fragments.
536def truncstore_#as : PatFrag<(ops node:$val, node:$ptr),
537                             (unindexedstore node:$val, node:$ptr)> {
538  let IsTruncStore = 1;
539}
540
541// TODO: We don't really need the truncstore here. We can use
542// unindexedstore with MemoryVT directly, which will save an
543// unnecessary check that the memory size is less than the value type
544// in the generated matcher table.
545def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr),
546                               (truncstorei8 node:$val, node:$ptr)>;
547def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
548                                (truncstorei16 node:$val, node:$ptr)>;
549
550def store_hi16_#as : StoreHi16 <truncstorei16, i16>;
551def truncstorei8_hi16_#as : StoreHi16<truncstorei8, i8>;
552def truncstorei16_hi16_#as : StoreHi16<truncstorei16, i16>;
553} // End let IsStore = 1, AddressSpaces = ...
554
555let IsAtomic = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
556def atomic_store_8_#as : PatFrag<(ops node:$val, node:$ptr),
557                                 (atomic_store_8 node:$val, node:$ptr)>;
558def atomic_store_16_#as : PatFrag<(ops node:$val, node:$ptr),
559                                  (atomic_store_16 node:$val, node:$ptr)>;
560def atomic_store_32_#as : PatFrag<(ops node:$val, node:$ptr),
561                                  (atomic_store_32 node:$val, node:$ptr)>;
562def atomic_store_64_#as : PatFrag<(ops node:$val, node:$ptr),
563                                  (atomic_store_64 node:$val, node:$ptr)>;
564} // End let IsAtomic = 1, AddressSpaces = ...
565} // End foreach as
566
567multiclass noret_op {
568  let HasNoUse = true in
569  def "_noret" : PatFrag<(ops node:$ptr, node:$data),
570    (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>;
571}
572
573multiclass global_addr_space_atomic_op {
574  def "_noret_global_addrspace" :
575    PatFrag<(ops node:$ptr, node:$data),
576            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
577      let HasNoUse = true;
578      let AddressSpaces = LoadAddress_global.AddrSpaces;
579      let IsAtomic = 1;
580    }
581    def "_global_addrspace" :
582    PatFrag<(ops node:$ptr, node:$data),
583            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
584      let AddressSpaces = LoadAddress_global.AddrSpaces;
585      let IsAtomic = 1;
586    }
587}
588
589multiclass flat_addr_space_atomic_op {
590  def "_noret_flat_addrspace" :
591    PatFrag<(ops node:$ptr, node:$data),
592            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
593      let HasNoUse = true;
594      let AddressSpaces = LoadAddress_flat.AddrSpaces;
595      let IsAtomic = 1;
596    }
597    def "_flat_addrspace" :
598    PatFrag<(ops node:$ptr, node:$data),
599            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
600      let AddressSpaces = LoadAddress_flat.AddrSpaces;
601      let IsAtomic = 1;
602    }
603}
604
605multiclass local_addr_space_atomic_op {
606  def "_noret_local_addrspace" :
607    PatFrag<(ops node:$ptr, node:$data),
608            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
609      let HasNoUse = true;
610      let AddressSpaces = LoadAddress_local.AddrSpaces;
611      let IsAtomic = 1;
612    }
613    def "_local_addrspace" :
614    PatFrag<(ops node:$ptr, node:$data),
615            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
616      let AddressSpaces = LoadAddress_local.AddrSpaces;
617      let IsAtomic = 1;
618    }
619}
620
621defm int_amdgcn_flat_atomic_fadd : noret_op;
622defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op;
623defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op;
624defm int_amdgcn_flat_atomic_fmin : noret_op;
625defm int_amdgcn_flat_atomic_fmax : noret_op;
626defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op;
627defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op;
628defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op;
629defm int_amdgcn_global_atomic_fmin : noret_op;
630defm int_amdgcn_global_atomic_fmax : noret_op;
631defm int_amdgcn_global_atomic_csub : noret_op;
632defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
633defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op;
634defm int_amdgcn_flat_atomic_fmin_num : noret_op;
635defm int_amdgcn_flat_atomic_fmax_num : noret_op;
636defm int_amdgcn_global_atomic_fmin_num : noret_op;
637defm int_amdgcn_global_atomic_fmax_num : noret_op;
638defm int_amdgcn_atomic_cond_sub_u32 : local_addr_space_atomic_op;
639defm int_amdgcn_atomic_cond_sub_u32 : flat_addr_space_atomic_op;
640defm int_amdgcn_atomic_cond_sub_u32 : global_addr_space_atomic_op;
641
642multiclass noret_binary_atomic_op<SDNode atomic_op> {
643  let HasNoUse = true in
644  defm "_noret" : binary_atomic_op<atomic_op>;
645}
646
647multiclass noret_binary_atomic_op_fp<SDNode atomic_op> {
648  let HasNoUse = true in
649  defm "_noret" : binary_atomic_op_fp<atomic_op>;
650}
651
652multiclass noret_ternary_atomic_op<SDNode atomic_op> {
653  let HasNoUse = true in
654  defm "_noret" : ternary_atomic_op<atomic_op>;
655}
656
657defvar atomic_addrspace_names = [ "global", "flat", "constant", "local", "private", "region" ];
658
659multiclass binary_atomic_op_all_as<SDNode atomic_op> {
660  foreach as = atomic_addrspace_names in {
661    let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
662      defm "_"#as : binary_atomic_op<atomic_op>;
663      defm "_"#as : noret_binary_atomic_op<atomic_op>;
664    }
665  }
666}
667multiclass binary_atomic_op_fp_all_as<SDNode atomic_op> {
668  foreach as = atomic_addrspace_names in {
669    let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
670      defm "_"#as : binary_atomic_op_fp<atomic_op>;
671      defm "_"#as : noret_binary_atomic_op_fp<atomic_op>;
672    }
673  }
674}
675
676defm atomic_swap : binary_atomic_op_all_as<atomic_swap>;
677defm atomic_load_add : binary_atomic_op_all_as<atomic_load_add>;
678defm atomic_load_and : binary_atomic_op_all_as<atomic_load_and>;
679defm atomic_load_max : binary_atomic_op_all_as<atomic_load_max>;
680defm atomic_load_min : binary_atomic_op_all_as<atomic_load_min>;
681defm atomic_load_or : binary_atomic_op_all_as<atomic_load_or>;
682defm atomic_load_sub : binary_atomic_op_all_as<atomic_load_sub>;
683defm atomic_load_umax : binary_atomic_op_all_as<atomic_load_umax>;
684defm atomic_load_umin : binary_atomic_op_all_as<atomic_load_umin>;
685defm atomic_load_xor : binary_atomic_op_all_as<atomic_load_xor>;
686defm atomic_load_fadd : binary_atomic_op_fp_all_as<atomic_load_fadd>;
687defm atomic_load_fmin : binary_atomic_op_fp_all_as<atomic_load_fmin>;
688defm atomic_load_fmax : binary_atomic_op_fp_all_as<atomic_load_fmax>;
689defm atomic_load_uinc_wrap : binary_atomic_op_all_as<atomic_load_uinc_wrap>;
690defm atomic_load_udec_wrap : binary_atomic_op_all_as<atomic_load_udec_wrap>;
691defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>;
692
693def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
694                       Aligned<8> {
695  let IsLoad = 1;
696}
697
698def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
699                        Aligned<16> {
700  let IsLoad = 1;
701}
702
703def store_align8_local: PatFrag<(ops node:$val, node:$ptr),
704                                (store_local node:$val, node:$ptr)>, Aligned<8> {
705  let IsStore = 1;
706}
707
708def store_align16_local: PatFrag<(ops node:$val, node:$ptr),
709                                (store_local node:$val, node:$ptr)>, Aligned<16> {
710  let IsStore = 1;
711}
712
713let AddressSpaces = StoreAddress_local.AddrSpaces in {
714defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>;
715defm atomic_cmp_swap_local : noret_ternary_atomic_op<atomic_cmp_swap>;
716defm atomic_cmp_swap_local_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>;
717defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
718}
719
720let AddressSpaces = StoreAddress_region.AddrSpaces in {
721defm atomic_cmp_swap_region : noret_ternary_atomic_op<atomic_cmp_swap>;
722defm atomic_cmp_swap_region_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>;
723defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
724}
725
726//===----------------------------------------------------------------------===//
727// Misc Pattern Fragments
728//===----------------------------------------------------------------------===//
729
730class Constants {
731int TWO_PI = 0x40c90fdb;
732int PI = 0x40490fdb;
733int TWO_PI_INV = 0x3e22f983;
734int FP_4294966784 = 0x4f7ffffe; // 4294966784 = 4294967296 - 512 = 2^32 - 2^9
735int FP16_ONE = 0x3C00;
736int FP16_NEG_ONE = 0xBC00;
737int FP32_ONE = 0x3f800000;
738int FP32_NEG_ONE = 0xbf800000;
739int FP64_ONE = 0x3ff0000000000000;
740int FP64_NEG_ONE = 0xbff0000000000000;
741}
742def CONST : Constants;
743
744def FP_ZERO : PatLeaf <
745  (fpimm),
746  [{return N->getValueAPF().isZero();}]
747>;
748
749def FP_ONE : PatLeaf <
750  (fpimm),
751  [{return N->isExactlyValue(1.0);}]
752>;
753
754def FP_HALF : PatLeaf <
755  (fpimm),
756  [{return N->isExactlyValue(0.5);}]
757>;
758
759/* Generic helper patterns for intrinsics */
760/* -------------------------------------- */
761
762class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
763  : AMDGPUPat <
764  (fpow f32:$src0, f32:$src1),
765  (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
766>;
767
768/* Other helper patterns */
769/* --------------------- */
770
771/* Extract element pattern */
772class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
773                       SubRegIndex sub_reg>
774  : AMDGPUPat<
775  (sub_type (extractelt vec_type:$src, sub_idx)),
776  (EXTRACT_SUBREG $src, sub_reg)
777>;
778
779/* Insert element pattern */
780class Insert_Element <ValueType elem_type, ValueType vec_type,
781                      int sub_idx, SubRegIndex sub_reg>
782  : AMDGPUPat <
783  (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
784  (INSERT_SUBREG $vec, $elem, sub_reg)
785>;
786
787// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
788// can handle COPY instructions.
789// bitconvert pattern
790class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat <
791  (dt (bitconvert (st rc:$src0))),
792  (dt rc:$src0)
793>;
794
795// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
796// can handle COPY instructions.
797class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
798  (vt (AMDGPUdwordaddr (vt rc:$addr))),
799  (vt rc:$addr)
800>;
801
802// rotr pattern
803class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
804  (rotr i32:$src0, i32:$src1),
805  (BIT_ALIGN $src0, $src0, $src1)
806>;
807
808// Special conversion patterns
809
810def cvt_rpi_i32_f32 : PatFrag <
811  (ops node:$src),
812  (fp_to_sint (ffloor (fadd $src, FP_HALF))),
813  [{ (void) N; return TM.Options.NoNaNsFPMath; }]
814>;
815
816def cvt_flr_i32_f32 : PatFrag <
817  (ops node:$src),
818  (fp_to_sint (ffloor $src)),
819  [{ (void)N; return TM.Options.NoNaNsFPMath; }]
820>;
821
822let AddedComplexity = 2 in {
823class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
824  (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
825  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
826                (Inst $src0, $src1, $src2))
827>;
828
829class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
830  (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
831  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
832                (Inst $src0, $src1, $src2))
833>;
834} // AddedComplexity.
835
836class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
837  (fdiv FP_ONE, vt:$src),
838  (RcpInst $src)
839>;
840
841// Instructions which select to the same v_min_f*
842def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
843  [(fminnum_ieee node:$src0, node:$src1),
844   (fminnum node:$src0, node:$src1)]
845>;
846
847// Instructions which select to the same v_max_f*
848def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1),
849  [(fmaxnum_ieee node:$src0, node:$src1),
850   (fmaxnum node:$src0, node:$src1)]
851>;
852
853class NeverNaNPats<dag ops, list<dag> frags> : PatFrags<ops, frags> {
854  let PredicateCode = [{
855    return CurDAG->isKnownNeverNaN(SDValue(N,0));
856  }];
857  let GISelPredicateCode = [{
858    return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI);
859  }];
860}
861
862def fminnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1),
863  [(fminnum_ieee node:$src0, node:$src1),
864   (fminnum node:$src0, node:$src1)]
865>;
866
867def fmaxnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1),
868  [(fmaxnum_ieee node:$src0, node:$src1),
869   (fmaxnum node:$src0, node:$src1)]
870>;
871
872def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
873  [(fminnum_ieee_oneuse node:$src0, node:$src1),
874   (fminnum_oneuse node:$src0, node:$src1)]
875>;
876
877def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
878  [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
879   (fmaxnum_oneuse node:$src0, node:$src1)]
880>;
881
882def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2),
883  [(fmad node:$src0, node:$src1, node:$src2),
884   (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)]
885>;
886
887// FIXME: fsqrt should not select directly
888def any_amdgcn_sqrt : PatFrags<(ops node:$src0),
889  [(fsqrt node:$src0), (int_amdgcn_sqrt node:$src0)]
890>;
891