xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (revision 59c8e88e72633afbc47a4ace0d2170d00d51f7dc)
1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains instruction defs that are common to all hw codegen
10// targets.
11//
12//===----------------------------------------------------------------------===//
13
14class AddressSpacesImpl {
15  int Flat = 0;
16  int Global = 1;
17  int Region = 2;
18  int Local = 3;
19  int Constant = 4;
20  int Private = 5;
21  int Constant32Bit = 6;
22}
23
24def AddrSpaces : AddressSpacesImpl;
25
26
27class AMDGPUInst <dag outs, dag ins, string asm = "",
28  list<dag> pattern = []> : Instruction {
29  field bit isRegisterLoad = 0;
30  field bit isRegisterStore = 0;
31
32  let Namespace = "AMDGPU";
33  let OutOperandList = outs;
34  let InOperandList = ins;
35  let AsmString = asm;
36  let Pattern = pattern;
37  let Itinerary = NullALU;
38
39  // SoftFail is a field the disassembler can use to provide a way for
40  // instructions to not match without killing the whole decode process. It is
41  // mainly used for ARM, but Tablegen expects this field to exist or it fails
42  // to build the decode table.
43  field bits<96> SoftFail = 0;
44
45  let DecoderNamespace = Namespace;
46
47  let TSFlags{63} = isRegisterLoad;
48  let TSFlags{62} = isRegisterStore;
49}
50
51class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
52  list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> {
53
54  field bits<32> Inst = 0xffffffff;
55}
56
57//===---------------------------------------------------------------------===//
58// Return instruction
59//===---------------------------------------------------------------------===//
60
61class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
62: Instruction {
63
64     let Namespace = "AMDGPU";
65     dag OutOperandList = outs;
66     dag InOperandList = ins;
67     let Pattern = pattern;
68     let AsmString = !strconcat(asmstr, "\n");
69     let isPseudo = 1;
70     let Itinerary = NullALU;
71     bit hasIEEEFlag = 0;
72     bit hasZeroOpFlag = 0;
73     let mayLoad = 0;
74     let mayStore = 0;
75     let hasSideEffects = 0;
76     let isCodeGenOnly = 1;
77}
78
79def TruePredicate : Predicate<"">;
80
81// FIXME: Tablegen should specially supports this
82def FalsePredicate : Predicate<"false">;
83
84// Add a predicate to the list if does not already exist to deduplicate it.
85class PredConcat<list<Predicate> lst, Predicate pred> {
86  list<Predicate> ret = !listconcat(lst, !listremove([pred], lst));
87}
88
89// Get the union of two Register lists
90class RegListUnion<list<Register> lstA, list<Register> lstB> {
91  list<Register> ret = !listconcat(lstA, !listremove(lstB, lstA));
92}
93
94class PredicateControl {
95  Predicate SubtargetPredicate = TruePredicate;
96  Predicate AssemblerPredicate = TruePredicate;
97  Predicate WaveSizePredicate = TruePredicate;
98  list<Predicate> OtherPredicates = [];
99  list<Predicate> Predicates = PredConcat<
100                                 PredConcat<PredConcat<OtherPredicates,
101                                                       SubtargetPredicate>.ret,
102                                            AssemblerPredicate>.ret,
103                                 WaveSizePredicate>.ret;
104}
105
106class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
107      PredicateControl, GISelFlags;
108
109let GIIgnoreCopies = 1 in
110class AMDGPUPatIgnoreCopies<dag pattern, dag result> : AMDGPUPat<pattern, result>;
111
112let RecomputePerFunction = 1 in {
113def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
114def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals != DenormalMode::getPreserveSign()">;
115def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
116def NoFP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
117def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">;
118def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
119def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
120}
121
122def FMA : Predicate<"Subtarget->hasFMA()">;
123
124def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
125
126def i1imm_0 : OperandWithDefaultOps<i1, (ops (i1 0))>;
127
128class CustomOperandClass<string name, bit optional, string parserMethod,
129                         string defaultMethod>
130    : AsmOperandClass {
131  let Name = name;
132  let PredicateMethod = "is"#name;
133  let ParserMethod = parserMethod;
134  let RenderMethod = "addImmOperands";
135  let IsOptional = optional;
136  let DefaultMethod = defaultMethod;
137}
138
139class CustomOperandProps<bit optional = 0, string name = NAME> {
140  string ImmTy = "ImmTy"#name;
141  string ParserMethod = "parse"#name;
142  string DefaultValue = "0";
143  string DefaultMethod = "[this]() { return "#
144    "AMDGPUOperand::CreateImm(this, "#DefaultValue#", SMLoc(), "#
145    "AMDGPUOperand::"#ImmTy#"); }";
146  string PrintMethod = "print"#name;
147  AsmOperandClass ParserMatchClass =
148    CustomOperandClass<name, optional, ParserMethod, DefaultMethod>;
149  string OperandType = "OPERAND_IMMEDIATE";
150}
151
152class CustomOperand<ValueType type, bit optional = 0, string name = NAME>
153  : Operand<type>, CustomOperandProps<optional, name>;
154
155class ImmOperand<ValueType type, string name = NAME, bit optional = 0,
156                 string printer = "print"#name>
157    : CustomOperand<type, optional, name> {
158  let ImmTy = "ImmTyNone";
159  let ParserMethod = "";
160  let PrintMethod = printer;
161}
162
163def s16imm : ImmOperand<i16, "S16Imm", 0, "printU16ImmOperand">;
164def u16imm : ImmOperand<i16, "U16Imm", 0, "printU16ImmOperand">;
165
166//===--------------------------------------------------------------------===//
167// Custom Operands
168//===--------------------------------------------------------------------===//
169def brtarget   : Operand<OtherVT>;
170
171//===----------------------------------------------------------------------===//
172// Misc. PatFrags
173//===----------------------------------------------------------------------===//
174
175class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
176  (ops node:$src0),
177  (op $src0),
178  [{ return N->hasOneUse(); }]> {
179
180  let GISelPredicateCode = [{
181    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
182  }];
183}
184
185class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
186  (ops node:$src0, node:$src1),
187  (op $src0, $src1),
188  [{ return N->hasOneUse(); }]> {
189  let GISelPredicateCode = [{
190    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
191  }];
192}
193
194class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
195  (ops node:$src0, node:$src1, node:$src2),
196  (op $src0, $src1, $src2),
197  [{ return N->hasOneUse(); }]> {
198  let GISelPredicateCode = [{
199    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
200  }];
201}
202
203class is_canonicalized<SDPatternOperator op> : PatFrag<
204  (ops node:$src0, node:$src1),
205  (op $src0, $src1),
206  [{
207    const SITargetLowering &Lowering =
208              *static_cast<const SITargetLowering *>(getTargetLowering());
209
210    return Lowering.isCanonicalized(*CurDAG, N->getOperand(0)) &&
211      Lowering.isCanonicalized(*CurDAG, N->getOperand(1));
212   }]> {
213
214  // TODO: Improve the Legalizer for g_build_vector in Global Isel to match this class
215  let GISelPredicateCode = [{
216    const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
217      MF.getSubtarget().getTargetLowering());
218
219    return TLI->isCanonicalized(MI.getOperand(1).getReg(), const_cast<MachineFunction&>(MF)) &&
220      TLI->isCanonicalized(MI.getOperand(2).getReg(), const_cast<MachineFunction&>(MF));
221  }];
222}
223
224class FoldTernaryOpPat<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
225  (ops node:$src0, node:$src1, node:$src2),
226  (op2 (op1 node:$src0, node:$src1), node:$src2)
227>;
228
229def imad : FoldTernaryOpPat<mul, add>;
230
231let Properties = [SDNPCommutative, SDNPAssociative] in {
232def smax_oneuse : HasOneUseBinOp<smax>;
233def smin_oneuse : HasOneUseBinOp<smin>;
234def umax_oneuse : HasOneUseBinOp<umax>;
235def umin_oneuse : HasOneUseBinOp<umin>;
236
237def fminnum_oneuse : HasOneUseBinOp<fminnum>;
238def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
239
240def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>;
241def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>;
242
243
244def and_oneuse : HasOneUseBinOp<and>;
245def or_oneuse : HasOneUseBinOp<or>;
246def xor_oneuse : HasOneUseBinOp<xor>;
247} // Properties = [SDNPCommutative, SDNPAssociative]
248
249def not_oneuse : HasOneUseUnaryOp<not>;
250
251def add_oneuse : HasOneUseBinOp<add>;
252def sub_oneuse : HasOneUseBinOp<sub>;
253
254def srl_oneuse : HasOneUseBinOp<srl>;
255def shl_oneuse : HasOneUseBinOp<shl>;
256
257def select_oneuse : HasOneUseTernaryOp<select>;
258
259def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>;
260def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
261
262//===----------------------------------------------------------------------===//
263// PatFrags for shifts
264//===----------------------------------------------------------------------===//
265
266// Constrained shift PatFrags.
267
268def csh_mask_16 : PatFrag<(ops node:$src0), (and node:$src0, imm),
269  [{ return isUnneededShiftMask(N, 4); }]> {
270    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 4); }];
271  }
272
273def csh_mask_32 : PatFrag<(ops node:$src0), (and node:$src0, imm),
274  [{ return isUnneededShiftMask(N, 5); }]> {
275    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 5); }];
276  }
277
278def csh_mask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm),
279  [{ return isUnneededShiftMask(N, 6); }]> {
280    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 6); }];
281  }
282
283foreach width = [16, 32, 64] in {
284defvar csh_mask = !cast<SDPatternOperator>("csh_mask_"#width);
285
286def cshl_#width : PatFrags<(ops node:$src0, node:$src1),
287  [(shl node:$src0, node:$src1), (shl node:$src0, (csh_mask node:$src1))]>;
288defvar cshl = !cast<SDPatternOperator>("cshl_"#width);
289def cshl_#width#_oneuse : HasOneUseBinOp<cshl>;
290def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1),
291  (cshl $src1, $src0)>;
292
293def csrl_#width : PatFrags<(ops node:$src0, node:$src1),
294  [(srl node:$src0, node:$src1), (srl node:$src0, (csh_mask node:$src1))]>;
295defvar csrl = !cast<SDPatternOperator>("csrl_"#width);
296def csrl_#width#_oneuse : HasOneUseBinOp<csrl>;
297def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
298  (csrl $src1, $src0)>;
299
300def csra_#width : PatFrags<(ops node:$src0, node:$src1),
301  [(sra node:$src0, node:$src1), (sra node:$src0, (csh_mask node:$src1))]>;
302defvar csra = !cast<SDPatternOperator>("csra_"#width);
303def csra_#width#_oneuse : HasOneUseBinOp<csra>;
304def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
305  (csra $src1, $src0)>;
306} // end foreach width
307
308def srl_16 : PatFrag<
309  (ops node:$src0), (srl_oneuse node:$src0, (i32 16))
310>;
311
312
313def hi_i16_elt : PatFrag<
314  (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0))))
315>;
316
317
318def hi_f16_elt : PatLeaf<
319  (vt), [{
320  if (N->getOpcode() != ISD::BITCAST)
321    return false;
322  SDValue Tmp = N->getOperand(0);
323
324  if (Tmp.getOpcode() != ISD::SRL)
325    return false;
326    if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1))
327      return RHS->getZExtValue() == 16;
328    return false;
329}]>;
330
331//===----------------------------------------------------------------------===//
332// PatLeafs for zero immediate
333//===----------------------------------------------------------------------===//
334
335def immzero : PatLeaf<(imm), [{ return N->isZero(); }]>;
336def fpimmzero : PatLeaf<(fpimm), [{ return N->isZero(); }]>;
337
338//===----------------------------------------------------------------------===//
339// PatLeafs for floating-point comparisons
340//===----------------------------------------------------------------------===//
341
342def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>;
343def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>;
344def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>;
345def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>;
346def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>;
347def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>;
348def COND_O   : PatFrags<(ops), [(OtherVT SETO)]>;
349def COND_UO  : PatFrags<(ops), [(OtherVT SETUO)]>;
350
351//===----------------------------------------------------------------------===//
352// PatLeafs for unsigned / unordered comparisons
353//===----------------------------------------------------------------------===//
354
355def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>;
356def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>;
357def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>;
358def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>;
359def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>;
360def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>;
361
362// XXX - For some reason R600 version is preferring to use unordered
363// for setne?
364def COND_UNE_NE  : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>;
365
366//===----------------------------------------------------------------------===//
367// PatLeafs for signed comparisons
368//===----------------------------------------------------------------------===//
369
370def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>;
371def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>;
372def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>;
373def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>;
374
375//===----------------------------------------------------------------------===//
376// PatLeafs for integer equality
377//===----------------------------------------------------------------------===//
378
379def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>;
380def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>;
381
382// FIXME: Should not need code predicate
383//def COND_NULL : PatLeaf<(OtherVT null_frag)>;
384def COND_NULL : PatLeaf <
385  (cond),
386  [{(void)N; return false;}]
387>;
388
389//===----------------------------------------------------------------------===//
390// PatLeafs for Texture Constants
391//===----------------------------------------------------------------------===//
392
393def TEX_ARRAY : PatLeaf<
394  (imm),
395  [{uint32_t TType = (uint32_t)N->getZExtValue();
396    return TType == 9 || TType == 10 || TType == 16;
397  }]
398>;
399
400def TEX_RECT : PatLeaf<
401  (imm),
402  [{uint32_t TType = (uint32_t)N->getZExtValue();
403    return TType == 5;
404  }]
405>;
406
407def TEX_SHADOW : PatLeaf<
408  (imm),
409  [{uint32_t TType = (uint32_t)N->getZExtValue();
410    return (TType >= 6 && TType <= 8) || TType == 13;
411  }]
412>;
413
414def TEX_SHADOW_ARRAY : PatLeaf<
415  (imm),
416  [{uint32_t TType = (uint32_t)N->getZExtValue();
417    return TType == 11 || TType == 12 || TType == 17;
418  }]
419>;
420
421//===----------------------------------------------------------------------===//
422// Load/Store Pattern Fragments
423//===----------------------------------------------------------------------===//
424
425def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
426  [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
427>;
428
429class AddressSpaceList<list<int> AS> {
430  list<int> AddrSpaces = AS;
431}
432
433class Aligned<int Bytes> {
434  int MinAlignment = Bytes;
435}
436
437class StoreHi16<SDPatternOperator op, ValueType vt> : PatFrag <
438  (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> {
439  let IsStore = 1;
440  let MemoryVT = vt;
441}
442
443def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant,
444                                              AddrSpaces.Constant32Bit ]>;
445def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global,
446                                            AddrSpaces.Constant,
447                                            AddrSpaces.Constant32Bit ]>;
448def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>;
449
450def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat,
451                                          AddrSpaces.Global,
452                                          AddrSpaces.Constant,
453                                          AddrSpaces.Constant32Bit ]>;
454def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>;
455
456def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
457def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
458
459def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
460def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
461
462def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
463def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
464
465
466
467foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
468let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
469
470def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
471  let IsLoad = 1;
472  let IsNonExtLoad = 1;
473}
474
475def extloadi8_#as  : PatFrag<(ops node:$ptr), (extloadi8 node:$ptr)> {
476  let IsLoad = 1;
477}
478
479def extloadi16_#as : PatFrag<(ops node:$ptr), (extloadi16 node:$ptr)> {
480  let IsLoad = 1;
481}
482
483def sextloadi8_#as  : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr)> {
484  let IsLoad = 1;
485}
486
487def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr)> {
488  let IsLoad = 1;
489}
490
491def zextloadi8_#as  : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr)> {
492  let IsLoad = 1;
493}
494
495def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextloadi16 node:$ptr)> {
496  let IsLoad = 1;
497}
498
499def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> {
500  let IsAtomic = 1;
501  let MemoryVT = i8;
502}
503
504def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> {
505  let IsAtomic = 1;
506  let MemoryVT = i16;
507}
508
509def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
510  let IsAtomic = 1;
511  let MemoryVT = i32;
512}
513
514def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
515  let IsAtomic = 1;
516  let MemoryVT = i64;
517}
518} // End let AddressSpaces
519} // End foreach as
520
521
522foreach as = [ "global", "flat", "local", "private", "region" ] in {
523let IsStore = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
524def store_#as : PatFrag<(ops node:$val, node:$ptr),
525                    (unindexedstore node:$val, node:$ptr)> {
526  let IsTruncStore = 0;
527}
528
529// truncstore fragments.
530def truncstore_#as : PatFrag<(ops node:$val, node:$ptr),
531                             (unindexedstore node:$val, node:$ptr)> {
532  let IsTruncStore = 1;
533}
534
535// TODO: We don't really need the truncstore here. We can use
536// unindexedstore with MemoryVT directly, which will save an
537// unnecessary check that the memory size is less than the value type
538// in the generated matcher table.
539def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr),
540                               (truncstorei8 node:$val, node:$ptr)>;
541def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
542                                (truncstorei16 node:$val, node:$ptr)>;
543
544def store_hi16_#as : StoreHi16 <truncstorei16, i16>;
545def truncstorei8_hi16_#as : StoreHi16<truncstorei8, i8>;
546def truncstorei16_hi16_#as : StoreHi16<truncstorei16, i16>;
547
548} // End let IsStore = 1, AddressSpaces = ...
549
550let IsAtomic = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
551def atomic_store_8_#as : PatFrag<(ops node:$ptr, node:$val),
552                                 (atomic_store_8 node:$ptr, node:$val)>;
553def atomic_store_16_#as : PatFrag<(ops node:$ptr, node:$val),
554                                  (atomic_store_16 node:$ptr, node:$val)>;
555def atomic_store_32_#as : PatFrag<(ops node:$ptr, node:$val),
556                                  (atomic_store_32 node:$ptr, node:$val)>;
557def atomic_store_64_#as : PatFrag<(ops node:$ptr, node:$val),
558                                  (atomic_store_64 node:$ptr, node:$val)>;
559}
560} // End foreach as
561
562multiclass noret_op {
563  let HasNoUse = true in
564  def "_noret" : PatFrag<(ops node:$ptr, node:$data),
565    (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>;
566}
567
568multiclass global_addr_space_atomic_op {
569  def "_noret_global_addrspace" :
570    PatFrag<(ops node:$ptr, node:$data),
571            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
572      let HasNoUse = true;
573      let AddressSpaces = LoadAddress_global.AddrSpaces;
574      let IsAtomic = 1;
575    }
576    def "_global_addrspace" :
577    PatFrag<(ops node:$ptr, node:$data),
578            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
579      let AddressSpaces = LoadAddress_global.AddrSpaces;
580      let IsAtomic = 1;
581    }
582}
583
584multiclass flat_addr_space_atomic_op {
585  def "_noret_flat_addrspace" :
586    PatFrag<(ops node:$ptr, node:$data),
587            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
588      let HasNoUse = true;
589      let AddressSpaces = LoadAddress_flat.AddrSpaces;
590      let IsAtomic = 1;
591    }
592    def "_flat_addrspace" :
593    PatFrag<(ops node:$ptr, node:$data),
594            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
595      let AddressSpaces = LoadAddress_flat.AddrSpaces;
596      let IsAtomic = 1;
597    }
598}
599
600multiclass local_addr_space_atomic_op {
601  def "_noret_local_addrspace" :
602    PatFrag<(ops node:$ptr, node:$data),
603            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
604      let HasNoUse = true;
605      let AddressSpaces = LoadAddress_local.AddrSpaces;
606      let IsAtomic = 1;
607    }
608    def "_local_addrspace" :
609    PatFrag<(ops node:$ptr, node:$data),
610            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
611      let AddressSpaces = LoadAddress_local.AddrSpaces;
612      let IsAtomic = 1;
613    }
614}
615
616defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op;
617defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op;
618defm int_amdgcn_flat_atomic_fmin : noret_op;
619defm int_amdgcn_flat_atomic_fmax : noret_op;
620defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op;
621defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op;
622defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op;
623defm int_amdgcn_global_atomic_fmin : noret_op;
624defm int_amdgcn_global_atomic_fmax : noret_op;
625defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
626defm int_amdgcn_ds_fadd_v2bf16 : noret_op;
627
628multiclass noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
629  let HasNoUse = true in
630  defm "_noret" : binary_atomic_op<atomic_op, IsInt>;
631}
632
633multiclass noret_ternary_atomic_op<SDNode atomic_op> {
634  let HasNoUse = true in
635  defm "_noret" : ternary_atomic_op<atomic_op>;
636}
637
638multiclass binary_atomic_op_all_as<SDNode atomic_op, bit IsInt = 1> {
639  foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
640    let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
641      defm "_"#as : binary_atomic_op<atomic_op, IsInt>;
642      defm "_"#as : noret_binary_atomic_op<atomic_op, IsInt>;
643    }
644  }
645}
646
647defm atomic_swap : binary_atomic_op_all_as<atomic_swap>;
648defm atomic_load_add : binary_atomic_op_all_as<atomic_load_add>;
649defm atomic_load_and : binary_atomic_op_all_as<atomic_load_and>;
650defm atomic_load_max : binary_atomic_op_all_as<atomic_load_max>;
651defm atomic_load_min : binary_atomic_op_all_as<atomic_load_min>;
652defm atomic_load_or : binary_atomic_op_all_as<atomic_load_or>;
653defm atomic_load_sub : binary_atomic_op_all_as<atomic_load_sub>;
654defm atomic_load_umax : binary_atomic_op_all_as<atomic_load_umax>;
655defm atomic_load_umin : binary_atomic_op_all_as<atomic_load_umin>;
656defm atomic_load_xor : binary_atomic_op_all_as<atomic_load_xor>;
657defm atomic_load_fadd : binary_atomic_op_all_as<atomic_load_fadd, 0>;
658defm atomic_load_uinc_wrap : binary_atomic_op_all_as<atomic_load_uinc_wrap>;
659defm atomic_load_udec_wrap : binary_atomic_op_all_as<atomic_load_udec_wrap>;
660let MemoryVT = v2f16 in
661defm atomic_load_fadd_v2f16 : binary_atomic_op_all_as<atomic_load_fadd, 0>;
662defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>;
663
664def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
665                       Aligned<8> {
666  let IsLoad = 1;
667}
668
669def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
670                        Aligned<16> {
671  let IsLoad = 1;
672}
673
674def store_align8_local: PatFrag<(ops node:$val, node:$ptr),
675                                (store_local node:$val, node:$ptr)>, Aligned<8> {
676  let IsStore = 1;
677}
678
679def store_align16_local: PatFrag<(ops node:$val, node:$ptr),
680                                (store_local node:$val, node:$ptr)>, Aligned<16> {
681  let IsStore = 1;
682}
683
684let AddressSpaces = StoreAddress_local.AddrSpaces in {
685defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>;
686defm atomic_cmp_swap_local : noret_ternary_atomic_op<atomic_cmp_swap>;
687defm atomic_cmp_swap_local_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>;
688defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
689}
690
691let AddressSpaces = StoreAddress_region.AddrSpaces in {
692defm atomic_cmp_swap_region : noret_ternary_atomic_op<atomic_cmp_swap>;
693defm atomic_cmp_swap_region_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>;
694defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
695}
696
697//===----------------------------------------------------------------------===//
698// Misc Pattern Fragments
699//===----------------------------------------------------------------------===//
700
701class Constants {
702int TWO_PI = 0x40c90fdb;
703int PI = 0x40490fdb;
704int TWO_PI_INV = 0x3e22f983;
705int FP_4294966784 = 0x4f7ffffe; // 4294966784 = 4294967296 - 512 = 2^32 - 2^9
706int FP16_ONE = 0x3C00;
707int FP16_NEG_ONE = 0xBC00;
708int FP32_ONE = 0x3f800000;
709int FP32_NEG_ONE = 0xbf800000;
710int FP64_ONE = 0x3ff0000000000000;
711int FP64_NEG_ONE = 0xbff0000000000000;
712}
713def CONST : Constants;
714
715def FP_ZERO : PatLeaf <
716  (fpimm),
717  [{return N->getValueAPF().isZero();}]
718>;
719
720def FP_ONE : PatLeaf <
721  (fpimm),
722  [{return N->isExactlyValue(1.0);}]
723>;
724
725def FP_HALF : PatLeaf <
726  (fpimm),
727  [{return N->isExactlyValue(0.5);}]
728>;
729
730/* Generic helper patterns for intrinsics */
731/* -------------------------------------- */
732
733class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
734  : AMDGPUPat <
735  (fpow f32:$src0, f32:$src1),
736  (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
737>;
738
739/* Other helper patterns */
740/* --------------------- */
741
742/* Extract element pattern */
743class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
744                       SubRegIndex sub_reg>
745  : AMDGPUPat<
746  (sub_type (extractelt vec_type:$src, sub_idx)),
747  (EXTRACT_SUBREG $src, sub_reg)
748>;
749
750/* Insert element pattern */
751class Insert_Element <ValueType elem_type, ValueType vec_type,
752                      int sub_idx, SubRegIndex sub_reg>
753  : AMDGPUPat <
754  (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
755  (INSERT_SUBREG $vec, $elem, sub_reg)
756>;
757
758// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
759// can handle COPY instructions.
760// bitconvert pattern
761class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat <
762  (dt (bitconvert (st rc:$src0))),
763  (dt rc:$src0)
764>;
765
766// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
767// can handle COPY instructions.
768class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
769  (vt (AMDGPUdwordaddr (vt rc:$addr))),
770  (vt rc:$addr)
771>;
772
773// rotr pattern
774class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
775  (rotr i32:$src0, i32:$src1),
776  (BIT_ALIGN $src0, $src0, $src1)
777>;
778
779// Special conversion patterns
780
781def cvt_rpi_i32_f32 : PatFrag <
782  (ops node:$src),
783  (fp_to_sint (ffloor (fadd $src, FP_HALF))),
784  [{ (void) N; return TM.Options.NoNaNsFPMath; }]
785>;
786
787def cvt_flr_i32_f32 : PatFrag <
788  (ops node:$src),
789  (fp_to_sint (ffloor $src)),
790  [{ (void)N; return TM.Options.NoNaNsFPMath; }]
791>;
792
793let AddedComplexity = 2 in {
794class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
795  (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
796  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
797                (Inst $src0, $src1, $src2))
798>;
799
800class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
801  (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
802  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
803                (Inst $src0, $src1, $src2))
804>;
805} // AddedComplexity.
806
807class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
808  (fdiv FP_ONE, vt:$src),
809  (RcpInst $src)
810>;
811
812// Instructions which select to the same v_min_f*
813def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
814  [(fminnum_ieee node:$src0, node:$src1),
815   (fminnum node:$src0, node:$src1)]
816>;
817
818// Instructions which select to the same v_max_f*
819def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1),
820  [(fmaxnum_ieee node:$src0, node:$src1),
821   (fmaxnum node:$src0, node:$src1)]
822>;
823
824class NeverNaNPats<dag ops, list<dag> frags> : PatFrags<ops, frags> {
825  let PredicateCode = [{
826    return CurDAG->isKnownNeverNaN(SDValue(N,0));
827  }];
828  let GISelPredicateCode = [{
829    return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI);
830  }];
831}
832
833def fminnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1),
834  [(fminnum_ieee node:$src0, node:$src1),
835   (fminnum node:$src0, node:$src1)]
836>;
837
838def fmaxnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1),
839  [(fmaxnum_ieee node:$src0, node:$src1),
840   (fmaxnum node:$src0, node:$src1)]
841>;
842
843def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
844  [(fminnum_ieee_oneuse node:$src0, node:$src1),
845   (fminnum_oneuse node:$src0, node:$src1)]
846>;
847
848def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
849  [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
850   (fmaxnum_oneuse node:$src0, node:$src1)]
851>;
852
853def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2),
854  [(fmad node:$src0, node:$src1, node:$src2),
855   (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)]
856>;
857
858// FIXME: fsqrt should not select directly
859def any_amdgcn_sqrt : PatFrags<(ops node:$src0),
860  [(fsqrt node:$src0), (int_amdgcn_sqrt node:$src0)]
861>;
862