xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (revision 1ed2ef42e01771f5d8ca9be61e07dcf0fd47feba)
1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains instruction defs that are common to all hw codegen
10// targets.
11//
12//===----------------------------------------------------------------------===//
13
14def AddrSpaces {
15  int Flat = 0;
16  int Global = 1;
17  int Region = 2;
18  int Local = 3;
19  int Constant = 4;
20  int Private = 5;
21  int Constant32Bit = 6;
22}
23
24
25class AMDGPUInst <dag outs, dag ins, string asm = "",
26  list<dag> pattern = []> : Instruction {
27  field bit isRegisterLoad = 0;
28  field bit isRegisterStore = 0;
29
30  let Namespace = "AMDGPU";
31  let OutOperandList = outs;
32  let InOperandList = ins;
33  let AsmString = asm;
34  let Pattern = pattern;
35  let Itinerary = NullALU;
36
37  // SoftFail is a field the disassembler can use to provide a way for
38  // instructions to not match without killing the whole decode process. It is
39  // mainly used for ARM, but Tablegen expects this field to exist or it fails
40  // to build the decode table.
41  field bits<128> SoftFail = 0; // FIXME: If this is smaller than largest instruction, DecodeEmitter crashes
42
43  let DecoderNamespace = Namespace;
44
45  let TSFlags{63} = isRegisterLoad;
46  let TSFlags{62} = isRegisterStore;
47}
48
49class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
50  list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> {
51
52  field bits<32> Inst = 0xffffffff;
53}
54
55//===---------------------------------------------------------------------===//
56// Return instruction
57//===---------------------------------------------------------------------===//
58
59class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
60: Instruction {
61
62     let Namespace = "AMDGPU";
63     dag OutOperandList = outs;
64     dag InOperandList = ins;
65     let Pattern = pattern;
66     let AsmString = !strconcat(asmstr, "\n");
67     let isPseudo = 1;
68     let Itinerary = NullALU;
69     bit hasIEEEFlag = 0;
70     bit hasZeroOpFlag = 0;
71     let mayLoad = 0;
72     let mayStore = 0;
73     let hasSideEffects = 0;
74     let isCodeGenOnly = 1;
75}
76
77// Get the union of two Register lists
78class RegListUnion<list<Register> lstA, list<Register> lstB> {
79  list<Register> ret = !listconcat(lstA, !listremove(lstB, lstA));
80}
81
82class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
83      PredicateControl, GISelFlags;
84
85let GIIgnoreCopies = 1 in
86class AMDGPUPatIgnoreCopies<dag pattern, dag result> : AMDGPUPat<pattern, result>;
87
88let RecomputePerFunction = 1 in {
89def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
90def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals != DenormalMode::getPreserveSign()">;
91def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
92def NoFP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
93def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">;
94def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
95def IEEEModeEnabled : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().IEEE">;
96def IEEEModeDisabled : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().IEEE">;
97def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
98}
99
100def FMA : Predicate<"Subtarget->hasFMA()">;
101
102def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
103
104def i1imm_0 : OperandWithDefaultOps<i1, (ops (i1 0))>;
105
106class CustomOperandClass<string name, bit optional, string predicateMethod,
107                         string parserMethod, string defaultMethod>
108    : AsmOperandClass {
109  let Name = name;
110  let PredicateMethod = predicateMethod;
111  let ParserMethod = parserMethod;
112  let RenderMethod = "addImmOperands";
113  let IsOptional = optional;
114  let DefaultMethod = defaultMethod;
115}
116
117class CustomOperandProps<bit optional = 0, string name = NAME> {
118  string ImmTy = "ImmTy"#name;
119  string PredicateMethod = "is"#name;
120  string ParserMethod = "parse"#name;
121  string DefaultValue = "0";
122  string DefaultMethod = "[this]() { return "#
123    "AMDGPUOperand::CreateImm(this, "#DefaultValue#", SMLoc(), "#
124    "AMDGPUOperand::"#ImmTy#"); }";
125  string PrintMethod = "print"#name;
126  AsmOperandClass ParserMatchClass =
127    CustomOperandClass<name, optional, PredicateMethod, ParserMethod,
128                       DefaultMethod>;
129  string OperandType = "OPERAND_IMMEDIATE";
130}
131
132class CustomOperand<ValueType type, bit optional = 0, string name = NAME>
133  : Operand<type>, CustomOperandProps<optional, name>;
134
135class ImmOperand<ValueType type, string name = NAME, bit optional = 0,
136                 string printer = "print"#name>
137    : CustomOperand<type, optional, name> {
138  let ImmTy = "ImmTyNone";
139  let ParserMethod = "";
140  let PrintMethod = printer;
141}
142
143class S16ImmOperand : ImmOperand<i16, "S16Imm", 0, "printU16ImmOperand">;
144
145def s16imm : S16ImmOperand;
146def u16imm : ImmOperand<i16, "U16Imm", 0, "printU16ImmOperand">;
147
148class ValuePredicatedOperand<CustomOperand op, string valuePredicate,
149                             bit optional = 0>
150    : CustomOperand<op.Type, optional> {
151  let ImmTy = op.ImmTy;
152  defvar OpPredicate = op.ParserMatchClass.PredicateMethod;
153  let PredicateMethod =
154    "getPredicate([](const AMDGPUOperand &Op) -> bool { "#
155    "return Op."#OpPredicate#"() && "#valuePredicate#"; })";
156  let ParserMethod = op.ParserMatchClass.ParserMethod;
157  let DefaultValue = op.DefaultValue;
158  let DefaultMethod = op.DefaultMethod;
159  let PrintMethod = op.PrintMethod;
160}
161
162//===--------------------------------------------------------------------===//
163// Custom Operands
164//===--------------------------------------------------------------------===//
165def brtarget   : Operand<OtherVT>;
166
167//===----------------------------------------------------------------------===//
168// Misc. PatFrags
169//===----------------------------------------------------------------------===//
170
171class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
172  (ops node:$src0),
173  (op $src0)> {
174  let HasOneUse = 1;
175}
176
177class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
178  (ops node:$src0, node:$src1),
179  (op $src0, $src1)> {
180  let HasOneUse = 1;
181}
182
183class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
184  (ops node:$src0, node:$src1, node:$src2),
185  (op $src0, $src1, $src2)> {
186  let HasOneUse = 1;
187}
188
189class is_canonicalized_1<SDPatternOperator op> : PatFrag<
190  (ops node:$src0),
191  (op $src0),
192  [{
193    const SITargetLowering &Lowering =
194              *static_cast<const SITargetLowering *>(getTargetLowering());
195
196    return Lowering.isCanonicalized(*CurDAG, N->getOperand(0));
197   }]> {
198
199  let GISelPredicateCode = [{
200    const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
201      MF.getSubtarget().getTargetLowering());
202
203    return TLI->isCanonicalized(MI.getOperand(1).getReg(), MF);
204  }];
205}
206
207class is_canonicalized_2<SDPatternOperator op> : PatFrag<
208  (ops node:$src0, node:$src1),
209  (op $src0, $src1),
210  [{
211    const SITargetLowering &Lowering =
212              *static_cast<const SITargetLowering *>(getTargetLowering());
213
214    return Lowering.isCanonicalized(*CurDAG, N->getOperand(0)) &&
215      Lowering.isCanonicalized(*CurDAG, N->getOperand(1));
216   }]> {
217
218  // TODO: Improve the Legalizer for g_build_vector in Global Isel to match this class
219  let GISelPredicateCode = [{
220    const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
221      MF.getSubtarget().getTargetLowering());
222
223    return TLI->isCanonicalized(MI.getOperand(1).getReg(), MF) &&
224      TLI->isCanonicalized(MI.getOperand(2).getReg(), MF);
225  }];
226}
227
228class FoldTernaryOpPat<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
229  (ops node:$src0, node:$src1, node:$src2),
230  (op2 (op1 node:$src0, node:$src1), node:$src2)
231>;
232
233def imad : FoldTernaryOpPat<mul, add>;
234
235let Properties = [SDNPCommutative, SDNPAssociative] in {
236def smax_oneuse : HasOneUseBinOp<smax>;
237def smin_oneuse : HasOneUseBinOp<smin>;
238def umax_oneuse : HasOneUseBinOp<umax>;
239def umin_oneuse : HasOneUseBinOp<umin>;
240
241def fminnum_oneuse : HasOneUseBinOp<fminnum>;
242def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
243def fminimum_oneuse : HasOneUseBinOp<fminimum>;
244def fmaximum_oneuse : HasOneUseBinOp<fmaximum>;
245
246def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>;
247def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>;
248
249
250def and_oneuse : HasOneUseBinOp<and>;
251def or_oneuse : HasOneUseBinOp<or>;
252def xor_oneuse : HasOneUseBinOp<xor>;
253} // Properties = [SDNPCommutative, SDNPAssociative]
254
255def not_oneuse : HasOneUseUnaryOp<not>;
256
257def add_oneuse : HasOneUseBinOp<add>;
258def sub_oneuse : HasOneUseBinOp<sub>;
259
260def srl_oneuse : HasOneUseBinOp<srl>;
261def shl_oneuse : HasOneUseBinOp<shl>;
262
263def select_oneuse : HasOneUseTernaryOp<select>;
264
265def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>;
266def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
267
268//===----------------------------------------------------------------------===//
269// PatFrags for shifts
270//===----------------------------------------------------------------------===//
271
272// Constrained shift PatFrags.
273
274def csh_mask_16 : PatFrag<(ops node:$src0), (and node:$src0, imm),
275  [{ return isUnneededShiftMask(N, 4); }]> {
276    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 4); }];
277  }
278
279def csh_mask_32 : PatFrag<(ops node:$src0), (and node:$src0, imm),
280  [{ return isUnneededShiftMask(N, 5); }]> {
281    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 5); }];
282  }
283
284def csh_mask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm),
285  [{ return isUnneededShiftMask(N, 6); }]> {
286    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 6); }];
287  }
288
289foreach width = [16, 32, 64] in {
290defvar csh_mask = !cast<SDPatternOperator>("csh_mask_"#width);
291
292def cshl_#width : PatFrags<(ops node:$src0, node:$src1),
293  [(shl node:$src0, node:$src1), (shl node:$src0, (csh_mask node:$src1))]>;
294defvar cshl = !cast<SDPatternOperator>("cshl_"#width);
295def cshl_#width#_oneuse : HasOneUseBinOp<cshl>;
296def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1),
297  (cshl $src1, $src0)>;
298
299def csrl_#width : PatFrags<(ops node:$src0, node:$src1),
300  [(srl node:$src0, node:$src1), (srl node:$src0, (csh_mask node:$src1))]>;
301defvar csrl = !cast<SDPatternOperator>("csrl_"#width);
302def csrl_#width#_oneuse : HasOneUseBinOp<csrl>;
303def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
304  (csrl $src1, $src0)>;
305
306def csra_#width : PatFrags<(ops node:$src0, node:$src1),
307  [(sra node:$src0, node:$src1), (sra node:$src0, (csh_mask node:$src1))]>;
308defvar csra = !cast<SDPatternOperator>("csra_"#width);
309def csra_#width#_oneuse : HasOneUseBinOp<csra>;
310def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
311  (csra $src1, $src0)>;
312} // end foreach width
313
314def srl_16 : PatFrag<
315  (ops node:$src0), (srl_oneuse node:$src0, (i32 16))
316>;
317
318
319def hi_i16_elt : PatFrag<
320  (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0))))
321>;
322
323
324def hi_f16_elt : PatLeaf<
325  (vt), [{
326  if (N->getOpcode() != ISD::BITCAST)
327    return false;
328  SDValue Tmp = N->getOperand(0);
329
330  if (Tmp.getOpcode() != ISD::SRL)
331    return false;
332    if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1))
333      return RHS->getZExtValue() == 16;
334    return false;
335}]>;
336
337//===----------------------------------------------------------------------===//
338// PatLeafs for zero immediate
339//===----------------------------------------------------------------------===//
340
341def immzero : PatLeaf<(imm), [{ return N->isZero(); }]>;
342def fpimmzero : PatLeaf<(fpimm), [{ return N->isZero(); }]>;
343
344//===----------------------------------------------------------------------===//
345// PatLeafs for floating-point comparisons
346//===----------------------------------------------------------------------===//
347
348def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>;
349def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>;
350def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>;
351def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>;
352def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>;
353def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>;
354def COND_O   : PatFrags<(ops), [(OtherVT SETO)]>;
355def COND_UO  : PatFrags<(ops), [(OtherVT SETUO)]>;
356
357//===----------------------------------------------------------------------===//
358// PatLeafs for unsigned / unordered comparisons
359//===----------------------------------------------------------------------===//
360
361def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>;
362def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>;
363def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>;
364def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>;
365def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>;
366def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>;
367
368// XXX - For some reason R600 version is preferring to use unordered
369// for setne?
370def COND_UNE_NE  : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>;
371
372//===----------------------------------------------------------------------===//
373// PatLeafs for signed comparisons
374//===----------------------------------------------------------------------===//
375
376def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>;
377def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>;
378def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>;
379def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>;
380
381//===----------------------------------------------------------------------===//
382// PatLeafs for integer equality
383//===----------------------------------------------------------------------===//
384
385def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>;
386def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>;
387
388// FIXME: Should not need code predicate
389//def COND_NULL : PatLeaf<(OtherVT null_frag)>;
390def COND_NULL : PatLeaf <
391  (cond),
392  [{(void)N; return false;}]
393>;
394
395//===----------------------------------------------------------------------===//
396// PatLeafs for Texture Constants
397//===----------------------------------------------------------------------===//
398
399def TEX_ARRAY : PatLeaf<
400  (imm),
401  [{uint32_t TType = (uint32_t)N->getZExtValue();
402    return TType == 9 || TType == 10 || TType == 16;
403  }]
404>;
405
406def TEX_RECT : PatLeaf<
407  (imm),
408  [{uint32_t TType = (uint32_t)N->getZExtValue();
409    return TType == 5;
410  }]
411>;
412
413def TEX_SHADOW : PatLeaf<
414  (imm),
415  [{uint32_t TType = (uint32_t)N->getZExtValue();
416    return (TType >= 6 && TType <= 8) || TType == 13;
417  }]
418>;
419
420def TEX_SHADOW_ARRAY : PatLeaf<
421  (imm),
422  [{uint32_t TType = (uint32_t)N->getZExtValue();
423    return TType == 11 || TType == 12 || TType == 17;
424  }]
425>;
426
427//===----------------------------------------------------------------------===//
428// Load/Store Pattern Fragments
429//===----------------------------------------------------------------------===//
430
431def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
432  [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
433>;
434
435class AddressSpaceList<list<int> AS> {
436  list<int> AddrSpaces = AS;
437}
438
439class Aligned<int Bytes> {
440  int MinAlignment = Bytes;
441}
442
443class StoreHi16<SDPatternOperator op, ValueType vt> : PatFrag <
444  (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> {
445  let IsStore = 1;
446  let MemoryVT = vt;
447}
448
449def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant,
450                                              AddrSpaces.Constant32Bit ]>;
451def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global,
452                                            AddrSpaces.Constant,
453                                            AddrSpaces.Constant32Bit ]>;
454def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>;
455
456def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat,
457                                          AddrSpaces.Global,
458                                          AddrSpaces.Constant,
459                                          AddrSpaces.Constant32Bit ]>;
460def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>;
461
462def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
463def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
464
465def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
466def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
467
468def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
469def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
470
471
472
473foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
474let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
475
476def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
477  let IsLoad = 1;
478  let IsNonExtLoad = 1;
479}
480
481def extloadi8_#as  : PatFrag<(ops node:$ptr), (extloadi8 node:$ptr)> {
482  let IsLoad = 1;
483}
484
485def extloadi16_#as : PatFrag<(ops node:$ptr), (extloadi16 node:$ptr)> {
486  let IsLoad = 1;
487}
488
489def sextloadi8_#as  : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr)> {
490  let IsLoad = 1;
491}
492
493def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr)> {
494  let IsLoad = 1;
495}
496
497def zextloadi8_#as  : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr)> {
498  let IsLoad = 1;
499}
500
501def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextloadi16 node:$ptr)> {
502  let IsLoad = 1;
503}
504
505def atomic_load_nonext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_nonext_16 node:$ptr)> {
506  let IsAtomic = 1;
507}
508
509def atomic_load_nonext_32_#as : PatFrag<(ops node:$ptr), (atomic_load_nonext_32 node:$ptr)> {
510  let IsAtomic = 1;
511}
512
513def atomic_load_nonext_64_#as : PatFrag<(ops node:$ptr), (atomic_load_nonext_64 node:$ptr)> {
514  let IsAtomic = 1;
515}
516
517def atomic_load_zext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_zext_8 node:$ptr)> {
518  let IsAtomic = 1;
519}
520
521def atomic_load_sext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_sext_8 node:$ptr)> {
522  let IsAtomic = 1;
523}
524
525def atomic_load_aext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_aext_8 node:$ptr)> {
526  let IsAtomic = 1;
527}
528
529def atomic_load_zext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_zext_16 node:$ptr)> {
530  let IsAtomic = 1;
531}
532
533def atomic_load_sext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_sext_16 node:$ptr)> {
534  let IsAtomic = 1;
535}
536
537def atomic_load_aext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_aext_16 node:$ptr)> {
538  let IsAtomic = 1;
539}
540
541} // End let AddressSpaces
542} // End foreach as
543
544
545foreach as = [ "global", "flat", "local", "private", "region" ] in {
546let IsStore = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
547def store_#as : PatFrag<(ops node:$val, node:$ptr),
548                    (unindexedstore node:$val, node:$ptr)> {
549  let IsTruncStore = 0;
550}
551
552// truncstore fragments.
553def truncstore_#as : PatFrag<(ops node:$val, node:$ptr),
554                             (unindexedstore node:$val, node:$ptr)> {
555  let IsTruncStore = 1;
556}
557
558// TODO: We don't really need the truncstore here. We can use
559// unindexedstore with MemoryVT directly, which will save an
560// unnecessary check that the memory size is less than the value type
561// in the generated matcher table.
562def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr),
563                               (truncstorei8 node:$val, node:$ptr)>;
564def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
565                                (truncstorei16 node:$val, node:$ptr)>;
566
567def store_hi16_#as : StoreHi16 <truncstorei16, i16>;
568def truncstorei8_hi16_#as : StoreHi16<truncstorei8, i8>;
569def truncstorei16_hi16_#as : StoreHi16<truncstorei16, i16>;
570} // End let IsStore = 1, AddressSpaces = ...
571
572let IsAtomic = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
573def atomic_store_8_#as : PatFrag<(ops node:$val, node:$ptr),
574                                 (atomic_store_8 node:$val, node:$ptr)>;
575def atomic_store_16_#as : PatFrag<(ops node:$val, node:$ptr),
576                                  (atomic_store_16 node:$val, node:$ptr)>;
577def atomic_store_32_#as : PatFrag<(ops node:$val, node:$ptr),
578                                  (atomic_store_32 node:$val, node:$ptr)>;
579def atomic_store_64_#as : PatFrag<(ops node:$val, node:$ptr),
580                                  (atomic_store_64 node:$val, node:$ptr)>;
581} // End let IsAtomic = 1, AddressSpaces = ...
582} // End foreach as
583
584multiclass noret_op {
585  let HasNoUse = true in
586  def "_noret" : PatFrag<(ops node:$ptr, node:$data),
587    (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>;
588}
589
590multiclass global_addr_space_atomic_op {
591  def "_noret_global_addrspace" :
592    PatFrag<(ops node:$ptr, node:$data),
593            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
594      let HasNoUse = true;
595      let AddressSpaces = LoadAddress_global.AddrSpaces;
596      let IsAtomic = 1;
597    }
598    def "_global_addrspace" :
599    PatFrag<(ops node:$ptr, node:$data),
600            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
601      let AddressSpaces = LoadAddress_global.AddrSpaces;
602      let IsAtomic = 1;
603    }
604}
605
606multiclass flat_addr_space_atomic_op {
607  def "_noret_flat_addrspace" :
608    PatFrag<(ops node:$ptr, node:$data),
609            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
610      let HasNoUse = true;
611      let AddressSpaces = LoadAddress_flat.AddrSpaces;
612      let IsAtomic = 1;
613    }
614    def "_flat_addrspace" :
615    PatFrag<(ops node:$ptr, node:$data),
616            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
617      let AddressSpaces = LoadAddress_flat.AddrSpaces;
618      let IsAtomic = 1;
619    }
620}
621
622multiclass local_addr_space_atomic_op {
623  def "_noret_local_addrspace" :
624    PatFrag<(ops node:$ptr, node:$data),
625            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
626      let HasNoUse = true;
627      let AddressSpaces = LoadAddress_local.AddrSpaces;
628      let IsAtomic = 1;
629    }
630    def "_local_addrspace" :
631    PatFrag<(ops node:$ptr, node:$data),
632            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
633      let AddressSpaces = LoadAddress_local.AddrSpaces;
634      let IsAtomic = 1;
635    }
636}
637
638defm int_amdgcn_global_atomic_csub : noret_op;
639defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op;
640defm int_amdgcn_flat_atomic_fmin_num : noret_op;
641defm int_amdgcn_flat_atomic_fmax_num : noret_op;
642defm int_amdgcn_global_atomic_fmin_num : noret_op;
643defm int_amdgcn_global_atomic_fmax_num : noret_op;
644defm int_amdgcn_atomic_cond_sub_u32 : local_addr_space_atomic_op;
645defm int_amdgcn_atomic_cond_sub_u32 : flat_addr_space_atomic_op;
646defm int_amdgcn_atomic_cond_sub_u32 : global_addr_space_atomic_op;
647
648multiclass noret_binary_atomic_op<SDNode atomic_op> {
649  let HasNoUse = true in
650  defm "_noret" : binary_atomic_op<atomic_op>;
651}
652
653multiclass noret_binary_atomic_op_fp<SDNode atomic_op> {
654  let HasNoUse = true in
655  defm "_noret" : binary_atomic_op_fp<atomic_op>;
656}
657
658multiclass noret_ternary_atomic_op<SDNode atomic_op> {
659  let HasNoUse = true in
660  defm "_noret" : ternary_atomic_op<atomic_op>;
661}
662
663defvar atomic_addrspace_names = [ "global", "flat", "constant", "local", "private", "region" ];
664
665multiclass binary_atomic_op_all_as<SDNode atomic_op> {
666  foreach as = atomic_addrspace_names in {
667    let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
668      defm "_"#as : binary_atomic_op<atomic_op>;
669      defm "_"#as : noret_binary_atomic_op<atomic_op>;
670    }
671  }
672}
673multiclass binary_atomic_op_fp_all_as<SDNode atomic_op> {
674  foreach as = atomic_addrspace_names in {
675    let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
676      defm "_"#as : binary_atomic_op_fp<atomic_op>;
677      defm "_"#as : noret_binary_atomic_op_fp<atomic_op>;
678    }
679  }
680}
681
682defm atomic_swap : binary_atomic_op_all_as<atomic_swap>;
683defm atomic_load_add : binary_atomic_op_all_as<atomic_load_add>;
684defm atomic_load_and : binary_atomic_op_all_as<atomic_load_and>;
685defm atomic_load_max : binary_atomic_op_all_as<atomic_load_max>;
686defm atomic_load_min : binary_atomic_op_all_as<atomic_load_min>;
687defm atomic_load_or : binary_atomic_op_all_as<atomic_load_or>;
688defm atomic_load_sub : binary_atomic_op_all_as<atomic_load_sub>;
689defm atomic_load_umax : binary_atomic_op_all_as<atomic_load_umax>;
690defm atomic_load_umin : binary_atomic_op_all_as<atomic_load_umin>;
691defm atomic_load_xor : binary_atomic_op_all_as<atomic_load_xor>;
692defm atomic_load_fadd : binary_atomic_op_fp_all_as<atomic_load_fadd>;
693defm atomic_load_fmin : binary_atomic_op_fp_all_as<atomic_load_fmin>;
694defm atomic_load_fmax : binary_atomic_op_fp_all_as<atomic_load_fmax>;
695defm atomic_load_uinc_wrap : binary_atomic_op_all_as<atomic_load_uinc_wrap>;
696defm atomic_load_udec_wrap : binary_atomic_op_all_as<atomic_load_udec_wrap>;
697defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>;
698
699def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
700                       Aligned<8> {
701  let IsLoad = 1;
702}
703
704def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
705                        Aligned<16> {
706  let IsLoad = 1;
707}
708
709def store_align8_local: PatFrag<(ops node:$val, node:$ptr),
710                                (store_local node:$val, node:$ptr)>, Aligned<8> {
711  let IsStore = 1;
712}
713
714def store_align16_local: PatFrag<(ops node:$val, node:$ptr),
715                                (store_local node:$val, node:$ptr)>, Aligned<16> {
716  let IsStore = 1;
717}
718
719let AddressSpaces = StoreAddress_local.AddrSpaces in {
720defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>;
721defm atomic_cmp_swap_local : noret_ternary_atomic_op<atomic_cmp_swap>;
722defm atomic_cmp_swap_local_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>;
723defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
724}
725
726let AddressSpaces = StoreAddress_region.AddrSpaces in {
727defm atomic_cmp_swap_region : noret_ternary_atomic_op<atomic_cmp_swap>;
728defm atomic_cmp_swap_region_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>;
729defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
730}
731
732//===----------------------------------------------------------------------===//
733// Misc Pattern Fragments
734//===----------------------------------------------------------------------===//
735
736class Constants {
737int TWO_PI = 0x40c90fdb;
738int PI = 0x40490fdb;
739int TWO_PI_INV = 0x3e22f983;
740int FP_4294966784 = 0x4f7ffffe; // 4294966784 = 4294967296 - 512 = 2^32 - 2^9
741int FP16_ONE = 0x3C00;
742int FP16_NEG_ONE = 0xBC00;
743int FP32_ONE = 0x3f800000;
744int FP32_NEG_ONE = 0xbf800000;
745int FP64_ONE = 0x3ff0000000000000;
746int FP64_NEG_ONE = 0xbff0000000000000;
747}
748def CONST : Constants;
749
750def FP_ZERO : PatLeaf <
751  (fpimm),
752  [{return N->getValueAPF().isZero();}]
753>;
754
755def FP_ONE : PatLeaf <
756  (fpimm),
757  [{return N->isExactlyValue(1.0);}]
758>;
759
760def FP_HALF : PatLeaf <
761  (fpimm),
762  [{return N->isExactlyValue(0.5);}]
763>;
764
765/* Generic helper patterns for intrinsics */
766/* -------------------------------------- */
767
768class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
769  : AMDGPUPat <
770  (fpow f32:$src0, f32:$src1),
771  (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
772>;
773
774/* Other helper patterns */
775/* --------------------- */
776
777/* Extract element pattern */
778class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
779                       SubRegIndex sub_reg>
780  : AMDGPUPat<
781  (sub_type (extractelt vec_type:$src, sub_idx)),
782  (EXTRACT_SUBREG $src, sub_reg)
783>;
784
785/* Insert element pattern */
786class Insert_Element <ValueType elem_type, ValueType vec_type,
787                      int sub_idx, SubRegIndex sub_reg>
788  : AMDGPUPat <
789  (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
790  (INSERT_SUBREG $vec, $elem, sub_reg)
791>;
792
793// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
794// can handle COPY instructions.
795// bitconvert pattern
796class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat <
797  (dt (bitconvert (st rc:$src0))),
798  (dt rc:$src0)
799>;
800
801// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
802// can handle COPY instructions.
803class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
804  (vt (AMDGPUdwordaddr (vt rc:$addr))),
805  (vt rc:$addr)
806>;
807
808// rotr pattern
809class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
810  (rotr i32:$src0, i32:$src1),
811  (BIT_ALIGN $src0, $src0, $src1)
812>;
813
814// Special conversion patterns
815
816def cvt_rpi_i32_f32 : PatFrag <
817  (ops node:$src),
818  (fp_to_sint (ffloor (fadd $src, FP_HALF))),
819  [{ (void) N; return TM.Options.NoNaNsFPMath; }]
820>;
821
822def cvt_flr_i32_f32 : PatFrag <
823  (ops node:$src),
824  (fp_to_sint (ffloor $src)),
825  [{ (void)N; return TM.Options.NoNaNsFPMath; }]
826>;
827
828let AddedComplexity = 2 in {
829class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
830  (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
831  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
832                (Inst $src0, $src1, $src2))
833>;
834
835class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
836  (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
837  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
838                (Inst $src0, $src1, $src2))
839>;
840} // AddedComplexity.
841
842class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
843  (fdiv FP_ONE, vt:$src),
844  (RcpInst $src)
845>;
846
847// Instructions which select to the same v_min_f*
848def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
849  [(fminnum_ieee node:$src0, node:$src1),
850   (fminnum node:$src0, node:$src1)]
851>;
852
853// Instructions which select to the same v_max_f*
854def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1),
855  [(fmaxnum_ieee node:$src0, node:$src1),
856   (fmaxnum node:$src0, node:$src1)]
857>;
858
859class NeverNaNPats<dag ops, list<dag> frags> : PatFrags<ops, frags> {
860  let PredicateCode = [{
861    return CurDAG->isKnownNeverNaN(SDValue(N,0));
862  }];
863  let GISelPredicateCode = [{
864    return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI);
865  }];
866}
867
868def fminnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1),
869  [(fminnum_ieee node:$src0, node:$src1),
870   (fminnum node:$src0, node:$src1)]
871>;
872
873def fmaxnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1),
874  [(fmaxnum_ieee node:$src0, node:$src1),
875   (fmaxnum node:$src0, node:$src1)]
876>;
877
878def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
879  [(fminnum_ieee_oneuse node:$src0, node:$src1),
880   (fminnum_oneuse node:$src0, node:$src1)]
881>;
882
883def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
884  [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
885   (fmaxnum_oneuse node:$src0, node:$src1)]
886>;
887
888def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2),
889  [(fmad node:$src0, node:$src1, node:$src2),
890   (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)]
891>;
892
893// FIXME: fsqrt should not select directly
894def any_amdgcn_sqrt : PatFrags<(ops node:$src0),
895  [(fsqrt node:$src0), (int_amdgcn_sqrt node:$src0)]
896>;
897