xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (revision 07cc7ea7386c5428cef9e8f06d4ebd8144dec311)
1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains instruction defs that are common to all hw codegen
10// targets.
11//
12//===----------------------------------------------------------------------===//
13
14class AddressSpacesImpl {
15  int Flat = 0;
16  int Global = 1;
17  int Region = 2;
18  int Local = 3;
19  int Constant = 4;
20  int Private = 5;
21  int Constant32Bit = 6;
22}
23
24def AddrSpaces : AddressSpacesImpl;
25
26
27class AMDGPUInst <dag outs, dag ins, string asm = "",
28  list<dag> pattern = []> : Instruction {
29  field bit isRegisterLoad = 0;
30  field bit isRegisterStore = 0;
31
32  let Namespace = "AMDGPU";
33  let OutOperandList = outs;
34  let InOperandList = ins;
35  let AsmString = asm;
36  let Pattern = pattern;
37  let Itinerary = NullALU;
38
39  // SoftFail is a field the disassembler can use to provide a way for
40  // instructions to not match without killing the whole decode process. It is
41  // mainly used for ARM, but Tablegen expects this field to exist or it fails
42  // to build the decode table.
43  field bits<96> SoftFail = 0;
44
45  let DecoderNamespace = Namespace;
46
47  let TSFlags{63} = isRegisterLoad;
48  let TSFlags{62} = isRegisterStore;
49}
50
51class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
52  list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> {
53
54  field bits<32> Inst = 0xffffffff;
55}
56
57//===---------------------------------------------------------------------===//
58// Return instruction
59//===---------------------------------------------------------------------===//
60
61class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
62: Instruction {
63
64     let Namespace = "AMDGPU";
65     dag OutOperandList = outs;
66     dag InOperandList = ins;
67     let Pattern = pattern;
68     let AsmString = !strconcat(asmstr, "\n");
69     let isPseudo = 1;
70     let Itinerary = NullALU;
71     bit hasIEEEFlag = 0;
72     bit hasZeroOpFlag = 0;
73     let mayLoad = 0;
74     let mayStore = 0;
75     let hasSideEffects = 0;
76     let isCodeGenOnly = 1;
77}
78
79def TruePredicate : Predicate<"">;
80
81// FIXME: Tablegen should specially supports this
82def FalsePredicate : Predicate<"false">;
83
84// Add a predicate to the list if does not already exist to deduplicate it.
85class PredConcat<list<Predicate> lst, Predicate pred> {
86  list<Predicate> ret = !listconcat(lst, !listremove([pred], lst));
87}
88
89// Get the union of two Register lists
90class RegListUnion<list<Register> lstA, list<Register> lstB> {
91  list<Register> ret = !listconcat(lstA, !listremove(lstB, lstA));
92}
93
94class PredicateControl {
95  Predicate SubtargetPredicate = TruePredicate;
96  Predicate AssemblerPredicate = TruePredicate;
97  Predicate WaveSizePredicate = TruePredicate;
98  list<Predicate> OtherPredicates = [];
99  list<Predicate> Predicates = PredConcat<
100                                 PredConcat<PredConcat<OtherPredicates,
101                                                       SubtargetPredicate>.ret,
102                                            AssemblerPredicate>.ret,
103                                 WaveSizePredicate>.ret;
104}
105
106class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
107      PredicateControl, GISelFlags;
108
109let GIIgnoreCopies = 1 in
110class AMDGPUPatIgnoreCopies<dag pattern, dag result> : AMDGPUPat<pattern, result>;
111
112let RecomputePerFunction = 1 in {
113def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
114def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals != DenormalMode::getPreserveSign()">;
115def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
116def NoFP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
117def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">;
118def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
119def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
120}
121
122def FMA : Predicate<"Subtarget->hasFMA()">;
123
124def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
125
126def i1imm_0 : OperandWithDefaultOps<i1, (ops (i1 0))>;
127
128class CustomOperandClass<string name, bit optional, string predicateMethod,
129                         string parserMethod, string defaultMethod>
130    : AsmOperandClass {
131  let Name = name;
132  let PredicateMethod = predicateMethod;
133  let ParserMethod = parserMethod;
134  let RenderMethod = "addImmOperands";
135  let IsOptional = optional;
136  let DefaultMethod = defaultMethod;
137}
138
139class CustomOperandProps<bit optional = 0, string name = NAME> {
140  string ImmTy = "ImmTy"#name;
141  string PredicateMethod = "is"#name;
142  string ParserMethod = "parse"#name;
143  string DefaultValue = "0";
144  string DefaultMethod = "[this]() { return "#
145    "AMDGPUOperand::CreateImm(this, "#DefaultValue#", SMLoc(), "#
146    "AMDGPUOperand::"#ImmTy#"); }";
147  string PrintMethod = "print"#name;
148  AsmOperandClass ParserMatchClass =
149    CustomOperandClass<name, optional, PredicateMethod, ParserMethod,
150                       DefaultMethod>;
151  string OperandType = "OPERAND_IMMEDIATE";
152}
153
154class CustomOperand<ValueType type, bit optional = 0, string name = NAME>
155  : Operand<type>, CustomOperandProps<optional, name>;
156
157class ImmOperand<ValueType type, string name = NAME, bit optional = 0,
158                 string printer = "print"#name>
159    : CustomOperand<type, optional, name> {
160  let ImmTy = "ImmTyNone";
161  let ParserMethod = "";
162  let PrintMethod = printer;
163}
164
165def s16imm : ImmOperand<i16, "S16Imm", 0, "printU16ImmOperand">;
166def u16imm : ImmOperand<i16, "U16Imm", 0, "printU16ImmOperand">;
167
168class ValuePredicatedOperand<CustomOperand op, string valuePredicate,
169                             bit optional = 0>
170    : CustomOperand<op.Type, optional> {
171  let ImmTy = op.ImmTy;
172  defvar OpPredicate = op.ParserMatchClass.PredicateMethod;
173  let PredicateMethod =
174    "getPredicate([](const AMDGPUOperand &Op) -> bool { "#
175    "return Op."#OpPredicate#"() && "#valuePredicate#"; })";
176  let ParserMethod = op.ParserMatchClass.ParserMethod;
177  let DefaultValue = op.DefaultValue;
178  let DefaultMethod = op.DefaultMethod;
179  let PrintMethod = op.PrintMethod;
180}
181
182//===--------------------------------------------------------------------===//
183// Custom Operands
184//===--------------------------------------------------------------------===//
185def brtarget   : Operand<OtherVT>;
186
187//===----------------------------------------------------------------------===//
188// Misc. PatFrags
189//===----------------------------------------------------------------------===//
190
191class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
192  (ops node:$src0),
193  (op $src0),
194  [{ return N->hasOneUse(); }]> {
195
196  let GISelPredicateCode = [{
197    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
198  }];
199}
200
201class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
202  (ops node:$src0, node:$src1),
203  (op $src0, $src1),
204  [{ return N->hasOneUse(); }]> {
205  let GISelPredicateCode = [{
206    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
207  }];
208}
209
210class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
211  (ops node:$src0, node:$src1, node:$src2),
212  (op $src0, $src1, $src2),
213  [{ return N->hasOneUse(); }]> {
214  let GISelPredicateCode = [{
215    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
216  }];
217}
218
219class is_canonicalized<SDPatternOperator op> : PatFrag<
220  (ops node:$src0, node:$src1),
221  (op $src0, $src1),
222  [{
223    const SITargetLowering &Lowering =
224              *static_cast<const SITargetLowering *>(getTargetLowering());
225
226    return Lowering.isCanonicalized(*CurDAG, N->getOperand(0)) &&
227      Lowering.isCanonicalized(*CurDAG, N->getOperand(1));
228   }]> {
229
230  // TODO: Improve the Legalizer for g_build_vector in Global Isel to match this class
231  let GISelPredicateCode = [{
232    const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
233      MF.getSubtarget().getTargetLowering());
234
235    return TLI->isCanonicalized(MI.getOperand(1).getReg(), const_cast<MachineFunction&>(MF)) &&
236      TLI->isCanonicalized(MI.getOperand(2).getReg(), const_cast<MachineFunction&>(MF));
237  }];
238}
239
240class FoldTernaryOpPat<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
241  (ops node:$src0, node:$src1, node:$src2),
242  (op2 (op1 node:$src0, node:$src1), node:$src2)
243>;
244
245def imad : FoldTernaryOpPat<mul, add>;
246
247let Properties = [SDNPCommutative, SDNPAssociative] in {
248def smax_oneuse : HasOneUseBinOp<smax>;
249def smin_oneuse : HasOneUseBinOp<smin>;
250def umax_oneuse : HasOneUseBinOp<umax>;
251def umin_oneuse : HasOneUseBinOp<umin>;
252
253def fminnum_oneuse : HasOneUseBinOp<fminnum>;
254def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
255def fminimum_oneuse : HasOneUseBinOp<fminimum>;
256def fmaximum_oneuse : HasOneUseBinOp<fmaximum>;
257
258def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>;
259def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>;
260
261
262def and_oneuse : HasOneUseBinOp<and>;
263def or_oneuse : HasOneUseBinOp<or>;
264def xor_oneuse : HasOneUseBinOp<xor>;
265} // Properties = [SDNPCommutative, SDNPAssociative]
266
267def not_oneuse : HasOneUseUnaryOp<not>;
268
269def add_oneuse : HasOneUseBinOp<add>;
270def sub_oneuse : HasOneUseBinOp<sub>;
271
272def srl_oneuse : HasOneUseBinOp<srl>;
273def shl_oneuse : HasOneUseBinOp<shl>;
274
275def select_oneuse : HasOneUseTernaryOp<select>;
276
277def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>;
278def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
279
280//===----------------------------------------------------------------------===//
281// PatFrags for shifts
282//===----------------------------------------------------------------------===//
283
284// Constrained shift PatFrags.
285
286def csh_mask_16 : PatFrag<(ops node:$src0), (and node:$src0, imm),
287  [{ return isUnneededShiftMask(N, 4); }]> {
288    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 4); }];
289  }
290
291def csh_mask_32 : PatFrag<(ops node:$src0), (and node:$src0, imm),
292  [{ return isUnneededShiftMask(N, 5); }]> {
293    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 5); }];
294  }
295
296def csh_mask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm),
297  [{ return isUnneededShiftMask(N, 6); }]> {
298    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 6); }];
299  }
300
301foreach width = [16, 32, 64] in {
302defvar csh_mask = !cast<SDPatternOperator>("csh_mask_"#width);
303
304def cshl_#width : PatFrags<(ops node:$src0, node:$src1),
305  [(shl node:$src0, node:$src1), (shl node:$src0, (csh_mask node:$src1))]>;
306defvar cshl = !cast<SDPatternOperator>("cshl_"#width);
307def cshl_#width#_oneuse : HasOneUseBinOp<cshl>;
308def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1),
309  (cshl $src1, $src0)>;
310
311def csrl_#width : PatFrags<(ops node:$src0, node:$src1),
312  [(srl node:$src0, node:$src1), (srl node:$src0, (csh_mask node:$src1))]>;
313defvar csrl = !cast<SDPatternOperator>("csrl_"#width);
314def csrl_#width#_oneuse : HasOneUseBinOp<csrl>;
315def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
316  (csrl $src1, $src0)>;
317
318def csra_#width : PatFrags<(ops node:$src0, node:$src1),
319  [(sra node:$src0, node:$src1), (sra node:$src0, (csh_mask node:$src1))]>;
320defvar csra = !cast<SDPatternOperator>("csra_"#width);
321def csra_#width#_oneuse : HasOneUseBinOp<csra>;
322def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
323  (csra $src1, $src0)>;
324} // end foreach width
325
326def srl_16 : PatFrag<
327  (ops node:$src0), (srl_oneuse node:$src0, (i32 16))
328>;
329
330
331def hi_i16_elt : PatFrag<
332  (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0))))
333>;
334
335
336def hi_f16_elt : PatLeaf<
337  (vt), [{
338  if (N->getOpcode() != ISD::BITCAST)
339    return false;
340  SDValue Tmp = N->getOperand(0);
341
342  if (Tmp.getOpcode() != ISD::SRL)
343    return false;
344    if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1))
345      return RHS->getZExtValue() == 16;
346    return false;
347}]>;
348
349//===----------------------------------------------------------------------===//
350// PatLeafs for zero immediate
351//===----------------------------------------------------------------------===//
352
353def immzero : PatLeaf<(imm), [{ return N->isZero(); }]>;
354def fpimmzero : PatLeaf<(fpimm), [{ return N->isZero(); }]>;
355
356//===----------------------------------------------------------------------===//
357// PatLeafs for floating-point comparisons
358//===----------------------------------------------------------------------===//
359
360def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>;
361def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>;
362def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>;
363def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>;
364def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>;
365def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>;
366def COND_O   : PatFrags<(ops), [(OtherVT SETO)]>;
367def COND_UO  : PatFrags<(ops), [(OtherVT SETUO)]>;
368
369//===----------------------------------------------------------------------===//
370// PatLeafs for unsigned / unordered comparisons
371//===----------------------------------------------------------------------===//
372
373def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>;
374def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>;
375def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>;
376def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>;
377def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>;
378def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>;
379
380// XXX - For some reason R600 version is preferring to use unordered
381// for setne?
382def COND_UNE_NE  : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>;
383
384//===----------------------------------------------------------------------===//
385// PatLeafs for signed comparisons
386//===----------------------------------------------------------------------===//
387
388def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>;
389def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>;
390def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>;
391def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>;
392
393//===----------------------------------------------------------------------===//
394// PatLeafs for integer equality
395//===----------------------------------------------------------------------===//
396
397def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>;
398def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>;
399
400// FIXME: Should not need code predicate
401//def COND_NULL : PatLeaf<(OtherVT null_frag)>;
402def COND_NULL : PatLeaf <
403  (cond),
404  [{(void)N; return false;}]
405>;
406
407//===----------------------------------------------------------------------===//
408// PatLeafs for Texture Constants
409//===----------------------------------------------------------------------===//
410
411def TEX_ARRAY : PatLeaf<
412  (imm),
413  [{uint32_t TType = (uint32_t)N->getZExtValue();
414    return TType == 9 || TType == 10 || TType == 16;
415  }]
416>;
417
418def TEX_RECT : PatLeaf<
419  (imm),
420  [{uint32_t TType = (uint32_t)N->getZExtValue();
421    return TType == 5;
422  }]
423>;
424
425def TEX_SHADOW : PatLeaf<
426  (imm),
427  [{uint32_t TType = (uint32_t)N->getZExtValue();
428    return (TType >= 6 && TType <= 8) || TType == 13;
429  }]
430>;
431
432def TEX_SHADOW_ARRAY : PatLeaf<
433  (imm),
434  [{uint32_t TType = (uint32_t)N->getZExtValue();
435    return TType == 11 || TType == 12 || TType == 17;
436  }]
437>;
438
439//===----------------------------------------------------------------------===//
440// Load/Store Pattern Fragments
441//===----------------------------------------------------------------------===//
442
443def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
444  [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
445>;
446
447class AddressSpaceList<list<int> AS> {
448  list<int> AddrSpaces = AS;
449}
450
451class Aligned<int Bytes> {
452  int MinAlignment = Bytes;
453}
454
455class StoreHi16<SDPatternOperator op, ValueType vt> : PatFrag <
456  (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> {
457  let IsStore = 1;
458  let MemoryVT = vt;
459}
460
461def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant,
462                                              AddrSpaces.Constant32Bit ]>;
463def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global,
464                                            AddrSpaces.Constant,
465                                            AddrSpaces.Constant32Bit ]>;
466def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>;
467
468def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat,
469                                          AddrSpaces.Global,
470                                          AddrSpaces.Constant,
471                                          AddrSpaces.Constant32Bit ]>;
472def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>;
473
474def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
475def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
476
477def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
478def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
479
480def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
481def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
482
483
484
485foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
486let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
487
488def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
489  let IsLoad = 1;
490  let IsNonExtLoad = 1;
491}
492
493def extloadi8_#as  : PatFrag<(ops node:$ptr), (extloadi8 node:$ptr)> {
494  let IsLoad = 1;
495}
496
497def extloadi16_#as : PatFrag<(ops node:$ptr), (extloadi16 node:$ptr)> {
498  let IsLoad = 1;
499}
500
501def sextloadi8_#as  : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr)> {
502  let IsLoad = 1;
503}
504
505def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr)> {
506  let IsLoad = 1;
507}
508
509def zextloadi8_#as  : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr)> {
510  let IsLoad = 1;
511}
512
513def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextloadi16 node:$ptr)> {
514  let IsLoad = 1;
515}
516
517def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> {
518  let IsAtomic = 1;
519  let MemoryVT = i8;
520}
521
522def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> {
523  let IsAtomic = 1;
524  let MemoryVT = i16;
525}
526
527def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
528  let IsAtomic = 1;
529  let MemoryVT = i32;
530}
531
532def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
533  let IsAtomic = 1;
534  let MemoryVT = i64;
535}
536} // End let AddressSpaces
537} // End foreach as
538
539
540foreach as = [ "global", "flat", "local", "private", "region" ] in {
541let IsStore = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
542def store_#as : PatFrag<(ops node:$val, node:$ptr),
543                    (unindexedstore node:$val, node:$ptr)> {
544  let IsTruncStore = 0;
545}
546
547// truncstore fragments.
548def truncstore_#as : PatFrag<(ops node:$val, node:$ptr),
549                             (unindexedstore node:$val, node:$ptr)> {
550  let IsTruncStore = 1;
551}
552
553// TODO: We don't really need the truncstore here. We can use
554// unindexedstore with MemoryVT directly, which will save an
555// unnecessary check that the memory size is less than the value type
556// in the generated matcher table.
557def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr),
558                               (truncstorei8 node:$val, node:$ptr)>;
559def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
560                                (truncstorei16 node:$val, node:$ptr)>;
561
562def store_hi16_#as : StoreHi16 <truncstorei16, i16>;
563def truncstorei8_hi16_#as : StoreHi16<truncstorei8, i8>;
564def truncstorei16_hi16_#as : StoreHi16<truncstorei16, i16>;
565} // End let IsStore = 1, AddressSpaces = ...
566
567let IsAtomic = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
568def atomic_store_8_#as : PatFrag<(ops node:$val, node:$ptr),
569                                 (atomic_store_8 node:$val, node:$ptr)>;
570def atomic_store_16_#as : PatFrag<(ops node:$val, node:$ptr),
571                                  (atomic_store_16 node:$val, node:$ptr)>;
572def atomic_store_32_#as : PatFrag<(ops node:$val, node:$ptr),
573                                  (atomic_store_32 node:$val, node:$ptr)>;
574def atomic_store_64_#as : PatFrag<(ops node:$val, node:$ptr),
575                                  (atomic_store_64 node:$val, node:$ptr)>;
576} // End let IsAtomic = 1, AddressSpaces = ...
577} // End foreach as
578
579multiclass noret_op {
580  let HasNoUse = true in
581  def "_noret" : PatFrag<(ops node:$ptr, node:$data),
582    (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>;
583}
584
585multiclass global_addr_space_atomic_op {
586  def "_noret_global_addrspace" :
587    PatFrag<(ops node:$ptr, node:$data),
588            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
589      let HasNoUse = true;
590      let AddressSpaces = LoadAddress_global.AddrSpaces;
591      let IsAtomic = 1;
592    }
593    def "_global_addrspace" :
594    PatFrag<(ops node:$ptr, node:$data),
595            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
596      let AddressSpaces = LoadAddress_global.AddrSpaces;
597      let IsAtomic = 1;
598    }
599}
600
601multiclass flat_addr_space_atomic_op {
602  def "_noret_flat_addrspace" :
603    PatFrag<(ops node:$ptr, node:$data),
604            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
605      let HasNoUse = true;
606      let AddressSpaces = LoadAddress_flat.AddrSpaces;
607      let IsAtomic = 1;
608    }
609    def "_flat_addrspace" :
610    PatFrag<(ops node:$ptr, node:$data),
611            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
612      let AddressSpaces = LoadAddress_flat.AddrSpaces;
613      let IsAtomic = 1;
614    }
615}
616
617multiclass local_addr_space_atomic_op {
618  def "_noret_local_addrspace" :
619    PatFrag<(ops node:$ptr, node:$data),
620            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
621      let HasNoUse = true;
622      let AddressSpaces = LoadAddress_local.AddrSpaces;
623      let IsAtomic = 1;
624    }
625    def "_local_addrspace" :
626    PatFrag<(ops node:$ptr, node:$data),
627            (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{
628      let AddressSpaces = LoadAddress_local.AddrSpaces;
629      let IsAtomic = 1;
630    }
631}
632
633defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op;
634defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op;
635defm int_amdgcn_flat_atomic_fmin : noret_op;
636defm int_amdgcn_flat_atomic_fmax : noret_op;
637defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op;
638defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op;
639defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op;
640defm int_amdgcn_global_atomic_fmin : noret_op;
641defm int_amdgcn_global_atomic_fmax : noret_op;
642defm int_amdgcn_global_atomic_csub : noret_op;
643defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
644defm int_amdgcn_ds_fadd_v2bf16 : noret_op;
645defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op;
646defm int_amdgcn_flat_atomic_fmin_num : noret_op;
647defm int_amdgcn_flat_atomic_fmax_num : noret_op;
648defm int_amdgcn_global_atomic_fmin_num : noret_op;
649defm int_amdgcn_global_atomic_fmax_num : noret_op;
650defm int_amdgcn_atomic_cond_sub_u32 : local_addr_space_atomic_op;
651defm int_amdgcn_atomic_cond_sub_u32 : flat_addr_space_atomic_op;
652defm int_amdgcn_atomic_cond_sub_u32 : global_addr_space_atomic_op;
653
654multiclass noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
655  let HasNoUse = true in
656  defm "_noret" : binary_atomic_op<atomic_op, IsInt>;
657}
658
659multiclass noret_ternary_atomic_op<SDNode atomic_op> {
660  let HasNoUse = true in
661  defm "_noret" : ternary_atomic_op<atomic_op>;
662}
663
664multiclass binary_atomic_op_all_as<SDNode atomic_op, bit IsInt = 1> {
665  foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
666    let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
667      defm "_"#as : binary_atomic_op<atomic_op, IsInt>;
668      defm "_"#as : noret_binary_atomic_op<atomic_op, IsInt>;
669    }
670  }
671}
672
673defm atomic_swap : binary_atomic_op_all_as<atomic_swap>;
674defm atomic_load_add : binary_atomic_op_all_as<atomic_load_add>;
675defm atomic_load_and : binary_atomic_op_all_as<atomic_load_and>;
676defm atomic_load_max : binary_atomic_op_all_as<atomic_load_max>;
677defm atomic_load_min : binary_atomic_op_all_as<atomic_load_min>;
678defm atomic_load_or : binary_atomic_op_all_as<atomic_load_or>;
679defm atomic_load_sub : binary_atomic_op_all_as<atomic_load_sub>;
680defm atomic_load_umax : binary_atomic_op_all_as<atomic_load_umax>;
681defm atomic_load_umin : binary_atomic_op_all_as<atomic_load_umin>;
682defm atomic_load_xor : binary_atomic_op_all_as<atomic_load_xor>;
683defm atomic_load_fadd : binary_atomic_op_all_as<atomic_load_fadd, 0>;
684defm atomic_load_uinc_wrap : binary_atomic_op_all_as<atomic_load_uinc_wrap>;
685defm atomic_load_udec_wrap : binary_atomic_op_all_as<atomic_load_udec_wrap>;
686let MemoryVT = v2f16 in
687defm atomic_load_fadd_v2f16 : binary_atomic_op_all_as<atomic_load_fadd, 0>;
688defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>;
689
690def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
691                       Aligned<8> {
692  let IsLoad = 1;
693}
694
695def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
696                        Aligned<16> {
697  let IsLoad = 1;
698}
699
700def store_align8_local: PatFrag<(ops node:$val, node:$ptr),
701                                (store_local node:$val, node:$ptr)>, Aligned<8> {
702  let IsStore = 1;
703}
704
705def store_align16_local: PatFrag<(ops node:$val, node:$ptr),
706                                (store_local node:$val, node:$ptr)>, Aligned<16> {
707  let IsStore = 1;
708}
709
710let AddressSpaces = StoreAddress_local.AddrSpaces in {
711defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>;
712defm atomic_cmp_swap_local : noret_ternary_atomic_op<atomic_cmp_swap>;
713defm atomic_cmp_swap_local_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>;
714defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
715}
716
717let AddressSpaces = StoreAddress_region.AddrSpaces in {
718defm atomic_cmp_swap_region : noret_ternary_atomic_op<atomic_cmp_swap>;
719defm atomic_cmp_swap_region_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>;
720defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
721}
722
723//===----------------------------------------------------------------------===//
724// Misc Pattern Fragments
725//===----------------------------------------------------------------------===//
726
727class Constants {
728int TWO_PI = 0x40c90fdb;
729int PI = 0x40490fdb;
730int TWO_PI_INV = 0x3e22f983;
731int FP_4294966784 = 0x4f7ffffe; // 4294966784 = 4294967296 - 512 = 2^32 - 2^9
732int FP16_ONE = 0x3C00;
733int FP16_NEG_ONE = 0xBC00;
734int FP32_ONE = 0x3f800000;
735int FP32_NEG_ONE = 0xbf800000;
736int FP64_ONE = 0x3ff0000000000000;
737int FP64_NEG_ONE = 0xbff0000000000000;
738}
739def CONST : Constants;
740
741def FP_ZERO : PatLeaf <
742  (fpimm),
743  [{return N->getValueAPF().isZero();}]
744>;
745
746def FP_ONE : PatLeaf <
747  (fpimm),
748  [{return N->isExactlyValue(1.0);}]
749>;
750
751def FP_HALF : PatLeaf <
752  (fpimm),
753  [{return N->isExactlyValue(0.5);}]
754>;
755
756/* Generic helper patterns for intrinsics */
757/* -------------------------------------- */
758
759class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
760  : AMDGPUPat <
761  (fpow f32:$src0, f32:$src1),
762  (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
763>;
764
765/* Other helper patterns */
766/* --------------------- */
767
768/* Extract element pattern */
769class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
770                       SubRegIndex sub_reg>
771  : AMDGPUPat<
772  (sub_type (extractelt vec_type:$src, sub_idx)),
773  (EXTRACT_SUBREG $src, sub_reg)
774>;
775
776/* Insert element pattern */
777class Insert_Element <ValueType elem_type, ValueType vec_type,
778                      int sub_idx, SubRegIndex sub_reg>
779  : AMDGPUPat <
780  (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
781  (INSERT_SUBREG $vec, $elem, sub_reg)
782>;
783
784// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
785// can handle COPY instructions.
786// bitconvert pattern
787class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat <
788  (dt (bitconvert (st rc:$src0))),
789  (dt rc:$src0)
790>;
791
792// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
793// can handle COPY instructions.
794class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
795  (vt (AMDGPUdwordaddr (vt rc:$addr))),
796  (vt rc:$addr)
797>;
798
799// rotr pattern
800class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
801  (rotr i32:$src0, i32:$src1),
802  (BIT_ALIGN $src0, $src0, $src1)
803>;
804
805// Special conversion patterns
806
807def cvt_rpi_i32_f32 : PatFrag <
808  (ops node:$src),
809  (fp_to_sint (ffloor (fadd $src, FP_HALF))),
810  [{ (void) N; return TM.Options.NoNaNsFPMath; }]
811>;
812
813def cvt_flr_i32_f32 : PatFrag <
814  (ops node:$src),
815  (fp_to_sint (ffloor $src)),
816  [{ (void)N; return TM.Options.NoNaNsFPMath; }]
817>;
818
819let AddedComplexity = 2 in {
820class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
821  (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
822  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
823                (Inst $src0, $src1, $src2))
824>;
825
826class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
827  (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
828  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
829                (Inst $src0, $src1, $src2))
830>;
831} // AddedComplexity.
832
833class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
834  (fdiv FP_ONE, vt:$src),
835  (RcpInst $src)
836>;
837
838// Instructions which select to the same v_min_f*
839def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
840  [(fminnum_ieee node:$src0, node:$src1),
841   (fminnum node:$src0, node:$src1)]
842>;
843
844// Instructions which select to the same v_max_f*
845def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1),
846  [(fmaxnum_ieee node:$src0, node:$src1),
847   (fmaxnum node:$src0, node:$src1)]
848>;
849
850class NeverNaNPats<dag ops, list<dag> frags> : PatFrags<ops, frags> {
851  let PredicateCode = [{
852    return CurDAG->isKnownNeverNaN(SDValue(N,0));
853  }];
854  let GISelPredicateCode = [{
855    return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI);
856  }];
857}
858
859def fminnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1),
860  [(fminnum_ieee node:$src0, node:$src1),
861   (fminnum node:$src0, node:$src1)]
862>;
863
864def fmaxnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1),
865  [(fmaxnum_ieee node:$src0, node:$src1),
866   (fmaxnum node:$src0, node:$src1)]
867>;
868
869def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
870  [(fminnum_ieee_oneuse node:$src0, node:$src1),
871   (fminnum_oneuse node:$src0, node:$src1)]
872>;
873
874def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
875  [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
876   (fmaxnum_oneuse node:$src0, node:$src1)]
877>;
878
879def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2),
880  [(fmad node:$src0, node:$src1, node:$src2),
881   (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)]
882>;
883
884// FIXME: fsqrt should not select directly
885def any_amdgcn_sqrt : PatFrags<(ops node:$src0),
886  [(fsqrt node:$src0), (int_amdgcn_sqrt node:$src0)]
887>;
888