xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 //===-- AMDGPUISelDAGToDAG.h - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
16 
17 #include "GCNSubtarget.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "llvm/CodeGen/SelectionDAGISel.h"
20 #include "llvm/Target/TargetMachine.h"
21 
22 using namespace llvm;
23 
24 namespace {
25 
26 static inline bool isNullConstantOrUndef(SDValue V) {
27   if (V.isUndef())
28     return true;
29 
30   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
31   return Const != nullptr && Const->isZero();
32 }
33 
34 static inline bool getConstantValue(SDValue N, uint32_t &Out) {
35   // This is only used for packed vectors, where using 0 for undef should
36   // always be good.
37   if (N.isUndef()) {
38     Out = 0;
39     return true;
40   }
41 
42   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
43     Out = C->getAPIntValue().getSExtValue();
44     return true;
45   }
46 
47   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
48     Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
49     return true;
50   }
51 
52   return false;
53 }
54 
55 // TODO: Handle undef as zero
56 static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
57                                         bool Negate = false) {
58   assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
59   uint32_t LHSVal, RHSVal;
60   if (getConstantValue(N->getOperand(0), LHSVal) &&
61       getConstantValue(N->getOperand(1), RHSVal)) {
62     SDLoc SL(N);
63     uint32_t K = Negate ? (-LHSVal & 0xffff) | (-RHSVal << 16)
64                         : (LHSVal & 0xffff) | (RHSVal << 16);
65     return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
66                               DAG.getTargetConstant(K, SL, MVT::i32));
67   }
68 
69   return nullptr;
70 }
71 
72 static inline SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
73   return packConstantV2I16(N, DAG, true);
74 }
75 } // namespace
76 
77 /// AMDGPU specific code to select AMDGPU machine instructions for
78 /// SelectionDAG operations.
79 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
80   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
81   // make the right decision when generating code for different targets.
82   const GCNSubtarget *Subtarget;
83 
84   // Default FP mode for the current function.
85   AMDGPU::SIModeRegisterDefaults Mode;
86 
87   bool EnableLateStructurizeCFG;
88 
89   // Instructions that will be lowered with a final instruction that zeros the
90   // high result bits.
91   bool fp16SrcZerosHighBits(unsigned Opc) const;
92 
93 public:
94   explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
95                               CodeGenOpt::Level OptLevel = CodeGenOpt::Default);
96   ~AMDGPUDAGToDAGISel() override = default;
97 
98   void getAnalysisUsage(AnalysisUsage &AU) const override;
99 
100   bool matchLoadD16FromBuildVector(SDNode *N) const;
101 
102   bool runOnMachineFunction(MachineFunction &MF) override;
103   void PreprocessISelDAG() override;
104   void Select(SDNode *N) override;
105   StringRef getPassName() const override;
106   void PostprocessISelDAG() override;
107 
108 protected:
109   void SelectBuildVector(SDNode *N, unsigned RegClassID);
110 
111 private:
112   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
113   bool isNoNanSrc(SDValue N) const;
114   bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
115   bool isNegInlineImmediate(const SDNode *N) const {
116     return isInlineImmediate(N, true);
117   }
118 
119   bool isInlineImmediate16(int64_t Imm) const {
120     return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
121   }
122 
123   bool isInlineImmediate32(int64_t Imm) const {
124     return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
125   }
126 
127   bool isInlineImmediate64(int64_t Imm) const {
128     return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
129   }
130 
131   bool isInlineImmediate(const APFloat &Imm) const {
132     return Subtarget->getInstrInfo()->isInlineConstant(Imm);
133   }
134 
135   bool isVGPRImm(const SDNode *N) const;
136   bool isUniformLoad(const SDNode *N) const;
137   bool isUniformBr(const SDNode *N) const;
138 
139   bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
140                                   SDValue &RHS) const;
141 
142   MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
143 
144   SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
145   SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
146   SDNode *glueCopyToM0LDSInit(SDNode *N) const;
147 
148   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
149   virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
150   virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
151   bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
152   bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
153                         unsigned Size) const;
154   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
155   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
156                                  SDValue &Offset1) const;
157   bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
158                                   SDValue &Offset1) const;
159   bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
160                           SDValue &Offset1, unsigned Size) const;
161   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
162                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
163                    SDValue &Idxen, SDValue &Addr64) const;
164   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
165                          SDValue &SOffset, SDValue &Offset) const;
166   bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc,
167                                SDValue &VAddr, SDValue &SOffset,
168                                SDValue &ImmOffset) const;
169   bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc,
170                                 SDValue &Soffset, SDValue &Offset) const;
171 
172   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
173                          SDValue &Offset) const;
174 
175   bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
176                             SDValue &Offset, uint64_t FlatVariant) const;
177   bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
178                         SDValue &Offset) const;
179   bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
180                           SDValue &Offset) const;
181   bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
182                            SDValue &Offset) const;
183   bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
184                          SDValue &VOffset, SDValue &Offset) const;
185   bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
186                           SDValue &Offset) const;
187 
188   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
189                         bool &Imm) const;
190   SDValue Expand32BitAddress(SDValue Addr) const;
191   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
192                   bool &Imm) const;
193   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
194   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
195   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
196   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
197   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
198   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
199 
200   bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
201   bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
202                           bool AllowAbs = true) const;
203   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
204   bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
205   bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
206   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
207                        SDValue &Clamp, SDValue &Omod) const;
208   bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
209                         SDValue &Clamp, SDValue &Omod) const;
210   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
211                          SDValue &Clamp, SDValue &Omod) const;
212 
213   bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp,
214                        SDValue &Omod) const;
215 
216   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
217 
218   bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
219 
220   bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
221   bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
222                                  unsigned &Mods) const;
223   bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
224 
225   SDValue getHi16Elt(SDValue In) const;
226 
227   SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
228 
229   void SelectADD_SUB_I64(SDNode *N);
230   void SelectAddcSubb(SDNode *N);
231   void SelectUADDO_USUBO(SDNode *N);
232   void SelectDIV_SCALE(SDNode *N);
233   void SelectMAD_64_32(SDNode *N);
234   void SelectFMA_W_CHAIN(SDNode *N);
235   void SelectFMUL_W_CHAIN(SDNode *N);
236   SDNode *getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset,
237                    uint32_t Width);
238   void SelectS_BFEFromShifts(SDNode *N);
239   void SelectS_BFE(SDNode *N);
240   bool isCBranchSCC(const SDNode *N) const;
241   void SelectBRCOND(SDNode *N);
242   void SelectFMAD_FMA(SDNode *N);
243   void SelectATOMIC_CMP_SWAP(SDNode *N);
244   void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
245   void SelectDS_GWS(SDNode *N, unsigned IntrID);
246   void SelectInterpP1F16(SDNode *N);
247   void SelectINTRINSIC_W_CHAIN(SDNode *N);
248   void SelectINTRINSIC_WO_CHAIN(SDNode *N);
249   void SelectINTRINSIC_VOID(SDNode *N);
250 
251 protected:
252   // Include the pieces autogenerated from the target description.
253 #include "AMDGPUGenDAGISel.inc"
254 };
255 
256 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
257