xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h (revision b9128a37faafede823eb456aa65a11ac69997284)
1 //===-- AMDGPUISelDAGToDAG.h - A dag to dag inst selector for AMDGPU ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines an instruction selector for the AMDGPU target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
16 
17 #include "GCNSubtarget.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "SIModeRegisterDefaults.h"
20 #include "llvm/CodeGen/SelectionDAGISel.h"
21 #include "llvm/Target/TargetMachine.h"
22 
23 using namespace llvm;
24 
25 namespace {
26 
27 static inline bool isNullConstantOrUndef(SDValue V) {
28   return V.isUndef() || isNullConstant(V);
29 }
30 
31 static inline bool getConstantValue(SDValue N, uint32_t &Out) {
32   // This is only used for packed vectors, where using 0 for undef should
33   // always be good.
34   if (N.isUndef()) {
35     Out = 0;
36     return true;
37   }
38 
39   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
40     Out = C->getAPIntValue().getSExtValue();
41     return true;
42   }
43 
44   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
45     Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
46     return true;
47   }
48 
49   return false;
50 }
51 
52 // TODO: Handle undef as zero
53 static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
54   assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
55   uint32_t LHSVal, RHSVal;
56   if (getConstantValue(N->getOperand(0), LHSVal) &&
57       getConstantValue(N->getOperand(1), RHSVal)) {
58     SDLoc SL(N);
59     uint32_t K = (LHSVal & 0xffff) | (RHSVal << 16);
60     return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
61                               DAG.getTargetConstant(K, SL, MVT::i32));
62   }
63 
64   return nullptr;
65 }
66 
67 } // namespace
68 
69 /// AMDGPU specific code to select AMDGPU machine instructions for
70 /// SelectionDAG operations.
71 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
72   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
73   // make the right decision when generating code for different targets.
74   const GCNSubtarget *Subtarget;
75 
76   // Default FP mode for the current function.
77   SIModeRegisterDefaults Mode;
78 
79   bool EnableLateStructurizeCFG;
80 
81   // Instructions that will be lowered with a final instruction that zeros the
82   // high result bits.
83   bool fp16SrcZerosHighBits(unsigned Opc) const;
84 
85 public:
86   static char ID;
87 
88   AMDGPUDAGToDAGISel() = delete;
89 
90   explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOptLevel OptLevel);
91   ~AMDGPUDAGToDAGISel() override = default;
92 
93   void getAnalysisUsage(AnalysisUsage &AU) const override;
94 
95   bool matchLoadD16FromBuildVector(SDNode *N) const;
96 
97   bool runOnMachineFunction(MachineFunction &MF) override;
98   void PreprocessISelDAG() override;
99   void Select(SDNode *N) override;
100   StringRef getPassName() const override;
101   void PostprocessISelDAG() override;
102 
103 protected:
104   void SelectBuildVector(SDNode *N, unsigned RegClassID);
105 
106 private:
107   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
108   bool isInlineImmediate(const SDNode *N) const;
109 
110   bool isInlineImmediate16(int64_t Imm) const {
111     return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
112   }
113 
114   bool isInlineImmediate32(int64_t Imm) const {
115     return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
116   }
117 
118   bool isInlineImmediate64(int64_t Imm) const {
119     return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
120   }
121 
122   bool isInlineImmediate(const APFloat &Imm) const {
123     return Subtarget->getInstrInfo()->isInlineConstant(Imm);
124   }
125 
126   bool isVGPRImm(const SDNode *N) const;
127   bool isUniformLoad(const SDNode *N) const;
128   bool isUniformBr(const SDNode *N) const;
129 
130   // Returns true if ISD::AND SDNode `N`'s masking of the shift amount operand's
131   // `ShAmtBits` bits is unneeded.
132   bool isUnneededShiftMask(const SDNode *N, unsigned ShAmtBits) const;
133 
134   bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
135                                   SDValue &RHS) const;
136 
137   MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
138 
139   SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
140   SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
141   SDNode *glueCopyToM0LDSInit(SDNode *N) const;
142 
143   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
144   virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
145   virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
146   bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
147   bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
148                         unsigned Size) const;
149 
150   bool isFlatScratchBaseLegal(SDValue Addr) const;
151   bool isFlatScratchBaseLegalSV(SDValue Addr) const;
152   bool isFlatScratchBaseLegalSVImm(SDValue Addr) const;
153 
154   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
155   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
156                                  SDValue &Offset1) const;
157   bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
158                                   SDValue &Offset1) const;
159   bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
160                           SDValue &Offset1, unsigned Size) const;
161   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
162                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
163                    SDValue &Idxen, SDValue &Addr64) const;
164   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
165                          SDValue &SOffset, SDValue &Offset) const;
166   bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc,
167                                SDValue &VAddr, SDValue &SOffset,
168                                SDValue &ImmOffset) const;
169   bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc,
170                                 SDValue &Soffset, SDValue &Offset) const;
171 
172   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
173                          SDValue &Offset) const;
174   bool SelectBUFSOffset(SDValue Addr, SDValue &SOffset) const;
175 
176   bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
177                             SDValue &Offset, uint64_t FlatVariant) const;
178   bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
179                         SDValue &Offset) const;
180   bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
181                           SDValue &Offset) const;
182   bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
183                            SDValue &Offset) const;
184   bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
185                          SDValue &VOffset, SDValue &Offset) const;
186   bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
187                           SDValue &Offset) const;
188   bool checkFlatScratchSVSSwizzleBug(SDValue VAddr, SDValue SAddr,
189                                      uint64_t ImmOffset) const;
190   bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
191                            SDValue &SAddr, SDValue &Offset) const;
192 
193   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SOffset,
194                         SDValue *Offset, bool Imm32Only = false,
195                         bool IsBuffer = false) const;
196   SDValue Expand32BitAddress(SDValue Addr) const;
197   bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset,
198                             SDValue *Offset, bool Imm32Only = false,
199                             bool IsBuffer = false) const;
200   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset,
201                   SDValue *Offset, bool Imm32Only = false) const;
202   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
203   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
204   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const;
205   bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset,
206                          SDValue &Offset) const;
207   bool SelectSMRDBufferImm(SDValue N, SDValue &Offset) const;
208   bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const;
209   bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
210                                SDValue &Offset) const;
211   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
212 
213   bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
214                           bool IsCanonicalizing = true,
215                           bool AllowAbs = true) const;
216   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
217   bool SelectVOP3ModsNonCanonicalizing(SDValue In, SDValue &Src,
218                                        SDValue &SrcMods) const;
219   bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
220   bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
221   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
222                        SDValue &Clamp, SDValue &Omod) const;
223   bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
224                         SDValue &Clamp, SDValue &Omod) const;
225   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
226                          SDValue &Clamp, SDValue &Omod) const;
227 
228   bool SelectVINTERPModsImpl(SDValue In, SDValue &Src, SDValue &SrcMods,
229                              bool OpSel) const;
230   bool SelectVINTERPMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
231   bool SelectVINTERPModsHi(SDValue In, SDValue &Src, SDValue &SrcMods) const;
232 
233   bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp,
234                        SDValue &Omod) const;
235 
236   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
237                        bool IsDOT = false) const;
238   bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
239 
240   bool SelectVOP3PModsNeg(SDValue In, SDValue &Src) const;
241   bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
242 
243   bool SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,
244                                SDValue &SrcMods) const;
245   bool SelectWMMAModsF16Neg(SDValue In, SDValue &Src, SDValue &SrcMods) const;
246   bool SelectWMMAModsF16NegAbs(SDValue In, SDValue &Src,
247                                SDValue &SrcMods) const;
248   bool SelectWMMAVISrc(SDValue In, SDValue &Src) const;
249 
250   bool SelectSWMMACIndex8(SDValue In, SDValue &Src, SDValue &IndexKey) const;
251   bool SelectSWMMACIndex16(SDValue In, SDValue &Src, SDValue &IndexKey) const;
252 
253   bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
254 
255   bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
256   bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
257                                  unsigned &Mods) const;
258   bool SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,
259                                 SDValue &SrcMods) const;
260   bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
261 
262   SDValue getHi16Elt(SDValue In) const;
263 
264   SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
265 
266   void SelectADD_SUB_I64(SDNode *N);
267   void SelectAddcSubb(SDNode *N);
268   void SelectUADDO_USUBO(SDNode *N);
269   void SelectDIV_SCALE(SDNode *N);
270   void SelectMAD_64_32(SDNode *N);
271   void SelectMUL_LOHI(SDNode *N);
272   void SelectFMA_W_CHAIN(SDNode *N);
273   void SelectFMUL_W_CHAIN(SDNode *N);
274   SDNode *getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset,
275                    uint32_t Width);
276   void SelectS_BFEFromShifts(SDNode *N);
277   void SelectS_BFE(SDNode *N);
278   bool isCBranchSCC(const SDNode *N) const;
279   void SelectBRCOND(SDNode *N);
280   void SelectFMAD_FMA(SDNode *N);
281   void SelectFP_EXTEND(SDNode *N);
282   void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
283   void SelectDSBvhStackIntrinsic(SDNode *N);
284   void SelectDS_GWS(SDNode *N, unsigned IntrID);
285   void SelectInterpP1F16(SDNode *N);
286   void SelectINTRINSIC_W_CHAIN(SDNode *N);
287   void SelectINTRINSIC_WO_CHAIN(SDNode *N);
288   void SelectINTRINSIC_VOID(SDNode *N);
289   void SelectWAVE_ADDRESS(SDNode *N);
290   void SelectSTACKRESTORE(SDNode *N);
291 
292 protected:
293   // Include the pieces autogenerated from the target description.
294 #include "AMDGPUGenDAGISel.inc"
295 };
296 
297 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
298