1 //===-- AMDGPUISelDAGToDAG.h - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines an instruction selector for the AMDGPU target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H 15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H 16 17 #include "GCNSubtarget.h" 18 #include "SIMachineFunctionInfo.h" 19 #include "llvm/CodeGen/SelectionDAGISel.h" 20 #include "llvm/Target/TargetMachine.h" 21 22 using namespace llvm; 23 24 namespace { 25 26 static inline bool isNullConstantOrUndef(SDValue V) { 27 if (V.isUndef()) 28 return true; 29 30 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 31 return Const != nullptr && Const->isZero(); 32 } 33 34 static inline bool getConstantValue(SDValue N, uint32_t &Out) { 35 // This is only used for packed vectors, where using 0 for undef should 36 // always be good. 37 if (N.isUndef()) { 38 Out = 0; 39 return true; 40 } 41 42 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 43 Out = C->getAPIntValue().getSExtValue(); 44 return true; 45 } 46 47 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 48 Out = C->getValueAPF().bitcastToAPInt().getSExtValue(); 49 return true; 50 } 51 52 return false; 53 } 54 55 // TODO: Handle undef as zero 56 static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG, 57 bool Negate = false) { 58 assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2); 59 uint32_t LHSVal, RHSVal; 60 if (getConstantValue(N->getOperand(0), LHSVal) && 61 getConstantValue(N->getOperand(1), RHSVal)) { 62 SDLoc SL(N); 63 uint32_t K = Negate ? (-LHSVal & 0xffff) | (-RHSVal << 16) 64 : (LHSVal & 0xffff) | (RHSVal << 16); 65 return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0), 66 DAG.getTargetConstant(K, SL, MVT::i32)); 67 } 68 69 return nullptr; 70 } 71 72 static inline SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) { 73 return packConstantV2I16(N, DAG, true); 74 } 75 } // namespace 76 77 /// AMDGPU specific code to select AMDGPU machine instructions for 78 /// SelectionDAG operations. 79 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 80 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 81 // make the right decision when generating code for different targets. 82 const GCNSubtarget *Subtarget; 83 84 // Default FP mode for the current function. 85 AMDGPU::SIModeRegisterDefaults Mode; 86 87 bool EnableLateStructurizeCFG; 88 89 // Instructions that will be lowered with a final instruction that zeros the 90 // high result bits. 91 bool fp16SrcZerosHighBits(unsigned Opc) const; 92 93 public: 94 explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, 95 CodeGenOpt::Level OptLevel = CodeGenOpt::Default); 96 ~AMDGPUDAGToDAGISel() override = default; 97 98 void getAnalysisUsage(AnalysisUsage &AU) const override; 99 100 bool matchLoadD16FromBuildVector(SDNode *N) const; 101 102 bool runOnMachineFunction(MachineFunction &MF) override; 103 void PreprocessISelDAG() override; 104 void Select(SDNode *N) override; 105 StringRef getPassName() const override; 106 void PostprocessISelDAG() override; 107 108 protected: 109 void SelectBuildVector(SDNode *N, unsigned RegClassID); 110 111 private: 112 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 113 bool isNoNanSrc(SDValue N) const; 114 bool isInlineImmediate(const SDNode *N, bool Negated = false) const; 115 bool isNegInlineImmediate(const SDNode *N) const { 116 return isInlineImmediate(N, true); 117 } 118 119 bool isInlineImmediate16(int64_t Imm) const { 120 return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm()); 121 } 122 123 bool isInlineImmediate32(int64_t Imm) const { 124 return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm()); 125 } 126 127 bool isInlineImmediate64(int64_t Imm) const { 128 return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm()); 129 } 130 131 bool isInlineImmediate(const APFloat &Imm) const { 132 return Subtarget->getInstrInfo()->isInlineConstant(Imm); 133 } 134 135 bool isVGPRImm(const SDNode *N) const; 136 bool isUniformLoad(const SDNode *N) const; 137 bool isUniformBr(const SDNode *N) const; 138 139 bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS, 140 SDValue &RHS) const; 141 142 MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; 143 144 SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const; 145 SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; 146 SDNode *glueCopyToM0LDSInit(SDNode *N) const; 147 148 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 149 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 150 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 151 bool isDSOffsetLegal(SDValue Base, unsigned Offset) const; 152 bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1, 153 unsigned Size) const; 154 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 155 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 156 SDValue &Offset1) const; 157 bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 158 SDValue &Offset1) const; 159 bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0, 160 SDValue &Offset1, unsigned Size) const; 161 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 162 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 163 SDValue &Idxen, SDValue &Addr64) const; 164 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 165 SDValue &SOffset, SDValue &Offset) const; 166 bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc, 167 SDValue &VAddr, SDValue &SOffset, 168 SDValue &ImmOffset) const; 169 bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc, 170 SDValue &Soffset, SDValue &Offset) const; 171 172 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 173 SDValue &Offset) const; 174 175 bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr, 176 SDValue &Offset, uint64_t FlatVariant) const; 177 bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, 178 SDValue &Offset) const; 179 bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr, 180 SDValue &Offset) const; 181 bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr, 182 SDValue &Offset) const; 183 bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, 184 SDValue &VOffset, SDValue &Offset) const; 185 bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, 186 SDValue &Offset) const; 187 188 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 189 bool &Imm) const; 190 SDValue Expand32BitAddress(SDValue Addr) const; 191 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 192 bool &Imm) const; 193 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 194 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 195 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 196 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 197 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 198 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 199 200 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 201 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods, 202 bool AllowAbs = true) const; 203 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 204 bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 205 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 206 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 207 SDValue &Clamp, SDValue &Omod) const; 208 bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 209 SDValue &Clamp, SDValue &Omod) const; 210 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 211 SDValue &Clamp, SDValue &Omod) const; 212 213 bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp, 214 SDValue &Omod) const; 215 216 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 217 218 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; 219 220 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 221 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, 222 unsigned &Mods) const; 223 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 224 225 SDValue getHi16Elt(SDValue In) const; 226 227 SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const; 228 229 void SelectADD_SUB_I64(SDNode *N); 230 void SelectAddcSubb(SDNode *N); 231 void SelectUADDO_USUBO(SDNode *N); 232 void SelectDIV_SCALE(SDNode *N); 233 void SelectMAD_64_32(SDNode *N); 234 void SelectFMA_W_CHAIN(SDNode *N); 235 void SelectFMUL_W_CHAIN(SDNode *N); 236 SDNode *getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset, 237 uint32_t Width); 238 void SelectS_BFEFromShifts(SDNode *N); 239 void SelectS_BFE(SDNode *N); 240 bool isCBranchSCC(const SDNode *N) const; 241 void SelectBRCOND(SDNode *N); 242 void SelectFMAD_FMA(SDNode *N); 243 void SelectATOMIC_CMP_SWAP(SDNode *N); 244 void SelectDSAppendConsume(SDNode *N, unsigned IntrID); 245 void SelectDS_GWS(SDNode *N, unsigned IntrID); 246 void SelectInterpP1F16(SDNode *N); 247 void SelectINTRINSIC_W_CHAIN(SDNode *N); 248 void SelectINTRINSIC_WO_CHAIN(SDNode *N); 249 void SelectINTRINSIC_VOID(SDNode *N); 250 251 protected: 252 // Include the pieces autogenerated from the target description. 253 #include "AMDGPUGenDAGISel.inc" 254 }; 255 256 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H 257