1 //===-- AMDGPUISelDAGToDAG.h - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines an instruction selector for the AMDGPU target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H 15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H 16 17 #include "GCNSubtarget.h" 18 #include "SIMachineFunctionInfo.h" 19 #include "SIModeRegisterDefaults.h" 20 #include "llvm/CodeGen/SelectionDAGISel.h" 21 #include "llvm/Target/TargetMachine.h" 22 23 using namespace llvm; 24 25 namespace { 26 27 static inline bool isNullConstantOrUndef(SDValue V) { 28 return V.isUndef() || isNullConstant(V); 29 } 30 31 static inline bool getConstantValue(SDValue N, uint32_t &Out) { 32 // This is only used for packed vectors, where using 0 for undef should 33 // always be good. 34 if (N.isUndef()) { 35 Out = 0; 36 return true; 37 } 38 39 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 40 Out = C->getAPIntValue().getSExtValue(); 41 return true; 42 } 43 44 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 45 Out = C->getValueAPF().bitcastToAPInt().getSExtValue(); 46 return true; 47 } 48 49 return false; 50 } 51 52 // TODO: Handle undef as zero 53 static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG) { 54 assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2); 55 uint32_t LHSVal, RHSVal; 56 if (getConstantValue(N->getOperand(0), LHSVal) && 57 getConstantValue(N->getOperand(1), RHSVal)) { 58 SDLoc SL(N); 59 uint32_t K = (LHSVal & 0xffff) | (RHSVal << 16); 60 return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0), 61 DAG.getTargetConstant(K, SL, MVT::i32)); 62 } 63 64 return nullptr; 65 } 66 67 } // namespace 68 69 /// AMDGPU specific code to select AMDGPU machine instructions for 70 /// SelectionDAG operations. 71 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 72 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 73 // make the right decision when generating code for different targets. 74 const GCNSubtarget *Subtarget; 75 76 // Default FP mode for the current function. 77 SIModeRegisterDefaults Mode; 78 79 bool EnableLateStructurizeCFG; 80 81 // Instructions that will be lowered with a final instruction that zeros the 82 // high result bits. 83 bool fp16SrcZerosHighBits(unsigned Opc) const; 84 85 public: 86 static char ID; 87 88 AMDGPUDAGToDAGISel() = delete; 89 90 explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOptLevel OptLevel); 91 ~AMDGPUDAGToDAGISel() override = default; 92 93 void getAnalysisUsage(AnalysisUsage &AU) const override; 94 95 bool matchLoadD16FromBuildVector(SDNode *N) const; 96 97 bool runOnMachineFunction(MachineFunction &MF) override; 98 void PreprocessISelDAG() override; 99 void Select(SDNode *N) override; 100 StringRef getPassName() const override; 101 void PostprocessISelDAG() override; 102 103 protected: 104 void SelectBuildVector(SDNode *N, unsigned RegClassID); 105 106 private: 107 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 108 bool isInlineImmediate(const SDNode *N) const; 109 110 bool isInlineImmediate16(int64_t Imm) const { 111 return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm()); 112 } 113 114 bool isInlineImmediate32(int64_t Imm) const { 115 return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm()); 116 } 117 118 bool isInlineImmediate64(int64_t Imm) const { 119 return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm()); 120 } 121 122 bool isInlineImmediate(const APFloat &Imm) const { 123 return Subtarget->getInstrInfo()->isInlineConstant(Imm); 124 } 125 126 bool isVGPRImm(const SDNode *N) const; 127 bool isUniformLoad(const SDNode *N) const; 128 bool isUniformBr(const SDNode *N) const; 129 130 // Returns true if ISD::AND SDNode `N`'s masking of the shift amount operand's 131 // `ShAmtBits` bits is unneeded. 132 bool isUnneededShiftMask(const SDNode *N, unsigned ShAmtBits) const; 133 134 bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS, 135 SDValue &RHS) const; 136 137 MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; 138 139 SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const; 140 SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; 141 SDNode *glueCopyToM0LDSInit(SDNode *N) const; 142 143 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 144 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 145 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 146 bool isDSOffsetLegal(SDValue Base, unsigned Offset) const; 147 bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1, 148 unsigned Size) const; 149 150 bool isFlatScratchBaseLegal(SDValue Addr) const; 151 bool isFlatScratchBaseLegalSV(SDValue Addr) const; 152 bool isFlatScratchBaseLegalSVImm(SDValue Addr) const; 153 154 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 155 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 156 SDValue &Offset1) const; 157 bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 158 SDValue &Offset1) const; 159 bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0, 160 SDValue &Offset1, unsigned Size) const; 161 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 162 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 163 SDValue &Idxen, SDValue &Addr64) const; 164 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 165 SDValue &SOffset, SDValue &Offset) const; 166 bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc, 167 SDValue &VAddr, SDValue &SOffset, 168 SDValue &ImmOffset) const; 169 bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc, 170 SDValue &Soffset, SDValue &Offset) const; 171 172 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 173 SDValue &Offset) const; 174 bool SelectBUFSOffset(SDValue Addr, SDValue &SOffset) const; 175 176 bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr, 177 SDValue &Offset, uint64_t FlatVariant) const; 178 bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, 179 SDValue &Offset) const; 180 bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr, 181 SDValue &Offset) const; 182 bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr, 183 SDValue &Offset) const; 184 bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, 185 SDValue &VOffset, SDValue &Offset) const; 186 bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, 187 SDValue &Offset) const; 188 bool checkFlatScratchSVSSwizzleBug(SDValue VAddr, SDValue SAddr, 189 uint64_t ImmOffset) const; 190 bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr, 191 SDValue &SAddr, SDValue &Offset) const; 192 193 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SOffset, 194 SDValue *Offset, bool Imm32Only = false, 195 bool IsBuffer = false) const; 196 SDValue Expand32BitAddress(SDValue Addr) const; 197 bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset, 198 SDValue *Offset, bool Imm32Only = false, 199 bool IsBuffer = false) const; 200 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset, 201 SDValue *Offset, bool Imm32Only = false) const; 202 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 203 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 204 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const; 205 bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset, 206 SDValue &Offset) const; 207 bool SelectSMRDBufferImm(SDValue N, SDValue &Offset) const; 208 bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const; 209 bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset, 210 SDValue &Offset) const; 211 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 212 213 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods, 214 bool IsCanonicalizing = true, 215 bool AllowAbs = true) const; 216 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 217 bool SelectVOP3ModsNonCanonicalizing(SDValue In, SDValue &Src, 218 SDValue &SrcMods) const; 219 bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 220 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 221 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 222 SDValue &Clamp, SDValue &Omod) const; 223 bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 224 SDValue &Clamp, SDValue &Omod) const; 225 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 226 SDValue &Clamp, SDValue &Omod) const; 227 228 bool SelectVINTERPModsImpl(SDValue In, SDValue &Src, SDValue &SrcMods, 229 bool OpSel) const; 230 bool SelectVINTERPMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 231 bool SelectVINTERPModsHi(SDValue In, SDValue &Src, SDValue &SrcMods) const; 232 233 bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp, 234 SDValue &Omod) const; 235 236 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods, 237 bool IsDOT = false) const; 238 bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const; 239 240 bool SelectVOP3PModsNeg(SDValue In, SDValue &Src) const; 241 bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const; 242 243 bool SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src, 244 SDValue &SrcMods) const; 245 bool SelectWMMAModsF16Neg(SDValue In, SDValue &Src, SDValue &SrcMods) const; 246 bool SelectWMMAModsF16NegAbs(SDValue In, SDValue &Src, 247 SDValue &SrcMods) const; 248 bool SelectWMMAVISrc(SDValue In, SDValue &Src) const; 249 250 bool SelectSWMMACIndex8(SDValue In, SDValue &Src, SDValue &IndexKey) const; 251 bool SelectSWMMACIndex16(SDValue In, SDValue &Src, SDValue &IndexKey) const; 252 253 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; 254 255 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 256 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, 257 unsigned &Mods) const; 258 bool SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src, 259 SDValue &SrcMods) const; 260 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 261 262 SDValue getHi16Elt(SDValue In) const; 263 264 SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const; 265 266 void SelectADD_SUB_I64(SDNode *N); 267 void SelectAddcSubb(SDNode *N); 268 void SelectUADDO_USUBO(SDNode *N); 269 void SelectDIV_SCALE(SDNode *N); 270 void SelectMAD_64_32(SDNode *N); 271 void SelectMUL_LOHI(SDNode *N); 272 void SelectFMA_W_CHAIN(SDNode *N); 273 void SelectFMUL_W_CHAIN(SDNode *N); 274 SDNode *getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset, 275 uint32_t Width); 276 void SelectS_BFEFromShifts(SDNode *N); 277 void SelectS_BFE(SDNode *N); 278 bool isCBranchSCC(const SDNode *N) const; 279 void SelectBRCOND(SDNode *N); 280 void SelectFMAD_FMA(SDNode *N); 281 void SelectFP_EXTEND(SDNode *N); 282 void SelectDSAppendConsume(SDNode *N, unsigned IntrID); 283 void SelectDSBvhStackIntrinsic(SDNode *N); 284 void SelectDS_GWS(SDNode *N, unsigned IntrID); 285 void SelectInterpP1F16(SDNode *N); 286 void SelectINTRINSIC_W_CHAIN(SDNode *N); 287 void SelectINTRINSIC_WO_CHAIN(SDNode *N); 288 void SelectINTRINSIC_VOID(SDNode *N); 289 void SelectWAVE_ADDRESS(SDNode *N); 290 void SelectSTACKRESTORE(SDNode *N); 291 292 protected: 293 // Include the pieces autogenerated from the target description. 294 #include "AMDGPUGenDAGISel.inc" 295 }; 296 297 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H 298