10b57cec5SDimitry Andric //===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// Interface definition of the TargetLowering class that is common 110b57cec5SDimitry Andric /// to all AMD GPUs. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H 160b57cec5SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H 170b57cec5SDimitry Andric 180b57cec5SDimitry Andric #include "llvm/CodeGen/CallingConvLower.h" 190b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric namespace llvm { 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric class AMDGPUMachineFunction; 240b57cec5SDimitry Andric class AMDGPUSubtarget; 250b57cec5SDimitry Andric struct ArgDescriptor; 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric class AMDGPUTargetLowering : public TargetLowering { 280b57cec5SDimitry Andric private: 290b57cec5SDimitry Andric const AMDGPUSubtarget *Subtarget; 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric /// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been 320b57cec5SDimitry Andric /// legalized from a smaller type VT. Need to match pre-legalized type because 330b57cec5SDimitry Andric /// the generic legalization inserts the add/sub between the select and 340b57cec5SDimitry Andric /// compare. 350b57cec5SDimitry Andric SDValue getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const; 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric public: 38349cc55cSDimitry Andric /// \returns The minimum number of bits needed to store the value of \Op as an 39349cc55cSDimitry Andric /// unsigned integer. Truncating to this size and then zero-extending to the 40349cc55cSDimitry Andric /// original size will not change the value. 410b57cec5SDimitry Andric static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG); 42349cc55cSDimitry Andric 43349cc55cSDimitry Andric /// \returns The minimum number of bits needed to store the value of \Op as a 44349cc55cSDimitry Andric /// signed integer. Truncating to this size and then sign-extending to the 45349cc55cSDimitry Andric /// original size will not change the value. 460b57cec5SDimitry Andric static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG); 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric protected: 490b57cec5SDimitry Andric SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 500b57cec5SDimitry Andric SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 510b57cec5SDimitry Andric /// Split a vector store into multiple scalar stores. 520b57cec5SDimitry Andric /// \returns The resulting chain. 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const; 550b57cec5SDimitry Andric SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const; 560b57cec5SDimitry Andric SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const; 570b57cec5SDimitry Andric SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const; 580b57cec5SDimitry Andric SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const; 590b57cec5SDimitry Andric 60bdd1243dSDimitry Andric SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const; 610b57cec5SDimitry Andric SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; 620b57cec5SDimitry Andric SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; 6306c3fb27SDimitry Andric 645f757f3fSDimitry Andric static bool allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags); 655f757f3fSDimitry Andric static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src, 665f757f3fSDimitry Andric SDNodeFlags Flags); 6706c3fb27SDimitry Andric SDValue getIsLtSmallestNormal(SelectionDAG &DAG, SDValue Op, 6806c3fb27SDimitry Andric SDNodeFlags Flags) const; 6906c3fb27SDimitry Andric SDValue getIsFinite(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const; 7006c3fb27SDimitry Andric std::pair<SDValue, SDValue> getScaledLogInput(SelectionDAG &DAG, 7106c3fb27SDimitry Andric const SDLoc SL, SDValue Op, 7206c3fb27SDimitry Andric SDNodeFlags Flags) const; 7306c3fb27SDimitry Andric 7406c3fb27SDimitry Andric SDValue LowerFLOG2(SDValue Op, SelectionDAG &DAG) const; 7506c3fb27SDimitry Andric SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const; 7606c3fb27SDimitry Andric SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const; 7706c3fb27SDimitry Andric SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, 788a4dda33SDimitry Andric bool IsLog10, SDNodeFlags Flags) const; 7906c3fb27SDimitry Andric SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const; 8006c3fb27SDimitry Andric 8106c3fb27SDimitry Andric SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, 8206c3fb27SDimitry Andric SDNodeFlags Flags) const; 835f757f3fSDimitry Andric SDValue lowerFEXP10Unsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, 845f757f3fSDimitry Andric SDNodeFlags Flags) const; 850b57cec5SDimitry Andric SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const; 860b57cec5SDimitry Andric 877a6dacacSDimitry Andric SDValue lowerCTLZResults(SDValue Op, SelectionDAG &DAG) const; 887a6dacacSDimitry Andric 890b57cec5SDimitry Andric SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const; 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const; 920b57cec5SDimitry Andric SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const; 930b57cec5SDimitry Andric SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 940b57cec5SDimitry Andric SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 950b57cec5SDimitry Andric 96fe6060f1SDimitry Andric SDValue LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG, bool Signed) const; 970b57cec5SDimitry Andric SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const; 98fe6060f1SDimitry Andric SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric protected: 1030b57cec5SDimitry Andric bool shouldCombineMemoryType(EVT VT) const; 1040b57cec5SDimitry Andric SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1050b57cec5SDimitry Andric SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1060b57cec5SDimitry Andric SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1078bcb0991SDimitry Andric SDValue performIntrinsicWOChainCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1080b57cec5SDimitry Andric 1090b57cec5SDimitry Andric SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, 1100b57cec5SDimitry Andric unsigned Opc, SDValue LHS, 1110b57cec5SDimitry Andric uint32_t ValLo, uint32_t ValHi) const; 1120b57cec5SDimitry Andric SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1130b57cec5SDimitry Andric SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1140b57cec5SDimitry Andric SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1150b57cec5SDimitry Andric SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1160b57cec5SDimitry Andric SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1174824e7fdSDimitry Andric SDValue performMulLoHiCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1180b57cec5SDimitry Andric SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1190b57cec5SDimitry Andric SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1200b57cec5SDimitry Andric SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, 1210b57cec5SDimitry Andric SDValue RHS, DAGCombinerInfo &DCI) const; 12206c3fb27SDimitry Andric 12306c3fb27SDimitry Andric SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, 12406c3fb27SDimitry Andric SDValue N) const; 1250b57cec5SDimitry Andric SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1260b57cec5SDimitry Andric 12706c3fb27SDimitry Andric TargetLowering::NegatibleCost 12806c3fb27SDimitry Andric getConstantNegateCost(const ConstantFPSDNode *C) const; 12906c3fb27SDimitry Andric 1300b57cec5SDimitry Andric bool isConstantCostlierToNegate(SDValue N) const; 13106c3fb27SDimitry Andric bool isConstantCheaperToNegate(SDValue N) const; 1320b57cec5SDimitry Andric SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1330b57cec5SDimitry Andric SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1340b57cec5SDimitry Andric SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const; 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric static EVT getEquivalentMemType(LLVMContext &Context, EVT VT); 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, 1390b57cec5SDimitry Andric SelectionDAG &DAG) const; 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric /// Return 64-bit value Op as two 32-bit integers. 1420b57cec5SDimitry Andric std::pair<SDValue, SDValue> split64BitValue(SDValue Op, 1430b57cec5SDimitry Andric SelectionDAG &DAG) const; 1440b57cec5SDimitry Andric SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const; 1450b57cec5SDimitry Andric SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const; 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric /// Split a vector type into two parts. The first part is a power of two 1480b57cec5SDimitry Andric /// vector. The second part is whatever is left over, and is a scalar if it 1490b57cec5SDimitry Andric /// would otherwise be a 1-vector. 1500b57cec5SDimitry Andric std::pair<EVT, EVT> getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const; 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric /// Split a vector value into two parts of types LoVT and HiVT. HiVT could be 1530b57cec5SDimitry Andric /// scalar. 1540b57cec5SDimitry Andric std::pair<SDValue, SDValue> splitVector(const SDValue &N, const SDLoc &DL, 1550b57cec5SDimitry Andric const EVT &LoVT, const EVT &HighVT, 1560b57cec5SDimitry Andric SelectionDAG &DAG) const; 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric /// Split a vector load into 2 loads of half the vector. 1590b57cec5SDimitry Andric SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const; 1600b57cec5SDimitry Andric 161e8d8bef9SDimitry Andric /// Widen a suitably aligned v3 load. For all other cases, split the input 162e8d8bef9SDimitry Andric /// vector load. 163e8d8bef9SDimitry Andric SDValue WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const; 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric /// Split a vector store into 2 stores of half the vector. 1660b57cec5SDimitry Andric SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const; 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 1690b57cec5SDimitry Andric SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; 1700b57cec5SDimitry Andric SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; 1710b57cec5SDimitry Andric SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const; 1720b57cec5SDimitry Andric void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, 1730b57cec5SDimitry Andric SmallVectorImpl<SDValue> &Results) const; 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric void analyzeFormalArgumentsCompute( 1760b57cec5SDimitry Andric CCState &State, 1770b57cec5SDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins) const; 1780b57cec5SDimitry Andric 1790b57cec5SDimitry Andric public: 1800b57cec5SDimitry Andric AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); 1810b57cec5SDimitry Andric 182e8d8bef9SDimitry Andric bool mayIgnoreSignedZero(SDValue Op) const; 1830b57cec5SDimitry Andric stripBitcast(SDValue Val)1840b57cec5SDimitry Andric static inline SDValue stripBitcast(SDValue Val) { 1850b57cec5SDimitry Andric return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 1860b57cec5SDimitry Andric } 1870b57cec5SDimitry Andric 18806c3fb27SDimitry Andric static bool shouldFoldFNegIntoSrc(SDNode *FNeg, SDValue FNegSrc); 1890b57cec5SDimitry Andric static bool allUsesHaveSourceMods(const SDNode *N, 1900b57cec5SDimitry Andric unsigned CostThreshold = 4); 1910b57cec5SDimitry Andric bool isFAbsFree(EVT VT) const override; 1920b57cec5SDimitry Andric bool isFNegFree(EVT VT) const override; 1930b57cec5SDimitry Andric bool isTruncateFree(EVT Src, EVT Dest) const override; 1940b57cec5SDimitry Andric bool isTruncateFree(Type *Src, Type *Dest) const override; 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric bool isZExtFree(Type *Src, Type *Dest) const override; 1970b57cec5SDimitry Andric bool isZExtFree(EVT Src, EVT Dest) const override; 1980b57cec5SDimitry Andric 1995ffd83dbSDimitry Andric SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 2005ffd83dbSDimitry Andric bool LegalOperations, bool ForCodeSize, 2015ffd83dbSDimitry Andric NegatibleCost &Cost, 2025ffd83dbSDimitry Andric unsigned Depth) const override; 2035ffd83dbSDimitry Andric 20406c3fb27SDimitry Andric bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override; 2050b57cec5SDimitry Andric 206bdd1243dSDimitry Andric bool isDesirableToCommuteWithShift(const SDNode *N, 207bdd1243dSDimitry Andric CombineLevel Level) const override; 208bdd1243dSDimitry Andric 2095ffd83dbSDimitry Andric EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 2105ffd83dbSDimitry Andric ISD::NodeType ExtendKind) const override; 2115ffd83dbSDimitry Andric 2120b57cec5SDimitry Andric MVT getVectorIdxTy(const DataLayout &) const override; 2130b57cec5SDimitry Andric bool isSelectSupported(SelectSupportKind) const override; 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric bool isFPImmLegal(const APFloat &Imm, EVT VT, 2160b57cec5SDimitry Andric bool ForCodeSize) const override; 2170b57cec5SDimitry Andric bool ShouldShrinkFPConstant(EVT VT) const override; 2180b57cec5SDimitry Andric bool shouldReduceLoadWidth(SDNode *Load, 2190b57cec5SDimitry Andric ISD::LoadExtType ExtType, 2200b57cec5SDimitry Andric EVT ExtVT) const override; 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG, 2230b57cec5SDimitry Andric const MachineMemOperand &MMO) const final; 2240b57cec5SDimitry Andric 22506c3fb27SDimitry Andric bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, 2260b57cec5SDimitry Andric unsigned NumElem, 2270b57cec5SDimitry Andric unsigned AS) const override; 2280b57cec5SDimitry Andric bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override; 229bdd1243dSDimitry Andric bool isCheapToSpeculateCttz(Type *Ty) const override; 230bdd1243dSDimitry Andric bool isCheapToSpeculateCtlz(Type *Ty) const override; 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric bool isSDNodeAlwaysUniform(const SDNode *N) const override; 233*0fca6ea1SDimitry Andric 234*0fca6ea1SDimitry Andric // FIXME: This hook should not exist shouldCastAtomicLoadInIR(LoadInst * LI)235*0fca6ea1SDimitry Andric AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override { 236*0fca6ea1SDimitry Andric return AtomicExpansionKind::None; 237*0fca6ea1SDimitry Andric } 238*0fca6ea1SDimitry Andric shouldCastAtomicStoreInIR(StoreInst * SI)239*0fca6ea1SDimitry Andric AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override { 240*0fca6ea1SDimitry Andric return AtomicExpansionKind::None; 241*0fca6ea1SDimitry Andric } 242*0fca6ea1SDimitry Andric shouldCastAtomicRMWIInIR(AtomicRMWInst *)243*0fca6ea1SDimitry Andric AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *) const override { 244*0fca6ea1SDimitry Andric return AtomicExpansionKind::None; 245*0fca6ea1SDimitry Andric } 246*0fca6ea1SDimitry Andric 2470b57cec5SDimitry Andric static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg); 2480b57cec5SDimitry Andric static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg); 2490b57cec5SDimitry Andric 2500b57cec5SDimitry Andric SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 2510b57cec5SDimitry Andric const SmallVectorImpl<ISD::OutputArg> &Outs, 2520b57cec5SDimitry Andric const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 2530b57cec5SDimitry Andric SelectionDAG &DAG) const override; 2540b57cec5SDimitry Andric 2550b57cec5SDimitry Andric SDValue addTokenForArgument(SDValue Chain, 2560b57cec5SDimitry Andric SelectionDAG &DAG, 2570b57cec5SDimitry Andric MachineFrameInfo &MFI, 2580b57cec5SDimitry Andric int ClobberedFI) const; 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric SDValue lowerUnhandledCall(CallLoweringInfo &CLI, 2610b57cec5SDimitry Andric SmallVectorImpl<SDValue> &InVals, 2620b57cec5SDimitry Andric StringRef Reason) const; 2630b57cec5SDimitry Andric SDValue LowerCall(CallLoweringInfo &CLI, 2640b57cec5SDimitry Andric SmallVectorImpl<SDValue> &InVals) const override; 2650b57cec5SDimitry Andric 2665f757f3fSDimitry Andric SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 2670b57cec5SDimitry Andric SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 2680b57cec5SDimitry Andric SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 2690b57cec5SDimitry Andric void ReplaceNodeResults(SDNode * N, 2700b57cec5SDimitry Andric SmallVectorImpl<SDValue> &Results, 2710b57cec5SDimitry Andric SelectionDAG &DAG) const override; 2720b57cec5SDimitry Andric 27306c3fb27SDimitry Andric SDValue combineFMinMaxLegacyImpl(const SDLoc &DL, EVT VT, SDValue LHS, 27406c3fb27SDimitry Andric SDValue RHS, SDValue True, SDValue False, 27506c3fb27SDimitry Andric SDValue CC, DAGCombinerInfo &DCI) const; 27606c3fb27SDimitry Andric 2770b57cec5SDimitry Andric SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, 2780b57cec5SDimitry Andric SDValue RHS, SDValue True, SDValue False, 2790b57cec5SDimitry Andric SDValue CC, DAGCombinerInfo &DCI) const; 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric const char* getTargetNodeName(unsigned Opcode) const override; 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric // FIXME: Turn off MergeConsecutiveStores() before Instruction Selection for 2840b57cec5SDimitry Andric // AMDGPU. Commit r319036, 2850b57cec5SDimitry Andric // (https://github.com/llvm/llvm-project/commit/db77e57ea86d941a4262ef60261692f4cb6893e6) 2860b57cec5SDimitry Andric // turned on MergeConsecutiveStores() before Instruction Selection for all 2870b57cec5SDimitry Andric // targets. Enough AMDGPU compiles go into an infinite loop ( 2880b57cec5SDimitry Andric // MergeConsecutiveStores() merges two stores; LegalizeStoreOps() un-merges; 2890b57cec5SDimitry Andric // MergeConsecutiveStores() re-merges, etc. ) to warrant turning it off for 2900b57cec5SDimitry Andric // now. mergeStoresAfterLegalization(EVT)2910b57cec5SDimitry Andric bool mergeStoresAfterLegalization(EVT) const override { return false; } 2920b57cec5SDimitry Andric isFsqrtCheap(SDValue Operand,SelectionDAG & DAG)2930b57cec5SDimitry Andric bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override { 2940b57cec5SDimitry Andric return true; 2950b57cec5SDimitry Andric } 2960b57cec5SDimitry Andric SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 2970b57cec5SDimitry Andric int &RefinementSteps, bool &UseOneConstNR, 2980b57cec5SDimitry Andric bool Reciprocal) const override; 2990b57cec5SDimitry Andric SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 3000b57cec5SDimitry Andric int &RefinementSteps) const override; 3010b57cec5SDimitry Andric 3020b57cec5SDimitry Andric virtual SDNode *PostISelFolding(MachineSDNode *N, 3030b57cec5SDimitry Andric SelectionDAG &DAG) const = 0; 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric /// Determine which of the bits specified in \p Mask are known to be 3060b57cec5SDimitry Andric /// either zero or one and return them in the \p KnownZero and \p KnownOne 3070b57cec5SDimitry Andric /// bitsets. 3080b57cec5SDimitry Andric void computeKnownBitsForTargetNode(const SDValue Op, 3090b57cec5SDimitry Andric KnownBits &Known, 3100b57cec5SDimitry Andric const APInt &DemandedElts, 3110b57cec5SDimitry Andric const SelectionDAG &DAG, 3120b57cec5SDimitry Andric unsigned Depth = 0) const override; 3130b57cec5SDimitry Andric 3140b57cec5SDimitry Andric unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, 3150b57cec5SDimitry Andric const SelectionDAG &DAG, 3160b57cec5SDimitry Andric unsigned Depth = 0) const override; 3170b57cec5SDimitry Andric 3185ffd83dbSDimitry Andric unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, 3195ffd83dbSDimitry Andric Register R, 3205ffd83dbSDimitry Andric const APInt &DemandedElts, 3215ffd83dbSDimitry Andric const MachineRegisterInfo &MRI, 3225ffd83dbSDimitry Andric unsigned Depth = 0) const override; 3235ffd83dbSDimitry Andric 3240b57cec5SDimitry Andric bool isKnownNeverNaNForTargetNode(SDValue Op, 3250b57cec5SDimitry Andric const SelectionDAG &DAG, 3260b57cec5SDimitry Andric bool SNaN = false, 3270b57cec5SDimitry Andric unsigned Depth = 0) const override; 3280b57cec5SDimitry Andric 32906c3fb27SDimitry Andric bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, 33006c3fb27SDimitry Andric Register N1) const override; 33106c3fb27SDimitry Andric 3320b57cec5SDimitry Andric /// Helper function that adds Reg to the LiveIn list of the DAG's 3330b57cec5SDimitry Andric /// MachineFunction. 3340b57cec5SDimitry Andric /// 3350b57cec5SDimitry Andric /// \returns a RegisterSDNode representing Reg if \p RawReg is true, otherwise 3360b57cec5SDimitry Andric /// a copy from the register. 3370b57cec5SDimitry Andric SDValue CreateLiveInRegister(SelectionDAG &DAG, 3380b57cec5SDimitry Andric const TargetRegisterClass *RC, 3395ffd83dbSDimitry Andric Register Reg, EVT VT, 3400b57cec5SDimitry Andric const SDLoc &SL, 3410b57cec5SDimitry Andric bool RawReg = false) const; CreateLiveInRegister(SelectionDAG & DAG,const TargetRegisterClass * RC,Register Reg,EVT VT)3420b57cec5SDimitry Andric SDValue CreateLiveInRegister(SelectionDAG &DAG, 3430b57cec5SDimitry Andric const TargetRegisterClass *RC, 3445ffd83dbSDimitry Andric Register Reg, EVT VT) const { 3450b57cec5SDimitry Andric return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode())); 3460b57cec5SDimitry Andric } 3470b57cec5SDimitry Andric 3480b57cec5SDimitry Andric // Returns the raw live in register rather than a copy from it. CreateLiveInRegisterRaw(SelectionDAG & DAG,const TargetRegisterClass * RC,Register Reg,EVT VT)3490b57cec5SDimitry Andric SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, 3500b57cec5SDimitry Andric const TargetRegisterClass *RC, 3515ffd83dbSDimitry Andric Register Reg, EVT VT) const { 3520b57cec5SDimitry Andric return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()), true); 3530b57cec5SDimitry Andric } 3540b57cec5SDimitry Andric 3550b57cec5SDimitry Andric /// Similar to CreateLiveInRegister, except value maybe loaded from a stack 3560b57cec5SDimitry Andric /// slot rather than passed in a register. 3570b57cec5SDimitry Andric SDValue loadStackInputValue(SelectionDAG &DAG, 3580b57cec5SDimitry Andric EVT VT, 3590b57cec5SDimitry Andric const SDLoc &SL, 3600b57cec5SDimitry Andric int64_t Offset) const; 3610b57cec5SDimitry Andric 3620b57cec5SDimitry Andric SDValue storeStackInputValue(SelectionDAG &DAG, 3630b57cec5SDimitry Andric const SDLoc &SL, 3640b57cec5SDimitry Andric SDValue Chain, 3650b57cec5SDimitry Andric SDValue ArgVal, 3660b57cec5SDimitry Andric int64_t Offset) const; 3670b57cec5SDimitry Andric 3680b57cec5SDimitry Andric SDValue loadInputValue(SelectionDAG &DAG, 3690b57cec5SDimitry Andric const TargetRegisterClass *RC, 3700b57cec5SDimitry Andric EVT VT, const SDLoc &SL, 3710b57cec5SDimitry Andric const ArgDescriptor &Arg) const; 3720b57cec5SDimitry Andric 3730b57cec5SDimitry Andric enum ImplicitParameter { 3740b57cec5SDimitry Andric FIRST_IMPLICIT, 37581ad6265SDimitry Andric PRIVATE_BASE, 37681ad6265SDimitry Andric SHARED_BASE, 37781ad6265SDimitry Andric QUEUE_PTR, 3780b57cec5SDimitry Andric }; 3790b57cec5SDimitry Andric 3800b57cec5SDimitry Andric /// Helper function that returns the byte offset of the given 3810b57cec5SDimitry Andric /// type of implicit parameter. 3820b57cec5SDimitry Andric uint32_t getImplicitParameterOffset(const MachineFunction &MF, 3830b57cec5SDimitry Andric const ImplicitParameter Param) const; 38406c3fb27SDimitry Andric uint32_t getImplicitParameterOffset(const uint64_t ExplicitKernArgSize, 38506c3fb27SDimitry Andric const ImplicitParameter Param) const; 3860b57cec5SDimitry Andric getFenceOperandTy(const DataLayout & DL)3870b57cec5SDimitry Andric MVT getFenceOperandTy(const DataLayout &DL) const override { 3880b57cec5SDimitry Andric return MVT::i32; 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric 3910b57cec5SDimitry Andric AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; 392fe6060f1SDimitry Andric 39306c3fb27SDimitry Andric bool shouldSinkOperands(Instruction *I, 39406c3fb27SDimitry Andric SmallVectorImpl<Use *> &Ops) const override; 3950b57cec5SDimitry Andric }; 3960b57cec5SDimitry Andric 3970b57cec5SDimitry Andric namespace AMDGPUISD { 3980b57cec5SDimitry Andric 3990b57cec5SDimitry Andric enum NodeType : unsigned { 4000b57cec5SDimitry Andric // AMDIL ISD Opcodes 4010b57cec5SDimitry Andric FIRST_NUMBER = ISD::BUILTIN_OP_END, 4020b57cec5SDimitry Andric UMUL, // 32bit unsigned multiplication 4030b57cec5SDimitry Andric BRANCH_COND, 4040b57cec5SDimitry Andric // End AMDIL ISD Opcodes 4050b57cec5SDimitry Andric 4060b57cec5SDimitry Andric // Function call. 4070b57cec5SDimitry Andric CALL, 4080b57cec5SDimitry Andric TC_RETURN, 40906c3fb27SDimitry Andric TC_RETURN_GFX, 4105f757f3fSDimitry Andric TC_RETURN_CHAIN, 4110b57cec5SDimitry Andric TRAP, 4120b57cec5SDimitry Andric 4130b57cec5SDimitry Andric // Masked control flow nodes. 4140b57cec5SDimitry Andric IF, 4150b57cec5SDimitry Andric ELSE, 4160b57cec5SDimitry Andric LOOP, 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric // A uniform kernel return that terminates the wavefront. 4190b57cec5SDimitry Andric ENDPGM, 4200b57cec5SDimitry Andric 42106c3fb27SDimitry Andric // s_endpgm, but we may want to insert it in the middle of the block. 42206c3fb27SDimitry Andric ENDPGM_TRAP, 42306c3fb27SDimitry Andric 424*0fca6ea1SDimitry Andric // "s_trap 2" equivalent on hardware that does not support it. 425*0fca6ea1SDimitry Andric SIMULATED_TRAP, 426*0fca6ea1SDimitry Andric 4270b57cec5SDimitry Andric // Return to a shader part's epilog code. 4280b57cec5SDimitry Andric RETURN_TO_EPILOG, 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric // Return with values from a non-entry function. 43106c3fb27SDimitry Andric RET_GLUE, 4320b57cec5SDimitry Andric 4335f757f3fSDimitry Andric // Convert a unswizzled wave uniform stack address to an address compatible 4345f757f3fSDimitry Andric // with a vector offset for use in stack access. 4355f757f3fSDimitry Andric WAVE_ADDRESS, 4365f757f3fSDimitry Andric 4370b57cec5SDimitry Andric DWORDADDR, 4380b57cec5SDimitry Andric FRACT, 4390b57cec5SDimitry Andric 4400b57cec5SDimitry Andric /// CLAMP value between 0.0 and 1.0. NaN clamped to 0, following clamp output 4410b57cec5SDimitry Andric /// modifier behavior with dx10_enable. 4420b57cec5SDimitry Andric CLAMP, 4430b57cec5SDimitry Andric 4440b57cec5SDimitry Andric // This is SETCC with the full mask result which is used for a compare with a 4450b57cec5SDimitry Andric // result bit per item in the wavefront. 4460b57cec5SDimitry Andric SETCC, 4470b57cec5SDimitry Andric SETREG, 4488bcb0991SDimitry Andric 4498bcb0991SDimitry Andric DENORM_MODE, 4508bcb0991SDimitry Andric 4510b57cec5SDimitry Andric // FP ops with input and output chain. 4520b57cec5SDimitry Andric FMA_W_CHAIN, 4530b57cec5SDimitry Andric FMUL_W_CHAIN, 4540b57cec5SDimitry Andric 4550b57cec5SDimitry Andric // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi. 4560b57cec5SDimitry Andric // Denormals handled on some parts. 4570b57cec5SDimitry Andric COS_HW, 4580b57cec5SDimitry Andric SIN_HW, 4590b57cec5SDimitry Andric FMAX_LEGACY, 4600b57cec5SDimitry Andric FMIN_LEGACY, 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric FMAX3, 4630b57cec5SDimitry Andric SMAX3, 4640b57cec5SDimitry Andric UMAX3, 4650b57cec5SDimitry Andric FMIN3, 4660b57cec5SDimitry Andric SMIN3, 4670b57cec5SDimitry Andric UMIN3, 4680b57cec5SDimitry Andric FMED3, 4690b57cec5SDimitry Andric SMED3, 4700b57cec5SDimitry Andric UMED3, 4715f757f3fSDimitry Andric FMAXIMUM3, 4725f757f3fSDimitry Andric FMINIMUM3, 4730b57cec5SDimitry Andric FDOT2, 4740b57cec5SDimitry Andric URECIP, 4750b57cec5SDimitry Andric DIV_SCALE, 4760b57cec5SDimitry Andric DIV_FMAS, 4770b57cec5SDimitry Andric DIV_FIXUP, 4780b57cec5SDimitry Andric // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is 4790b57cec5SDimitry Andric // treated as an illegal operation. 4800b57cec5SDimitry Andric FMAD_FTZ, 4810b57cec5SDimitry Andric 4820b57cec5SDimitry Andric // RCP, RSQ - For f32, 1 ULP max error, no denormal handling. 4830b57cec5SDimitry Andric // For f64, max error 2^29 ULP, handles denormals. 4840b57cec5SDimitry Andric RCP, 4850b57cec5SDimitry Andric RSQ, 4860b57cec5SDimitry Andric RCP_LEGACY, 4870b57cec5SDimitry Andric RCP_IFLAG, 48806c3fb27SDimitry Andric 48906c3fb27SDimitry Andric // log2, no denormal handling for f32. 49006c3fb27SDimitry Andric LOG, 49106c3fb27SDimitry Andric 49206c3fb27SDimitry Andric // exp2, no denormal handling for f32. 49306c3fb27SDimitry Andric EXP, 49406c3fb27SDimitry Andric 4950b57cec5SDimitry Andric FMUL_LEGACY, 4960b57cec5SDimitry Andric RSQ_CLAMP, 4970b57cec5SDimitry Andric FP_CLASS, 4980b57cec5SDimitry Andric DOT4, 4990b57cec5SDimitry Andric CARRY, 5000b57cec5SDimitry Andric BORROW, 5010b57cec5SDimitry Andric BFE_U32, // Extract range of bits with zero extension to 32-bits. 5020b57cec5SDimitry Andric BFE_I32, // Extract range of bits with sign extension to 32-bits. 5030b57cec5SDimitry Andric BFI, // (src0 & src1) | (~src0 & src2) 5040b57cec5SDimitry Andric BFM, // Insert a range of bits into a 32-bit word. 5050b57cec5SDimitry Andric FFBH_U32, // ctlz with -1 if input is zero. 5060b57cec5SDimitry Andric FFBH_I32, 5070b57cec5SDimitry Andric FFBL_B32, // cttz with -1 if input is zero. 5080b57cec5SDimitry Andric MUL_U24, 5090b57cec5SDimitry Andric MUL_I24, 5100b57cec5SDimitry Andric MULHI_U24, 5110b57cec5SDimitry Andric MULHI_I24, 5120b57cec5SDimitry Andric MAD_U24, 5130b57cec5SDimitry Andric MAD_I24, 5140b57cec5SDimitry Andric MAD_U64_U32, 5150b57cec5SDimitry Andric MAD_I64_I32, 5160b57cec5SDimitry Andric PERM, 5170b57cec5SDimitry Andric TEXTURE_FETCH, 5180b57cec5SDimitry Andric R600_EXPORT, 5190b57cec5SDimitry Andric CONST_ADDRESS, 5200b57cec5SDimitry Andric REGISTER_LOAD, 5210b57cec5SDimitry Andric REGISTER_STORE, 5220b57cec5SDimitry Andric SAMPLE, 5230b57cec5SDimitry Andric SAMPLEB, 5240b57cec5SDimitry Andric SAMPLED, 5250b57cec5SDimitry Andric SAMPLEL, 5260b57cec5SDimitry Andric 5270b57cec5SDimitry Andric // These cvt_f32_ubyte* nodes need to remain consecutive and in order. 5280b57cec5SDimitry Andric CVT_F32_UBYTE0, 5290b57cec5SDimitry Andric CVT_F32_UBYTE1, 5300b57cec5SDimitry Andric CVT_F32_UBYTE2, 5310b57cec5SDimitry Andric CVT_F32_UBYTE3, 5320b57cec5SDimitry Andric 5330b57cec5SDimitry Andric // Convert two float 32 numbers into a single register holding two packed f16 5340b57cec5SDimitry Andric // with round to zero. 5350b57cec5SDimitry Andric CVT_PKRTZ_F16_F32, 5360b57cec5SDimitry Andric CVT_PKNORM_I16_F32, 5370b57cec5SDimitry Andric CVT_PKNORM_U16_F32, 5380b57cec5SDimitry Andric CVT_PK_I16_I32, 5390b57cec5SDimitry Andric CVT_PK_U16_U32, 5400b57cec5SDimitry Andric 5410b57cec5SDimitry Andric // Same as the standard node, except the high bits of the resulting integer 5420b57cec5SDimitry Andric // are known 0. 5430b57cec5SDimitry Andric FP_TO_FP16, 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric /// This node is for VLIW targets and it is used to represent a vector 5460b57cec5SDimitry Andric /// that is stored in consecutive registers with the same channel. 5470b57cec5SDimitry Andric /// For example: 5480b57cec5SDimitry Andric /// |X |Y|Z|W| 5490b57cec5SDimitry Andric /// T0|v.x| | | | 5500b57cec5SDimitry Andric /// T1|v.y| | | | 5510b57cec5SDimitry Andric /// T2|v.z| | | | 5520b57cec5SDimitry Andric /// T3|v.w| | | | 5530b57cec5SDimitry Andric BUILD_VERTICAL_VECTOR, 5540b57cec5SDimitry Andric /// Pointer to the start of the shader's constant data. 5550b57cec5SDimitry Andric CONST_DATA_PTR, 5560b57cec5SDimitry Andric PC_ADD_REL_OFFSET, 5570b57cec5SDimitry Andric LDS, 55881ad6265SDimitry Andric FPTRUNC_ROUND_UPWARD, 55981ad6265SDimitry Andric FPTRUNC_ROUND_DOWNWARD, 56081ad6265SDimitry Andric 5610b57cec5SDimitry Andric DUMMY_CHAIN, 5620b57cec5SDimitry Andric FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, 5630b57cec5SDimitry Andric LOAD_D16_HI, 5640b57cec5SDimitry Andric LOAD_D16_LO, 5650b57cec5SDimitry Andric LOAD_D16_HI_I8, 5660b57cec5SDimitry Andric LOAD_D16_HI_U8, 5670b57cec5SDimitry Andric LOAD_D16_LO_I8, 5680b57cec5SDimitry Andric LOAD_D16_LO_U8, 5690b57cec5SDimitry Andric 5700b57cec5SDimitry Andric STORE_MSKOR, 5710b57cec5SDimitry Andric LOAD_CONSTANT, 5720b57cec5SDimitry Andric TBUFFER_STORE_FORMAT, 5730b57cec5SDimitry Andric TBUFFER_STORE_FORMAT_D16, 5740b57cec5SDimitry Andric TBUFFER_LOAD_FORMAT, 5750b57cec5SDimitry Andric TBUFFER_LOAD_FORMAT_D16, 5760b57cec5SDimitry Andric DS_ORDERED_COUNT, 5770b57cec5SDimitry Andric ATOMIC_CMP_SWAP, 5780b57cec5SDimitry Andric BUFFER_LOAD, 5790b57cec5SDimitry Andric BUFFER_LOAD_UBYTE, 5800b57cec5SDimitry Andric BUFFER_LOAD_USHORT, 5810b57cec5SDimitry Andric BUFFER_LOAD_BYTE, 5820b57cec5SDimitry Andric BUFFER_LOAD_SHORT, 583*0fca6ea1SDimitry Andric BUFFER_LOAD_TFE, 584*0fca6ea1SDimitry Andric BUFFER_LOAD_UBYTE_TFE, 585*0fca6ea1SDimitry Andric BUFFER_LOAD_USHORT_TFE, 586*0fca6ea1SDimitry Andric BUFFER_LOAD_BYTE_TFE, 587*0fca6ea1SDimitry Andric BUFFER_LOAD_SHORT_TFE, 5880b57cec5SDimitry Andric BUFFER_LOAD_FORMAT, 589bdd1243dSDimitry Andric BUFFER_LOAD_FORMAT_TFE, 5900b57cec5SDimitry Andric BUFFER_LOAD_FORMAT_D16, 5910b57cec5SDimitry Andric SBUFFER_LOAD, 5927a6dacacSDimitry Andric SBUFFER_LOAD_BYTE, 5937a6dacacSDimitry Andric SBUFFER_LOAD_UBYTE, 5947a6dacacSDimitry Andric SBUFFER_LOAD_SHORT, 5957a6dacacSDimitry Andric SBUFFER_LOAD_USHORT, 5960b57cec5SDimitry Andric BUFFER_STORE, 5970b57cec5SDimitry Andric BUFFER_STORE_BYTE, 5980b57cec5SDimitry Andric BUFFER_STORE_SHORT, 5990b57cec5SDimitry Andric BUFFER_STORE_FORMAT, 6000b57cec5SDimitry Andric BUFFER_STORE_FORMAT_D16, 6010b57cec5SDimitry Andric BUFFER_ATOMIC_SWAP, 6020b57cec5SDimitry Andric BUFFER_ATOMIC_ADD, 6030b57cec5SDimitry Andric BUFFER_ATOMIC_SUB, 6040b57cec5SDimitry Andric BUFFER_ATOMIC_SMIN, 6050b57cec5SDimitry Andric BUFFER_ATOMIC_UMIN, 6060b57cec5SDimitry Andric BUFFER_ATOMIC_SMAX, 6070b57cec5SDimitry Andric BUFFER_ATOMIC_UMAX, 6080b57cec5SDimitry Andric BUFFER_ATOMIC_AND, 6090b57cec5SDimitry Andric BUFFER_ATOMIC_OR, 6100b57cec5SDimitry Andric BUFFER_ATOMIC_XOR, 6118bcb0991SDimitry Andric BUFFER_ATOMIC_INC, 6128bcb0991SDimitry Andric BUFFER_ATOMIC_DEC, 6130b57cec5SDimitry Andric BUFFER_ATOMIC_CMPSWAP, 6145ffd83dbSDimitry Andric BUFFER_ATOMIC_CSUB, 6150b57cec5SDimitry Andric BUFFER_ATOMIC_FADD, 616fe6060f1SDimitry Andric BUFFER_ATOMIC_FMIN, 617fe6060f1SDimitry Andric BUFFER_ATOMIC_FMAX, 6187a6dacacSDimitry Andric BUFFER_ATOMIC_COND_SUB_U32, 6190b57cec5SDimitry Andric 6200b57cec5SDimitry Andric LAST_AMDGPU_ISD_NUMBER 6210b57cec5SDimitry Andric }; 6220b57cec5SDimitry Andric 6230b57cec5SDimitry Andric } // End namespace AMDGPUISD 6240b57cec5SDimitry Andric 6250b57cec5SDimitry Andric } // End namespace llvm 6260b57cec5SDimitry Andric 6270b57cec5SDimitry Andric #endif 628