1e8d8bef9SDimitry Andric //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //==-----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric // 9e8d8bef9SDimitry Andric /// \file 10e8d8bef9SDimitry Andric /// AMD GCN specific subclass of TargetSubtarget. 11e8d8bef9SDimitry Andric // 12e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 13e8d8bef9SDimitry Andric 14e8d8bef9SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 15e8d8bef9SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 16e8d8bef9SDimitry Andric 17e8d8bef9SDimitry Andric #include "AMDGPUCallLowering.h" 18e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h" 19e8d8bef9SDimitry Andric #include "SIFrameLowering.h" 20e8d8bef9SDimitry Andric #include "SIISelLowering.h" 21e8d8bef9SDimitry Andric #include "SIInstrInfo.h" 22e8d8bef9SDimitry Andric #include "llvm/CodeGen/SelectionDAGTargetInfo.h" 23e8d8bef9SDimitry Andric 24e8d8bef9SDimitry Andric namespace llvm { 25e8d8bef9SDimitry Andric 26e8d8bef9SDimitry Andric class MCInst; 27e8d8bef9SDimitry Andric class MCInstrInfo; 28e8d8bef9SDimitry Andric 29e8d8bef9SDimitry Andric } // namespace llvm 30e8d8bef9SDimitry Andric 31e8d8bef9SDimitry Andric #define GET_SUBTARGETINFO_HEADER 32e8d8bef9SDimitry Andric #include "AMDGPUGenSubtargetInfo.inc" 33e8d8bef9SDimitry Andric 34e8d8bef9SDimitry Andric namespace llvm { 35e8d8bef9SDimitry Andric 36e8d8bef9SDimitry Andric class GCNTargetMachine; 37e8d8bef9SDimitry Andric 38e8d8bef9SDimitry Andric class GCNSubtarget final : public AMDGPUGenSubtargetInfo, 39e8d8bef9SDimitry Andric public AMDGPUSubtarget { 40e8d8bef9SDimitry Andric 41e8d8bef9SDimitry Andric using AMDGPUSubtarget::getMaxWavesPerEU; 42e8d8bef9SDimitry Andric 43e8d8bef9SDimitry Andric public: 44*fe6060f1SDimitry Andric // Following 2 enums are documented at: 45*fe6060f1SDimitry Andric // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi 46*fe6060f1SDimitry Andric enum class TrapHandlerAbi { 47*fe6060f1SDimitry Andric NONE = 0x00, 48*fe6060f1SDimitry Andric AMDHSA = 0x01, 49e8d8bef9SDimitry Andric }; 50e8d8bef9SDimitry Andric 51*fe6060f1SDimitry Andric enum class TrapID { 52*fe6060f1SDimitry Andric LLVMAMDHSATrap = 0x02, 53*fe6060f1SDimitry Andric LLVMAMDHSADebugTrap = 0x03, 54e8d8bef9SDimitry Andric }; 55e8d8bef9SDimitry Andric 56e8d8bef9SDimitry Andric private: 57e8d8bef9SDimitry Andric /// GlobalISel related APIs. 58e8d8bef9SDimitry Andric std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; 59e8d8bef9SDimitry Andric std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; 60e8d8bef9SDimitry Andric std::unique_ptr<InstructionSelector> InstSelector; 61e8d8bef9SDimitry Andric std::unique_ptr<LegalizerInfo> Legalizer; 62e8d8bef9SDimitry Andric std::unique_ptr<RegisterBankInfo> RegBankInfo; 63e8d8bef9SDimitry Andric 64e8d8bef9SDimitry Andric protected: 65e8d8bef9SDimitry Andric // Basic subtarget description. 66e8d8bef9SDimitry Andric Triple TargetTriple; 67e8d8bef9SDimitry Andric AMDGPU::IsaInfo::AMDGPUTargetID TargetID; 68e8d8bef9SDimitry Andric unsigned Gen; 69e8d8bef9SDimitry Andric InstrItineraryData InstrItins; 70e8d8bef9SDimitry Andric int LDSBankCount; 71e8d8bef9SDimitry Andric unsigned MaxPrivateElementSize; 72e8d8bef9SDimitry Andric 73e8d8bef9SDimitry Andric // Possibly statically set by tablegen, but may want to be overridden. 74e8d8bef9SDimitry Andric bool FastFMAF32; 75e8d8bef9SDimitry Andric bool FastDenormalF32; 76e8d8bef9SDimitry Andric bool HalfRate64Ops; 77*fe6060f1SDimitry Andric bool FullRate64Ops; 78e8d8bef9SDimitry Andric 79e8d8bef9SDimitry Andric // Dynamically set bits that enable features. 80e8d8bef9SDimitry Andric bool FlatForGlobal; 81e8d8bef9SDimitry Andric bool AutoWaitcntBeforeBarrier; 82e8d8bef9SDimitry Andric bool UnalignedScratchAccess; 83e8d8bef9SDimitry Andric bool UnalignedAccessMode; 84e8d8bef9SDimitry Andric bool HasApertureRegs; 85e8d8bef9SDimitry Andric bool SupportsXNACK; 86e8d8bef9SDimitry Andric 87e8d8bef9SDimitry Andric // This should not be used directly. 'TargetID' tracks the dynamic settings 88e8d8bef9SDimitry Andric // for XNACK. 89e8d8bef9SDimitry Andric bool EnableXNACK; 90e8d8bef9SDimitry Andric 91*fe6060f1SDimitry Andric bool EnableTgSplit; 92e8d8bef9SDimitry Andric bool EnableCuMode; 93e8d8bef9SDimitry Andric bool TrapHandler; 94e8d8bef9SDimitry Andric 95e8d8bef9SDimitry Andric // Used as options. 96e8d8bef9SDimitry Andric bool EnableLoadStoreOpt; 97e8d8bef9SDimitry Andric bool EnableUnsafeDSOffsetFolding; 98e8d8bef9SDimitry Andric bool EnableSIScheduler; 99e8d8bef9SDimitry Andric bool EnableDS128; 100e8d8bef9SDimitry Andric bool EnablePRTStrictNull; 101e8d8bef9SDimitry Andric bool DumpCode; 102e8d8bef9SDimitry Andric 103e8d8bef9SDimitry Andric // Subtarget statically properties set by tablegen 104e8d8bef9SDimitry Andric bool FP64; 105e8d8bef9SDimitry Andric bool FMA; 106e8d8bef9SDimitry Andric bool MIMG_R128; 107*fe6060f1SDimitry Andric bool IsGCN; 108e8d8bef9SDimitry Andric bool CIInsts; 109e8d8bef9SDimitry Andric bool GFX8Insts; 110e8d8bef9SDimitry Andric bool GFX9Insts; 111*fe6060f1SDimitry Andric bool GFX90AInsts; 112e8d8bef9SDimitry Andric bool GFX10Insts; 113e8d8bef9SDimitry Andric bool GFX10_3Insts; 114e8d8bef9SDimitry Andric bool GFX7GFX8GFX9Insts; 115e8d8bef9SDimitry Andric bool SGPRInitBug; 116*fe6060f1SDimitry Andric bool NegativeScratchOffsetBug; 117*fe6060f1SDimitry Andric bool NegativeUnalignedScratchOffsetBug; 118e8d8bef9SDimitry Andric bool HasSMemRealTime; 119e8d8bef9SDimitry Andric bool HasIntClamp; 120e8d8bef9SDimitry Andric bool HasFmaMixInsts; 121e8d8bef9SDimitry Andric bool HasMovrel; 122e8d8bef9SDimitry Andric bool HasVGPRIndexMode; 123e8d8bef9SDimitry Andric bool HasScalarStores; 124e8d8bef9SDimitry Andric bool HasScalarAtomics; 125e8d8bef9SDimitry Andric bool HasSDWAOmod; 126e8d8bef9SDimitry Andric bool HasSDWAScalar; 127e8d8bef9SDimitry Andric bool HasSDWASdst; 128e8d8bef9SDimitry Andric bool HasSDWAMac; 129e8d8bef9SDimitry Andric bool HasSDWAOutModsVOPC; 130e8d8bef9SDimitry Andric bool HasDPP; 131e8d8bef9SDimitry Andric bool HasDPP8; 132*fe6060f1SDimitry Andric bool Has64BitDPP; 133*fe6060f1SDimitry Andric bool HasPackedFP32Ops; 134*fe6060f1SDimitry Andric bool HasExtendedImageInsts; 135e8d8bef9SDimitry Andric bool HasR128A16; 136e8d8bef9SDimitry Andric bool HasGFX10A16; 137e8d8bef9SDimitry Andric bool HasG16; 138e8d8bef9SDimitry Andric bool HasNSAEncoding; 139*fe6060f1SDimitry Andric unsigned NSAMaxSize; 140*fe6060f1SDimitry Andric bool GFX10_AEncoding; 141e8d8bef9SDimitry Andric bool GFX10_BEncoding; 142e8d8bef9SDimitry Andric bool HasDLInsts; 143e8d8bef9SDimitry Andric bool HasDot1Insts; 144e8d8bef9SDimitry Andric bool HasDot2Insts; 145e8d8bef9SDimitry Andric bool HasDot3Insts; 146e8d8bef9SDimitry Andric bool HasDot4Insts; 147e8d8bef9SDimitry Andric bool HasDot5Insts; 148e8d8bef9SDimitry Andric bool HasDot6Insts; 149*fe6060f1SDimitry Andric bool HasDot7Insts; 150e8d8bef9SDimitry Andric bool HasMAIInsts; 151e8d8bef9SDimitry Andric bool HasPkFmacF16Inst; 152e8d8bef9SDimitry Andric bool HasAtomicFaddInsts; 153e8d8bef9SDimitry Andric bool SupportsSRAMECC; 154e8d8bef9SDimitry Andric 155e8d8bef9SDimitry Andric // This should not be used directly. 'TargetID' tracks the dynamic settings 156e8d8bef9SDimitry Andric // for SRAMECC. 157e8d8bef9SDimitry Andric bool EnableSRAMECC; 158e8d8bef9SDimitry Andric 159e8d8bef9SDimitry Andric bool HasNoSdstCMPX; 160e8d8bef9SDimitry Andric bool HasVscnt; 161e8d8bef9SDimitry Andric bool HasGetWaveIdInst; 162e8d8bef9SDimitry Andric bool HasSMemTimeInst; 163*fe6060f1SDimitry Andric bool HasShaderCyclesRegister; 164e8d8bef9SDimitry Andric bool HasRegisterBanking; 165e8d8bef9SDimitry Andric bool HasVOP3Literal; 166e8d8bef9SDimitry Andric bool HasNoDataDepHazard; 167e8d8bef9SDimitry Andric bool FlatAddressSpace; 168e8d8bef9SDimitry Andric bool FlatInstOffsets; 169e8d8bef9SDimitry Andric bool FlatGlobalInsts; 170e8d8bef9SDimitry Andric bool FlatScratchInsts; 171e8d8bef9SDimitry Andric bool ScalarFlatScratchInsts; 172*fe6060f1SDimitry Andric bool HasArchitectedFlatScratch; 173e8d8bef9SDimitry Andric bool AddNoCarryInsts; 174e8d8bef9SDimitry Andric bool HasUnpackedD16VMem; 175*fe6060f1SDimitry Andric bool R600ALUInst; 176*fe6060f1SDimitry Andric bool CaymanISA; 177*fe6060f1SDimitry Andric bool CFALUBug; 178e8d8bef9SDimitry Andric bool LDSMisalignedBug; 179e8d8bef9SDimitry Andric bool HasMFMAInlineLiteralBug; 180*fe6060f1SDimitry Andric bool HasVertexCache; 181*fe6060f1SDimitry Andric short TexVTXClauseSize; 182e8d8bef9SDimitry Andric bool UnalignedBufferAccess; 183e8d8bef9SDimitry Andric bool UnalignedDSAccess; 184*fe6060f1SDimitry Andric bool HasPackedTID; 185e8d8bef9SDimitry Andric bool ScalarizeGlobal; 186e8d8bef9SDimitry Andric 187e8d8bef9SDimitry Andric bool HasVcmpxPermlaneHazard; 188e8d8bef9SDimitry Andric bool HasVMEMtoScalarWriteHazard; 189e8d8bef9SDimitry Andric bool HasSMEMtoVectorWriteHazard; 190e8d8bef9SDimitry Andric bool HasInstFwdPrefetchBug; 191e8d8bef9SDimitry Andric bool HasVcmpxExecWARHazard; 192e8d8bef9SDimitry Andric bool HasLdsBranchVmemWARHazard; 193e8d8bef9SDimitry Andric bool HasNSAtoVMEMBug; 194*fe6060f1SDimitry Andric bool HasNSAClauseBug; 195e8d8bef9SDimitry Andric bool HasOffset3fBug; 196e8d8bef9SDimitry Andric bool HasFlatSegmentOffsetBug; 197e8d8bef9SDimitry Andric bool HasImageStoreD16Bug; 198e8d8bef9SDimitry Andric bool HasImageGather4D16Bug; 199e8d8bef9SDimitry Andric 200e8d8bef9SDimitry Andric // Dummy feature to use for assembler in tablegen. 201e8d8bef9SDimitry Andric bool FeatureDisable; 202e8d8bef9SDimitry Andric 203e8d8bef9SDimitry Andric SelectionDAGTargetInfo TSInfo; 204e8d8bef9SDimitry Andric private: 205e8d8bef9SDimitry Andric SIInstrInfo InstrInfo; 206e8d8bef9SDimitry Andric SITargetLowering TLInfo; 207e8d8bef9SDimitry Andric SIFrameLowering FrameLowering; 208e8d8bef9SDimitry Andric 209e8d8bef9SDimitry Andric public: 210e8d8bef9SDimitry Andric // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword. 211e8d8bef9SDimitry Andric static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1); 212e8d8bef9SDimitry Andric 213e8d8bef9SDimitry Andric GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 214e8d8bef9SDimitry Andric const GCNTargetMachine &TM); 215e8d8bef9SDimitry Andric ~GCNSubtarget() override; 216e8d8bef9SDimitry Andric 217e8d8bef9SDimitry Andric GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, 218e8d8bef9SDimitry Andric StringRef GPU, StringRef FS); 219e8d8bef9SDimitry Andric 220e8d8bef9SDimitry Andric const SIInstrInfo *getInstrInfo() const override { 221e8d8bef9SDimitry Andric return &InstrInfo; 222e8d8bef9SDimitry Andric } 223e8d8bef9SDimitry Andric 224e8d8bef9SDimitry Andric const SIFrameLowering *getFrameLowering() const override { 225e8d8bef9SDimitry Andric return &FrameLowering; 226e8d8bef9SDimitry Andric } 227e8d8bef9SDimitry Andric 228e8d8bef9SDimitry Andric const SITargetLowering *getTargetLowering() const override { 229e8d8bef9SDimitry Andric return &TLInfo; 230e8d8bef9SDimitry Andric } 231e8d8bef9SDimitry Andric 232e8d8bef9SDimitry Andric const SIRegisterInfo *getRegisterInfo() const override { 233e8d8bef9SDimitry Andric return &InstrInfo.getRegisterInfo(); 234e8d8bef9SDimitry Andric } 235e8d8bef9SDimitry Andric 236e8d8bef9SDimitry Andric const CallLowering *getCallLowering() const override { 237e8d8bef9SDimitry Andric return CallLoweringInfo.get(); 238e8d8bef9SDimitry Andric } 239e8d8bef9SDimitry Andric 240e8d8bef9SDimitry Andric const InlineAsmLowering *getInlineAsmLowering() const override { 241e8d8bef9SDimitry Andric return InlineAsmLoweringInfo.get(); 242e8d8bef9SDimitry Andric } 243e8d8bef9SDimitry Andric 244e8d8bef9SDimitry Andric InstructionSelector *getInstructionSelector() const override { 245e8d8bef9SDimitry Andric return InstSelector.get(); 246e8d8bef9SDimitry Andric } 247e8d8bef9SDimitry Andric 248e8d8bef9SDimitry Andric const LegalizerInfo *getLegalizerInfo() const override { 249e8d8bef9SDimitry Andric return Legalizer.get(); 250e8d8bef9SDimitry Andric } 251e8d8bef9SDimitry Andric 252e8d8bef9SDimitry Andric const RegisterBankInfo *getRegBankInfo() const override { 253e8d8bef9SDimitry Andric return RegBankInfo.get(); 254e8d8bef9SDimitry Andric } 255e8d8bef9SDimitry Andric 256*fe6060f1SDimitry Andric const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const { 257*fe6060f1SDimitry Andric return TargetID; 258*fe6060f1SDimitry Andric } 259*fe6060f1SDimitry Andric 260e8d8bef9SDimitry Andric // Nothing implemented, just prevent crashes on use. 261e8d8bef9SDimitry Andric const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { 262e8d8bef9SDimitry Andric return &TSInfo; 263e8d8bef9SDimitry Andric } 264e8d8bef9SDimitry Andric 265e8d8bef9SDimitry Andric const InstrItineraryData *getInstrItineraryData() const override { 266e8d8bef9SDimitry Andric return &InstrItins; 267e8d8bef9SDimitry Andric } 268e8d8bef9SDimitry Andric 269e8d8bef9SDimitry Andric void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); 270e8d8bef9SDimitry Andric 271e8d8bef9SDimitry Andric Generation getGeneration() const { 272e8d8bef9SDimitry Andric return (Generation)Gen; 273e8d8bef9SDimitry Andric } 274e8d8bef9SDimitry Andric 275e8d8bef9SDimitry Andric /// Return the number of high bits known to be zero fror a frame index. 276e8d8bef9SDimitry Andric unsigned getKnownHighZeroBitsForFrameIndex() const { 277e8d8bef9SDimitry Andric return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2(); 278e8d8bef9SDimitry Andric } 279e8d8bef9SDimitry Andric 280e8d8bef9SDimitry Andric int getLDSBankCount() const { 281e8d8bef9SDimitry Andric return LDSBankCount; 282e8d8bef9SDimitry Andric } 283e8d8bef9SDimitry Andric 284e8d8bef9SDimitry Andric unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const { 285e8d8bef9SDimitry Andric return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16; 286e8d8bef9SDimitry Andric } 287e8d8bef9SDimitry Andric 288e8d8bef9SDimitry Andric unsigned getConstantBusLimit(unsigned Opcode) const; 289e8d8bef9SDimitry Andric 290*fe6060f1SDimitry Andric /// Returns if the result of this instruction with a 16-bit result returned in 291*fe6060f1SDimitry Andric /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve 292*fe6060f1SDimitry Andric /// the original value. 293*fe6060f1SDimitry Andric bool zeroesHigh16BitsOfDest(unsigned Opcode) const; 294*fe6060f1SDimitry Andric 295e8d8bef9SDimitry Andric bool hasIntClamp() const { 296e8d8bef9SDimitry Andric return HasIntClamp; 297e8d8bef9SDimitry Andric } 298e8d8bef9SDimitry Andric 299e8d8bef9SDimitry Andric bool hasFP64() const { 300e8d8bef9SDimitry Andric return FP64; 301e8d8bef9SDimitry Andric } 302e8d8bef9SDimitry Andric 303e8d8bef9SDimitry Andric bool hasMIMG_R128() const { 304e8d8bef9SDimitry Andric return MIMG_R128; 305e8d8bef9SDimitry Andric } 306e8d8bef9SDimitry Andric 307e8d8bef9SDimitry Andric bool hasHWFP64() const { 308e8d8bef9SDimitry Andric return FP64; 309e8d8bef9SDimitry Andric } 310e8d8bef9SDimitry Andric 311e8d8bef9SDimitry Andric bool hasFastFMAF32() const { 312e8d8bef9SDimitry Andric return FastFMAF32; 313e8d8bef9SDimitry Andric } 314e8d8bef9SDimitry Andric 315e8d8bef9SDimitry Andric bool hasHalfRate64Ops() const { 316e8d8bef9SDimitry Andric return HalfRate64Ops; 317e8d8bef9SDimitry Andric } 318e8d8bef9SDimitry Andric 319*fe6060f1SDimitry Andric bool hasFullRate64Ops() const { 320*fe6060f1SDimitry Andric return FullRate64Ops; 321*fe6060f1SDimitry Andric } 322*fe6060f1SDimitry Andric 323e8d8bef9SDimitry Andric bool hasAddr64() const { 324e8d8bef9SDimitry Andric return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); 325e8d8bef9SDimitry Andric } 326e8d8bef9SDimitry Andric 327e8d8bef9SDimitry Andric bool hasFlat() const { 328e8d8bef9SDimitry Andric return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS); 329e8d8bef9SDimitry Andric } 330e8d8bef9SDimitry Andric 331e8d8bef9SDimitry Andric // Return true if the target only has the reverse operand versions of VALU 332e8d8bef9SDimitry Andric // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32). 333e8d8bef9SDimitry Andric bool hasOnlyRevVALUShifts() const { 334e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 335e8d8bef9SDimitry Andric } 336e8d8bef9SDimitry Andric 337e8d8bef9SDimitry Andric bool hasFractBug() const { 338e8d8bef9SDimitry Andric return getGeneration() == SOUTHERN_ISLANDS; 339e8d8bef9SDimitry Andric } 340e8d8bef9SDimitry Andric 341e8d8bef9SDimitry Andric bool hasBFE() const { 342e8d8bef9SDimitry Andric return true; 343e8d8bef9SDimitry Andric } 344e8d8bef9SDimitry Andric 345e8d8bef9SDimitry Andric bool hasBFI() const { 346e8d8bef9SDimitry Andric return true; 347e8d8bef9SDimitry Andric } 348e8d8bef9SDimitry Andric 349e8d8bef9SDimitry Andric bool hasBFM() const { 350e8d8bef9SDimitry Andric return hasBFE(); 351e8d8bef9SDimitry Andric } 352e8d8bef9SDimitry Andric 353e8d8bef9SDimitry Andric bool hasBCNT(unsigned Size) const { 354e8d8bef9SDimitry Andric return true; 355e8d8bef9SDimitry Andric } 356e8d8bef9SDimitry Andric 357e8d8bef9SDimitry Andric bool hasFFBL() const { 358e8d8bef9SDimitry Andric return true; 359e8d8bef9SDimitry Andric } 360e8d8bef9SDimitry Andric 361e8d8bef9SDimitry Andric bool hasFFBH() const { 362e8d8bef9SDimitry Andric return true; 363e8d8bef9SDimitry Andric } 364e8d8bef9SDimitry Andric 365e8d8bef9SDimitry Andric bool hasMed3_16() const { 366e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 367e8d8bef9SDimitry Andric } 368e8d8bef9SDimitry Andric 369e8d8bef9SDimitry Andric bool hasMin3Max3_16() const { 370e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 371e8d8bef9SDimitry Andric } 372e8d8bef9SDimitry Andric 373e8d8bef9SDimitry Andric bool hasFmaMixInsts() const { 374e8d8bef9SDimitry Andric return HasFmaMixInsts; 375e8d8bef9SDimitry Andric } 376e8d8bef9SDimitry Andric 377e8d8bef9SDimitry Andric bool hasCARRY() const { 378e8d8bef9SDimitry Andric return true; 379e8d8bef9SDimitry Andric } 380e8d8bef9SDimitry Andric 381e8d8bef9SDimitry Andric bool hasFMA() const { 382e8d8bef9SDimitry Andric return FMA; 383e8d8bef9SDimitry Andric } 384e8d8bef9SDimitry Andric 385e8d8bef9SDimitry Andric bool hasSwap() const { 386e8d8bef9SDimitry Andric return GFX9Insts; 387e8d8bef9SDimitry Andric } 388e8d8bef9SDimitry Andric 389e8d8bef9SDimitry Andric bool hasScalarPackInsts() const { 390e8d8bef9SDimitry Andric return GFX9Insts; 391e8d8bef9SDimitry Andric } 392e8d8bef9SDimitry Andric 393e8d8bef9SDimitry Andric bool hasScalarMulHiInsts() const { 394e8d8bef9SDimitry Andric return GFX9Insts; 395e8d8bef9SDimitry Andric } 396e8d8bef9SDimitry Andric 397e8d8bef9SDimitry Andric TrapHandlerAbi getTrapHandlerAbi() const { 398*fe6060f1SDimitry Andric return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE; 399*fe6060f1SDimitry Andric } 400*fe6060f1SDimitry Andric 401*fe6060f1SDimitry Andric bool supportsGetDoorbellID() const { 402*fe6060f1SDimitry Andric // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets. 403*fe6060f1SDimitry Andric return getGeneration() >= GFX9; 404e8d8bef9SDimitry Andric } 405e8d8bef9SDimitry Andric 406e8d8bef9SDimitry Andric /// True if the offset field of DS instructions works as expected. On SI, the 407e8d8bef9SDimitry Andric /// offset uses a 16-bit adder and does not always wrap properly. 408e8d8bef9SDimitry Andric bool hasUsableDSOffset() const { 409e8d8bef9SDimitry Andric return getGeneration() >= SEA_ISLANDS; 410e8d8bef9SDimitry Andric } 411e8d8bef9SDimitry Andric 412e8d8bef9SDimitry Andric bool unsafeDSOffsetFoldingEnabled() const { 413e8d8bef9SDimitry Andric return EnableUnsafeDSOffsetFolding; 414e8d8bef9SDimitry Andric } 415e8d8bef9SDimitry Andric 416e8d8bef9SDimitry Andric /// Condition output from div_scale is usable. 417e8d8bef9SDimitry Andric bool hasUsableDivScaleConditionOutput() const { 418e8d8bef9SDimitry Andric return getGeneration() != SOUTHERN_ISLANDS; 419e8d8bef9SDimitry Andric } 420e8d8bef9SDimitry Andric 421e8d8bef9SDimitry Andric /// Extra wait hazard is needed in some cases before 422e8d8bef9SDimitry Andric /// s_cbranch_vccnz/s_cbranch_vccz. 423e8d8bef9SDimitry Andric bool hasReadVCCZBug() const { 424e8d8bef9SDimitry Andric return getGeneration() <= SEA_ISLANDS; 425e8d8bef9SDimitry Andric } 426e8d8bef9SDimitry Andric 427e8d8bef9SDimitry Andric /// Writes to VCC_LO/VCC_HI update the VCCZ flag. 428e8d8bef9SDimitry Andric bool partialVCCWritesUpdateVCCZ() const { 429e8d8bef9SDimitry Andric return getGeneration() >= GFX10; 430e8d8bef9SDimitry Andric } 431e8d8bef9SDimitry Andric 432e8d8bef9SDimitry Andric /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR 433e8d8bef9SDimitry Andric /// was written by a VALU instruction. 434e8d8bef9SDimitry Andric bool hasSMRDReadVALUDefHazard() const { 435e8d8bef9SDimitry Andric return getGeneration() == SOUTHERN_ISLANDS; 436e8d8bef9SDimitry Andric } 437e8d8bef9SDimitry Andric 438e8d8bef9SDimitry Andric /// A read of an SGPR by a VMEM instruction requires 5 wait states when the 439e8d8bef9SDimitry Andric /// SGPR was written by a VALU Instruction. 440e8d8bef9SDimitry Andric bool hasVMEMReadSGPRVALUDefHazard() const { 441e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 442e8d8bef9SDimitry Andric } 443e8d8bef9SDimitry Andric 444e8d8bef9SDimitry Andric bool hasRFEHazards() const { 445e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 446e8d8bef9SDimitry Andric } 447e8d8bef9SDimitry Andric 448e8d8bef9SDimitry Andric /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32. 449e8d8bef9SDimitry Andric unsigned getSetRegWaitStates() const { 450e8d8bef9SDimitry Andric return getGeneration() <= SEA_ISLANDS ? 1 : 2; 451e8d8bef9SDimitry Andric } 452e8d8bef9SDimitry Andric 453e8d8bef9SDimitry Andric bool dumpCode() const { 454e8d8bef9SDimitry Andric return DumpCode; 455e8d8bef9SDimitry Andric } 456e8d8bef9SDimitry Andric 457e8d8bef9SDimitry Andric /// Return the amount of LDS that can be used that will not restrict the 458e8d8bef9SDimitry Andric /// occupancy lower than WaveCount. 459e8d8bef9SDimitry Andric unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, 460e8d8bef9SDimitry Andric const Function &) const; 461e8d8bef9SDimitry Andric 462e8d8bef9SDimitry Andric bool supportsMinMaxDenormModes() const { 463e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 464e8d8bef9SDimitry Andric } 465e8d8bef9SDimitry Andric 466e8d8bef9SDimitry Andric /// \returns If target supports S_DENORM_MODE. 467e8d8bef9SDimitry Andric bool hasDenormModeInst() const { 468e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX10; 469e8d8bef9SDimitry Andric } 470e8d8bef9SDimitry Andric 471e8d8bef9SDimitry Andric bool useFlatForGlobal() const { 472e8d8bef9SDimitry Andric return FlatForGlobal; 473e8d8bef9SDimitry Andric } 474e8d8bef9SDimitry Andric 475e8d8bef9SDimitry Andric /// \returns If target supports ds_read/write_b128 and user enables generation 476e8d8bef9SDimitry Andric /// of ds_read/write_b128. 477e8d8bef9SDimitry Andric bool useDS128() const { 478e8d8bef9SDimitry Andric return CIInsts && EnableDS128; 479e8d8bef9SDimitry Andric } 480e8d8bef9SDimitry Andric 481e8d8bef9SDimitry Andric /// \return If target supports ds_read/write_b96/128. 482e8d8bef9SDimitry Andric bool hasDS96AndDS128() const { 483e8d8bef9SDimitry Andric return CIInsts; 484e8d8bef9SDimitry Andric } 485e8d8bef9SDimitry Andric 486e8d8bef9SDimitry Andric /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64 487e8d8bef9SDimitry Andric bool haveRoundOpsF64() const { 488e8d8bef9SDimitry Andric return CIInsts; 489e8d8bef9SDimitry Andric } 490e8d8bef9SDimitry Andric 491e8d8bef9SDimitry Andric /// \returns If MUBUF instructions always perform range checking, even for 492e8d8bef9SDimitry Andric /// buffer resources used for private memory access. 493e8d8bef9SDimitry Andric bool privateMemoryResourceIsRangeChecked() const { 494e8d8bef9SDimitry Andric return getGeneration() < AMDGPUSubtarget::GFX9; 495e8d8bef9SDimitry Andric } 496e8d8bef9SDimitry Andric 497e8d8bef9SDimitry Andric /// \returns If target requires PRT Struct NULL support (zero result registers 498e8d8bef9SDimitry Andric /// for sparse texture support). 499e8d8bef9SDimitry Andric bool usePRTStrictNull() const { 500e8d8bef9SDimitry Andric return EnablePRTStrictNull; 501e8d8bef9SDimitry Andric } 502e8d8bef9SDimitry Andric 503e8d8bef9SDimitry Andric bool hasAutoWaitcntBeforeBarrier() const { 504e8d8bef9SDimitry Andric return AutoWaitcntBeforeBarrier; 505e8d8bef9SDimitry Andric } 506e8d8bef9SDimitry Andric 507e8d8bef9SDimitry Andric bool hasUnalignedBufferAccess() const { 508e8d8bef9SDimitry Andric return UnalignedBufferAccess; 509e8d8bef9SDimitry Andric } 510e8d8bef9SDimitry Andric 511e8d8bef9SDimitry Andric bool hasUnalignedBufferAccessEnabled() const { 512e8d8bef9SDimitry Andric return UnalignedBufferAccess && UnalignedAccessMode; 513e8d8bef9SDimitry Andric } 514e8d8bef9SDimitry Andric 515e8d8bef9SDimitry Andric bool hasUnalignedDSAccess() const { 516e8d8bef9SDimitry Andric return UnalignedDSAccess; 517e8d8bef9SDimitry Andric } 518e8d8bef9SDimitry Andric 519e8d8bef9SDimitry Andric bool hasUnalignedDSAccessEnabled() const { 520e8d8bef9SDimitry Andric return UnalignedDSAccess && UnalignedAccessMode; 521e8d8bef9SDimitry Andric } 522e8d8bef9SDimitry Andric 523e8d8bef9SDimitry Andric bool hasUnalignedScratchAccess() const { 524e8d8bef9SDimitry Andric return UnalignedScratchAccess; 525e8d8bef9SDimitry Andric } 526e8d8bef9SDimitry Andric 527e8d8bef9SDimitry Andric bool hasUnalignedAccessMode() const { 528e8d8bef9SDimitry Andric return UnalignedAccessMode; 529e8d8bef9SDimitry Andric } 530e8d8bef9SDimitry Andric 531e8d8bef9SDimitry Andric bool hasApertureRegs() const { 532e8d8bef9SDimitry Andric return HasApertureRegs; 533e8d8bef9SDimitry Andric } 534e8d8bef9SDimitry Andric 535e8d8bef9SDimitry Andric bool isTrapHandlerEnabled() const { 536e8d8bef9SDimitry Andric return TrapHandler; 537e8d8bef9SDimitry Andric } 538e8d8bef9SDimitry Andric 539e8d8bef9SDimitry Andric bool isXNACKEnabled() const { 540e8d8bef9SDimitry Andric return TargetID.isXnackOnOrAny(); 541e8d8bef9SDimitry Andric } 542e8d8bef9SDimitry Andric 543*fe6060f1SDimitry Andric bool isTgSplitEnabled() const { 544*fe6060f1SDimitry Andric return EnableTgSplit; 545*fe6060f1SDimitry Andric } 546*fe6060f1SDimitry Andric 547e8d8bef9SDimitry Andric bool isCuModeEnabled() const { 548e8d8bef9SDimitry Andric return EnableCuMode; 549e8d8bef9SDimitry Andric } 550e8d8bef9SDimitry Andric 551e8d8bef9SDimitry Andric bool hasFlatAddressSpace() const { 552e8d8bef9SDimitry Andric return FlatAddressSpace; 553e8d8bef9SDimitry Andric } 554e8d8bef9SDimitry Andric 555e8d8bef9SDimitry Andric bool hasFlatScrRegister() const { 556e8d8bef9SDimitry Andric return hasFlatAddressSpace(); 557e8d8bef9SDimitry Andric } 558e8d8bef9SDimitry Andric 559e8d8bef9SDimitry Andric bool hasFlatInstOffsets() const { 560e8d8bef9SDimitry Andric return FlatInstOffsets; 561e8d8bef9SDimitry Andric } 562e8d8bef9SDimitry Andric 563e8d8bef9SDimitry Andric bool hasFlatGlobalInsts() const { 564e8d8bef9SDimitry Andric return FlatGlobalInsts; 565e8d8bef9SDimitry Andric } 566e8d8bef9SDimitry Andric 567e8d8bef9SDimitry Andric bool hasFlatScratchInsts() const { 568e8d8bef9SDimitry Andric return FlatScratchInsts; 569e8d8bef9SDimitry Andric } 570e8d8bef9SDimitry Andric 571e8d8bef9SDimitry Andric // Check if target supports ST addressing mode with FLAT scratch instructions. 572e8d8bef9SDimitry Andric // The ST addressing mode means no registers are used, either VGPR or SGPR, 573e8d8bef9SDimitry Andric // but only immediate offset is swizzled and added to the FLAT scratch base. 574e8d8bef9SDimitry Andric bool hasFlatScratchSTMode() const { 575e8d8bef9SDimitry Andric return hasFlatScratchInsts() && hasGFX10_3Insts(); 576e8d8bef9SDimitry Andric } 577e8d8bef9SDimitry Andric 578e8d8bef9SDimitry Andric bool hasScalarFlatScratchInsts() const { 579e8d8bef9SDimitry Andric return ScalarFlatScratchInsts; 580e8d8bef9SDimitry Andric } 581e8d8bef9SDimitry Andric 582e8d8bef9SDimitry Andric bool hasGlobalAddTidInsts() const { 583e8d8bef9SDimitry Andric return GFX10_BEncoding; 584e8d8bef9SDimitry Andric } 585e8d8bef9SDimitry Andric 586e8d8bef9SDimitry Andric bool hasAtomicCSub() const { 587e8d8bef9SDimitry Andric return GFX10_BEncoding; 588e8d8bef9SDimitry Andric } 589e8d8bef9SDimitry Andric 590e8d8bef9SDimitry Andric bool hasMultiDwordFlatScratchAddressing() const { 591e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 592e8d8bef9SDimitry Andric } 593e8d8bef9SDimitry Andric 594e8d8bef9SDimitry Andric bool hasFlatSegmentOffsetBug() const { 595e8d8bef9SDimitry Andric return HasFlatSegmentOffsetBug; 596e8d8bef9SDimitry Andric } 597e8d8bef9SDimitry Andric 598e8d8bef9SDimitry Andric bool hasFlatLgkmVMemCountInOrder() const { 599e8d8bef9SDimitry Andric return getGeneration() > GFX9; 600e8d8bef9SDimitry Andric } 601e8d8bef9SDimitry Andric 602e8d8bef9SDimitry Andric bool hasD16LoadStore() const { 603e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 604e8d8bef9SDimitry Andric } 605e8d8bef9SDimitry Andric 606e8d8bef9SDimitry Andric bool d16PreservesUnusedBits() const { 607e8d8bef9SDimitry Andric return hasD16LoadStore() && !TargetID.isSramEccOnOrAny(); 608e8d8bef9SDimitry Andric } 609e8d8bef9SDimitry Andric 610e8d8bef9SDimitry Andric bool hasD16Images() const { 611e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 612e8d8bef9SDimitry Andric } 613e8d8bef9SDimitry Andric 614e8d8bef9SDimitry Andric /// Return if most LDS instructions have an m0 use that require m0 to be 615e8d8bef9SDimitry Andric /// iniitalized. 616e8d8bef9SDimitry Andric bool ldsRequiresM0Init() const { 617e8d8bef9SDimitry Andric return getGeneration() < GFX9; 618e8d8bef9SDimitry Andric } 619e8d8bef9SDimitry Andric 620e8d8bef9SDimitry Andric // True if the hardware rewinds and replays GWS operations if a wave is 621e8d8bef9SDimitry Andric // preempted. 622e8d8bef9SDimitry Andric // 623e8d8bef9SDimitry Andric // If this is false, a GWS operation requires testing if a nack set the 624e8d8bef9SDimitry Andric // MEM_VIOL bit, and repeating if so. 625e8d8bef9SDimitry Andric bool hasGWSAutoReplay() const { 626e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 627e8d8bef9SDimitry Andric } 628e8d8bef9SDimitry Andric 629e8d8bef9SDimitry Andric /// \returns if target has ds_gws_sema_release_all instruction. 630e8d8bef9SDimitry Andric bool hasGWSSemaReleaseAll() const { 631e8d8bef9SDimitry Andric return CIInsts; 632e8d8bef9SDimitry Andric } 633e8d8bef9SDimitry Andric 634e8d8bef9SDimitry Andric /// \returns true if the target has integer add/sub instructions that do not 635e8d8bef9SDimitry Andric /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32, 636e8d8bef9SDimitry Andric /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier 637e8d8bef9SDimitry Andric /// for saturation. 638e8d8bef9SDimitry Andric bool hasAddNoCarry() const { 639e8d8bef9SDimitry Andric return AddNoCarryInsts; 640e8d8bef9SDimitry Andric } 641e8d8bef9SDimitry Andric 642e8d8bef9SDimitry Andric bool hasUnpackedD16VMem() const { 643e8d8bef9SDimitry Andric return HasUnpackedD16VMem; 644e8d8bef9SDimitry Andric } 645e8d8bef9SDimitry Andric 646e8d8bef9SDimitry Andric // Covers VS/PS/CS graphics shaders 647e8d8bef9SDimitry Andric bool isMesaGfxShader(const Function &F) const { 648e8d8bef9SDimitry Andric return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv()); 649e8d8bef9SDimitry Andric } 650e8d8bef9SDimitry Andric 651e8d8bef9SDimitry Andric bool hasMad64_32() const { 652e8d8bef9SDimitry Andric return getGeneration() >= SEA_ISLANDS; 653e8d8bef9SDimitry Andric } 654e8d8bef9SDimitry Andric 655e8d8bef9SDimitry Andric bool hasSDWAOmod() const { 656e8d8bef9SDimitry Andric return HasSDWAOmod; 657e8d8bef9SDimitry Andric } 658e8d8bef9SDimitry Andric 659e8d8bef9SDimitry Andric bool hasSDWAScalar() const { 660e8d8bef9SDimitry Andric return HasSDWAScalar; 661e8d8bef9SDimitry Andric } 662e8d8bef9SDimitry Andric 663e8d8bef9SDimitry Andric bool hasSDWASdst() const { 664e8d8bef9SDimitry Andric return HasSDWASdst; 665e8d8bef9SDimitry Andric } 666e8d8bef9SDimitry Andric 667e8d8bef9SDimitry Andric bool hasSDWAMac() const { 668e8d8bef9SDimitry Andric return HasSDWAMac; 669e8d8bef9SDimitry Andric } 670e8d8bef9SDimitry Andric 671e8d8bef9SDimitry Andric bool hasSDWAOutModsVOPC() const { 672e8d8bef9SDimitry Andric return HasSDWAOutModsVOPC; 673e8d8bef9SDimitry Andric } 674e8d8bef9SDimitry Andric 675e8d8bef9SDimitry Andric bool hasDLInsts() const { 676e8d8bef9SDimitry Andric return HasDLInsts; 677e8d8bef9SDimitry Andric } 678e8d8bef9SDimitry Andric 679e8d8bef9SDimitry Andric bool hasDot1Insts() const { 680e8d8bef9SDimitry Andric return HasDot1Insts; 681e8d8bef9SDimitry Andric } 682e8d8bef9SDimitry Andric 683e8d8bef9SDimitry Andric bool hasDot2Insts() const { 684e8d8bef9SDimitry Andric return HasDot2Insts; 685e8d8bef9SDimitry Andric } 686e8d8bef9SDimitry Andric 687e8d8bef9SDimitry Andric bool hasDot3Insts() const { 688e8d8bef9SDimitry Andric return HasDot3Insts; 689e8d8bef9SDimitry Andric } 690e8d8bef9SDimitry Andric 691e8d8bef9SDimitry Andric bool hasDot4Insts() const { 692e8d8bef9SDimitry Andric return HasDot4Insts; 693e8d8bef9SDimitry Andric } 694e8d8bef9SDimitry Andric 695e8d8bef9SDimitry Andric bool hasDot5Insts() const { 696e8d8bef9SDimitry Andric return HasDot5Insts; 697e8d8bef9SDimitry Andric } 698e8d8bef9SDimitry Andric 699e8d8bef9SDimitry Andric bool hasDot6Insts() const { 700e8d8bef9SDimitry Andric return HasDot6Insts; 701e8d8bef9SDimitry Andric } 702e8d8bef9SDimitry Andric 703*fe6060f1SDimitry Andric bool hasDot7Insts() const { 704*fe6060f1SDimitry Andric return HasDot7Insts; 705*fe6060f1SDimitry Andric } 706*fe6060f1SDimitry Andric 707e8d8bef9SDimitry Andric bool hasMAIInsts() const { 708e8d8bef9SDimitry Andric return HasMAIInsts; 709e8d8bef9SDimitry Andric } 710e8d8bef9SDimitry Andric 711e8d8bef9SDimitry Andric bool hasPkFmacF16Inst() const { 712e8d8bef9SDimitry Andric return HasPkFmacF16Inst; 713e8d8bef9SDimitry Andric } 714e8d8bef9SDimitry Andric 715e8d8bef9SDimitry Andric bool hasAtomicFaddInsts() const { 716e8d8bef9SDimitry Andric return HasAtomicFaddInsts; 717e8d8bef9SDimitry Andric } 718e8d8bef9SDimitry Andric 719e8d8bef9SDimitry Andric bool hasNoSdstCMPX() const { 720e8d8bef9SDimitry Andric return HasNoSdstCMPX; 721e8d8bef9SDimitry Andric } 722e8d8bef9SDimitry Andric 723e8d8bef9SDimitry Andric bool hasVscnt() const { 724e8d8bef9SDimitry Andric return HasVscnt; 725e8d8bef9SDimitry Andric } 726e8d8bef9SDimitry Andric 727e8d8bef9SDimitry Andric bool hasGetWaveIdInst() const { 728e8d8bef9SDimitry Andric return HasGetWaveIdInst; 729e8d8bef9SDimitry Andric } 730e8d8bef9SDimitry Andric 731e8d8bef9SDimitry Andric bool hasSMemTimeInst() const { 732e8d8bef9SDimitry Andric return HasSMemTimeInst; 733e8d8bef9SDimitry Andric } 734e8d8bef9SDimitry Andric 735*fe6060f1SDimitry Andric bool hasShaderCyclesRegister() const { 736*fe6060f1SDimitry Andric return HasShaderCyclesRegister; 737*fe6060f1SDimitry Andric } 738*fe6060f1SDimitry Andric 739e8d8bef9SDimitry Andric bool hasRegisterBanking() const { 740e8d8bef9SDimitry Andric return HasRegisterBanking; 741e8d8bef9SDimitry Andric } 742e8d8bef9SDimitry Andric 743e8d8bef9SDimitry Andric bool hasVOP3Literal() const { 744e8d8bef9SDimitry Andric return HasVOP3Literal; 745e8d8bef9SDimitry Andric } 746e8d8bef9SDimitry Andric 747e8d8bef9SDimitry Andric bool hasNoDataDepHazard() const { 748e8d8bef9SDimitry Andric return HasNoDataDepHazard; 749e8d8bef9SDimitry Andric } 750e8d8bef9SDimitry Andric 751e8d8bef9SDimitry Andric bool vmemWriteNeedsExpWaitcnt() const { 752e8d8bef9SDimitry Andric return getGeneration() < SEA_ISLANDS; 753e8d8bef9SDimitry Andric } 754e8d8bef9SDimitry Andric 755e8d8bef9SDimitry Andric // Scratch is allocated in 256 dword per wave blocks for the entire 756e8d8bef9SDimitry Andric // wavefront. When viewed from the perspecive of an arbitrary workitem, this 757e8d8bef9SDimitry Andric // is 4-byte aligned. 758e8d8bef9SDimitry Andric // 759e8d8bef9SDimitry Andric // Only 4-byte alignment is really needed to access anything. Transformations 760e8d8bef9SDimitry Andric // on the pointer value itself may rely on the alignment / known low bits of 761e8d8bef9SDimitry Andric // the pointer. Set this to something above the minimum to avoid needing 762e8d8bef9SDimitry Andric // dynamic realignment in common cases. 763e8d8bef9SDimitry Andric Align getStackAlignment() const { return Align(16); } 764e8d8bef9SDimitry Andric 765e8d8bef9SDimitry Andric bool enableMachineScheduler() const override { 766e8d8bef9SDimitry Andric return true; 767e8d8bef9SDimitry Andric } 768e8d8bef9SDimitry Andric 769e8d8bef9SDimitry Andric bool useAA() const override; 770e8d8bef9SDimitry Andric 771e8d8bef9SDimitry Andric bool enableSubRegLiveness() const override { 772e8d8bef9SDimitry Andric return true; 773e8d8bef9SDimitry Andric } 774e8d8bef9SDimitry Andric 775e8d8bef9SDimitry Andric void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } 776e8d8bef9SDimitry Andric bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; } 777e8d8bef9SDimitry Andric 778e8d8bef9SDimitry Andric // static wrappers 779e8d8bef9SDimitry Andric static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); 780e8d8bef9SDimitry Andric 781e8d8bef9SDimitry Andric // XXX - Why is this here if it isn't in the default pass set? 782e8d8bef9SDimitry Andric bool enableEarlyIfConversion() const override { 783e8d8bef9SDimitry Andric return true; 784e8d8bef9SDimitry Andric } 785e8d8bef9SDimitry Andric 786e8d8bef9SDimitry Andric bool enableFlatScratch() const; 787e8d8bef9SDimitry Andric 788e8d8bef9SDimitry Andric void overrideSchedPolicy(MachineSchedPolicy &Policy, 789e8d8bef9SDimitry Andric unsigned NumRegionInstrs) const override; 790e8d8bef9SDimitry Andric 791e8d8bef9SDimitry Andric unsigned getMaxNumUserSGPRs() const { 792e8d8bef9SDimitry Andric return 16; 793e8d8bef9SDimitry Andric } 794e8d8bef9SDimitry Andric 795e8d8bef9SDimitry Andric bool hasSMemRealTime() const { 796e8d8bef9SDimitry Andric return HasSMemRealTime; 797e8d8bef9SDimitry Andric } 798e8d8bef9SDimitry Andric 799e8d8bef9SDimitry Andric bool hasMovrel() const { 800e8d8bef9SDimitry Andric return HasMovrel; 801e8d8bef9SDimitry Andric } 802e8d8bef9SDimitry Andric 803e8d8bef9SDimitry Andric bool hasVGPRIndexMode() const { 804e8d8bef9SDimitry Andric return HasVGPRIndexMode; 805e8d8bef9SDimitry Andric } 806e8d8bef9SDimitry Andric 807e8d8bef9SDimitry Andric bool useVGPRIndexMode() const; 808e8d8bef9SDimitry Andric 809e8d8bef9SDimitry Andric bool hasScalarCompareEq64() const { 810e8d8bef9SDimitry Andric return getGeneration() >= VOLCANIC_ISLANDS; 811e8d8bef9SDimitry Andric } 812e8d8bef9SDimitry Andric 813e8d8bef9SDimitry Andric bool hasScalarStores() const { 814e8d8bef9SDimitry Andric return HasScalarStores; 815e8d8bef9SDimitry Andric } 816e8d8bef9SDimitry Andric 817e8d8bef9SDimitry Andric bool hasScalarAtomics() const { 818e8d8bef9SDimitry Andric return HasScalarAtomics; 819e8d8bef9SDimitry Andric } 820e8d8bef9SDimitry Andric 821e8d8bef9SDimitry Andric bool hasLDSFPAtomics() const { 822e8d8bef9SDimitry Andric return GFX8Insts; 823e8d8bef9SDimitry Andric } 824e8d8bef9SDimitry Andric 825*fe6060f1SDimitry Andric /// \returns true if the subtarget has the v_permlanex16_b32 instruction. 826*fe6060f1SDimitry Andric bool hasPermLaneX16() const { return getGeneration() >= GFX10; } 827*fe6060f1SDimitry Andric 828e8d8bef9SDimitry Andric bool hasDPP() const { 829e8d8bef9SDimitry Andric return HasDPP; 830e8d8bef9SDimitry Andric } 831e8d8bef9SDimitry Andric 832e8d8bef9SDimitry Andric bool hasDPPBroadcasts() const { 833e8d8bef9SDimitry Andric return HasDPP && getGeneration() < GFX10; 834e8d8bef9SDimitry Andric } 835e8d8bef9SDimitry Andric 836e8d8bef9SDimitry Andric bool hasDPPWavefrontShifts() const { 837e8d8bef9SDimitry Andric return HasDPP && getGeneration() < GFX10; 838e8d8bef9SDimitry Andric } 839e8d8bef9SDimitry Andric 840e8d8bef9SDimitry Andric bool hasDPP8() const { 841e8d8bef9SDimitry Andric return HasDPP8; 842e8d8bef9SDimitry Andric } 843e8d8bef9SDimitry Andric 844*fe6060f1SDimitry Andric bool has64BitDPP() const { 845*fe6060f1SDimitry Andric return Has64BitDPP; 846*fe6060f1SDimitry Andric } 847*fe6060f1SDimitry Andric 848*fe6060f1SDimitry Andric bool hasPackedFP32Ops() const { 849*fe6060f1SDimitry Andric return HasPackedFP32Ops; 850*fe6060f1SDimitry Andric } 851*fe6060f1SDimitry Andric 852*fe6060f1SDimitry Andric bool hasFmaakFmamkF32Insts() const { 853*fe6060f1SDimitry Andric return getGeneration() >= GFX10; 854*fe6060f1SDimitry Andric } 855*fe6060f1SDimitry Andric 856*fe6060f1SDimitry Andric bool hasExtendedImageInsts() const { 857*fe6060f1SDimitry Andric return HasExtendedImageInsts; 858*fe6060f1SDimitry Andric } 859*fe6060f1SDimitry Andric 860e8d8bef9SDimitry Andric bool hasR128A16() const { 861e8d8bef9SDimitry Andric return HasR128A16; 862e8d8bef9SDimitry Andric } 863e8d8bef9SDimitry Andric 864e8d8bef9SDimitry Andric bool hasGFX10A16() const { 865e8d8bef9SDimitry Andric return HasGFX10A16; 866e8d8bef9SDimitry Andric } 867e8d8bef9SDimitry Andric 868e8d8bef9SDimitry Andric bool hasA16() const { return hasR128A16() || hasGFX10A16(); } 869e8d8bef9SDimitry Andric 870e8d8bef9SDimitry Andric bool hasG16() const { return HasG16; } 871e8d8bef9SDimitry Andric 872e8d8bef9SDimitry Andric bool hasOffset3fBug() const { 873e8d8bef9SDimitry Andric return HasOffset3fBug; 874e8d8bef9SDimitry Andric } 875e8d8bef9SDimitry Andric 876e8d8bef9SDimitry Andric bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; } 877e8d8bef9SDimitry Andric 878e8d8bef9SDimitry Andric bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; } 879e8d8bef9SDimitry Andric 880e8d8bef9SDimitry Andric bool hasNSAEncoding() const { return HasNSAEncoding; } 881e8d8bef9SDimitry Andric 882*fe6060f1SDimitry Andric unsigned getNSAMaxSize() const { return NSAMaxSize; } 883*fe6060f1SDimitry Andric 884*fe6060f1SDimitry Andric bool hasGFX10_AEncoding() const { 885*fe6060f1SDimitry Andric return GFX10_AEncoding; 886*fe6060f1SDimitry Andric } 887*fe6060f1SDimitry Andric 888e8d8bef9SDimitry Andric bool hasGFX10_BEncoding() const { 889e8d8bef9SDimitry Andric return GFX10_BEncoding; 890e8d8bef9SDimitry Andric } 891e8d8bef9SDimitry Andric 892e8d8bef9SDimitry Andric bool hasGFX10_3Insts() const { 893e8d8bef9SDimitry Andric return GFX10_3Insts; 894e8d8bef9SDimitry Andric } 895e8d8bef9SDimitry Andric 896e8d8bef9SDimitry Andric bool hasMadF16() const; 897e8d8bef9SDimitry Andric 898e8d8bef9SDimitry Andric bool enableSIScheduler() const { 899e8d8bef9SDimitry Andric return EnableSIScheduler; 900e8d8bef9SDimitry Andric } 901e8d8bef9SDimitry Andric 902e8d8bef9SDimitry Andric bool loadStoreOptEnabled() const { 903e8d8bef9SDimitry Andric return EnableLoadStoreOpt; 904e8d8bef9SDimitry Andric } 905e8d8bef9SDimitry Andric 906e8d8bef9SDimitry Andric bool hasSGPRInitBug() const { 907e8d8bef9SDimitry Andric return SGPRInitBug; 908e8d8bef9SDimitry Andric } 909e8d8bef9SDimitry Andric 910*fe6060f1SDimitry Andric bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; } 911*fe6060f1SDimitry Andric 912*fe6060f1SDimitry Andric bool hasNegativeUnalignedScratchOffsetBug() const { 913*fe6060f1SDimitry Andric return NegativeUnalignedScratchOffsetBug; 914*fe6060f1SDimitry Andric } 915*fe6060f1SDimitry Andric 916e8d8bef9SDimitry Andric bool hasMFMAInlineLiteralBug() const { 917e8d8bef9SDimitry Andric return HasMFMAInlineLiteralBug; 918e8d8bef9SDimitry Andric } 919e8d8bef9SDimitry Andric 920e8d8bef9SDimitry Andric bool has12DWordStoreHazard() const { 921e8d8bef9SDimitry Andric return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; 922e8d8bef9SDimitry Andric } 923e8d8bef9SDimitry Andric 924e8d8bef9SDimitry Andric // \returns true if the subtarget supports DWORDX3 load/store instructions. 925e8d8bef9SDimitry Andric bool hasDwordx3LoadStores() const { 926e8d8bef9SDimitry Andric return CIInsts; 927e8d8bef9SDimitry Andric } 928e8d8bef9SDimitry Andric 929e8d8bef9SDimitry Andric bool hasReadM0MovRelInterpHazard() const { 930e8d8bef9SDimitry Andric return getGeneration() == AMDGPUSubtarget::GFX9; 931e8d8bef9SDimitry Andric } 932e8d8bef9SDimitry Andric 933e8d8bef9SDimitry Andric bool hasReadM0SendMsgHazard() const { 934e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && 935e8d8bef9SDimitry Andric getGeneration() <= AMDGPUSubtarget::GFX9; 936e8d8bef9SDimitry Andric } 937e8d8bef9SDimitry Andric 938e8d8bef9SDimitry Andric bool hasVcmpxPermlaneHazard() const { 939e8d8bef9SDimitry Andric return HasVcmpxPermlaneHazard; 940e8d8bef9SDimitry Andric } 941e8d8bef9SDimitry Andric 942e8d8bef9SDimitry Andric bool hasVMEMtoScalarWriteHazard() const { 943e8d8bef9SDimitry Andric return HasVMEMtoScalarWriteHazard; 944e8d8bef9SDimitry Andric } 945e8d8bef9SDimitry Andric 946e8d8bef9SDimitry Andric bool hasSMEMtoVectorWriteHazard() const { 947e8d8bef9SDimitry Andric return HasSMEMtoVectorWriteHazard; 948e8d8bef9SDimitry Andric } 949e8d8bef9SDimitry Andric 950e8d8bef9SDimitry Andric bool hasLDSMisalignedBug() const { 951e8d8bef9SDimitry Andric return LDSMisalignedBug && !EnableCuMode; 952e8d8bef9SDimitry Andric } 953e8d8bef9SDimitry Andric 954e8d8bef9SDimitry Andric bool hasInstFwdPrefetchBug() const { 955e8d8bef9SDimitry Andric return HasInstFwdPrefetchBug; 956e8d8bef9SDimitry Andric } 957e8d8bef9SDimitry Andric 958e8d8bef9SDimitry Andric bool hasVcmpxExecWARHazard() const { 959e8d8bef9SDimitry Andric return HasVcmpxExecWARHazard; 960e8d8bef9SDimitry Andric } 961e8d8bef9SDimitry Andric 962e8d8bef9SDimitry Andric bool hasLdsBranchVmemWARHazard() const { 963e8d8bef9SDimitry Andric return HasLdsBranchVmemWARHazard; 964e8d8bef9SDimitry Andric } 965e8d8bef9SDimitry Andric 966e8d8bef9SDimitry Andric bool hasNSAtoVMEMBug() const { 967e8d8bef9SDimitry Andric return HasNSAtoVMEMBug; 968e8d8bef9SDimitry Andric } 969e8d8bef9SDimitry Andric 970*fe6060f1SDimitry Andric bool hasNSAClauseBug() const { return HasNSAClauseBug; } 971*fe6060f1SDimitry Andric 972e8d8bef9SDimitry Andric bool hasHardClauses() const { return getGeneration() >= GFX10; } 973e8d8bef9SDimitry Andric 974*fe6060f1SDimitry Andric bool hasGFX90AInsts() const { return GFX90AInsts; } 975*fe6060f1SDimitry Andric 976*fe6060f1SDimitry Andric /// Return if operations acting on VGPR tuples require even alignment. 977*fe6060f1SDimitry Andric bool needsAlignedVGPRs() const { return GFX90AInsts; } 978*fe6060f1SDimitry Andric 979*fe6060f1SDimitry Andric bool hasPackedTID() const { return HasPackedTID; } 980*fe6060f1SDimitry Andric 981e8d8bef9SDimitry Andric /// Return the maximum number of waves per SIMD for kernels using \p SGPRs 982e8d8bef9SDimitry Andric /// SGPRs 983e8d8bef9SDimitry Andric unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; 984e8d8bef9SDimitry Andric 985e8d8bef9SDimitry Andric /// Return the maximum number of waves per SIMD for kernels using \p VGPRs 986e8d8bef9SDimitry Andric /// VGPRs 987e8d8bef9SDimitry Andric unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; 988e8d8bef9SDimitry Andric 989e8d8bef9SDimitry Andric /// Return occupancy for the given function. Used LDS and a number of 990e8d8bef9SDimitry Andric /// registers if provided. 991e8d8bef9SDimitry Andric /// Note, occupancy can be affected by the scratch allocation as well, but 992e8d8bef9SDimitry Andric /// we do not have enough information to compute it. 993e8d8bef9SDimitry Andric unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0, 994e8d8bef9SDimitry Andric unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; 995e8d8bef9SDimitry Andric 996e8d8bef9SDimitry Andric /// \returns true if the flat_scratch register should be initialized with the 997e8d8bef9SDimitry Andric /// pointer to the wave's scratch memory rather than a size and offset. 998e8d8bef9SDimitry Andric bool flatScratchIsPointer() const { 999e8d8bef9SDimitry Andric return getGeneration() >= AMDGPUSubtarget::GFX9; 1000e8d8bef9SDimitry Andric } 1001e8d8bef9SDimitry Andric 1002*fe6060f1SDimitry Andric /// \returns true if the flat_scratch register is initialized by the HW. 1003*fe6060f1SDimitry Andric /// In this case it is readonly. 1004*fe6060f1SDimitry Andric bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; } 1005*fe6060f1SDimitry Andric 1006e8d8bef9SDimitry Andric /// \returns true if the machine has merged shaders in which s0-s7 are 1007e8d8bef9SDimitry Andric /// reserved by the hardware and user SGPRs start at s8 1008e8d8bef9SDimitry Andric bool hasMergedShaders() const { 1009e8d8bef9SDimitry Andric return getGeneration() >= GFX9; 1010e8d8bef9SDimitry Andric } 1011e8d8bef9SDimitry Andric 1012e8d8bef9SDimitry Andric /// \returns SGPR allocation granularity supported by the subtarget. 1013e8d8bef9SDimitry Andric unsigned getSGPRAllocGranule() const { 1014e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getSGPRAllocGranule(this); 1015e8d8bef9SDimitry Andric } 1016e8d8bef9SDimitry Andric 1017e8d8bef9SDimitry Andric /// \returns SGPR encoding granularity supported by the subtarget. 1018e8d8bef9SDimitry Andric unsigned getSGPREncodingGranule() const { 1019e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getSGPREncodingGranule(this); 1020e8d8bef9SDimitry Andric } 1021e8d8bef9SDimitry Andric 1022e8d8bef9SDimitry Andric /// \returns Total number of SGPRs supported by the subtarget. 1023e8d8bef9SDimitry Andric unsigned getTotalNumSGPRs() const { 1024e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getTotalNumSGPRs(this); 1025e8d8bef9SDimitry Andric } 1026e8d8bef9SDimitry Andric 1027e8d8bef9SDimitry Andric /// \returns Addressable number of SGPRs supported by the subtarget. 1028e8d8bef9SDimitry Andric unsigned getAddressableNumSGPRs() const { 1029e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getAddressableNumSGPRs(this); 1030e8d8bef9SDimitry Andric } 1031e8d8bef9SDimitry Andric 1032e8d8bef9SDimitry Andric /// \returns Minimum number of SGPRs that meets the given number of waves per 1033e8d8bef9SDimitry Andric /// execution unit requirement supported by the subtarget. 1034e8d8bef9SDimitry Andric unsigned getMinNumSGPRs(unsigned WavesPerEU) const { 1035e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU); 1036e8d8bef9SDimitry Andric } 1037e8d8bef9SDimitry Andric 1038e8d8bef9SDimitry Andric /// \returns Maximum number of SGPRs that meets the given number of waves per 1039e8d8bef9SDimitry Andric /// execution unit requirement supported by the subtarget. 1040e8d8bef9SDimitry Andric unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { 1041e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable); 1042e8d8bef9SDimitry Andric } 1043e8d8bef9SDimitry Andric 1044*fe6060f1SDimitry Andric /// \returns Reserved number of SGPRs. This is common 1045*fe6060f1SDimitry Andric /// utility function called by MachineFunction and 1046*fe6060f1SDimitry Andric /// Function variants of getReservedNumSGPRs. 1047*fe6060f1SDimitry Andric unsigned getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const; 1048*fe6060f1SDimitry Andric /// \returns Reserved number of SGPRs for given machine function \p MF. 1049e8d8bef9SDimitry Andric unsigned getReservedNumSGPRs(const MachineFunction &MF) const; 1050e8d8bef9SDimitry Andric 1051*fe6060f1SDimitry Andric /// \returns Reserved number of SGPRs for given function \p F. 1052*fe6060f1SDimitry Andric unsigned getReservedNumSGPRs(const Function &F) const; 1053*fe6060f1SDimitry Andric 1054*fe6060f1SDimitry Andric /// \returns max num SGPRs. This is the common utility 1055*fe6060f1SDimitry Andric /// function called by MachineFunction and Function 1056*fe6060f1SDimitry Andric /// variants of getMaxNumSGPRs. 1057*fe6060f1SDimitry Andric unsigned getBaseMaxNumSGPRs(const Function &F, 1058*fe6060f1SDimitry Andric std::pair<unsigned, unsigned> WavesPerEU, 1059*fe6060f1SDimitry Andric unsigned PreloadedSGPRs, 1060*fe6060f1SDimitry Andric unsigned ReservedNumSGPRs) const; 1061*fe6060f1SDimitry Andric 1062e8d8bef9SDimitry Andric /// \returns Maximum number of SGPRs that meets number of waves per execution 1063e8d8bef9SDimitry Andric /// unit requirement for function \p MF, or number of SGPRs explicitly 1064e8d8bef9SDimitry Andric /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF. 1065e8d8bef9SDimitry Andric /// 1066e8d8bef9SDimitry Andric /// \returns Value that meets number of waves per execution unit requirement 1067e8d8bef9SDimitry Andric /// if explicitly requested value cannot be converted to integer, violates 1068e8d8bef9SDimitry Andric /// subtarget's specifications, or does not meet number of waves per execution 1069e8d8bef9SDimitry Andric /// unit requirement. 1070e8d8bef9SDimitry Andric unsigned getMaxNumSGPRs(const MachineFunction &MF) const; 1071e8d8bef9SDimitry Andric 1072*fe6060f1SDimitry Andric /// \returns Maximum number of SGPRs that meets number of waves per execution 1073*fe6060f1SDimitry Andric /// unit requirement for function \p F, or number of SGPRs explicitly 1074*fe6060f1SDimitry Andric /// requested using "amdgpu-num-sgpr" attribute attached to function \p F. 1075*fe6060f1SDimitry Andric /// 1076*fe6060f1SDimitry Andric /// \returns Value that meets number of waves per execution unit requirement 1077*fe6060f1SDimitry Andric /// if explicitly requested value cannot be converted to integer, violates 1078*fe6060f1SDimitry Andric /// subtarget's specifications, or does not meet number of waves per execution 1079*fe6060f1SDimitry Andric /// unit requirement. 1080*fe6060f1SDimitry Andric unsigned getMaxNumSGPRs(const Function &F) const; 1081*fe6060f1SDimitry Andric 1082e8d8bef9SDimitry Andric /// \returns VGPR allocation granularity supported by the subtarget. 1083e8d8bef9SDimitry Andric unsigned getVGPRAllocGranule() const { 1084e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getVGPRAllocGranule(this); 1085e8d8bef9SDimitry Andric } 1086e8d8bef9SDimitry Andric 1087e8d8bef9SDimitry Andric /// \returns VGPR encoding granularity supported by the subtarget. 1088e8d8bef9SDimitry Andric unsigned getVGPREncodingGranule() const { 1089e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getVGPREncodingGranule(this); 1090e8d8bef9SDimitry Andric } 1091e8d8bef9SDimitry Andric 1092e8d8bef9SDimitry Andric /// \returns Total number of VGPRs supported by the subtarget. 1093e8d8bef9SDimitry Andric unsigned getTotalNumVGPRs() const { 1094e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getTotalNumVGPRs(this); 1095e8d8bef9SDimitry Andric } 1096e8d8bef9SDimitry Andric 1097e8d8bef9SDimitry Andric /// \returns Addressable number of VGPRs supported by the subtarget. 1098e8d8bef9SDimitry Andric unsigned getAddressableNumVGPRs() const { 1099e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getAddressableNumVGPRs(this); 1100e8d8bef9SDimitry Andric } 1101e8d8bef9SDimitry Andric 1102e8d8bef9SDimitry Andric /// \returns Minimum number of VGPRs that meets given number of waves per 1103e8d8bef9SDimitry Andric /// execution unit requirement supported by the subtarget. 1104e8d8bef9SDimitry Andric unsigned getMinNumVGPRs(unsigned WavesPerEU) const { 1105e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU); 1106e8d8bef9SDimitry Andric } 1107e8d8bef9SDimitry Andric 1108e8d8bef9SDimitry Andric /// \returns Maximum number of VGPRs that meets given number of waves per 1109e8d8bef9SDimitry Andric /// execution unit requirement supported by the subtarget. 1110e8d8bef9SDimitry Andric unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { 1111e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU); 1112e8d8bef9SDimitry Andric } 1113e8d8bef9SDimitry Andric 1114*fe6060f1SDimitry Andric /// \returns max num VGPRs. This is the common utility function 1115*fe6060f1SDimitry Andric /// called by MachineFunction and Function variants of getMaxNumVGPRs. 1116*fe6060f1SDimitry Andric unsigned getBaseMaxNumVGPRs(const Function &F, 1117*fe6060f1SDimitry Andric std::pair<unsigned, unsigned> WavesPerEU) const; 1118*fe6060f1SDimitry Andric /// \returns Maximum number of VGPRs that meets number of waves per execution 1119*fe6060f1SDimitry Andric /// unit requirement for function \p F, or number of VGPRs explicitly 1120*fe6060f1SDimitry Andric /// requested using "amdgpu-num-vgpr" attribute attached to function \p F. 1121*fe6060f1SDimitry Andric /// 1122*fe6060f1SDimitry Andric /// \returns Value that meets number of waves per execution unit requirement 1123*fe6060f1SDimitry Andric /// if explicitly requested value cannot be converted to integer, violates 1124*fe6060f1SDimitry Andric /// subtarget's specifications, or does not meet number of waves per execution 1125*fe6060f1SDimitry Andric /// unit requirement. 1126*fe6060f1SDimitry Andric unsigned getMaxNumVGPRs(const Function &F) const; 1127*fe6060f1SDimitry Andric 1128e8d8bef9SDimitry Andric /// \returns Maximum number of VGPRs that meets number of waves per execution 1129e8d8bef9SDimitry Andric /// unit requirement for function \p MF, or number of VGPRs explicitly 1130e8d8bef9SDimitry Andric /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. 1131e8d8bef9SDimitry Andric /// 1132e8d8bef9SDimitry Andric /// \returns Value that meets number of waves per execution unit requirement 1133e8d8bef9SDimitry Andric /// if explicitly requested value cannot be converted to integer, violates 1134e8d8bef9SDimitry Andric /// subtarget's specifications, or does not meet number of waves per execution 1135e8d8bef9SDimitry Andric /// unit requirement. 1136e8d8bef9SDimitry Andric unsigned getMaxNumVGPRs(const MachineFunction &MF) const; 1137e8d8bef9SDimitry Andric 1138e8d8bef9SDimitry Andric void getPostRAMutations( 1139e8d8bef9SDimitry Andric std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) 1140e8d8bef9SDimitry Andric const override; 1141e8d8bef9SDimitry Andric 1142e8d8bef9SDimitry Andric bool isWave32() const { 1143e8d8bef9SDimitry Andric return getWavefrontSize() == 32; 1144e8d8bef9SDimitry Andric } 1145e8d8bef9SDimitry Andric 1146e8d8bef9SDimitry Andric bool isWave64() const { 1147e8d8bef9SDimitry Andric return getWavefrontSize() == 64; 1148e8d8bef9SDimitry Andric } 1149e8d8bef9SDimitry Andric 1150e8d8bef9SDimitry Andric const TargetRegisterClass *getBoolRC() const { 1151e8d8bef9SDimitry Andric return getRegisterInfo()->getBoolRC(); 1152e8d8bef9SDimitry Andric } 1153e8d8bef9SDimitry Andric 1154e8d8bef9SDimitry Andric /// \returns Maximum number of work groups per compute unit supported by the 1155e8d8bef9SDimitry Andric /// subtarget and limited by given \p FlatWorkGroupSize. 1156e8d8bef9SDimitry Andric unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { 1157e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); 1158e8d8bef9SDimitry Andric } 1159e8d8bef9SDimitry Andric 1160e8d8bef9SDimitry Andric /// \returns Minimum flat work group size supported by the subtarget. 1161e8d8bef9SDimitry Andric unsigned getMinFlatWorkGroupSize() const override { 1162e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); 1163e8d8bef9SDimitry Andric } 1164e8d8bef9SDimitry Andric 1165e8d8bef9SDimitry Andric /// \returns Maximum flat work group size supported by the subtarget. 1166e8d8bef9SDimitry Andric unsigned getMaxFlatWorkGroupSize() const override { 1167e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); 1168e8d8bef9SDimitry Andric } 1169e8d8bef9SDimitry Andric 1170e8d8bef9SDimitry Andric /// \returns Number of waves per execution unit required to support the given 1171e8d8bef9SDimitry Andric /// \p FlatWorkGroupSize. 1172e8d8bef9SDimitry Andric unsigned 1173e8d8bef9SDimitry Andric getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { 1174e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); 1175e8d8bef9SDimitry Andric } 1176e8d8bef9SDimitry Andric 1177e8d8bef9SDimitry Andric /// \returns Minimum number of waves per execution unit supported by the 1178e8d8bef9SDimitry Andric /// subtarget. 1179e8d8bef9SDimitry Andric unsigned getMinWavesPerEU() const override { 1180e8d8bef9SDimitry Andric return AMDGPU::IsaInfo::getMinWavesPerEU(this); 1181e8d8bef9SDimitry Andric } 1182e8d8bef9SDimitry Andric 1183e8d8bef9SDimitry Andric void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, 1184e8d8bef9SDimitry Andric SDep &Dep) const override; 1185e8d8bef9SDimitry Andric }; 1186e8d8bef9SDimitry Andric 1187e8d8bef9SDimitry Andric } // end namespace llvm 1188e8d8bef9SDimitry Andric 1189e8d8bef9SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 1190