1 //===-- AMDGPUMachineFunctionInfo.h -------------------------------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H 10 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H 11 12 #include "Utils/AMDGPUBaseInfo.h" 13 #include "llvm/ADT/DenseMap.h" 14 #include "llvm/CodeGen/MachineFunction.h" 15 #include "llvm/IR/DataLayout.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/IR/GlobalValue.h" 18 #include "llvm/IR/GlobalVariable.h" 19 20 namespace llvm { 21 22 class AMDGPUSubtarget; 23 24 class AMDGPUMachineFunction : public MachineFunctionInfo { 25 /// A map to keep track of local memory objects and their offsets within the 26 /// local memory space. 27 SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects; 28 29 protected: 30 uint64_t ExplicitKernArgSize = 0; // Cache for this. 31 Align MaxKernArgAlign; // Cache for this. 32 33 /// Number of bytes in the LDS that are being used. 34 uint32_t LDSSize = 0; 35 uint32_t GDSSize = 0; 36 37 /// Number of bytes in the LDS allocated statically. This field is only used 38 /// in the instruction selector and not part of the machine function info. 39 uint32_t StaticLDSSize = 0; 40 uint32_t StaticGDSSize = 0; 41 42 /// Align for dynamic shared memory if any. Dynamic shared memory is 43 /// allocated directly after the static one, i.e., LDSSize. Need to pad 44 /// LDSSize to ensure that dynamic one is aligned accordingly. 45 /// The maximal alignment is updated during IR translation or lowering 46 /// stages. 47 Align DynLDSAlign; 48 49 // Flag to check dynamic LDS usage by kernel. 50 bool UsesDynamicLDS = false; 51 52 // Kernels + shaders. i.e. functions called by the hardware and not called 53 // by other functions. 54 bool IsEntryFunction = false; 55 56 // Entry points called by other functions instead of directly by the hardware. 57 bool IsModuleEntryFunction = false; 58 59 // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve CC. 60 bool IsChainFunction = false; 61 62 bool NoSignedZerosFPMath = false; 63 64 // Function may be memory bound. 65 bool MemoryBound = false; 66 67 // Kernel may need limited waves per EU for better performance. 68 bool WaveLimiter = false; 69 70 public: 71 AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST); 72 73 uint64_t getExplicitKernArgSize() const { 74 return ExplicitKernArgSize; 75 } 76 77 Align getMaxKernArgAlign() const { return MaxKernArgAlign; } 78 79 uint32_t getLDSSize() const { 80 return LDSSize; 81 } 82 83 uint32_t getGDSSize() const { 84 return GDSSize; 85 } 86 87 bool isEntryFunction() const { 88 return IsEntryFunction; 89 } 90 91 bool isModuleEntryFunction() const { return IsModuleEntryFunction; } 92 93 bool isChainFunction() const { return IsChainFunction; } 94 95 // The stack is empty upon entry to this function. 96 bool isBottomOfStack() const { 97 return isEntryFunction() || isChainFunction(); 98 } 99 100 bool hasNoSignedZerosFPMath() const { 101 return NoSignedZerosFPMath; 102 } 103 104 bool isMemoryBound() const { 105 return MemoryBound; 106 } 107 108 bool needsWaveLimiter() const { 109 return WaveLimiter; 110 } 111 112 unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV) { 113 return allocateLDSGlobal(DL, GV, DynLDSAlign); 114 } 115 116 unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV, 117 Align Trailing); 118 119 static std::optional<uint32_t> getLDSKernelIdMetadata(const Function &F); 120 static std::optional<uint32_t> getLDSAbsoluteAddress(const GlobalValue &GV); 121 122 Align getDynLDSAlign() const { return DynLDSAlign; } 123 124 void setDynLDSAlign(const Function &F, const GlobalVariable &GV); 125 126 void setUsesDynamicLDS(bool DynLDS); 127 128 bool isDynamicLDSUsed() const; 129 }; 130 131 } 132 #endif 133