xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h (revision 5fb307d29b364982acbde82cbf77db3cae486f8c)
1 //===-- AMDGPUMachineFunctionInfo.h -------------------------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
10 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
11 
12 #include "Utils/AMDGPUBaseInfo.h"
13 #include "llvm/ADT/DenseMap.h"
14 #include "llvm/CodeGen/MachineFunction.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/GlobalVariable.h"
19 
20 namespace llvm {
21 
22 class AMDGPUSubtarget;
23 class GCNSubtarget;
24 
25 class AMDGPUMachineFunction : public MachineFunctionInfo {
26   /// A map to keep track of local memory objects and their offsets within the
27   /// local memory space.
28   SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects;
29 
30 protected:
31   uint64_t ExplicitKernArgSize = 0; // Cache for this.
32   Align MaxKernArgAlign;        // Cache for this.
33 
34   /// Number of bytes in the LDS that are being used.
35   uint32_t LDSSize = 0;
36   uint32_t GDSSize = 0;
37 
38   /// Number of bytes in the LDS allocated statically. This field is only used
39   /// in the instruction selector and not part of the machine function info.
40   uint32_t StaticLDSSize = 0;
41   uint32_t StaticGDSSize = 0;
42 
43   /// Align for dynamic shared memory if any. Dynamic shared memory is
44   /// allocated directly after the static one, i.e., LDSSize. Need to pad
45   /// LDSSize to ensure that dynamic one is aligned accordingly.
46   /// The maximal alignment is updated during IR translation or lowering
47   /// stages.
48   Align DynLDSAlign;
49 
50   // Kernels + shaders. i.e. functions called by the hardware and not called
51   // by other functions.
52   bool IsEntryFunction = false;
53 
54   // Entry points called by other functions instead of directly by the hardware.
55   bool IsModuleEntryFunction = false;
56 
57   bool NoSignedZerosFPMath = false;
58 
59   // Function may be memory bound.
60   bool MemoryBound = false;
61 
62   // Kernel may need limited waves per EU for better performance.
63   bool WaveLimiter = false;
64 
65 public:
66   AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST);
67 
68   uint64_t getExplicitKernArgSize() const {
69     return ExplicitKernArgSize;
70   }
71 
72   Align getMaxKernArgAlign() const { return MaxKernArgAlign; }
73 
74   uint32_t getLDSSize() const {
75     return LDSSize;
76   }
77 
78   uint32_t getGDSSize() const {
79     return GDSSize;
80   }
81 
82   bool isEntryFunction() const {
83     return IsEntryFunction;
84   }
85 
86   bool isModuleEntryFunction() const { return IsModuleEntryFunction; }
87 
88   bool hasNoSignedZerosFPMath() const {
89     return NoSignedZerosFPMath;
90   }
91 
92   bool isMemoryBound() const {
93     return MemoryBound;
94   }
95 
96   bool needsWaveLimiter() const {
97     return WaveLimiter;
98   }
99 
100   unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV) {
101     return allocateLDSGlobal(DL, GV, DynLDSAlign);
102   }
103 
104   unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV,
105                              Align Trailing);
106 
107   static std::optional<uint32_t> getLDSKernelIdMetadata(const Function &F);
108   static std::optional<uint32_t> getLDSAbsoluteAddress(const GlobalValue &GV);
109 
110   Align getDynLDSAlign() const { return DynLDSAlign; }
111 
112   void setDynLDSAlign(const Function &F, const GlobalVariable &GV);
113 };
114 
115 }
116 #endif
117