xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h (revision 1f1e2261e341e6ca6862f82261066ef1705f0a7a)
1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
12 
13 #include "llvm/IR/PassManager.h"
14 #include "llvm/Pass.h"
15 #include "llvm/Support/CodeGen.h"
16 
17 namespace llvm {
18 
19 class TargetMachine;
20 
21 // GlobalISel passes
22 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
23 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone);
24 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &);
25 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
26 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
27 void initializeAMDGPURegBankCombinerPass(PassRegistry &);
28 
29 // SI Passes
30 FunctionPass *createGCNDPPCombinePass();
31 FunctionPass *createSIAnnotateControlFlowPass();
32 FunctionPass *createSIFoldOperandsPass();
33 FunctionPass *createSIPeepholeSDWAPass();
34 FunctionPass *createSILowerI1CopiesPass();
35 FunctionPass *createSIShrinkInstructionsPass();
36 FunctionPass *createSILoadStoreOptimizerPass();
37 FunctionPass *createSIWholeQuadModePass();
38 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
39 FunctionPass *createSIOptimizeExecMaskingPreRAPass();
40 FunctionPass *createSIOptimizeVGPRLiveRangePass();
41 FunctionPass *createSIFixSGPRCopiesPass();
42 FunctionPass *createSIMemoryLegalizerPass();
43 FunctionPass *createSIInsertWaitcntsPass();
44 FunctionPass *createSIPreAllocateWWMRegsPass();
45 FunctionPass *createSIFormMemoryClausesPass();
46 
47 FunctionPass *createSIPostRABundlerPass();
48 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *);
49 FunctionPass *createAMDGPUUseNativeCallsPass();
50 FunctionPass *createAMDGPUCodeGenPreparePass();
51 FunctionPass *createAMDGPULateCodeGenPreparePass();
52 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
53 FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *);
54 ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *);
55 FunctionPass *createAMDGPURewriteOutArgumentsPass();
56 ModulePass *createAMDGPUReplaceLDSUseWithPointerPass();
57 ModulePass *createAMDGPULowerModuleLDSPass();
58 FunctionPass *createSIModeRegisterPass();
59 FunctionPass *createGCNPreRAOptimizationsPass();
60 
61 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
62   AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {}
63   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
64 
65 private:
66   TargetMachine &TM;
67 };
68 
69 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {
70   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
71 };
72 
73 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
74 
75 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
76 extern char &AMDGPUMachineCFGStructurizerID;
77 
78 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
79 
80 Pass *createAMDGPUAnnotateKernelFeaturesPass();
81 Pass *createAMDGPUAttributorPass();
82 void initializeAMDGPUAttributorPass(PassRegistry &);
83 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
84 extern char &AMDGPUAnnotateKernelFeaturesID;
85 
86 FunctionPass *createAMDGPUAtomicOptimizerPass();
87 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
88 extern char &AMDGPUAtomicOptimizerID;
89 
90 ModulePass *createAMDGPULowerIntrinsicsPass();
91 void initializeAMDGPULowerIntrinsicsPass(PassRegistry &);
92 extern char &AMDGPULowerIntrinsicsID;
93 
94 ModulePass *createAMDGPUFixFunctionBitcastsPass();
95 void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &);
96 extern char &AMDGPUFixFunctionBitcastsID;
97 
98 ModulePass *createAMDGPUCtorDtorLoweringPass();
99 void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &);
100 extern char &AMDGPUCtorDtorLoweringID;
101 
102 FunctionPass *createAMDGPULowerKernelArgumentsPass();
103 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
104 extern char &AMDGPULowerKernelArgumentsID;
105 
106 FunctionPass *createAMDGPUPromoteKernelArgumentsPass();
107 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &);
108 extern char &AMDGPUPromoteKernelArgumentsID;
109 
110 struct AMDGPUPromoteKernelArgumentsPass
111     : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> {
112   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
113 };
114 
115 ModulePass *createAMDGPULowerKernelAttributesPass();
116 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
117 extern char &AMDGPULowerKernelAttributesID;
118 
119 struct AMDGPULowerKernelAttributesPass
120     : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
121   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
122 };
123 
124 void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &);
125 extern char &AMDGPUPropagateAttributesEarlyID;
126 
127 struct AMDGPUPropagateAttributesEarlyPass
128     : PassInfoMixin<AMDGPUPropagateAttributesEarlyPass> {
129   AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {}
130   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
131 
132 private:
133   TargetMachine &TM;
134 };
135 
136 void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &);
137 extern char &AMDGPUPropagateAttributesLateID;
138 
139 struct AMDGPUPropagateAttributesLatePass
140     : PassInfoMixin<AMDGPUPropagateAttributesLatePass> {
141   AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {}
142   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
143 
144 private:
145   TargetMachine &TM;
146 };
147 
148 void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &);
149 extern char &AMDGPUReplaceLDSUseWithPointerID;
150 
151 struct AMDGPUReplaceLDSUseWithPointerPass
152     : PassInfoMixin<AMDGPUReplaceLDSUseWithPointerPass> {
153   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
154 };
155 
156 void initializeAMDGPULowerModuleLDSPass(PassRegistry &);
157 extern char &AMDGPULowerModuleLDSID;
158 
159 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> {
160   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
161 };
162 
163 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
164 extern char &AMDGPURewriteOutArgumentsID;
165 
166 void initializeGCNDPPCombinePass(PassRegistry &);
167 extern char &GCNDPPCombineID;
168 
169 void initializeSIFoldOperandsPass(PassRegistry &);
170 extern char &SIFoldOperandsID;
171 
172 void initializeSIPeepholeSDWAPass(PassRegistry &);
173 extern char &SIPeepholeSDWAID;
174 
175 void initializeSIShrinkInstructionsPass(PassRegistry&);
176 extern char &SIShrinkInstructionsID;
177 
178 void initializeSIFixSGPRCopiesPass(PassRegistry &);
179 extern char &SIFixSGPRCopiesID;
180 
181 void initializeSIFixVGPRCopiesPass(PassRegistry &);
182 extern char &SIFixVGPRCopiesID;
183 
184 void initializeSILowerI1CopiesPass(PassRegistry &);
185 extern char &SILowerI1CopiesID;
186 
187 void initializeSILowerSGPRSpillsPass(PassRegistry &);
188 extern char &SILowerSGPRSpillsID;
189 
190 void initializeSILoadStoreOptimizerPass(PassRegistry &);
191 extern char &SILoadStoreOptimizerID;
192 
193 void initializeSIWholeQuadModePass(PassRegistry &);
194 extern char &SIWholeQuadModeID;
195 
196 void initializeSILowerControlFlowPass(PassRegistry &);
197 extern char &SILowerControlFlowID;
198 
199 void initializeSIPreEmitPeepholePass(PassRegistry &);
200 extern char &SIPreEmitPeepholeID;
201 
202 void initializeSILateBranchLoweringPass(PassRegistry &);
203 extern char &SILateBranchLoweringPassID;
204 
205 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
206 extern char &SIOptimizeExecMaskingID;
207 
208 void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
209 extern char &SIPreAllocateWWMRegsID;
210 
211 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &);
212 extern char &AMDGPUSimplifyLibCallsID;
213 
214 void initializeAMDGPUUseNativeCallsPass(PassRegistry &);
215 extern char &AMDGPUUseNativeCallsID;
216 
217 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
218 extern char &AMDGPUPerfHintAnalysisID;
219 
220 // Passes common to R600 and SI
221 FunctionPass *createAMDGPUPromoteAlloca();
222 void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
223 extern char &AMDGPUPromoteAllocaID;
224 
225 FunctionPass *createAMDGPUPromoteAllocaToVector();
226 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&);
227 extern char &AMDGPUPromoteAllocaToVectorID;
228 
229 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
230   AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {}
231   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
232 
233 private:
234   TargetMachine &TM;
235 };
236 
237 struct AMDGPUPromoteAllocaToVectorPass
238     : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> {
239   AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {}
240   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
241 
242 private:
243   TargetMachine &TM;
244 };
245 
246 Pass *createAMDGPUStructurizeCFGPass();
247 FunctionPass *createAMDGPUISelDag(
248   TargetMachine *TM = nullptr,
249   CodeGenOpt::Level OptLevel = CodeGenOpt::Default);
250 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
251 
252 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
253   AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
254   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
255 
256 private:
257   bool GlobalOpt;
258 };
259 
260 FunctionPass *createAMDGPUAnnotateUniformValues();
261 
262 ModulePass *createAMDGPUPrintfRuntimeBinding();
263 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
264 extern char &AMDGPUPrintfRuntimeBindingID;
265 
266 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &);
267 extern char &AMDGPUResourceUsageAnalysisID;
268 
269 struct AMDGPUPrintfRuntimeBindingPass
270     : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> {
271   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
272 };
273 
274 ModulePass* createAMDGPUUnifyMetadataPass();
275 void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
276 extern char &AMDGPUUnifyMetadataID;
277 
278 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
279   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
280 };
281 
282 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
283 extern char &SIOptimizeExecMaskingPreRAID;
284 
285 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
286 extern char &SIOptimizeVGPRLiveRangeID;
287 
288 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
289 extern char &AMDGPUAnnotateUniformValuesPassID;
290 
291 void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
292 extern char &AMDGPUCodeGenPrepareID;
293 
294 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
295 extern char &AMDGPULateCodeGenPrepareID;
296 
297 void initializeSIAnnotateControlFlowPass(PassRegistry&);
298 extern char &SIAnnotateControlFlowPassID;
299 
300 void initializeSIMemoryLegalizerPass(PassRegistry&);
301 extern char &SIMemoryLegalizerID;
302 
303 void initializeSIModeRegisterPass(PassRegistry&);
304 extern char &SIModeRegisterID;
305 
306 void initializeSIInsertHardClausesPass(PassRegistry &);
307 extern char &SIInsertHardClausesID;
308 
309 void initializeSIInsertWaitcntsPass(PassRegistry&);
310 extern char &SIInsertWaitcntsID;
311 
312 void initializeSIFormMemoryClausesPass(PassRegistry&);
313 extern char &SIFormMemoryClausesID;
314 
315 void initializeSIPostRABundlerPass(PassRegistry&);
316 extern char &SIPostRABundlerID;
317 
318 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
319 extern char &AMDGPUUnifyDivergentExitNodesID;
320 
321 ImmutablePass *createAMDGPUAAWrapperPass();
322 void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
323 ImmutablePass *createAMDGPUExternalAAWrapperPass();
324 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
325 
326 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
327 
328 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
329 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
330 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
331 
332 void initializeGCNNSAReassignPass(PassRegistry &);
333 extern char &GCNNSAReassignID;
334 
335 void initializeGCNPreRAOptimizationsPass(PassRegistry &);
336 extern char &GCNPreRAOptimizationsID;
337 
338 namespace AMDGPU {
339 enum TargetIndex {
340   TI_CONSTDATA_START,
341   TI_SCRATCH_RSRC_DWORD0,
342   TI_SCRATCH_RSRC_DWORD1,
343   TI_SCRATCH_RSRC_DWORD2,
344   TI_SCRATCH_RSRC_DWORD3
345 };
346 }
347 
348 /// OpenCL uses address spaces to differentiate between
349 /// various memory regions on the hardware. On the CPU
350 /// all of the address spaces point to the same memory,
351 /// however on the GPU, each address space points to
352 /// a separate piece of memory that is unique from other
353 /// memory locations.
354 namespace AMDGPUAS {
355   enum : unsigned {
356     // The maximum value for flat, generic, local, private, constant and region.
357     MAX_AMDGPU_ADDRESS = 7,
358 
359     FLAT_ADDRESS = 0,     ///< Address space for flat memory.
360     GLOBAL_ADDRESS = 1,   ///< Address space for global memory (RAT0, VTX0).
361     REGION_ADDRESS = 2,   ///< Address space for region memory. (GDS)
362 
363     CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2).
364     LOCAL_ADDRESS = 3,    ///< Address space for local memory.
365     PRIVATE_ADDRESS = 5,  ///< Address space for private memory.
366 
367     CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory.
368 
369     BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers.
370 
371     /// Address space for direct addressable parameter memory (CONST0).
372     PARAM_D_ADDRESS = 6,
373     /// Address space for indirect addressable parameter memory (VTX1).
374     PARAM_I_ADDRESS = 7,
375 
376     // Do not re-order the CONSTANT_BUFFER_* enums.  Several places depend on
377     // this order to be able to dynamically index a constant buffer, for
378     // example:
379     //
380     // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
381 
382     CONSTANT_BUFFER_0 = 8,
383     CONSTANT_BUFFER_1 = 9,
384     CONSTANT_BUFFER_2 = 10,
385     CONSTANT_BUFFER_3 = 11,
386     CONSTANT_BUFFER_4 = 12,
387     CONSTANT_BUFFER_5 = 13,
388     CONSTANT_BUFFER_6 = 14,
389     CONSTANT_BUFFER_7 = 15,
390     CONSTANT_BUFFER_8 = 16,
391     CONSTANT_BUFFER_9 = 17,
392     CONSTANT_BUFFER_10 = 18,
393     CONSTANT_BUFFER_11 = 19,
394     CONSTANT_BUFFER_12 = 20,
395     CONSTANT_BUFFER_13 = 21,
396     CONSTANT_BUFFER_14 = 22,
397     CONSTANT_BUFFER_15 = 23,
398 
399     // Some places use this if the address space can't be determined.
400     UNKNOWN_ADDRESS_SPACE = ~0u,
401   };
402 }
403 
404 namespace AMDGPU {
405 
406 // FIXME: Missing constant_32bit
407 inline bool isFlatGlobalAddrSpace(unsigned AS) {
408   return AS == AMDGPUAS::GLOBAL_ADDRESS ||
409          AS == AMDGPUAS::FLAT_ADDRESS ||
410          AS == AMDGPUAS::CONSTANT_ADDRESS ||
411          AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
412 }
413 }
414 
415 } // End namespace llvm
416 
417 #endif
418