xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h (revision 43a5ec4eb41567cc92586503212743d89686d78f)
1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
12 
13 #include "llvm/IR/PassManager.h"
14 #include "llvm/Support/CodeGen.h"
15 
16 namespace llvm {
17 
18 class FunctionPass;
19 class GCNTargetMachine;
20 class ImmutablePass;
21 class MachineFunctionPass;
22 class ModulePass;
23 class Pass;
24 class Target;
25 class TargetMachine;
26 class TargetOptions;
27 class PassRegistry;
28 class Module;
29 
30 // GlobalISel passes
31 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
32 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone);
33 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &);
34 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
35 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
36 void initializeAMDGPURegBankCombinerPass(PassRegistry &);
37 
38 // R600 Passes
39 FunctionPass *createR600VectorRegMerger();
40 FunctionPass *createR600ExpandSpecialInstrsPass();
41 FunctionPass *createR600EmitClauseMarkers();
42 FunctionPass *createR600ClauseMergePass();
43 FunctionPass *createR600Packetizer();
44 FunctionPass *createR600ControlFlowFinalizer();
45 FunctionPass *createAMDGPUCFGStructurizerPass();
46 FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel);
47 
48 // SI Passes
49 FunctionPass *createGCNDPPCombinePass();
50 FunctionPass *createSIAnnotateControlFlowPass();
51 FunctionPass *createSIFoldOperandsPass();
52 FunctionPass *createSIPeepholeSDWAPass();
53 FunctionPass *createSILowerI1CopiesPass();
54 FunctionPass *createSIShrinkInstructionsPass();
55 FunctionPass *createSILoadStoreOptimizerPass();
56 FunctionPass *createSIWholeQuadModePass();
57 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
58 FunctionPass *createSIOptimizeExecMaskingPreRAPass();
59 FunctionPass *createSIOptimizeVGPRLiveRangePass();
60 FunctionPass *createSIFixSGPRCopiesPass();
61 FunctionPass *createSIMemoryLegalizerPass();
62 FunctionPass *createSIInsertWaitcntsPass();
63 FunctionPass *createSIPreAllocateWWMRegsPass();
64 FunctionPass *createSIFormMemoryClausesPass();
65 
66 FunctionPass *createSIPostRABundlerPass();
67 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *);
68 FunctionPass *createAMDGPUUseNativeCallsPass();
69 FunctionPass *createAMDGPUCodeGenPreparePass();
70 FunctionPass *createAMDGPULateCodeGenPreparePass();
71 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
72 FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *);
73 ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *);
74 FunctionPass *createAMDGPURewriteOutArgumentsPass();
75 ModulePass *createAMDGPUReplaceLDSUseWithPointerPass();
76 ModulePass *createAMDGPULowerModuleLDSPass();
77 FunctionPass *createSIModeRegisterPass();
78 FunctionPass *createGCNPreRAOptimizationsPass();
79 
80 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
81   AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {}
82   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
83 
84 private:
85   TargetMachine &TM;
86 };
87 
88 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {
89   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
90 };
91 
92 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
93 
94 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
95 extern char &AMDGPUMachineCFGStructurizerID;
96 
97 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
98 
99 Pass *createAMDGPUAnnotateKernelFeaturesPass();
100 Pass *createAMDGPUAttributorPass();
101 void initializeAMDGPUAttributorPass(PassRegistry &);
102 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
103 extern char &AMDGPUAnnotateKernelFeaturesID;
104 
105 FunctionPass *createAMDGPUAtomicOptimizerPass();
106 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
107 extern char &AMDGPUAtomicOptimizerID;
108 
109 ModulePass *createAMDGPULowerIntrinsicsPass();
110 void initializeAMDGPULowerIntrinsicsPass(PassRegistry &);
111 extern char &AMDGPULowerIntrinsicsID;
112 
113 ModulePass *createAMDGPUFixFunctionBitcastsPass();
114 void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &);
115 extern char &AMDGPUFixFunctionBitcastsID;
116 
117 FunctionPass *createAMDGPULowerKernelArgumentsPass();
118 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
119 extern char &AMDGPULowerKernelArgumentsID;
120 
121 ModulePass *createAMDGPULowerKernelAttributesPass();
122 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
123 extern char &AMDGPULowerKernelAttributesID;
124 
125 struct AMDGPULowerKernelAttributesPass
126     : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
127   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
128 };
129 
130 void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &);
131 extern char &AMDGPUPropagateAttributesEarlyID;
132 
133 struct AMDGPUPropagateAttributesEarlyPass
134     : PassInfoMixin<AMDGPUPropagateAttributesEarlyPass> {
135   AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {}
136   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
137 
138 private:
139   TargetMachine &TM;
140 };
141 
142 void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &);
143 extern char &AMDGPUPropagateAttributesLateID;
144 
145 struct AMDGPUPropagateAttributesLatePass
146     : PassInfoMixin<AMDGPUPropagateAttributesLatePass> {
147   AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {}
148   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
149 
150 private:
151   TargetMachine &TM;
152 };
153 
154 void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &);
155 extern char &AMDGPUReplaceLDSUseWithPointerID;
156 
157 struct AMDGPUReplaceLDSUseWithPointerPass
158     : PassInfoMixin<AMDGPUReplaceLDSUseWithPointerPass> {
159   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
160 };
161 
162 void initializeAMDGPULowerModuleLDSPass(PassRegistry &);
163 extern char &AMDGPULowerModuleLDSID;
164 
165 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> {
166   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
167 };
168 
169 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
170 extern char &AMDGPURewriteOutArgumentsID;
171 
172 void initializeGCNDPPCombinePass(PassRegistry &);
173 extern char &GCNDPPCombineID;
174 
175 void initializeR600ClauseMergePassPass(PassRegistry &);
176 extern char &R600ClauseMergePassID;
177 
178 void initializeR600ControlFlowFinalizerPass(PassRegistry &);
179 extern char &R600ControlFlowFinalizerID;
180 
181 void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &);
182 extern char &R600ExpandSpecialInstrsPassID;
183 
184 void initializeR600VectorRegMergerPass(PassRegistry &);
185 extern char &R600VectorRegMergerID;
186 
187 void initializeR600PacketizerPass(PassRegistry &);
188 extern char &R600PacketizerID;
189 
190 void initializeSIFoldOperandsPass(PassRegistry &);
191 extern char &SIFoldOperandsID;
192 
193 void initializeSIPeepholeSDWAPass(PassRegistry &);
194 extern char &SIPeepholeSDWAID;
195 
196 void initializeSIShrinkInstructionsPass(PassRegistry&);
197 extern char &SIShrinkInstructionsID;
198 
199 void initializeSIFixSGPRCopiesPass(PassRegistry &);
200 extern char &SIFixSGPRCopiesID;
201 
202 void initializeSIFixVGPRCopiesPass(PassRegistry &);
203 extern char &SIFixVGPRCopiesID;
204 
205 void initializeSILowerI1CopiesPass(PassRegistry &);
206 extern char &SILowerI1CopiesID;
207 
208 void initializeSILowerSGPRSpillsPass(PassRegistry &);
209 extern char &SILowerSGPRSpillsID;
210 
211 void initializeSILoadStoreOptimizerPass(PassRegistry &);
212 extern char &SILoadStoreOptimizerID;
213 
214 void initializeSIWholeQuadModePass(PassRegistry &);
215 extern char &SIWholeQuadModeID;
216 
217 void initializeSILowerControlFlowPass(PassRegistry &);
218 extern char &SILowerControlFlowID;
219 
220 void initializeSIPreEmitPeepholePass(PassRegistry &);
221 extern char &SIPreEmitPeepholeID;
222 
223 void initializeSILateBranchLoweringPass(PassRegistry &);
224 extern char &SILateBranchLoweringPassID;
225 
226 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
227 extern char &SIOptimizeExecMaskingID;
228 
229 void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
230 extern char &SIPreAllocateWWMRegsID;
231 
232 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &);
233 extern char &AMDGPUSimplifyLibCallsID;
234 
235 void initializeAMDGPUUseNativeCallsPass(PassRegistry &);
236 extern char &AMDGPUUseNativeCallsID;
237 
238 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
239 extern char &AMDGPUPerfHintAnalysisID;
240 
241 // Passes common to R600 and SI
242 FunctionPass *createAMDGPUPromoteAlloca();
243 void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
244 extern char &AMDGPUPromoteAllocaID;
245 
246 FunctionPass *createAMDGPUPromoteAllocaToVector();
247 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&);
248 extern char &AMDGPUPromoteAllocaToVectorID;
249 
250 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
251   AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {}
252   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
253 
254 private:
255   TargetMachine &TM;
256 };
257 
258 struct AMDGPUPromoteAllocaToVectorPass
259     : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> {
260   AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {}
261   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
262 
263 private:
264   TargetMachine &TM;
265 };
266 
267 Pass *createAMDGPUStructurizeCFGPass();
268 FunctionPass *createAMDGPUISelDag(
269   TargetMachine *TM = nullptr,
270   CodeGenOpt::Level OptLevel = CodeGenOpt::Default);
271 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
272 
273 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
274   AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
275   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
276 
277 private:
278   bool GlobalOpt;
279 };
280 
281 ModulePass *createR600OpenCLImageTypeLoweringPass();
282 FunctionPass *createAMDGPUAnnotateUniformValues();
283 
284 ModulePass *createAMDGPUPrintfRuntimeBinding();
285 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
286 extern char &AMDGPUPrintfRuntimeBindingID;
287 
288 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &);
289 extern char &AMDGPUResourceUsageAnalysisID;
290 
291 struct AMDGPUPrintfRuntimeBindingPass
292     : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> {
293   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
294 };
295 
296 ModulePass* createAMDGPUUnifyMetadataPass();
297 void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
298 extern char &AMDGPUUnifyMetadataID;
299 
300 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
301   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
302 };
303 
304 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
305 extern char &SIOptimizeExecMaskingPreRAID;
306 
307 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
308 extern char &SIOptimizeVGPRLiveRangeID;
309 
310 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
311 extern char &AMDGPUAnnotateUniformValuesPassID;
312 
313 void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
314 extern char &AMDGPUCodeGenPrepareID;
315 
316 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
317 extern char &AMDGPULateCodeGenPrepareID;
318 
319 void initializeSIAnnotateControlFlowPass(PassRegistry&);
320 extern char &SIAnnotateControlFlowPassID;
321 
322 void initializeSIMemoryLegalizerPass(PassRegistry&);
323 extern char &SIMemoryLegalizerID;
324 
325 void initializeSIModeRegisterPass(PassRegistry&);
326 extern char &SIModeRegisterID;
327 
328 void initializeSIInsertHardClausesPass(PassRegistry &);
329 extern char &SIInsertHardClausesID;
330 
331 void initializeSIInsertWaitcntsPass(PassRegistry&);
332 extern char &SIInsertWaitcntsID;
333 
334 void initializeSIFormMemoryClausesPass(PassRegistry&);
335 extern char &SIFormMemoryClausesID;
336 
337 void initializeSIPostRABundlerPass(PassRegistry&);
338 extern char &SIPostRABundlerID;
339 
340 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
341 extern char &AMDGPUUnifyDivergentExitNodesID;
342 
343 ImmutablePass *createAMDGPUAAWrapperPass();
344 void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
345 ImmutablePass *createAMDGPUExternalAAWrapperPass();
346 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
347 
348 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
349 
350 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
351 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
352 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
353 
354 void initializeGCNNSAReassignPass(PassRegistry &);
355 extern char &GCNNSAReassignID;
356 
357 void initializeGCNPreRAOptimizationsPass(PassRegistry &);
358 extern char &GCNPreRAOptimizationsID;
359 
360 namespace AMDGPU {
361 enum TargetIndex {
362   TI_CONSTDATA_START,
363   TI_SCRATCH_RSRC_DWORD0,
364   TI_SCRATCH_RSRC_DWORD1,
365   TI_SCRATCH_RSRC_DWORD2,
366   TI_SCRATCH_RSRC_DWORD3
367 };
368 }
369 
370 /// OpenCL uses address spaces to differentiate between
371 /// various memory regions on the hardware. On the CPU
372 /// all of the address spaces point to the same memory,
373 /// however on the GPU, each address space points to
374 /// a separate piece of memory that is unique from other
375 /// memory locations.
376 namespace AMDGPUAS {
377   enum : unsigned {
378     // The maximum value for flat, generic, local, private, constant and region.
379     MAX_AMDGPU_ADDRESS = 7,
380 
381     FLAT_ADDRESS = 0,     ///< Address space for flat memory.
382     GLOBAL_ADDRESS = 1,   ///< Address space for global memory (RAT0, VTX0).
383     REGION_ADDRESS = 2,   ///< Address space for region memory. (GDS)
384 
385     CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2).
386     LOCAL_ADDRESS = 3,    ///< Address space for local memory.
387     PRIVATE_ADDRESS = 5,  ///< Address space for private memory.
388 
389     CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory.
390 
391     BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers.
392 
393     /// Address space for direct addressible parameter memory (CONST0).
394     PARAM_D_ADDRESS = 6,
395     /// Address space for indirect addressible parameter memory (VTX1).
396     PARAM_I_ADDRESS = 7,
397 
398     // Do not re-order the CONSTANT_BUFFER_* enums.  Several places depend on
399     // this order to be able to dynamically index a constant buffer, for
400     // example:
401     //
402     // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
403 
404     CONSTANT_BUFFER_0 = 8,
405     CONSTANT_BUFFER_1 = 9,
406     CONSTANT_BUFFER_2 = 10,
407     CONSTANT_BUFFER_3 = 11,
408     CONSTANT_BUFFER_4 = 12,
409     CONSTANT_BUFFER_5 = 13,
410     CONSTANT_BUFFER_6 = 14,
411     CONSTANT_BUFFER_7 = 15,
412     CONSTANT_BUFFER_8 = 16,
413     CONSTANT_BUFFER_9 = 17,
414     CONSTANT_BUFFER_10 = 18,
415     CONSTANT_BUFFER_11 = 19,
416     CONSTANT_BUFFER_12 = 20,
417     CONSTANT_BUFFER_13 = 21,
418     CONSTANT_BUFFER_14 = 22,
419     CONSTANT_BUFFER_15 = 23,
420 
421     // Some places use this if the address space can't be determined.
422     UNKNOWN_ADDRESS_SPACE = ~0u,
423   };
424 }
425 
426 namespace AMDGPU {
427 
428 // FIXME: Missing constant_32bit
429 inline bool isFlatGlobalAddrSpace(unsigned AS) {
430   return AS == AMDGPUAS::GLOBAL_ADDRESS ||
431          AS == AMDGPUAS::FLAT_ADDRESS ||
432          AS == AMDGPUAS::CONSTANT_ADDRESS ||
433          AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
434 }
435 }
436 
437 } // End namespace llvm
438 
439 #endif
440