xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
12 
13 #include "llvm/IR/PassManager.h"
14 #include "llvm/Pass.h"
15 #include "llvm/Support/AMDGPUAddrSpace.h"
16 #include "llvm/Support/CodeGen.h"
17 
18 namespace llvm {
19 
20 class AMDGPUTargetMachine;
21 class TargetMachine;
22 
23 // GlobalISel passes
24 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
25 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone);
26 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &);
27 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
28 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
29 void initializeAMDGPURegBankCombinerPass(PassRegistry &);
30 
31 void initializeAMDGPURegBankSelectPass(PassRegistry &);
32 
33 // SI Passes
34 FunctionPass *createGCNDPPCombinePass();
35 FunctionPass *createSIAnnotateControlFlowPass();
36 FunctionPass *createSIFoldOperandsPass();
37 FunctionPass *createSIPeepholeSDWAPass();
38 FunctionPass *createSILowerI1CopiesPass();
39 FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
40 FunctionPass *createSIShrinkInstructionsPass();
41 FunctionPass *createSILoadStoreOptimizerPass();
42 FunctionPass *createSIWholeQuadModePass();
43 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
44 FunctionPass *createSIOptimizeExecMaskingPreRAPass();
45 FunctionPass *createSIOptimizeVGPRLiveRangePass();
46 FunctionPass *createSIFixSGPRCopiesPass();
47 FunctionPass *createLowerWWMCopiesPass();
48 FunctionPass *createSIMemoryLegalizerPass();
49 FunctionPass *createSIInsertWaitcntsPass();
50 FunctionPass *createSIPreAllocateWWMRegsPass();
51 FunctionPass *createSIFormMemoryClausesPass();
52 
53 FunctionPass *createSIPostRABundlerPass();
54 FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
55 ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
56 FunctionPass *createAMDGPUCodeGenPreparePass();
57 FunctionPass *createAMDGPULateCodeGenPreparePass();
58 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
59 FunctionPass *createAMDGPURewriteOutArgumentsPass();
60 ModulePass *
61 createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr);
62 FunctionPass *createSIModeRegisterPass();
63 FunctionPass *createGCNPreRAOptimizationsPass();
64 
65 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
66   AMDGPUSimplifyLibCallsPass() {}
67   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
68 };
69 
70 struct AMDGPUImageIntrinsicOptimizerPass
71     : PassInfoMixin<AMDGPUImageIntrinsicOptimizerPass> {
72   AMDGPUImageIntrinsicOptimizerPass(TargetMachine &TM) : TM(TM) {}
73   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
74 
75 private:
76   TargetMachine &TM;
77 };
78 
79 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {
80   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
81 };
82 
83 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
84 
85 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
86 extern char &AMDGPUMachineCFGStructurizerID;
87 
88 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
89 
90 Pass *createAMDGPUAnnotateKernelFeaturesPass();
91 Pass *createAMDGPUAttributorLegacyPass();
92 void initializeAMDGPUAttributorLegacyPass(PassRegistry &);
93 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
94 extern char &AMDGPUAnnotateKernelFeaturesID;
95 
96 // DPP/Iterative option enables the atomic optimizer with given strategy
97 // whereas None disables the atomic optimizer.
98 enum class ScanOptions { DPP, Iterative, None };
99 FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy);
100 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
101 extern char &AMDGPUAtomicOptimizerID;
102 
103 ModulePass *createAMDGPUCtorDtorLoweringLegacyPass();
104 void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &);
105 extern char &AMDGPUCtorDtorLoweringLegacyPassID;
106 
107 FunctionPass *createAMDGPULowerKernelArgumentsPass();
108 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
109 extern char &AMDGPULowerKernelArgumentsID;
110 
111 FunctionPass *createAMDGPUPromoteKernelArgumentsPass();
112 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &);
113 extern char &AMDGPUPromoteKernelArgumentsID;
114 
115 struct AMDGPUPromoteKernelArgumentsPass
116     : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> {
117   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
118 };
119 
120 ModulePass *createAMDGPULowerKernelAttributesPass();
121 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
122 extern char &AMDGPULowerKernelAttributesID;
123 
124 struct AMDGPULowerKernelAttributesPass
125     : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
126   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
127 };
128 
129 void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &);
130 extern char &AMDGPULowerModuleLDSLegacyPassID;
131 
132 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> {
133   const AMDGPUTargetMachine &TM;
134   AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
135 
136   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
137 };
138 
139 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
140 extern char &AMDGPURewriteOutArgumentsID;
141 
142 void initializeGCNDPPCombinePass(PassRegistry &);
143 extern char &GCNDPPCombineID;
144 
145 void initializeSIFoldOperandsPass(PassRegistry &);
146 extern char &SIFoldOperandsID;
147 
148 void initializeSIPeepholeSDWAPass(PassRegistry &);
149 extern char &SIPeepholeSDWAID;
150 
151 void initializeSIShrinkInstructionsPass(PassRegistry&);
152 extern char &SIShrinkInstructionsID;
153 
154 void initializeSIFixSGPRCopiesPass(PassRegistry &);
155 extern char &SIFixSGPRCopiesID;
156 
157 void initializeSIFixVGPRCopiesPass(PassRegistry &);
158 extern char &SIFixVGPRCopiesID;
159 
160 void initializeSILowerWWMCopiesPass(PassRegistry &);
161 extern char &SILowerWWMCopiesID;
162 
163 void initializeSILowerI1CopiesPass(PassRegistry &);
164 extern char &SILowerI1CopiesID;
165 
166 void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
167 extern char &AMDGPUGlobalISelDivergenceLoweringID;
168 
169 void initializeSILowerSGPRSpillsPass(PassRegistry &);
170 extern char &SILowerSGPRSpillsID;
171 
172 void initializeSILoadStoreOptimizerPass(PassRegistry &);
173 extern char &SILoadStoreOptimizerID;
174 
175 void initializeSIWholeQuadModePass(PassRegistry &);
176 extern char &SIWholeQuadModeID;
177 
178 void initializeSILowerControlFlowPass(PassRegistry &);
179 extern char &SILowerControlFlowID;
180 
181 void initializeSIPreEmitPeepholePass(PassRegistry &);
182 extern char &SIPreEmitPeepholeID;
183 
184 void initializeSILateBranchLoweringPass(PassRegistry &);
185 extern char &SILateBranchLoweringPassID;
186 
187 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
188 extern char &SIOptimizeExecMaskingID;
189 
190 void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
191 extern char &SIPreAllocateWWMRegsID;
192 
193 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
194 extern char &AMDGPUImageIntrinsicOptimizerID;
195 
196 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
197 extern char &AMDGPUPerfHintAnalysisID;
198 
199 void initializeGCNRegPressurePrinterPass(PassRegistry &);
200 extern char &GCNRegPressurePrinterID;
201 
202 // Passes common to R600 and SI
203 FunctionPass *createAMDGPUPromoteAlloca();
204 void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
205 extern char &AMDGPUPromoteAllocaID;
206 
207 FunctionPass *createAMDGPUPromoteAllocaToVector();
208 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&);
209 extern char &AMDGPUPromoteAllocaToVectorID;
210 
211 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
212   AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {}
213   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
214 
215 private:
216   TargetMachine &TM;
217 };
218 
219 struct AMDGPUPromoteAllocaToVectorPass
220     : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> {
221   AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {}
222   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
223 
224 private:
225   TargetMachine &TM;
226 };
227 
228 struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> {
229   AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl)
230       : TM(TM), ScanImpl(ScanImpl) {}
231   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
232 
233 private:
234   TargetMachine &TM;
235   ScanOptions ScanImpl;
236 };
237 
238 Pass *createAMDGPUStructurizeCFGPass();
239 FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel);
240 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
241 
242 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
243   AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
244   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
245 
246 private:
247   bool GlobalOpt;
248 };
249 
250 class AMDGPUCodeGenPreparePass
251     : public PassInfoMixin<AMDGPUCodeGenPreparePass> {
252 private:
253   TargetMachine &TM;
254 
255 public:
256   AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){};
257   PreservedAnalyses run(Function &, FunctionAnalysisManager &);
258 };
259 
260 class AMDGPULowerKernelArgumentsPass
261     : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> {
262 private:
263   TargetMachine &TM;
264 
265 public:
266   AMDGPULowerKernelArgumentsPass(TargetMachine &TM) : TM(TM){};
267   PreservedAnalyses run(Function &, FunctionAnalysisManager &);
268 };
269 
270 class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> {
271 private:
272   TargetMachine &TM;
273 
274 public:
275   AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){};
276   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
277 };
278 
279 FunctionPass *createAMDGPUAnnotateUniformValues();
280 
281 ModulePass *createAMDGPUPrintfRuntimeBinding();
282 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
283 extern char &AMDGPUPrintfRuntimeBindingID;
284 
285 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &);
286 extern char &AMDGPUResourceUsageAnalysisID;
287 
288 struct AMDGPUPrintfRuntimeBindingPass
289     : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> {
290   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
291 };
292 
293 ModulePass* createAMDGPUUnifyMetadataPass();
294 void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
295 extern char &AMDGPUUnifyMetadataID;
296 
297 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
298   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
299 };
300 
301 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
302 extern char &SIOptimizeExecMaskingPreRAID;
303 
304 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
305 extern char &SIOptimizeVGPRLiveRangeID;
306 
307 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
308 extern char &AMDGPUAnnotateUniformValuesPassID;
309 
310 void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
311 extern char &AMDGPUCodeGenPrepareID;
312 
313 void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
314 extern char &AMDGPURemoveIncompatibleFunctionsID;
315 
316 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
317 extern char &AMDGPULateCodeGenPrepareID;
318 
319 FunctionPass *createAMDGPURewriteUndefForPHILegacyPass();
320 void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &);
321 extern char &AMDGPURewriteUndefForPHILegacyPassID;
322 
323 class AMDGPURewriteUndefForPHIPass
324     : public PassInfoMixin<AMDGPURewriteUndefForPHIPass> {
325 public:
326   AMDGPURewriteUndefForPHIPass() = default;
327   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
328 };
329 
330 void initializeSIAnnotateControlFlowPass(PassRegistry&);
331 extern char &SIAnnotateControlFlowPassID;
332 
333 void initializeSIMemoryLegalizerPass(PassRegistry&);
334 extern char &SIMemoryLegalizerID;
335 
336 void initializeSIModeRegisterPass(PassRegistry&);
337 extern char &SIModeRegisterID;
338 
339 void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
340 extern char &AMDGPUInsertDelayAluID;
341 
342 void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &);
343 extern char &AMDGPUInsertSingleUseVDSTID;
344 
345 void initializeSIInsertHardClausesPass(PassRegistry &);
346 extern char &SIInsertHardClausesID;
347 
348 void initializeSIInsertWaitcntsPass(PassRegistry&);
349 extern char &SIInsertWaitcntsID;
350 
351 void initializeSIFormMemoryClausesPass(PassRegistry&);
352 extern char &SIFormMemoryClausesID;
353 
354 void initializeSIPostRABundlerPass(PassRegistry&);
355 extern char &SIPostRABundlerID;
356 
357 void initializeGCNCreateVOPDPass(PassRegistry &);
358 extern char &GCNCreateVOPDID;
359 
360 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
361 extern char &AMDGPUUnifyDivergentExitNodesID;
362 
363 ImmutablePass *createAMDGPUAAWrapperPass();
364 void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
365 ImmutablePass *createAMDGPUExternalAAWrapperPass();
366 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
367 
368 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
369 
370 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
371 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
372 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
373 
374 void initializeGCNNSAReassignPass(PassRegistry &);
375 extern char &GCNNSAReassignID;
376 
377 void initializeGCNPreRALongBranchRegPass(PassRegistry &);
378 extern char &GCNPreRALongBranchRegID;
379 
380 void initializeGCNPreRAOptimizationsPass(PassRegistry &);
381 extern char &GCNPreRAOptimizationsID;
382 
383 FunctionPass *createAMDGPUSetWavePriorityPass();
384 void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
385 
386 void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
387 extern char &GCNRewritePartialRegUsesID;
388 
389 namespace AMDGPU {
390 enum TargetIndex {
391   TI_CONSTDATA_START,
392   TI_SCRATCH_RSRC_DWORD0,
393   TI_SCRATCH_RSRC_DWORD1,
394   TI_SCRATCH_RSRC_DWORD2,
395   TI_SCRATCH_RSRC_DWORD3
396 };
397 
398 // FIXME: Missing constant_32bit
399 inline bool isFlatGlobalAddrSpace(unsigned AS) {
400   return AS == AMDGPUAS::GLOBAL_ADDRESS ||
401          AS == AMDGPUAS::FLAT_ADDRESS ||
402          AS == AMDGPUAS::CONSTANT_ADDRESS ||
403          AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
404 }
405 
406 inline bool isExtendedGlobalAddrSpace(unsigned AS) {
407   return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS ||
408          AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
409          AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
410 }
411 
412 static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) {
413   static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range");
414 
415   if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
416     return true;
417 
418   // This array is indexed by address space value enum elements 0 ... to 9
419   // clang-format off
420   static const bool ASAliasRules[10][10] = {
421     /*                       Flat   Global Region  Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */
422     /* Flat     */            {true,  true,  false, true,  true,  true,  true,  true,  true,  true},
423     /* Global   */            {true,  true,  false, false, true,  false, true,  true,  true,  true},
424     /* Region   */            {false, false, true,  false, false, false, false, false, false, false},
425     /* Group    */            {true,  false, false, true,  false, false, false, false, false, false},
426     /* Constant */            {true,  true,  false, false, false, false, true,  true,  true,  true},
427     /* Private  */            {true,  false, false, false, false, true,  false, false, false, false},
428     /* Constant 32-bit */     {true,  true,  false, false, true,  false, false, true,  true,  true},
429     /* Buffer Fat Ptr  */     {true,  true,  false, false, true,  false, true,  true,  true,  true},
430     /* Buffer Resource */     {true,  true,  false, false, true,  false, true,  true,  true,  true},
431     /* Buffer Strided Ptr  */ {true,  true,  false, false, true,  false, true,  true,  true,  true},
432   };
433   // clang-format on
434 
435   return ASAliasRules[AS1][AS2];
436 }
437 
438 }
439 
440 } // End namespace llvm
441 
442 #endif
443