xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h (revision 7fdf597e96a02165cfe22ff357b857d5fa15ed8a)
1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
12 
13 #include "llvm/IR/PassManager.h"
14 #include "llvm/Pass.h"
15 #include "llvm/Support/AMDGPUAddrSpace.h"
16 #include "llvm/Support/CodeGen.h"
17 
18 namespace llvm {
19 
20 class AMDGPUTargetMachine;
21 class TargetMachine;
22 
23 // GlobalISel passes
24 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
25 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone);
26 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &);
27 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
28 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
29 void initializeAMDGPURegBankCombinerPass(PassRegistry &);
30 
31 void initializeAMDGPURegBankSelectPass(PassRegistry &);
32 
33 // SI Passes
34 FunctionPass *createGCNDPPCombinePass();
35 FunctionPass *createSIAnnotateControlFlowPass();
36 FunctionPass *createSIFoldOperandsPass();
37 FunctionPass *createSIPeepholeSDWAPass();
38 FunctionPass *createSILowerI1CopiesPass();
39 FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
40 FunctionPass *createSIShrinkInstructionsPass();
41 FunctionPass *createSILoadStoreOptimizerPass();
42 FunctionPass *createSIWholeQuadModePass();
43 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
44 FunctionPass *createSIOptimizeExecMaskingPreRAPass();
45 FunctionPass *createSIOptimizeVGPRLiveRangePass();
46 FunctionPass *createSIFixSGPRCopiesPass();
47 FunctionPass *createLowerWWMCopiesPass();
48 FunctionPass *createSIMemoryLegalizerPass();
49 FunctionPass *createSIInsertWaitcntsPass();
50 FunctionPass *createSIPreAllocateWWMRegsPass();
51 FunctionPass *createSIFormMemoryClausesPass();
52 
53 FunctionPass *createSIPostRABundlerPass();
54 FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
55 ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
56 FunctionPass *createAMDGPUCodeGenPreparePass();
57 FunctionPass *createAMDGPULateCodeGenPreparePass();
58 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
59 FunctionPass *createAMDGPURewriteOutArgumentsPass();
60 ModulePass *
61 createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr);
62 ModulePass *createAMDGPULowerBufferFatPointersPass();
63 FunctionPass *createSIModeRegisterPass();
64 FunctionPass *createGCNPreRAOptimizationsPass();
65 
66 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
67   AMDGPUSimplifyLibCallsPass() {}
68   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
69 };
70 
71 struct AMDGPUImageIntrinsicOptimizerPass
72     : PassInfoMixin<AMDGPUImageIntrinsicOptimizerPass> {
73   AMDGPUImageIntrinsicOptimizerPass(TargetMachine &TM) : TM(TM) {}
74   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
75 
76 private:
77   TargetMachine &TM;
78 };
79 
80 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {
81   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
82 };
83 
84 void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &);
85 
86 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
87 extern char &AMDGPUMachineCFGStructurizerID;
88 
89 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
90 
91 Pass *createAMDGPUAnnotateKernelFeaturesPass();
92 Pass *createAMDGPUAttributorLegacyPass();
93 void initializeAMDGPUAttributorLegacyPass(PassRegistry &);
94 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
95 extern char &AMDGPUAnnotateKernelFeaturesID;
96 
97 // DPP/Iterative option enables the atomic optimizer with given strategy
98 // whereas None disables the atomic optimizer.
99 enum class ScanOptions { DPP, Iterative, None };
100 FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy);
101 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
102 extern char &AMDGPUAtomicOptimizerID;
103 
104 ModulePass *createAMDGPUCtorDtorLoweringLegacyPass();
105 void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &);
106 extern char &AMDGPUCtorDtorLoweringLegacyPassID;
107 
108 FunctionPass *createAMDGPULowerKernelArgumentsPass();
109 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
110 extern char &AMDGPULowerKernelArgumentsID;
111 
112 FunctionPass *createAMDGPUPromoteKernelArgumentsPass();
113 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &);
114 extern char &AMDGPUPromoteKernelArgumentsID;
115 
116 struct AMDGPUPromoteKernelArgumentsPass
117     : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> {
118   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
119 };
120 
121 ModulePass *createAMDGPULowerKernelAttributesPass();
122 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
123 extern char &AMDGPULowerKernelAttributesID;
124 
125 struct AMDGPULowerKernelAttributesPass
126     : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
127   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
128 };
129 
130 void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &);
131 extern char &AMDGPULowerModuleLDSLegacyPassID;
132 
133 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> {
134   const AMDGPUTargetMachine &TM;
135   AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
136 
137   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
138 };
139 
140 void initializeAMDGPULowerBufferFatPointersPass(PassRegistry &);
141 extern char &AMDGPULowerBufferFatPointersID;
142 
143 struct AMDGPULowerBufferFatPointersPass
144     : PassInfoMixin<AMDGPULowerBufferFatPointersPass> {
145   AMDGPULowerBufferFatPointersPass(const TargetMachine &TM) : TM(TM) {}
146   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
147 
148 private:
149   const TargetMachine &TM;
150 };
151 
152 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
153 extern char &AMDGPURewriteOutArgumentsID;
154 
155 void initializeGCNDPPCombinePass(PassRegistry &);
156 extern char &GCNDPPCombineID;
157 
158 void initializeSIFoldOperandsPass(PassRegistry &);
159 extern char &SIFoldOperandsID;
160 
161 void initializeSIPeepholeSDWAPass(PassRegistry &);
162 extern char &SIPeepholeSDWAID;
163 
164 void initializeSIShrinkInstructionsPass(PassRegistry&);
165 extern char &SIShrinkInstructionsID;
166 
167 void initializeSIFixSGPRCopiesPass(PassRegistry &);
168 extern char &SIFixSGPRCopiesID;
169 
170 void initializeSIFixVGPRCopiesPass(PassRegistry &);
171 extern char &SIFixVGPRCopiesID;
172 
173 void initializeSILowerWWMCopiesPass(PassRegistry &);
174 extern char &SILowerWWMCopiesID;
175 
176 void initializeSILowerI1CopiesPass(PassRegistry &);
177 extern char &SILowerI1CopiesID;
178 
179 void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
180 extern char &AMDGPUGlobalISelDivergenceLoweringID;
181 
182 void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &);
183 extern char &AMDGPUMarkLastScratchLoadID;
184 
185 void initializeSILowerSGPRSpillsPass(PassRegistry &);
186 extern char &SILowerSGPRSpillsID;
187 
188 void initializeSILoadStoreOptimizerPass(PassRegistry &);
189 extern char &SILoadStoreOptimizerID;
190 
191 void initializeSIWholeQuadModePass(PassRegistry &);
192 extern char &SIWholeQuadModeID;
193 
194 void initializeSILowerControlFlowPass(PassRegistry &);
195 extern char &SILowerControlFlowID;
196 
197 void initializeSIPreEmitPeepholePass(PassRegistry &);
198 extern char &SIPreEmitPeepholeID;
199 
200 void initializeSILateBranchLoweringPass(PassRegistry &);
201 extern char &SILateBranchLoweringPassID;
202 
203 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
204 extern char &SIOptimizeExecMaskingID;
205 
206 void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
207 extern char &SIPreAllocateWWMRegsID;
208 
209 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
210 extern char &AMDGPUImageIntrinsicOptimizerID;
211 
212 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
213 extern char &AMDGPUPerfHintAnalysisID;
214 
215 void initializeGCNRegPressurePrinterPass(PassRegistry &);
216 extern char &GCNRegPressurePrinterID;
217 
218 // Passes common to R600 and SI
219 FunctionPass *createAMDGPUPromoteAlloca();
220 void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
221 extern char &AMDGPUPromoteAllocaID;
222 
223 FunctionPass *createAMDGPUPromoteAllocaToVector();
224 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&);
225 extern char &AMDGPUPromoteAllocaToVectorID;
226 
227 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
228   AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {}
229   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
230 
231 private:
232   TargetMachine &TM;
233 };
234 
235 struct AMDGPUPromoteAllocaToVectorPass
236     : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> {
237   AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {}
238   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
239 
240 private:
241   TargetMachine &TM;
242 };
243 
244 struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> {
245   AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl)
246       : TM(TM), ScanImpl(ScanImpl) {}
247   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
248 
249 private:
250   TargetMachine &TM;
251   ScanOptions ScanImpl;
252 };
253 
254 Pass *createAMDGPUStructurizeCFGPass();
255 FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel);
256 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
257 
258 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
259   AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
260   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
261 
262 private:
263   bool GlobalOpt;
264 };
265 
266 class AMDGPUCodeGenPreparePass
267     : public PassInfoMixin<AMDGPUCodeGenPreparePass> {
268 private:
269   TargetMachine &TM;
270 
271 public:
272   AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){};
273   PreservedAnalyses run(Function &, FunctionAnalysisManager &);
274 };
275 
276 class AMDGPULowerKernelArgumentsPass
277     : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> {
278 private:
279   TargetMachine &TM;
280 
281 public:
282   AMDGPULowerKernelArgumentsPass(TargetMachine &TM) : TM(TM){};
283   PreservedAnalyses run(Function &, FunctionAnalysisManager &);
284 };
285 
286 class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> {
287 private:
288   TargetMachine &TM;
289 
290 public:
291   AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){};
292   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
293 };
294 
295 FunctionPass *createAMDGPUAnnotateUniformValues();
296 
297 ModulePass *createAMDGPUPrintfRuntimeBinding();
298 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
299 extern char &AMDGPUPrintfRuntimeBindingID;
300 
301 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &);
302 extern char &AMDGPUResourceUsageAnalysisID;
303 
304 struct AMDGPUPrintfRuntimeBindingPass
305     : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> {
306   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
307 };
308 
309 ModulePass* createAMDGPUUnifyMetadataPass();
310 void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
311 extern char &AMDGPUUnifyMetadataID;
312 
313 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
314   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
315 };
316 
317 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
318 extern char &SIOptimizeExecMaskingPreRAID;
319 
320 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
321 extern char &SIOptimizeVGPRLiveRangeID;
322 
323 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
324 extern char &AMDGPUAnnotateUniformValuesPassID;
325 
326 void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
327 extern char &AMDGPUCodeGenPrepareID;
328 
329 void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
330 extern char &AMDGPURemoveIncompatibleFunctionsID;
331 
332 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
333 extern char &AMDGPULateCodeGenPrepareID;
334 
335 FunctionPass *createAMDGPURewriteUndefForPHILegacyPass();
336 void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &);
337 extern char &AMDGPURewriteUndefForPHILegacyPassID;
338 
339 class AMDGPURewriteUndefForPHIPass
340     : public PassInfoMixin<AMDGPURewriteUndefForPHIPass> {
341 public:
342   AMDGPURewriteUndefForPHIPass() = default;
343   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
344 };
345 
346 void initializeSIAnnotateControlFlowPass(PassRegistry&);
347 extern char &SIAnnotateControlFlowPassID;
348 
349 void initializeSIMemoryLegalizerPass(PassRegistry&);
350 extern char &SIMemoryLegalizerID;
351 
352 void initializeSIModeRegisterPass(PassRegistry&);
353 extern char &SIModeRegisterID;
354 
355 void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
356 extern char &AMDGPUInsertDelayAluID;
357 
358 void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &);
359 extern char &AMDGPUInsertSingleUseVDSTID;
360 
361 void initializeSIInsertHardClausesPass(PassRegistry &);
362 extern char &SIInsertHardClausesID;
363 
364 void initializeSIInsertWaitcntsPass(PassRegistry&);
365 extern char &SIInsertWaitcntsID;
366 
367 void initializeSIFormMemoryClausesPass(PassRegistry&);
368 extern char &SIFormMemoryClausesID;
369 
370 void initializeSIPostRABundlerPass(PassRegistry&);
371 extern char &SIPostRABundlerID;
372 
373 void initializeGCNCreateVOPDPass(PassRegistry &);
374 extern char &GCNCreateVOPDID;
375 
376 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
377 extern char &AMDGPUUnifyDivergentExitNodesID;
378 
379 ImmutablePass *createAMDGPUAAWrapperPass();
380 void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
381 ImmutablePass *createAMDGPUExternalAAWrapperPass();
382 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
383 
384 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
385 
386 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
387 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
388 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
389 
390 void initializeGCNNSAReassignPass(PassRegistry &);
391 extern char &GCNNSAReassignID;
392 
393 void initializeGCNPreRALongBranchRegPass(PassRegistry &);
394 extern char &GCNPreRALongBranchRegID;
395 
396 void initializeGCNPreRAOptimizationsPass(PassRegistry &);
397 extern char &GCNPreRAOptimizationsID;
398 
399 FunctionPass *createAMDGPUSetWavePriorityPass();
400 void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
401 
402 void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
403 extern char &GCNRewritePartialRegUsesID;
404 
405 namespace AMDGPU {
406 enum TargetIndex {
407   TI_CONSTDATA_START,
408   TI_SCRATCH_RSRC_DWORD0,
409   TI_SCRATCH_RSRC_DWORD1,
410   TI_SCRATCH_RSRC_DWORD2,
411   TI_SCRATCH_RSRC_DWORD3
412 };
413 
414 // FIXME: Missing constant_32bit
415 inline bool isFlatGlobalAddrSpace(unsigned AS) {
416   return AS == AMDGPUAS::GLOBAL_ADDRESS ||
417          AS == AMDGPUAS::FLAT_ADDRESS ||
418          AS == AMDGPUAS::CONSTANT_ADDRESS ||
419          AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
420 }
421 
422 inline bool isExtendedGlobalAddrSpace(unsigned AS) {
423   return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS ||
424          AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
425          AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
426 }
427 
428 static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) {
429   static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range");
430 
431   if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
432     return true;
433 
434   // This array is indexed by address space value enum elements 0 ... to 9
435   // clang-format off
436   static const bool ASAliasRules[10][10] = {
437     /*                       Flat   Global Region  Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */
438     /* Flat     */            {true,  true,  false, true,  true,  true,  true,  true,  true,  true},
439     /* Global   */            {true,  true,  false, false, true,  false, true,  true,  true,  true},
440     /* Region   */            {false, false, true,  false, false, false, false, false, false, false},
441     /* Group    */            {true,  false, false, true,  false, false, false, false, false, false},
442     /* Constant */            {true,  true,  false, false, false, false, true,  true,  true,  true},
443     /* Private  */            {true,  false, false, false, false, true,  false, false, false, false},
444     /* Constant 32-bit */     {true,  true,  false, false, true,  false, false, true,  true,  true},
445     /* Buffer Fat Ptr  */     {true,  true,  false, false, true,  false, true,  true,  true,  true},
446     /* Buffer Resource */     {true,  true,  false, false, true,  false, true,  true,  true,  true},
447     /* Buffer Strided Ptr  */ {true,  true,  false, false, true,  false, true,  true,  true,  true},
448   };
449   // clang-format on
450 
451   return ASAliasRules[AS1][AS2];
452 }
453 
454 }
455 
456 } // End namespace llvm
457 
458 #endif
459