xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPU.h (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
12 
13 #include "llvm/IR/PassManager.h"
14 #include "llvm/Pass.h"
15 #include "llvm/Support/AMDGPUAddrSpace.h"
16 #include "llvm/Support/CodeGen.h"
17 
18 namespace llvm {
19 
20 class AMDGPUTargetMachine;
21 class TargetMachine;
22 
23 // GlobalISel passes
24 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
25 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone);
26 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &);
27 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
28 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
29 void initializeAMDGPURegBankCombinerPass(PassRegistry &);
30 
31 void initializeAMDGPURegBankSelectPass(PassRegistry &);
32 
33 // SI Passes
34 FunctionPass *createGCNDPPCombinePass();
35 FunctionPass *createSIAnnotateControlFlowPass();
36 FunctionPass *createSIFoldOperandsPass();
37 FunctionPass *createSIPeepholeSDWAPass();
38 FunctionPass *createSILowerI1CopiesPass();
39 FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
40 FunctionPass *createSIShrinkInstructionsPass();
41 FunctionPass *createSILoadStoreOptimizerPass();
42 FunctionPass *createSIWholeQuadModePass();
43 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
44 FunctionPass *createSIOptimizeExecMaskingPreRAPass();
45 FunctionPass *createSIOptimizeVGPRLiveRangePass();
46 FunctionPass *createSIFixSGPRCopiesPass();
47 FunctionPass *createLowerWWMCopiesPass();
48 FunctionPass *createSIMemoryLegalizerPass();
49 FunctionPass *createSIInsertWaitcntsPass();
50 FunctionPass *createSIPreAllocateWWMRegsPass();
51 FunctionPass *createSIFormMemoryClausesPass();
52 
53 FunctionPass *createSIPostRABundlerPass();
54 FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
55 ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
56 FunctionPass *createAMDGPUCodeGenPreparePass();
57 FunctionPass *createAMDGPULateCodeGenPreparePass();
58 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
59 FunctionPass *createAMDGPURewriteOutArgumentsPass();
60 ModulePass *
61 createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr);
62 FunctionPass *createSIModeRegisterPass();
63 FunctionPass *createGCNPreRAOptimizationsPass();
64 
65 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
66   AMDGPUSimplifyLibCallsPass() {}
67   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
68 };
69 
70 struct AMDGPUImageIntrinsicOptimizerPass
71     : PassInfoMixin<AMDGPUImageIntrinsicOptimizerPass> {
72   AMDGPUImageIntrinsicOptimizerPass(TargetMachine &TM) : TM(TM) {}
73   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
74 
75 private:
76   TargetMachine &TM;
77 };
78 
79 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {
80   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
81 };
82 
83 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
84 
85 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
86 extern char &AMDGPUMachineCFGStructurizerID;
87 
88 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
89 
90 Pass *createAMDGPUAnnotateKernelFeaturesPass();
91 Pass *createAMDGPUAttributorLegacyPass();
92 void initializeAMDGPUAttributorLegacyPass(PassRegistry &);
93 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
94 extern char &AMDGPUAnnotateKernelFeaturesID;
95 
96 // DPP/Iterative option enables the atomic optimizer with given strategy
97 // whereas None disables the atomic optimizer.
98 enum class ScanOptions { DPP, Iterative, None };
99 FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy);
100 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
101 extern char &AMDGPUAtomicOptimizerID;
102 
103 ModulePass *createAMDGPUCtorDtorLoweringLegacyPass();
104 void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &);
105 extern char &AMDGPUCtorDtorLoweringLegacyPassID;
106 
107 FunctionPass *createAMDGPULowerKernelArgumentsPass();
108 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
109 extern char &AMDGPULowerKernelArgumentsID;
110 
111 FunctionPass *createAMDGPUPromoteKernelArgumentsPass();
112 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &);
113 extern char &AMDGPUPromoteKernelArgumentsID;
114 
115 struct AMDGPUPromoteKernelArgumentsPass
116     : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> {
117   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
118 };
119 
120 ModulePass *createAMDGPULowerKernelAttributesPass();
121 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
122 extern char &AMDGPULowerKernelAttributesID;
123 
124 struct AMDGPULowerKernelAttributesPass
125     : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
126   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
127 };
128 
129 void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &);
130 extern char &AMDGPULowerModuleLDSLegacyPassID;
131 
132 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> {
133   const AMDGPUTargetMachine &TM;
134   AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
135 
136   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
137 };
138 
139 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
140 extern char &AMDGPURewriteOutArgumentsID;
141 
142 void initializeGCNDPPCombinePass(PassRegistry &);
143 extern char &GCNDPPCombineID;
144 
145 void initializeSIFoldOperandsPass(PassRegistry &);
146 extern char &SIFoldOperandsID;
147 
148 void initializeSIPeepholeSDWAPass(PassRegistry &);
149 extern char &SIPeepholeSDWAID;
150 
151 void initializeSIShrinkInstructionsPass(PassRegistry&);
152 extern char &SIShrinkInstructionsID;
153 
154 void initializeSIFixSGPRCopiesPass(PassRegistry &);
155 extern char &SIFixSGPRCopiesID;
156 
157 void initializeSIFixVGPRCopiesPass(PassRegistry &);
158 extern char &SIFixVGPRCopiesID;
159 
160 void initializeSILowerWWMCopiesPass(PassRegistry &);
161 extern char &SILowerWWMCopiesID;
162 
163 void initializeSILowerI1CopiesPass(PassRegistry &);
164 extern char &SILowerI1CopiesID;
165 
166 void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
167 extern char &AMDGPUGlobalISelDivergenceLoweringID;
168 
169 void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &);
170 extern char &AMDGPUMarkLastScratchLoadID;
171 
172 void initializeSILowerSGPRSpillsPass(PassRegistry &);
173 extern char &SILowerSGPRSpillsID;
174 
175 void initializeSILoadStoreOptimizerPass(PassRegistry &);
176 extern char &SILoadStoreOptimizerID;
177 
178 void initializeSIWholeQuadModePass(PassRegistry &);
179 extern char &SIWholeQuadModeID;
180 
181 void initializeSILowerControlFlowPass(PassRegistry &);
182 extern char &SILowerControlFlowID;
183 
184 void initializeSIPreEmitPeepholePass(PassRegistry &);
185 extern char &SIPreEmitPeepholeID;
186 
187 void initializeSILateBranchLoweringPass(PassRegistry &);
188 extern char &SILateBranchLoweringPassID;
189 
190 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
191 extern char &SIOptimizeExecMaskingID;
192 
193 void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
194 extern char &SIPreAllocateWWMRegsID;
195 
196 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
197 extern char &AMDGPUImageIntrinsicOptimizerID;
198 
199 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
200 extern char &AMDGPUPerfHintAnalysisID;
201 
202 void initializeGCNRegPressurePrinterPass(PassRegistry &);
203 extern char &GCNRegPressurePrinterID;
204 
205 // Passes common to R600 and SI
206 FunctionPass *createAMDGPUPromoteAlloca();
207 void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
208 extern char &AMDGPUPromoteAllocaID;
209 
210 FunctionPass *createAMDGPUPromoteAllocaToVector();
211 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&);
212 extern char &AMDGPUPromoteAllocaToVectorID;
213 
214 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
215   AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {}
216   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
217 
218 private:
219   TargetMachine &TM;
220 };
221 
222 struct AMDGPUPromoteAllocaToVectorPass
223     : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> {
224   AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {}
225   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
226 
227 private:
228   TargetMachine &TM;
229 };
230 
231 struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> {
232   AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl)
233       : TM(TM), ScanImpl(ScanImpl) {}
234   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
235 
236 private:
237   TargetMachine &TM;
238   ScanOptions ScanImpl;
239 };
240 
241 Pass *createAMDGPUStructurizeCFGPass();
242 FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel);
243 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
244 
245 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
246   AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
247   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
248 
249 private:
250   bool GlobalOpt;
251 };
252 
253 class AMDGPUCodeGenPreparePass
254     : public PassInfoMixin<AMDGPUCodeGenPreparePass> {
255 private:
256   TargetMachine &TM;
257 
258 public:
259   AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){};
260   PreservedAnalyses run(Function &, FunctionAnalysisManager &);
261 };
262 
263 class AMDGPULowerKernelArgumentsPass
264     : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> {
265 private:
266   TargetMachine &TM;
267 
268 public:
269   AMDGPULowerKernelArgumentsPass(TargetMachine &TM) : TM(TM){};
270   PreservedAnalyses run(Function &, FunctionAnalysisManager &);
271 };
272 
273 class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> {
274 private:
275   TargetMachine &TM;
276 
277 public:
278   AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){};
279   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
280 };
281 
282 FunctionPass *createAMDGPUAnnotateUniformValues();
283 
284 ModulePass *createAMDGPUPrintfRuntimeBinding();
285 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
286 extern char &AMDGPUPrintfRuntimeBindingID;
287 
288 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &);
289 extern char &AMDGPUResourceUsageAnalysisID;
290 
291 struct AMDGPUPrintfRuntimeBindingPass
292     : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> {
293   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
294 };
295 
296 ModulePass* createAMDGPUUnifyMetadataPass();
297 void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
298 extern char &AMDGPUUnifyMetadataID;
299 
300 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
301   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
302 };
303 
304 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
305 extern char &SIOptimizeExecMaskingPreRAID;
306 
307 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
308 extern char &SIOptimizeVGPRLiveRangeID;
309 
310 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
311 extern char &AMDGPUAnnotateUniformValuesPassID;
312 
313 void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
314 extern char &AMDGPUCodeGenPrepareID;
315 
316 void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
317 extern char &AMDGPURemoveIncompatibleFunctionsID;
318 
319 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
320 extern char &AMDGPULateCodeGenPrepareID;
321 
322 FunctionPass *createAMDGPURewriteUndefForPHILegacyPass();
323 void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &);
324 extern char &AMDGPURewriteUndefForPHILegacyPassID;
325 
326 class AMDGPURewriteUndefForPHIPass
327     : public PassInfoMixin<AMDGPURewriteUndefForPHIPass> {
328 public:
329   AMDGPURewriteUndefForPHIPass() = default;
330   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
331 };
332 
333 void initializeSIAnnotateControlFlowPass(PassRegistry&);
334 extern char &SIAnnotateControlFlowPassID;
335 
336 void initializeSIMemoryLegalizerPass(PassRegistry&);
337 extern char &SIMemoryLegalizerID;
338 
339 void initializeSIModeRegisterPass(PassRegistry&);
340 extern char &SIModeRegisterID;
341 
342 void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
343 extern char &AMDGPUInsertDelayAluID;
344 
345 void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &);
346 extern char &AMDGPUInsertSingleUseVDSTID;
347 
348 void initializeSIInsertHardClausesPass(PassRegistry &);
349 extern char &SIInsertHardClausesID;
350 
351 void initializeSIInsertWaitcntsPass(PassRegistry&);
352 extern char &SIInsertWaitcntsID;
353 
354 void initializeSIFormMemoryClausesPass(PassRegistry&);
355 extern char &SIFormMemoryClausesID;
356 
357 void initializeSIPostRABundlerPass(PassRegistry&);
358 extern char &SIPostRABundlerID;
359 
360 void initializeGCNCreateVOPDPass(PassRegistry &);
361 extern char &GCNCreateVOPDID;
362 
363 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
364 extern char &AMDGPUUnifyDivergentExitNodesID;
365 
366 ImmutablePass *createAMDGPUAAWrapperPass();
367 void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
368 ImmutablePass *createAMDGPUExternalAAWrapperPass();
369 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
370 
371 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
372 
373 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
374 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
375 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
376 
377 void initializeGCNNSAReassignPass(PassRegistry &);
378 extern char &GCNNSAReassignID;
379 
380 void initializeGCNPreRALongBranchRegPass(PassRegistry &);
381 extern char &GCNPreRALongBranchRegID;
382 
383 void initializeGCNPreRAOptimizationsPass(PassRegistry &);
384 extern char &GCNPreRAOptimizationsID;
385 
386 FunctionPass *createAMDGPUSetWavePriorityPass();
387 void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
388 
389 void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
390 extern char &GCNRewritePartialRegUsesID;
391 
392 namespace AMDGPU {
393 enum TargetIndex {
394   TI_CONSTDATA_START,
395   TI_SCRATCH_RSRC_DWORD0,
396   TI_SCRATCH_RSRC_DWORD1,
397   TI_SCRATCH_RSRC_DWORD2,
398   TI_SCRATCH_RSRC_DWORD3
399 };
400 
401 // FIXME: Missing constant_32bit
402 inline bool isFlatGlobalAddrSpace(unsigned AS) {
403   return AS == AMDGPUAS::GLOBAL_ADDRESS ||
404          AS == AMDGPUAS::FLAT_ADDRESS ||
405          AS == AMDGPUAS::CONSTANT_ADDRESS ||
406          AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
407 }
408 
409 inline bool isExtendedGlobalAddrSpace(unsigned AS) {
410   return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS ||
411          AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
412          AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
413 }
414 
415 static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) {
416   static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range");
417 
418   if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
419     return true;
420 
421   // This array is indexed by address space value enum elements 0 ... to 9
422   // clang-format off
423   static const bool ASAliasRules[10][10] = {
424     /*                       Flat   Global Region  Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */
425     /* Flat     */            {true,  true,  false, true,  true,  true,  true,  true,  true,  true},
426     /* Global   */            {true,  true,  false, false, true,  false, true,  true,  true,  true},
427     /* Region   */            {false, false, true,  false, false, false, false, false, false, false},
428     /* Group    */            {true,  false, false, true,  false, false, false, false, false, false},
429     /* Constant */            {true,  true,  false, false, false, false, true,  true,  true,  true},
430     /* Private  */            {true,  false, false, false, false, true,  false, false, false, false},
431     /* Constant 32-bit */     {true,  true,  false, false, true,  false, false, true,  true,  true},
432     /* Buffer Fat Ptr  */     {true,  true,  false, false, true,  false, true,  true,  true,  true},
433     /* Buffer Resource */     {true,  true,  false, false, true,  false, true,  true,  true,  true},
434     /* Buffer Strided Ptr  */ {true,  true,  false, false, true,  false, true,  true,  true,  true},
435   };
436   // clang-format on
437 
438   return ASAliasRules[AS1][AS2];
439 }
440 
441 }
442 
443 } // End namespace llvm
444 
445 #endif
446