1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9
10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
12
13 #include "llvm/IR/PassManager.h"
14 #include "llvm/Pass.h"
15 #include "llvm/Support/AMDGPUAddrSpace.h"
16 #include "llvm/Support/CodeGen.h"
17
18 namespace llvm {
19
20 class AMDGPUTargetMachine;
21 class TargetMachine;
22
23 // GlobalISel passes
24 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
25 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone);
26 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &);
27 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
28 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
29 void initializeAMDGPURegBankCombinerPass(PassRegistry &);
30
31 void initializeAMDGPURegBankSelectPass(PassRegistry &);
32
33 // SI Passes
34 FunctionPass *createGCNDPPCombinePass();
35 FunctionPass *createSIAnnotateControlFlowPass();
36 FunctionPass *createSIFoldOperandsPass();
37 FunctionPass *createSIPeepholeSDWAPass();
38 FunctionPass *createSILowerI1CopiesPass();
39 FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
40 FunctionPass *createSIShrinkInstructionsPass();
41 FunctionPass *createSILoadStoreOptimizerPass();
42 FunctionPass *createSIWholeQuadModePass();
43 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
44 FunctionPass *createSIOptimizeExecMaskingPreRAPass();
45 FunctionPass *createSIOptimizeVGPRLiveRangePass();
46 FunctionPass *createSIFixSGPRCopiesPass();
47 FunctionPass *createLowerWWMCopiesPass();
48 FunctionPass *createSIMemoryLegalizerPass();
49 FunctionPass *createSIInsertWaitcntsPass();
50 FunctionPass *createSIPreAllocateWWMRegsPass();
51 FunctionPass *createSIFormMemoryClausesPass();
52
53 FunctionPass *createSIPostRABundlerPass();
54 FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
55 ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
56 FunctionPass *createAMDGPUCodeGenPreparePass();
57 FunctionPass *createAMDGPULateCodeGenPreparePass();
58 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
59 FunctionPass *createAMDGPURewriteOutArgumentsPass();
60 ModulePass *
61 createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr);
62 ModulePass *createAMDGPULowerBufferFatPointersPass();
63 FunctionPass *createSIModeRegisterPass();
64 FunctionPass *createGCNPreRAOptimizationsPass();
65
66 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
AMDGPUSimplifyLibCallsPassAMDGPUSimplifyLibCallsPass67 AMDGPUSimplifyLibCallsPass() {}
68 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
69 };
70
71 struct AMDGPUImageIntrinsicOptimizerPass
72 : PassInfoMixin<AMDGPUImageIntrinsicOptimizerPass> {
AMDGPUImageIntrinsicOptimizerPassAMDGPUImageIntrinsicOptimizerPass73 AMDGPUImageIntrinsicOptimizerPass(TargetMachine &TM) : TM(TM) {}
74 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
75
76 private:
77 TargetMachine &TM;
78 };
79
80 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {
81 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
82 };
83
84 void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &);
85
86 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
87 extern char &AMDGPUMachineCFGStructurizerID;
88
89 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
90
91 Pass *createAMDGPUAnnotateKernelFeaturesPass();
92 Pass *createAMDGPUAttributorLegacyPass();
93 void initializeAMDGPUAttributorLegacyPass(PassRegistry &);
94 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
95 extern char &AMDGPUAnnotateKernelFeaturesID;
96
97 // DPP/Iterative option enables the atomic optimizer with given strategy
98 // whereas None disables the atomic optimizer.
99 enum class ScanOptions { DPP, Iterative, None };
100 FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy);
101 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
102 extern char &AMDGPUAtomicOptimizerID;
103
104 ModulePass *createAMDGPUCtorDtorLoweringLegacyPass();
105 void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &);
106 extern char &AMDGPUCtorDtorLoweringLegacyPassID;
107
108 FunctionPass *createAMDGPULowerKernelArgumentsPass();
109 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
110 extern char &AMDGPULowerKernelArgumentsID;
111
112 FunctionPass *createAMDGPUPromoteKernelArgumentsPass();
113 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &);
114 extern char &AMDGPUPromoteKernelArgumentsID;
115
116 struct AMDGPUPromoteKernelArgumentsPass
117 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> {
118 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
119 };
120
121 ModulePass *createAMDGPULowerKernelAttributesPass();
122 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
123 extern char &AMDGPULowerKernelAttributesID;
124
125 struct AMDGPULowerKernelAttributesPass
126 : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
127 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
128 };
129
130 void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &);
131 extern char &AMDGPULowerModuleLDSLegacyPassID;
132
133 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> {
134 const AMDGPUTargetMachine &TM;
AMDGPULowerModuleLDSPassAMDGPULowerModuleLDSPass135 AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
136
137 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
138 };
139
140 void initializeAMDGPULowerBufferFatPointersPass(PassRegistry &);
141 extern char &AMDGPULowerBufferFatPointersID;
142
143 struct AMDGPULowerBufferFatPointersPass
144 : PassInfoMixin<AMDGPULowerBufferFatPointersPass> {
AMDGPULowerBufferFatPointersPassAMDGPULowerBufferFatPointersPass145 AMDGPULowerBufferFatPointersPass(const TargetMachine &TM) : TM(TM) {}
146 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
147
148 private:
149 const TargetMachine &TM;
150 };
151
152 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
153 extern char &AMDGPURewriteOutArgumentsID;
154
155 void initializeGCNDPPCombinePass(PassRegistry &);
156 extern char &GCNDPPCombineID;
157
158 void initializeSIFoldOperandsPass(PassRegistry &);
159 extern char &SIFoldOperandsID;
160
161 void initializeSIPeepholeSDWAPass(PassRegistry &);
162 extern char &SIPeepholeSDWAID;
163
164 void initializeSIShrinkInstructionsPass(PassRegistry&);
165 extern char &SIShrinkInstructionsID;
166
167 void initializeSIFixSGPRCopiesPass(PassRegistry &);
168 extern char &SIFixSGPRCopiesID;
169
170 void initializeSIFixVGPRCopiesPass(PassRegistry &);
171 extern char &SIFixVGPRCopiesID;
172
173 void initializeSILowerWWMCopiesPass(PassRegistry &);
174 extern char &SILowerWWMCopiesID;
175
176 void initializeSILowerI1CopiesPass(PassRegistry &);
177 extern char &SILowerI1CopiesID;
178
179 void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
180 extern char &AMDGPUGlobalISelDivergenceLoweringID;
181
182 void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &);
183 extern char &AMDGPUMarkLastScratchLoadID;
184
185 void initializeSILowerSGPRSpillsPass(PassRegistry &);
186 extern char &SILowerSGPRSpillsID;
187
188 void initializeSILoadStoreOptimizerPass(PassRegistry &);
189 extern char &SILoadStoreOptimizerID;
190
191 void initializeSIWholeQuadModePass(PassRegistry &);
192 extern char &SIWholeQuadModeID;
193
194 void initializeSILowerControlFlowPass(PassRegistry &);
195 extern char &SILowerControlFlowID;
196
197 void initializeSIPreEmitPeepholePass(PassRegistry &);
198 extern char &SIPreEmitPeepholeID;
199
200 void initializeSILateBranchLoweringPass(PassRegistry &);
201 extern char &SILateBranchLoweringPassID;
202
203 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
204 extern char &SIOptimizeExecMaskingID;
205
206 void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
207 extern char &SIPreAllocateWWMRegsID;
208
209 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
210 extern char &AMDGPUImageIntrinsicOptimizerID;
211
212 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
213 extern char &AMDGPUPerfHintAnalysisID;
214
215 void initializeGCNRegPressurePrinterPass(PassRegistry &);
216 extern char &GCNRegPressurePrinterID;
217
218 // Passes common to R600 and SI
219 FunctionPass *createAMDGPUPromoteAlloca();
220 void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
221 extern char &AMDGPUPromoteAllocaID;
222
223 FunctionPass *createAMDGPUPromoteAllocaToVector();
224 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&);
225 extern char &AMDGPUPromoteAllocaToVectorID;
226
227 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
AMDGPUPromoteAllocaPassAMDGPUPromoteAllocaPass228 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {}
229 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
230
231 private:
232 TargetMachine &TM;
233 };
234
235 struct AMDGPUPromoteAllocaToVectorPass
236 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> {
AMDGPUPromoteAllocaToVectorPassAMDGPUPromoteAllocaToVectorPass237 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {}
238 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
239
240 private:
241 TargetMachine &TM;
242 };
243
244 struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> {
AMDGPUAtomicOptimizerPassAMDGPUAtomicOptimizerPass245 AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl)
246 : TM(TM), ScanImpl(ScanImpl) {}
247 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
248
249 private:
250 TargetMachine &TM;
251 ScanOptions ScanImpl;
252 };
253
254 Pass *createAMDGPUStructurizeCFGPass();
255 FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel);
256 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
257
258 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
GlobalOptAMDGPUAlwaysInlinePass259 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
260 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
261
262 private:
263 bool GlobalOpt;
264 };
265
266 class AMDGPUCodeGenPreparePass
267 : public PassInfoMixin<AMDGPUCodeGenPreparePass> {
268 private:
269 TargetMachine &TM;
270
271 public:
AMDGPUCodeGenPreparePass(TargetMachine & TM)272 AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){};
273 PreservedAnalyses run(Function &, FunctionAnalysisManager &);
274 };
275
276 class AMDGPULowerKernelArgumentsPass
277 : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> {
278 private:
279 TargetMachine &TM;
280
281 public:
AMDGPULowerKernelArgumentsPass(TargetMachine & TM)282 AMDGPULowerKernelArgumentsPass(TargetMachine &TM) : TM(TM){};
283 PreservedAnalyses run(Function &, FunctionAnalysisManager &);
284 };
285
286 class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> {
287 private:
288 TargetMachine &TM;
289
290 public:
AMDGPUAttributorPass(TargetMachine & TM)291 AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){};
292 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
293 };
294
295 FunctionPass *createAMDGPUAnnotateUniformValues();
296
297 ModulePass *createAMDGPUPrintfRuntimeBinding();
298 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
299 extern char &AMDGPUPrintfRuntimeBindingID;
300
301 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &);
302 extern char &AMDGPUResourceUsageAnalysisID;
303
304 struct AMDGPUPrintfRuntimeBindingPass
305 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> {
306 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
307 };
308
309 ModulePass* createAMDGPUUnifyMetadataPass();
310 void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
311 extern char &AMDGPUUnifyMetadataID;
312
313 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
314 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
315 };
316
317 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
318 extern char &SIOptimizeExecMaskingPreRAID;
319
320 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
321 extern char &SIOptimizeVGPRLiveRangeID;
322
323 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
324 extern char &AMDGPUAnnotateUniformValuesPassID;
325
326 void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
327 extern char &AMDGPUCodeGenPrepareID;
328
329 void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
330 extern char &AMDGPURemoveIncompatibleFunctionsID;
331
332 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
333 extern char &AMDGPULateCodeGenPrepareID;
334
335 FunctionPass *createAMDGPURewriteUndefForPHILegacyPass();
336 void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &);
337 extern char &AMDGPURewriteUndefForPHILegacyPassID;
338
339 class AMDGPURewriteUndefForPHIPass
340 : public PassInfoMixin<AMDGPURewriteUndefForPHIPass> {
341 public:
342 AMDGPURewriteUndefForPHIPass() = default;
343 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
344 };
345
346 void initializeSIAnnotateControlFlowPass(PassRegistry&);
347 extern char &SIAnnotateControlFlowPassID;
348
349 void initializeSIMemoryLegalizerPass(PassRegistry&);
350 extern char &SIMemoryLegalizerID;
351
352 void initializeSIModeRegisterPass(PassRegistry&);
353 extern char &SIModeRegisterID;
354
355 void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
356 extern char &AMDGPUInsertDelayAluID;
357
358 void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &);
359 extern char &AMDGPUInsertSingleUseVDSTID;
360
361 void initializeSIInsertHardClausesPass(PassRegistry &);
362 extern char &SIInsertHardClausesID;
363
364 void initializeSIInsertWaitcntsPass(PassRegistry&);
365 extern char &SIInsertWaitcntsID;
366
367 void initializeSIFormMemoryClausesPass(PassRegistry&);
368 extern char &SIFormMemoryClausesID;
369
370 void initializeSIPostRABundlerPass(PassRegistry&);
371 extern char &SIPostRABundlerID;
372
373 void initializeGCNCreateVOPDPass(PassRegistry &);
374 extern char &GCNCreateVOPDID;
375
376 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
377 extern char &AMDGPUUnifyDivergentExitNodesID;
378
379 ImmutablePass *createAMDGPUAAWrapperPass();
380 void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
381 ImmutablePass *createAMDGPUExternalAAWrapperPass();
382 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
383
384 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
385
386 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
387 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
388 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
389
390 void initializeGCNNSAReassignPass(PassRegistry &);
391 extern char &GCNNSAReassignID;
392
393 void initializeGCNPreRALongBranchRegPass(PassRegistry &);
394 extern char &GCNPreRALongBranchRegID;
395
396 void initializeGCNPreRAOptimizationsPass(PassRegistry &);
397 extern char &GCNPreRAOptimizationsID;
398
399 FunctionPass *createAMDGPUSetWavePriorityPass();
400 void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
401
402 void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
403 extern char &GCNRewritePartialRegUsesID;
404
405 namespace AMDGPU {
406 enum TargetIndex {
407 TI_CONSTDATA_START,
408 TI_SCRATCH_RSRC_DWORD0,
409 TI_SCRATCH_RSRC_DWORD1,
410 TI_SCRATCH_RSRC_DWORD2,
411 TI_SCRATCH_RSRC_DWORD3
412 };
413
414 // FIXME: Missing constant_32bit
isFlatGlobalAddrSpace(unsigned AS)415 inline bool isFlatGlobalAddrSpace(unsigned AS) {
416 return AS == AMDGPUAS::GLOBAL_ADDRESS ||
417 AS == AMDGPUAS::FLAT_ADDRESS ||
418 AS == AMDGPUAS::CONSTANT_ADDRESS ||
419 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
420 }
421
isExtendedGlobalAddrSpace(unsigned AS)422 inline bool isExtendedGlobalAddrSpace(unsigned AS) {
423 return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS ||
424 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
425 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
426 }
427
addrspacesMayAlias(unsigned AS1,unsigned AS2)428 static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) {
429 static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range");
430
431 if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
432 return true;
433
434 // This array is indexed by address space value enum elements 0 ... to 9
435 // clang-format off
436 static const bool ASAliasRules[10][10] = {
437 /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */
438 /* Flat */ {true, true, false, true, true, true, true, true, true, true},
439 /* Global */ {true, true, false, false, true, false, true, true, true, true},
440 /* Region */ {false, false, true, false, false, false, false, false, false, false},
441 /* Group */ {true, false, false, true, false, false, false, false, false, false},
442 /* Constant */ {true, true, false, false, false, false, true, true, true, true},
443 /* Private */ {true, false, false, false, false, true, false, false, false, false},
444 /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true, true},
445 /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true, true},
446 /* Buffer Resource */ {true, true, false, false, true, false, true, true, true, true},
447 /* Buffer Strided Ptr */ {true, true, false, false, true, false, true, true, true, true},
448 };
449 // clang-format on
450
451 return ASAliasRules[AS1][AS2];
452 }
453
454 }
455
456 } // End namespace llvm
457
458 #endif
459