1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 /// \file 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 12 13 #include "llvm/IR/PassManager.h" 14 #include "llvm/Pass.h" 15 #include "llvm/Support/AMDGPUAddrSpace.h" 16 #include "llvm/Support/CodeGen.h" 17 18 namespace llvm { 19 20 class AMDGPUTargetMachine; 21 class TargetMachine; 22 23 // GlobalISel passes 24 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); 25 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); 26 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); 27 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); 28 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); 29 void initializeAMDGPURegBankCombinerPass(PassRegistry &); 30 31 void initializeAMDGPURegBankSelectPass(PassRegistry &); 32 33 // SI Passes 34 FunctionPass *createGCNDPPCombinePass(); 35 FunctionPass *createSIAnnotateControlFlowPass(); 36 FunctionPass *createSIFoldOperandsPass(); 37 FunctionPass *createSIPeepholeSDWAPass(); 38 FunctionPass *createSILowerI1CopiesPass(); 39 FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass(); 40 FunctionPass *createSIShrinkInstructionsPass(); 41 FunctionPass *createSILoadStoreOptimizerPass(); 42 FunctionPass *createSIWholeQuadModePass(); 43 FunctionPass *createSIFixControlFlowLiveIntervalsPass(); 44 FunctionPass *createSIOptimizeExecMaskingPreRAPass(); 45 FunctionPass *createSIOptimizeVGPRLiveRangePass(); 46 FunctionPass *createSIFixSGPRCopiesPass(); 47 FunctionPass *createLowerWWMCopiesPass(); 48 FunctionPass *createSIMemoryLegalizerPass(); 49 FunctionPass *createSIInsertWaitcntsPass(); 50 FunctionPass *createSIPreAllocateWWMRegsPass(); 51 FunctionPass *createSIFormMemoryClausesPass(); 52 53 FunctionPass *createSIPostRABundlerPass(); 54 FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *); 55 ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); 56 FunctionPass *createAMDGPUCodeGenPreparePass(); 57 FunctionPass *createAMDGPULateCodeGenPreparePass(); 58 FunctionPass *createAMDGPUMachineCFGStructurizerPass(); 59 FunctionPass *createAMDGPURewriteOutArgumentsPass(); 60 ModulePass * 61 createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr); 62 FunctionPass *createSIModeRegisterPass(); 63 FunctionPass *createGCNPreRAOptimizationsPass(); 64 65 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { 66 AMDGPUSimplifyLibCallsPass() {} 67 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 68 }; 69 70 struct AMDGPUImageIntrinsicOptimizerPass 71 : PassInfoMixin<AMDGPUImageIntrinsicOptimizerPass> { 72 AMDGPUImageIntrinsicOptimizerPass(TargetMachine &TM) : TM(TM) {} 73 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 74 75 private: 76 TargetMachine &TM; 77 }; 78 79 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> { 80 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 81 }; 82 83 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&); 84 85 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); 86 extern char &AMDGPUMachineCFGStructurizerID; 87 88 void initializeAMDGPUAlwaysInlinePass(PassRegistry&); 89 90 Pass *createAMDGPUAnnotateKernelFeaturesPass(); 91 Pass *createAMDGPUAttributorLegacyPass(); 92 void initializeAMDGPUAttributorLegacyPass(PassRegistry &); 93 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); 94 extern char &AMDGPUAnnotateKernelFeaturesID; 95 96 // DPP/Iterative option enables the atomic optimizer with given strategy 97 // whereas None disables the atomic optimizer. 98 enum class ScanOptions { DPP, Iterative, None }; 99 FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy); 100 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &); 101 extern char &AMDGPUAtomicOptimizerID; 102 103 ModulePass *createAMDGPUCtorDtorLoweringLegacyPass(); 104 void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &); 105 extern char &AMDGPUCtorDtorLoweringLegacyPassID; 106 107 FunctionPass *createAMDGPULowerKernelArgumentsPass(); 108 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); 109 extern char &AMDGPULowerKernelArgumentsID; 110 111 FunctionPass *createAMDGPUPromoteKernelArgumentsPass(); 112 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &); 113 extern char &AMDGPUPromoteKernelArgumentsID; 114 115 struct AMDGPUPromoteKernelArgumentsPass 116 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> { 117 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 118 }; 119 120 ModulePass *createAMDGPULowerKernelAttributesPass(); 121 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); 122 extern char &AMDGPULowerKernelAttributesID; 123 124 struct AMDGPULowerKernelAttributesPass 125 : PassInfoMixin<AMDGPULowerKernelAttributesPass> { 126 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 127 }; 128 129 void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &); 130 extern char &AMDGPULowerModuleLDSLegacyPassID; 131 132 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> { 133 const AMDGPUTargetMachine &TM; 134 AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_) : TM(TM_) {} 135 136 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 137 }; 138 139 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); 140 extern char &AMDGPURewriteOutArgumentsID; 141 142 void initializeGCNDPPCombinePass(PassRegistry &); 143 extern char &GCNDPPCombineID; 144 145 void initializeSIFoldOperandsPass(PassRegistry &); 146 extern char &SIFoldOperandsID; 147 148 void initializeSIPeepholeSDWAPass(PassRegistry &); 149 extern char &SIPeepholeSDWAID; 150 151 void initializeSIShrinkInstructionsPass(PassRegistry&); 152 extern char &SIShrinkInstructionsID; 153 154 void initializeSIFixSGPRCopiesPass(PassRegistry &); 155 extern char &SIFixSGPRCopiesID; 156 157 void initializeSIFixVGPRCopiesPass(PassRegistry &); 158 extern char &SIFixVGPRCopiesID; 159 160 void initializeSILowerWWMCopiesPass(PassRegistry &); 161 extern char &SILowerWWMCopiesID; 162 163 void initializeSILowerI1CopiesPass(PassRegistry &); 164 extern char &SILowerI1CopiesID; 165 166 void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &); 167 extern char &AMDGPUGlobalISelDivergenceLoweringID; 168 169 void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &); 170 extern char &AMDGPUMarkLastScratchLoadID; 171 172 void initializeSILowerSGPRSpillsPass(PassRegistry &); 173 extern char &SILowerSGPRSpillsID; 174 175 void initializeSILoadStoreOptimizerPass(PassRegistry &); 176 extern char &SILoadStoreOptimizerID; 177 178 void initializeSIWholeQuadModePass(PassRegistry &); 179 extern char &SIWholeQuadModeID; 180 181 void initializeSILowerControlFlowPass(PassRegistry &); 182 extern char &SILowerControlFlowID; 183 184 void initializeSIPreEmitPeepholePass(PassRegistry &); 185 extern char &SIPreEmitPeepholeID; 186 187 void initializeSILateBranchLoweringPass(PassRegistry &); 188 extern char &SILateBranchLoweringPassID; 189 190 void initializeSIOptimizeExecMaskingPass(PassRegistry &); 191 extern char &SIOptimizeExecMaskingID; 192 193 void initializeSIPreAllocateWWMRegsPass(PassRegistry &); 194 extern char &SIPreAllocateWWMRegsID; 195 196 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &); 197 extern char &AMDGPUImageIntrinsicOptimizerID; 198 199 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); 200 extern char &AMDGPUPerfHintAnalysisID; 201 202 void initializeGCNRegPressurePrinterPass(PassRegistry &); 203 extern char &GCNRegPressurePrinterID; 204 205 // Passes common to R600 and SI 206 FunctionPass *createAMDGPUPromoteAlloca(); 207 void initializeAMDGPUPromoteAllocaPass(PassRegistry&); 208 extern char &AMDGPUPromoteAllocaID; 209 210 FunctionPass *createAMDGPUPromoteAllocaToVector(); 211 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&); 212 extern char &AMDGPUPromoteAllocaToVectorID; 213 214 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> { 215 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {} 216 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 217 218 private: 219 TargetMachine &TM; 220 }; 221 222 struct AMDGPUPromoteAllocaToVectorPass 223 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> { 224 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {} 225 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 226 227 private: 228 TargetMachine &TM; 229 }; 230 231 struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> { 232 AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl) 233 : TM(TM), ScanImpl(ScanImpl) {} 234 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 235 236 private: 237 TargetMachine &TM; 238 ScanOptions ScanImpl; 239 }; 240 241 Pass *createAMDGPUStructurizeCFGPass(); 242 FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel); 243 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); 244 245 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> { 246 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} 247 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 248 249 private: 250 bool GlobalOpt; 251 }; 252 253 class AMDGPUCodeGenPreparePass 254 : public PassInfoMixin<AMDGPUCodeGenPreparePass> { 255 private: 256 TargetMachine &TM; 257 258 public: 259 AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){}; 260 PreservedAnalyses run(Function &, FunctionAnalysisManager &); 261 }; 262 263 class AMDGPULowerKernelArgumentsPass 264 : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> { 265 private: 266 TargetMachine &TM; 267 268 public: 269 AMDGPULowerKernelArgumentsPass(TargetMachine &TM) : TM(TM){}; 270 PreservedAnalyses run(Function &, FunctionAnalysisManager &); 271 }; 272 273 class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> { 274 private: 275 TargetMachine &TM; 276 277 public: 278 AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){}; 279 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 280 }; 281 282 FunctionPass *createAMDGPUAnnotateUniformValues(); 283 284 ModulePass *createAMDGPUPrintfRuntimeBinding(); 285 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); 286 extern char &AMDGPUPrintfRuntimeBindingID; 287 288 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &); 289 extern char &AMDGPUResourceUsageAnalysisID; 290 291 struct AMDGPUPrintfRuntimeBindingPass 292 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { 293 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 294 }; 295 296 ModulePass* createAMDGPUUnifyMetadataPass(); 297 void initializeAMDGPUUnifyMetadataPass(PassRegistry&); 298 extern char &AMDGPUUnifyMetadataID; 299 300 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { 301 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 302 }; 303 304 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); 305 extern char &SIOptimizeExecMaskingPreRAID; 306 307 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &); 308 extern char &SIOptimizeVGPRLiveRangeID; 309 310 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); 311 extern char &AMDGPUAnnotateUniformValuesPassID; 312 313 void initializeAMDGPUCodeGenPreparePass(PassRegistry&); 314 extern char &AMDGPUCodeGenPrepareID; 315 316 void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &); 317 extern char &AMDGPURemoveIncompatibleFunctionsID; 318 319 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); 320 extern char &AMDGPULateCodeGenPrepareID; 321 322 FunctionPass *createAMDGPURewriteUndefForPHILegacyPass(); 323 void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &); 324 extern char &AMDGPURewriteUndefForPHILegacyPassID; 325 326 class AMDGPURewriteUndefForPHIPass 327 : public PassInfoMixin<AMDGPURewriteUndefForPHIPass> { 328 public: 329 AMDGPURewriteUndefForPHIPass() = default; 330 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 331 }; 332 333 void initializeSIAnnotateControlFlowPass(PassRegistry&); 334 extern char &SIAnnotateControlFlowPassID; 335 336 void initializeSIMemoryLegalizerPass(PassRegistry&); 337 extern char &SIMemoryLegalizerID; 338 339 void initializeSIModeRegisterPass(PassRegistry&); 340 extern char &SIModeRegisterID; 341 342 void initializeAMDGPUInsertDelayAluPass(PassRegistry &); 343 extern char &AMDGPUInsertDelayAluID; 344 345 void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &); 346 extern char &AMDGPUInsertSingleUseVDSTID; 347 348 void initializeSIInsertHardClausesPass(PassRegistry &); 349 extern char &SIInsertHardClausesID; 350 351 void initializeSIInsertWaitcntsPass(PassRegistry&); 352 extern char &SIInsertWaitcntsID; 353 354 void initializeSIFormMemoryClausesPass(PassRegistry&); 355 extern char &SIFormMemoryClausesID; 356 357 void initializeSIPostRABundlerPass(PassRegistry&); 358 extern char &SIPostRABundlerID; 359 360 void initializeGCNCreateVOPDPass(PassRegistry &); 361 extern char &GCNCreateVOPDID; 362 363 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); 364 extern char &AMDGPUUnifyDivergentExitNodesID; 365 366 ImmutablePass *createAMDGPUAAWrapperPass(); 367 void initializeAMDGPUAAWrapperPassPass(PassRegistry&); 368 ImmutablePass *createAMDGPUExternalAAWrapperPass(); 369 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); 370 371 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); 372 373 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); 374 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); 375 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; 376 377 void initializeGCNNSAReassignPass(PassRegistry &); 378 extern char &GCNNSAReassignID; 379 380 void initializeGCNPreRALongBranchRegPass(PassRegistry &); 381 extern char &GCNPreRALongBranchRegID; 382 383 void initializeGCNPreRAOptimizationsPass(PassRegistry &); 384 extern char &GCNPreRAOptimizationsID; 385 386 FunctionPass *createAMDGPUSetWavePriorityPass(); 387 void initializeAMDGPUSetWavePriorityPass(PassRegistry &); 388 389 void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &); 390 extern char &GCNRewritePartialRegUsesID; 391 392 namespace AMDGPU { 393 enum TargetIndex { 394 TI_CONSTDATA_START, 395 TI_SCRATCH_RSRC_DWORD0, 396 TI_SCRATCH_RSRC_DWORD1, 397 TI_SCRATCH_RSRC_DWORD2, 398 TI_SCRATCH_RSRC_DWORD3 399 }; 400 401 // FIXME: Missing constant_32bit 402 inline bool isFlatGlobalAddrSpace(unsigned AS) { 403 return AS == AMDGPUAS::GLOBAL_ADDRESS || 404 AS == AMDGPUAS::FLAT_ADDRESS || 405 AS == AMDGPUAS::CONSTANT_ADDRESS || 406 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 407 } 408 409 inline bool isExtendedGlobalAddrSpace(unsigned AS) { 410 return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS || 411 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || 412 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 413 } 414 415 static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) { 416 static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range"); 417 418 if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS) 419 return true; 420 421 // This array is indexed by address space value enum elements 0 ... to 9 422 // clang-format off 423 static const bool ASAliasRules[10][10] = { 424 /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */ 425 /* Flat */ {true, true, false, true, true, true, true, true, true, true}, 426 /* Global */ {true, true, false, false, true, false, true, true, true, true}, 427 /* Region */ {false, false, true, false, false, false, false, false, false, false}, 428 /* Group */ {true, false, false, true, false, false, false, false, false, false}, 429 /* Constant */ {true, true, false, false, false, false, true, true, true, true}, 430 /* Private */ {true, false, false, false, false, true, false, false, false, false}, 431 /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true, true}, 432 /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true, true}, 433 /* Buffer Resource */ {true, true, false, false, true, false, true, true, true, true}, 434 /* Buffer Strided Ptr */ {true, true, false, false, true, false, true, true, true, true}, 435 }; 436 // clang-format on 437 438 return ASAliasRules[AS1][AS2]; 439 } 440 441 } 442 443 } // End namespace llvm 444 445 #endif 446