1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 /// \file 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 12 13 #include "llvm/IR/PassManager.h" 14 #include "llvm/Pass.h" 15 #include "llvm/Support/AMDGPUAddrSpace.h" 16 #include "llvm/Support/CodeGen.h" 17 18 namespace llvm { 19 20 class AMDGPUTargetMachine; 21 class TargetMachine; 22 23 // GlobalISel passes 24 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); 25 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); 26 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); 27 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); 28 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); 29 void initializeAMDGPURegBankCombinerPass(PassRegistry &); 30 31 void initializeAMDGPURegBankSelectPass(PassRegistry &); 32 33 // SI Passes 34 FunctionPass *createGCNDPPCombinePass(); 35 FunctionPass *createSIAnnotateControlFlowPass(); 36 FunctionPass *createSIFoldOperandsPass(); 37 FunctionPass *createSIPeepholeSDWAPass(); 38 FunctionPass *createSILowerI1CopiesPass(); 39 FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass(); 40 FunctionPass *createSIShrinkInstructionsPass(); 41 FunctionPass *createSILoadStoreOptimizerPass(); 42 FunctionPass *createSIWholeQuadModePass(); 43 FunctionPass *createSIFixControlFlowLiveIntervalsPass(); 44 FunctionPass *createSIOptimizeExecMaskingPreRAPass(); 45 FunctionPass *createSIOptimizeVGPRLiveRangePass(); 46 FunctionPass *createSIFixSGPRCopiesPass(); 47 FunctionPass *createLowerWWMCopiesPass(); 48 FunctionPass *createSIMemoryLegalizerPass(); 49 FunctionPass *createSIInsertWaitcntsPass(); 50 FunctionPass *createSIPreAllocateWWMRegsPass(); 51 FunctionPass *createSIFormMemoryClausesPass(); 52 53 FunctionPass *createSIPostRABundlerPass(); 54 FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *); 55 ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); 56 FunctionPass *createAMDGPUCodeGenPreparePass(); 57 FunctionPass *createAMDGPULateCodeGenPreparePass(); 58 FunctionPass *createAMDGPUMachineCFGStructurizerPass(); 59 FunctionPass *createAMDGPURewriteOutArgumentsPass(); 60 ModulePass * 61 createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr); 62 FunctionPass *createSIModeRegisterPass(); 63 FunctionPass *createGCNPreRAOptimizationsPass(); 64 65 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { 66 AMDGPUSimplifyLibCallsPass() {} 67 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 68 }; 69 70 struct AMDGPUImageIntrinsicOptimizerPass 71 : PassInfoMixin<AMDGPUImageIntrinsicOptimizerPass> { 72 AMDGPUImageIntrinsicOptimizerPass(TargetMachine &TM) : TM(TM) {} 73 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 74 75 private: 76 TargetMachine &TM; 77 }; 78 79 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> { 80 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 81 }; 82 83 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&); 84 85 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); 86 extern char &AMDGPUMachineCFGStructurizerID; 87 88 void initializeAMDGPUAlwaysInlinePass(PassRegistry&); 89 90 Pass *createAMDGPUAnnotateKernelFeaturesPass(); 91 Pass *createAMDGPUAttributorLegacyPass(); 92 void initializeAMDGPUAttributorLegacyPass(PassRegistry &); 93 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); 94 extern char &AMDGPUAnnotateKernelFeaturesID; 95 96 // DPP/Iterative option enables the atomic optimizer with given strategy 97 // whereas None disables the atomic optimizer. 98 enum class ScanOptions { DPP, Iterative, None }; 99 FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy); 100 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &); 101 extern char &AMDGPUAtomicOptimizerID; 102 103 ModulePass *createAMDGPUCtorDtorLoweringLegacyPass(); 104 void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &); 105 extern char &AMDGPUCtorDtorLoweringLegacyPassID; 106 107 FunctionPass *createAMDGPULowerKernelArgumentsPass(); 108 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); 109 extern char &AMDGPULowerKernelArgumentsID; 110 111 FunctionPass *createAMDGPUPromoteKernelArgumentsPass(); 112 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &); 113 extern char &AMDGPUPromoteKernelArgumentsID; 114 115 struct AMDGPUPromoteKernelArgumentsPass 116 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> { 117 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 118 }; 119 120 ModulePass *createAMDGPULowerKernelAttributesPass(); 121 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); 122 extern char &AMDGPULowerKernelAttributesID; 123 124 struct AMDGPULowerKernelAttributesPass 125 : PassInfoMixin<AMDGPULowerKernelAttributesPass> { 126 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 127 }; 128 129 void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &); 130 extern char &AMDGPULowerModuleLDSLegacyPassID; 131 132 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> { 133 const AMDGPUTargetMachine &TM; 134 AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_) : TM(TM_) {} 135 136 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 137 }; 138 139 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); 140 extern char &AMDGPURewriteOutArgumentsID; 141 142 void initializeGCNDPPCombinePass(PassRegistry &); 143 extern char &GCNDPPCombineID; 144 145 void initializeSIFoldOperandsPass(PassRegistry &); 146 extern char &SIFoldOperandsID; 147 148 void initializeSIPeepholeSDWAPass(PassRegistry &); 149 extern char &SIPeepholeSDWAID; 150 151 void initializeSIShrinkInstructionsPass(PassRegistry&); 152 extern char &SIShrinkInstructionsID; 153 154 void initializeSIFixSGPRCopiesPass(PassRegistry &); 155 extern char &SIFixSGPRCopiesID; 156 157 void initializeSIFixVGPRCopiesPass(PassRegistry &); 158 extern char &SIFixVGPRCopiesID; 159 160 void initializeSILowerWWMCopiesPass(PassRegistry &); 161 extern char &SILowerWWMCopiesID; 162 163 void initializeSILowerI1CopiesPass(PassRegistry &); 164 extern char &SILowerI1CopiesID; 165 166 void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &); 167 extern char &AMDGPUGlobalISelDivergenceLoweringID; 168 169 void initializeSILowerSGPRSpillsPass(PassRegistry &); 170 extern char &SILowerSGPRSpillsID; 171 172 void initializeSILoadStoreOptimizerPass(PassRegistry &); 173 extern char &SILoadStoreOptimizerID; 174 175 void initializeSIWholeQuadModePass(PassRegistry &); 176 extern char &SIWholeQuadModeID; 177 178 void initializeSILowerControlFlowPass(PassRegistry &); 179 extern char &SILowerControlFlowID; 180 181 void initializeSIPreEmitPeepholePass(PassRegistry &); 182 extern char &SIPreEmitPeepholeID; 183 184 void initializeSILateBranchLoweringPass(PassRegistry &); 185 extern char &SILateBranchLoweringPassID; 186 187 void initializeSIOptimizeExecMaskingPass(PassRegistry &); 188 extern char &SIOptimizeExecMaskingID; 189 190 void initializeSIPreAllocateWWMRegsPass(PassRegistry &); 191 extern char &SIPreAllocateWWMRegsID; 192 193 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &); 194 extern char &AMDGPUImageIntrinsicOptimizerID; 195 196 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); 197 extern char &AMDGPUPerfHintAnalysisID; 198 199 void initializeGCNRegPressurePrinterPass(PassRegistry &); 200 extern char &GCNRegPressurePrinterID; 201 202 // Passes common to R600 and SI 203 FunctionPass *createAMDGPUPromoteAlloca(); 204 void initializeAMDGPUPromoteAllocaPass(PassRegistry&); 205 extern char &AMDGPUPromoteAllocaID; 206 207 FunctionPass *createAMDGPUPromoteAllocaToVector(); 208 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&); 209 extern char &AMDGPUPromoteAllocaToVectorID; 210 211 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> { 212 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {} 213 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 214 215 private: 216 TargetMachine &TM; 217 }; 218 219 struct AMDGPUPromoteAllocaToVectorPass 220 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> { 221 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {} 222 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 223 224 private: 225 TargetMachine &TM; 226 }; 227 228 struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> { 229 AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl) 230 : TM(TM), ScanImpl(ScanImpl) {} 231 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 232 233 private: 234 TargetMachine &TM; 235 ScanOptions ScanImpl; 236 }; 237 238 Pass *createAMDGPUStructurizeCFGPass(); 239 FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel); 240 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); 241 242 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> { 243 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} 244 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 245 246 private: 247 bool GlobalOpt; 248 }; 249 250 class AMDGPUCodeGenPreparePass 251 : public PassInfoMixin<AMDGPUCodeGenPreparePass> { 252 private: 253 TargetMachine &TM; 254 255 public: 256 AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){}; 257 PreservedAnalyses run(Function &, FunctionAnalysisManager &); 258 }; 259 260 class AMDGPULowerKernelArgumentsPass 261 : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> { 262 private: 263 TargetMachine &TM; 264 265 public: 266 AMDGPULowerKernelArgumentsPass(TargetMachine &TM) : TM(TM){}; 267 PreservedAnalyses run(Function &, FunctionAnalysisManager &); 268 }; 269 270 class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> { 271 private: 272 TargetMachine &TM; 273 274 public: 275 AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){}; 276 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 277 }; 278 279 FunctionPass *createAMDGPUAnnotateUniformValues(); 280 281 ModulePass *createAMDGPUPrintfRuntimeBinding(); 282 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); 283 extern char &AMDGPUPrintfRuntimeBindingID; 284 285 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &); 286 extern char &AMDGPUResourceUsageAnalysisID; 287 288 struct AMDGPUPrintfRuntimeBindingPass 289 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { 290 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 291 }; 292 293 ModulePass* createAMDGPUUnifyMetadataPass(); 294 void initializeAMDGPUUnifyMetadataPass(PassRegistry&); 295 extern char &AMDGPUUnifyMetadataID; 296 297 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { 298 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 299 }; 300 301 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); 302 extern char &SIOptimizeExecMaskingPreRAID; 303 304 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &); 305 extern char &SIOptimizeVGPRLiveRangeID; 306 307 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); 308 extern char &AMDGPUAnnotateUniformValuesPassID; 309 310 void initializeAMDGPUCodeGenPreparePass(PassRegistry&); 311 extern char &AMDGPUCodeGenPrepareID; 312 313 void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &); 314 extern char &AMDGPURemoveIncompatibleFunctionsID; 315 316 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); 317 extern char &AMDGPULateCodeGenPrepareID; 318 319 FunctionPass *createAMDGPURewriteUndefForPHILegacyPass(); 320 void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &); 321 extern char &AMDGPURewriteUndefForPHILegacyPassID; 322 323 class AMDGPURewriteUndefForPHIPass 324 : public PassInfoMixin<AMDGPURewriteUndefForPHIPass> { 325 public: 326 AMDGPURewriteUndefForPHIPass() = default; 327 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 328 }; 329 330 void initializeSIAnnotateControlFlowPass(PassRegistry&); 331 extern char &SIAnnotateControlFlowPassID; 332 333 void initializeSIMemoryLegalizerPass(PassRegistry&); 334 extern char &SIMemoryLegalizerID; 335 336 void initializeSIModeRegisterPass(PassRegistry&); 337 extern char &SIModeRegisterID; 338 339 void initializeAMDGPUInsertDelayAluPass(PassRegistry &); 340 extern char &AMDGPUInsertDelayAluID; 341 342 void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &); 343 extern char &AMDGPUInsertSingleUseVDSTID; 344 345 void initializeSIInsertHardClausesPass(PassRegistry &); 346 extern char &SIInsertHardClausesID; 347 348 void initializeSIInsertWaitcntsPass(PassRegistry&); 349 extern char &SIInsertWaitcntsID; 350 351 void initializeSIFormMemoryClausesPass(PassRegistry&); 352 extern char &SIFormMemoryClausesID; 353 354 void initializeSIPostRABundlerPass(PassRegistry&); 355 extern char &SIPostRABundlerID; 356 357 void initializeGCNCreateVOPDPass(PassRegistry &); 358 extern char &GCNCreateVOPDID; 359 360 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); 361 extern char &AMDGPUUnifyDivergentExitNodesID; 362 363 ImmutablePass *createAMDGPUAAWrapperPass(); 364 void initializeAMDGPUAAWrapperPassPass(PassRegistry&); 365 ImmutablePass *createAMDGPUExternalAAWrapperPass(); 366 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); 367 368 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); 369 370 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); 371 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); 372 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; 373 374 void initializeGCNNSAReassignPass(PassRegistry &); 375 extern char &GCNNSAReassignID; 376 377 void initializeGCNPreRALongBranchRegPass(PassRegistry &); 378 extern char &GCNPreRALongBranchRegID; 379 380 void initializeGCNPreRAOptimizationsPass(PassRegistry &); 381 extern char &GCNPreRAOptimizationsID; 382 383 FunctionPass *createAMDGPUSetWavePriorityPass(); 384 void initializeAMDGPUSetWavePriorityPass(PassRegistry &); 385 386 void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &); 387 extern char &GCNRewritePartialRegUsesID; 388 389 namespace AMDGPU { 390 enum TargetIndex { 391 TI_CONSTDATA_START, 392 TI_SCRATCH_RSRC_DWORD0, 393 TI_SCRATCH_RSRC_DWORD1, 394 TI_SCRATCH_RSRC_DWORD2, 395 TI_SCRATCH_RSRC_DWORD3 396 }; 397 398 // FIXME: Missing constant_32bit 399 inline bool isFlatGlobalAddrSpace(unsigned AS) { 400 return AS == AMDGPUAS::GLOBAL_ADDRESS || 401 AS == AMDGPUAS::FLAT_ADDRESS || 402 AS == AMDGPUAS::CONSTANT_ADDRESS || 403 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 404 } 405 406 inline bool isExtendedGlobalAddrSpace(unsigned AS) { 407 return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS || 408 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || 409 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 410 } 411 412 static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) { 413 static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range"); 414 415 if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS) 416 return true; 417 418 // This array is indexed by address space value enum elements 0 ... to 9 419 // clang-format off 420 static const bool ASAliasRules[10][10] = { 421 /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */ 422 /* Flat */ {true, true, false, true, true, true, true, true, true, true}, 423 /* Global */ {true, true, false, false, true, false, true, true, true, true}, 424 /* Region */ {false, false, true, false, false, false, false, false, false, false}, 425 /* Group */ {true, false, false, true, false, false, false, false, false, false}, 426 /* Constant */ {true, true, false, false, false, false, true, true, true, true}, 427 /* Private */ {true, false, false, false, false, true, false, false, false, false}, 428 /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true, true}, 429 /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true, true}, 430 /* Buffer Resource */ {true, true, false, false, true, false, true, true, true, true}, 431 /* Buffer Strided Ptr */ {true, true, false, false, true, false, true, true, true, true}, 432 }; 433 // clang-format on 434 435 return ASAliasRules[AS1][AS2]; 436 } 437 438 } 439 440 } // End namespace llvm 441 442 #endif 443