1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 /// \file 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 12 13 #include "llvm/IR/PassManager.h" 14 #include "llvm/Pass.h" 15 #include "llvm/Support/AMDGPUAddrSpace.h" 16 #include "llvm/Support/CodeGen.h" 17 18 namespace llvm { 19 20 class AMDGPUTargetMachine; 21 class TargetMachine; 22 23 // GlobalISel passes 24 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); 25 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); 26 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); 27 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); 28 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); 29 void initializeAMDGPURegBankCombinerPass(PassRegistry &); 30 31 void initializeAMDGPURegBankSelectPass(PassRegistry &); 32 33 // SI Passes 34 FunctionPass *createGCNDPPCombinePass(); 35 FunctionPass *createSIAnnotateControlFlowPass(); 36 FunctionPass *createSIFoldOperandsPass(); 37 FunctionPass *createSIPeepholeSDWAPass(); 38 FunctionPass *createSILowerI1CopiesPass(); 39 FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass(); 40 FunctionPass *createSIShrinkInstructionsPass(); 41 FunctionPass *createSILoadStoreOptimizerPass(); 42 FunctionPass *createSIWholeQuadModePass(); 43 FunctionPass *createSIFixControlFlowLiveIntervalsPass(); 44 FunctionPass *createSIOptimizeExecMaskingPreRAPass(); 45 FunctionPass *createSIOptimizeVGPRLiveRangePass(); 46 FunctionPass *createSIFixSGPRCopiesPass(); 47 FunctionPass *createLowerWWMCopiesPass(); 48 FunctionPass *createSIMemoryLegalizerPass(); 49 FunctionPass *createSIInsertWaitcntsPass(); 50 FunctionPass *createSIPreAllocateWWMRegsPass(); 51 FunctionPass *createSIFormMemoryClausesPass(); 52 53 FunctionPass *createSIPostRABundlerPass(); 54 FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *); 55 ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); 56 FunctionPass *createAMDGPUCodeGenPreparePass(); 57 FunctionPass *createAMDGPULateCodeGenPreparePass(); 58 FunctionPass *createAMDGPUMachineCFGStructurizerPass(); 59 FunctionPass *createAMDGPURewriteOutArgumentsPass(); 60 ModulePass * 61 createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr); 62 ModulePass *createAMDGPULowerBufferFatPointersPass(); 63 FunctionPass *createSIModeRegisterPass(); 64 FunctionPass *createGCNPreRAOptimizationsPass(); 65 66 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { 67 AMDGPUSimplifyLibCallsPass() {} 68 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 69 }; 70 71 struct AMDGPUImageIntrinsicOptimizerPass 72 : PassInfoMixin<AMDGPUImageIntrinsicOptimizerPass> { 73 AMDGPUImageIntrinsicOptimizerPass(TargetMachine &TM) : TM(TM) {} 74 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 75 76 private: 77 TargetMachine &TM; 78 }; 79 80 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> { 81 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 82 }; 83 84 void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &); 85 86 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); 87 extern char &AMDGPUMachineCFGStructurizerID; 88 89 void initializeAMDGPUAlwaysInlinePass(PassRegistry&); 90 91 Pass *createAMDGPUAnnotateKernelFeaturesPass(); 92 Pass *createAMDGPUAttributorLegacyPass(); 93 void initializeAMDGPUAttributorLegacyPass(PassRegistry &); 94 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); 95 extern char &AMDGPUAnnotateKernelFeaturesID; 96 97 // DPP/Iterative option enables the atomic optimizer with given strategy 98 // whereas None disables the atomic optimizer. 99 enum class ScanOptions { DPP, Iterative, None }; 100 FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy); 101 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &); 102 extern char &AMDGPUAtomicOptimizerID; 103 104 ModulePass *createAMDGPUCtorDtorLoweringLegacyPass(); 105 void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &); 106 extern char &AMDGPUCtorDtorLoweringLegacyPassID; 107 108 FunctionPass *createAMDGPULowerKernelArgumentsPass(); 109 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); 110 extern char &AMDGPULowerKernelArgumentsID; 111 112 FunctionPass *createAMDGPUPromoteKernelArgumentsPass(); 113 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &); 114 extern char &AMDGPUPromoteKernelArgumentsID; 115 116 struct AMDGPUPromoteKernelArgumentsPass 117 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> { 118 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 119 }; 120 121 ModulePass *createAMDGPULowerKernelAttributesPass(); 122 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); 123 extern char &AMDGPULowerKernelAttributesID; 124 125 struct AMDGPULowerKernelAttributesPass 126 : PassInfoMixin<AMDGPULowerKernelAttributesPass> { 127 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 128 }; 129 130 void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &); 131 extern char &AMDGPULowerModuleLDSLegacyPassID; 132 133 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> { 134 const AMDGPUTargetMachine &TM; 135 AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_) : TM(TM_) {} 136 137 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 138 }; 139 140 void initializeAMDGPULowerBufferFatPointersPass(PassRegistry &); 141 extern char &AMDGPULowerBufferFatPointersID; 142 143 struct AMDGPULowerBufferFatPointersPass 144 : PassInfoMixin<AMDGPULowerBufferFatPointersPass> { 145 AMDGPULowerBufferFatPointersPass(const TargetMachine &TM) : TM(TM) {} 146 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 147 148 private: 149 const TargetMachine &TM; 150 }; 151 152 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); 153 extern char &AMDGPURewriteOutArgumentsID; 154 155 void initializeGCNDPPCombinePass(PassRegistry &); 156 extern char &GCNDPPCombineID; 157 158 void initializeSIFoldOperandsPass(PassRegistry &); 159 extern char &SIFoldOperandsID; 160 161 void initializeSIPeepholeSDWAPass(PassRegistry &); 162 extern char &SIPeepholeSDWAID; 163 164 void initializeSIShrinkInstructionsPass(PassRegistry&); 165 extern char &SIShrinkInstructionsID; 166 167 void initializeSIFixSGPRCopiesPass(PassRegistry &); 168 extern char &SIFixSGPRCopiesID; 169 170 void initializeSIFixVGPRCopiesPass(PassRegistry &); 171 extern char &SIFixVGPRCopiesID; 172 173 void initializeSILowerWWMCopiesPass(PassRegistry &); 174 extern char &SILowerWWMCopiesID; 175 176 void initializeSILowerI1CopiesPass(PassRegistry &); 177 extern char &SILowerI1CopiesID; 178 179 void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &); 180 extern char &AMDGPUGlobalISelDivergenceLoweringID; 181 182 void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &); 183 extern char &AMDGPUMarkLastScratchLoadID; 184 185 void initializeSILowerSGPRSpillsPass(PassRegistry &); 186 extern char &SILowerSGPRSpillsID; 187 188 void initializeSILoadStoreOptimizerPass(PassRegistry &); 189 extern char &SILoadStoreOptimizerID; 190 191 void initializeSIWholeQuadModePass(PassRegistry &); 192 extern char &SIWholeQuadModeID; 193 194 void initializeSILowerControlFlowPass(PassRegistry &); 195 extern char &SILowerControlFlowID; 196 197 void initializeSIPreEmitPeepholePass(PassRegistry &); 198 extern char &SIPreEmitPeepholeID; 199 200 void initializeSILateBranchLoweringPass(PassRegistry &); 201 extern char &SILateBranchLoweringPassID; 202 203 void initializeSIOptimizeExecMaskingPass(PassRegistry &); 204 extern char &SIOptimizeExecMaskingID; 205 206 void initializeSIPreAllocateWWMRegsPass(PassRegistry &); 207 extern char &SIPreAllocateWWMRegsID; 208 209 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &); 210 extern char &AMDGPUImageIntrinsicOptimizerID; 211 212 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); 213 extern char &AMDGPUPerfHintAnalysisID; 214 215 void initializeGCNRegPressurePrinterPass(PassRegistry &); 216 extern char &GCNRegPressurePrinterID; 217 218 // Passes common to R600 and SI 219 FunctionPass *createAMDGPUPromoteAlloca(); 220 void initializeAMDGPUPromoteAllocaPass(PassRegistry&); 221 extern char &AMDGPUPromoteAllocaID; 222 223 FunctionPass *createAMDGPUPromoteAllocaToVector(); 224 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&); 225 extern char &AMDGPUPromoteAllocaToVectorID; 226 227 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> { 228 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {} 229 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 230 231 private: 232 TargetMachine &TM; 233 }; 234 235 struct AMDGPUPromoteAllocaToVectorPass 236 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> { 237 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {} 238 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 239 240 private: 241 TargetMachine &TM; 242 }; 243 244 struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> { 245 AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl) 246 : TM(TM), ScanImpl(ScanImpl) {} 247 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 248 249 private: 250 TargetMachine &TM; 251 ScanOptions ScanImpl; 252 }; 253 254 Pass *createAMDGPUStructurizeCFGPass(); 255 FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel); 256 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); 257 258 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> { 259 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} 260 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 261 262 private: 263 bool GlobalOpt; 264 }; 265 266 class AMDGPUCodeGenPreparePass 267 : public PassInfoMixin<AMDGPUCodeGenPreparePass> { 268 private: 269 TargetMachine &TM; 270 271 public: 272 AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){}; 273 PreservedAnalyses run(Function &, FunctionAnalysisManager &); 274 }; 275 276 class AMDGPULowerKernelArgumentsPass 277 : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> { 278 private: 279 TargetMachine &TM; 280 281 public: 282 AMDGPULowerKernelArgumentsPass(TargetMachine &TM) : TM(TM){}; 283 PreservedAnalyses run(Function &, FunctionAnalysisManager &); 284 }; 285 286 class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> { 287 private: 288 TargetMachine &TM; 289 290 public: 291 AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){}; 292 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 293 }; 294 295 FunctionPass *createAMDGPUAnnotateUniformValues(); 296 297 ModulePass *createAMDGPUPrintfRuntimeBinding(); 298 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); 299 extern char &AMDGPUPrintfRuntimeBindingID; 300 301 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &); 302 extern char &AMDGPUResourceUsageAnalysisID; 303 304 struct AMDGPUPrintfRuntimeBindingPass 305 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { 306 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 307 }; 308 309 ModulePass* createAMDGPUUnifyMetadataPass(); 310 void initializeAMDGPUUnifyMetadataPass(PassRegistry&); 311 extern char &AMDGPUUnifyMetadataID; 312 313 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { 314 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 315 }; 316 317 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); 318 extern char &SIOptimizeExecMaskingPreRAID; 319 320 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &); 321 extern char &SIOptimizeVGPRLiveRangeID; 322 323 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); 324 extern char &AMDGPUAnnotateUniformValuesPassID; 325 326 void initializeAMDGPUCodeGenPreparePass(PassRegistry&); 327 extern char &AMDGPUCodeGenPrepareID; 328 329 void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &); 330 extern char &AMDGPURemoveIncompatibleFunctionsID; 331 332 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); 333 extern char &AMDGPULateCodeGenPrepareID; 334 335 FunctionPass *createAMDGPURewriteUndefForPHILegacyPass(); 336 void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &); 337 extern char &AMDGPURewriteUndefForPHILegacyPassID; 338 339 class AMDGPURewriteUndefForPHIPass 340 : public PassInfoMixin<AMDGPURewriteUndefForPHIPass> { 341 public: 342 AMDGPURewriteUndefForPHIPass() = default; 343 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 344 }; 345 346 void initializeSIAnnotateControlFlowPass(PassRegistry&); 347 extern char &SIAnnotateControlFlowPassID; 348 349 void initializeSIMemoryLegalizerPass(PassRegistry&); 350 extern char &SIMemoryLegalizerID; 351 352 void initializeSIModeRegisterPass(PassRegistry&); 353 extern char &SIModeRegisterID; 354 355 void initializeAMDGPUInsertDelayAluPass(PassRegistry &); 356 extern char &AMDGPUInsertDelayAluID; 357 358 void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &); 359 extern char &AMDGPUInsertSingleUseVDSTID; 360 361 void initializeSIInsertHardClausesPass(PassRegistry &); 362 extern char &SIInsertHardClausesID; 363 364 void initializeSIInsertWaitcntsPass(PassRegistry&); 365 extern char &SIInsertWaitcntsID; 366 367 void initializeSIFormMemoryClausesPass(PassRegistry&); 368 extern char &SIFormMemoryClausesID; 369 370 void initializeSIPostRABundlerPass(PassRegistry&); 371 extern char &SIPostRABundlerID; 372 373 void initializeGCNCreateVOPDPass(PassRegistry &); 374 extern char &GCNCreateVOPDID; 375 376 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); 377 extern char &AMDGPUUnifyDivergentExitNodesID; 378 379 ImmutablePass *createAMDGPUAAWrapperPass(); 380 void initializeAMDGPUAAWrapperPassPass(PassRegistry&); 381 ImmutablePass *createAMDGPUExternalAAWrapperPass(); 382 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); 383 384 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); 385 386 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); 387 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); 388 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; 389 390 void initializeGCNNSAReassignPass(PassRegistry &); 391 extern char &GCNNSAReassignID; 392 393 void initializeGCNPreRALongBranchRegPass(PassRegistry &); 394 extern char &GCNPreRALongBranchRegID; 395 396 void initializeGCNPreRAOptimizationsPass(PassRegistry &); 397 extern char &GCNPreRAOptimizationsID; 398 399 FunctionPass *createAMDGPUSetWavePriorityPass(); 400 void initializeAMDGPUSetWavePriorityPass(PassRegistry &); 401 402 void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &); 403 extern char &GCNRewritePartialRegUsesID; 404 405 namespace AMDGPU { 406 enum TargetIndex { 407 TI_CONSTDATA_START, 408 TI_SCRATCH_RSRC_DWORD0, 409 TI_SCRATCH_RSRC_DWORD1, 410 TI_SCRATCH_RSRC_DWORD2, 411 TI_SCRATCH_RSRC_DWORD3 412 }; 413 414 // FIXME: Missing constant_32bit 415 inline bool isFlatGlobalAddrSpace(unsigned AS) { 416 return AS == AMDGPUAS::GLOBAL_ADDRESS || 417 AS == AMDGPUAS::FLAT_ADDRESS || 418 AS == AMDGPUAS::CONSTANT_ADDRESS || 419 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 420 } 421 422 inline bool isExtendedGlobalAddrSpace(unsigned AS) { 423 return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS || 424 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || 425 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 426 } 427 428 static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) { 429 static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range"); 430 431 if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS) 432 return true; 433 434 // This array is indexed by address space value enum elements 0 ... to 9 435 // clang-format off 436 static const bool ASAliasRules[10][10] = { 437 /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */ 438 /* Flat */ {true, true, false, true, true, true, true, true, true, true}, 439 /* Global */ {true, true, false, false, true, false, true, true, true, true}, 440 /* Region */ {false, false, true, false, false, false, false, false, false, false}, 441 /* Group */ {true, false, false, true, false, false, false, false, false, false}, 442 /* Constant */ {true, true, false, false, false, false, true, true, true, true}, 443 /* Private */ {true, false, false, false, false, true, false, false, false, false}, 444 /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true, true}, 445 /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true, true}, 446 /* Buffer Resource */ {true, true, false, false, true, false, true, true, true, true}, 447 /* Buffer Strided Ptr */ {true, true, false, false, true, false, true, true, true, true}, 448 }; 449 // clang-format on 450 451 return ASAliasRules[AS1][AS2]; 452 } 453 454 } 455 456 } // End namespace llvm 457 458 #endif 459