1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 /// \file 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 12 13 #include "llvm/IR/PassManager.h" 14 #include "llvm/Pass.h" 15 #include "llvm/Support/CodeGen.h" 16 17 namespace llvm { 18 19 class TargetMachine; 20 21 // GlobalISel passes 22 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); 23 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); 24 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); 25 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); 26 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); 27 void initializeAMDGPURegBankCombinerPass(PassRegistry &); 28 29 void initializeAMDGPURegBankSelectPass(PassRegistry &); 30 31 // SI Passes 32 FunctionPass *createGCNDPPCombinePass(); 33 FunctionPass *createSIAnnotateControlFlowPass(); 34 FunctionPass *createSIFoldOperandsPass(); 35 FunctionPass *createSIPeepholeSDWAPass(); 36 FunctionPass *createSILowerI1CopiesPass(); 37 FunctionPass *createSIShrinkInstructionsPass(); 38 FunctionPass *createSILoadStoreOptimizerPass(); 39 FunctionPass *createSIWholeQuadModePass(); 40 FunctionPass *createSIFixControlFlowLiveIntervalsPass(); 41 FunctionPass *createSIOptimizeExecMaskingPreRAPass(); 42 FunctionPass *createSIOptimizeVGPRLiveRangePass(); 43 FunctionPass *createSIFixSGPRCopiesPass(); 44 FunctionPass *createLowerWWMCopiesPass(); 45 FunctionPass *createSIMemoryLegalizerPass(); 46 FunctionPass *createSIInsertWaitcntsPass(); 47 FunctionPass *createSIPreAllocateWWMRegsPass(); 48 FunctionPass *createSIFormMemoryClausesPass(); 49 50 FunctionPass *createSIPostRABundlerPass(); 51 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *); 52 FunctionPass *createAMDGPUUseNativeCallsPass(); 53 ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); 54 FunctionPass *createAMDGPUCodeGenPreparePass(); 55 FunctionPass *createAMDGPULateCodeGenPreparePass(); 56 FunctionPass *createAMDGPUMachineCFGStructurizerPass(); 57 FunctionPass *createAMDGPURewriteOutArgumentsPass(); 58 ModulePass *createAMDGPULowerModuleLDSPass(); 59 FunctionPass *createSIModeRegisterPass(); 60 FunctionPass *createGCNPreRAOptimizationsPass(); 61 62 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { 63 AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {} 64 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 65 66 private: 67 TargetMachine &TM; 68 }; 69 70 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> { 71 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 72 }; 73 74 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&); 75 76 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); 77 extern char &AMDGPUMachineCFGStructurizerID; 78 79 void initializeAMDGPUAlwaysInlinePass(PassRegistry&); 80 81 Pass *createAMDGPUAnnotateKernelFeaturesPass(); 82 Pass *createAMDGPUAttributorPass(); 83 void initializeAMDGPUAttributorPass(PassRegistry &); 84 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); 85 extern char &AMDGPUAnnotateKernelFeaturesID; 86 87 // DPP/Iterative option enables the atomic optimizer with given strategy 88 // whereas None disables the atomic optimizer. 89 enum class ScanOptions { DPP, Iterative, None }; 90 FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy); 91 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &); 92 extern char &AMDGPUAtomicOptimizerID; 93 94 ModulePass *createAMDGPUCtorDtorLoweringLegacyPass(); 95 void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &); 96 extern char &AMDGPUCtorDtorLoweringLegacyPassID; 97 98 FunctionPass *createAMDGPULowerKernelArgumentsPass(); 99 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); 100 extern char &AMDGPULowerKernelArgumentsID; 101 102 FunctionPass *createAMDGPUPromoteKernelArgumentsPass(); 103 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &); 104 extern char &AMDGPUPromoteKernelArgumentsID; 105 106 struct AMDGPUPromoteKernelArgumentsPass 107 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> { 108 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 109 }; 110 111 ModulePass *createAMDGPULowerKernelAttributesPass(); 112 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); 113 extern char &AMDGPULowerKernelAttributesID; 114 115 struct AMDGPULowerKernelAttributesPass 116 : PassInfoMixin<AMDGPULowerKernelAttributesPass> { 117 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 118 }; 119 120 void initializeAMDGPULowerModuleLDSPass(PassRegistry &); 121 extern char &AMDGPULowerModuleLDSID; 122 123 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> { 124 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 125 }; 126 127 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); 128 extern char &AMDGPURewriteOutArgumentsID; 129 130 void initializeGCNDPPCombinePass(PassRegistry &); 131 extern char &GCNDPPCombineID; 132 133 void initializeSIFoldOperandsPass(PassRegistry &); 134 extern char &SIFoldOperandsID; 135 136 void initializeSIPeepholeSDWAPass(PassRegistry &); 137 extern char &SIPeepholeSDWAID; 138 139 void initializeSIShrinkInstructionsPass(PassRegistry&); 140 extern char &SIShrinkInstructionsID; 141 142 void initializeSIFixSGPRCopiesPass(PassRegistry &); 143 extern char &SIFixSGPRCopiesID; 144 145 void initializeSIFixVGPRCopiesPass(PassRegistry &); 146 extern char &SIFixVGPRCopiesID; 147 148 void initializeSILowerWWMCopiesPass(PassRegistry &); 149 extern char &SILowerWWMCopiesID; 150 151 void initializeSILowerI1CopiesPass(PassRegistry &); 152 extern char &SILowerI1CopiesID; 153 154 void initializeSILowerSGPRSpillsPass(PassRegistry &); 155 extern char &SILowerSGPRSpillsID; 156 157 void initializeSILoadStoreOptimizerPass(PassRegistry &); 158 extern char &SILoadStoreOptimizerID; 159 160 void initializeSIWholeQuadModePass(PassRegistry &); 161 extern char &SIWholeQuadModeID; 162 163 void initializeSILowerControlFlowPass(PassRegistry &); 164 extern char &SILowerControlFlowID; 165 166 void initializeSIPreEmitPeepholePass(PassRegistry &); 167 extern char &SIPreEmitPeepholeID; 168 169 void initializeSILateBranchLoweringPass(PassRegistry &); 170 extern char &SILateBranchLoweringPassID; 171 172 void initializeSIOptimizeExecMaskingPass(PassRegistry &); 173 extern char &SIOptimizeExecMaskingID; 174 175 void initializeSIPreAllocateWWMRegsPass(PassRegistry &); 176 extern char &SIPreAllocateWWMRegsID; 177 178 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &); 179 extern char &AMDGPUSimplifyLibCallsID; 180 181 void initializeAMDGPUUseNativeCallsPass(PassRegistry &); 182 extern char &AMDGPUUseNativeCallsID; 183 184 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); 185 extern char &AMDGPUPerfHintAnalysisID; 186 187 // Passes common to R600 and SI 188 FunctionPass *createAMDGPUPromoteAlloca(); 189 void initializeAMDGPUPromoteAllocaPass(PassRegistry&); 190 extern char &AMDGPUPromoteAllocaID; 191 192 FunctionPass *createAMDGPUPromoteAllocaToVector(); 193 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&); 194 extern char &AMDGPUPromoteAllocaToVectorID; 195 196 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> { 197 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {} 198 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 199 200 private: 201 TargetMachine &TM; 202 }; 203 204 struct AMDGPUPromoteAllocaToVectorPass 205 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> { 206 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {} 207 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 208 209 private: 210 TargetMachine &TM; 211 }; 212 213 struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> { 214 AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl) 215 : TM(TM), ScanImpl(ScanImpl) {} 216 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 217 218 private: 219 TargetMachine &TM; 220 ScanOptions ScanImpl; 221 }; 222 223 Pass *createAMDGPUStructurizeCFGPass(); 224 FunctionPass *createAMDGPUISelDag(TargetMachine &TM, 225 CodeGenOpt::Level OptLevel); 226 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); 227 228 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> { 229 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} 230 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 231 232 private: 233 bool GlobalOpt; 234 }; 235 236 class AMDGPUCodeGenPreparePass 237 : public PassInfoMixin<AMDGPUCodeGenPreparePass> { 238 private: 239 TargetMachine &TM; 240 241 public: 242 AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){}; 243 PreservedAnalyses run(Function &, FunctionAnalysisManager &); 244 }; 245 246 FunctionPass *createAMDGPUAnnotateUniformValues(); 247 248 ModulePass *createAMDGPUPrintfRuntimeBinding(); 249 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); 250 extern char &AMDGPUPrintfRuntimeBindingID; 251 252 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &); 253 extern char &AMDGPUResourceUsageAnalysisID; 254 255 struct AMDGPUPrintfRuntimeBindingPass 256 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { 257 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 258 }; 259 260 ModulePass* createAMDGPUUnifyMetadataPass(); 261 void initializeAMDGPUUnifyMetadataPass(PassRegistry&); 262 extern char &AMDGPUUnifyMetadataID; 263 264 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { 265 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 266 }; 267 268 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); 269 extern char &SIOptimizeExecMaskingPreRAID; 270 271 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &); 272 extern char &SIOptimizeVGPRLiveRangeID; 273 274 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); 275 extern char &AMDGPUAnnotateUniformValuesPassID; 276 277 void initializeAMDGPUCodeGenPreparePass(PassRegistry&); 278 extern char &AMDGPUCodeGenPrepareID; 279 280 void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &); 281 extern char &AMDGPURemoveIncompatibleFunctionsID; 282 283 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); 284 extern char &AMDGPULateCodeGenPrepareID; 285 286 FunctionPass *createAMDGPURewriteUndefForPHIPass(); 287 void initializeAMDGPURewriteUndefForPHIPass(PassRegistry &); 288 extern char &AMDGPURewriteUndefForPHIPassID; 289 290 void initializeSIAnnotateControlFlowPass(PassRegistry&); 291 extern char &SIAnnotateControlFlowPassID; 292 293 void initializeSIMemoryLegalizerPass(PassRegistry&); 294 extern char &SIMemoryLegalizerID; 295 296 void initializeSIModeRegisterPass(PassRegistry&); 297 extern char &SIModeRegisterID; 298 299 void initializeAMDGPUInsertDelayAluPass(PassRegistry &); 300 extern char &AMDGPUInsertDelayAluID; 301 302 void initializeSIInsertHardClausesPass(PassRegistry &); 303 extern char &SIInsertHardClausesID; 304 305 void initializeSIInsertWaitcntsPass(PassRegistry&); 306 extern char &SIInsertWaitcntsID; 307 308 void initializeSIFormMemoryClausesPass(PassRegistry&); 309 extern char &SIFormMemoryClausesID; 310 311 void initializeSIPostRABundlerPass(PassRegistry&); 312 extern char &SIPostRABundlerID; 313 314 void initializeGCNCreateVOPDPass(PassRegistry &); 315 extern char &GCNCreateVOPDID; 316 317 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); 318 extern char &AMDGPUUnifyDivergentExitNodesID; 319 320 ImmutablePass *createAMDGPUAAWrapperPass(); 321 void initializeAMDGPUAAWrapperPassPass(PassRegistry&); 322 ImmutablePass *createAMDGPUExternalAAWrapperPass(); 323 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); 324 325 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); 326 327 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); 328 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); 329 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; 330 331 void initializeGCNNSAReassignPass(PassRegistry &); 332 extern char &GCNNSAReassignID; 333 334 void initializeGCNPreRALongBranchRegPass(PassRegistry &); 335 extern char &GCNPreRALongBranchRegID; 336 337 void initializeGCNPreRAOptimizationsPass(PassRegistry &); 338 extern char &GCNPreRAOptimizationsID; 339 340 FunctionPass *createAMDGPUSetWavePriorityPass(); 341 void initializeAMDGPUSetWavePriorityPass(PassRegistry &); 342 343 void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &); 344 extern char &GCNRewritePartialRegUsesID; 345 346 namespace AMDGPU { 347 enum TargetIndex { 348 TI_CONSTDATA_START, 349 TI_SCRATCH_RSRC_DWORD0, 350 TI_SCRATCH_RSRC_DWORD1, 351 TI_SCRATCH_RSRC_DWORD2, 352 TI_SCRATCH_RSRC_DWORD3 353 }; 354 } 355 356 /// OpenCL uses address spaces to differentiate between 357 /// various memory regions on the hardware. On the CPU 358 /// all of the address spaces point to the same memory, 359 /// however on the GPU, each address space points to 360 /// a separate piece of memory that is unique from other 361 /// memory locations. 362 namespace AMDGPUAS { 363 enum : unsigned { 364 // The maximum value for flat, generic, local, private, constant and region. 365 MAX_AMDGPU_ADDRESS = 8, 366 367 FLAT_ADDRESS = 0, ///< Address space for flat memory. 368 GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). 369 REGION_ADDRESS = 2, ///< Address space for region memory. (GDS) 370 371 CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2). 372 LOCAL_ADDRESS = 3, ///< Address space for local memory. 373 PRIVATE_ADDRESS = 5, ///< Address space for private memory. 374 375 CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory. 376 377 BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers. 378 ///< Not used in backend. 379 380 BUFFER_RESOURCE = 8, ///< Address space for 128-bit buffer resources. 381 382 /// Internal address spaces. Can be freely renumbered. 383 STREAMOUT_REGISTER = 128, ///< Address space for GS NGG Streamout registers. 384 /// end Internal address spaces. 385 386 /// Address space for direct addressable parameter memory (CONST0). 387 PARAM_D_ADDRESS = 6, 388 /// Address space for indirect addressable parameter memory (VTX1). 389 PARAM_I_ADDRESS = 7, 390 391 // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on 392 // this order to be able to dynamically index a constant buffer, for 393 // example: 394 // 395 // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx 396 397 CONSTANT_BUFFER_0 = 8, 398 CONSTANT_BUFFER_1 = 9, 399 CONSTANT_BUFFER_2 = 10, 400 CONSTANT_BUFFER_3 = 11, 401 CONSTANT_BUFFER_4 = 12, 402 CONSTANT_BUFFER_5 = 13, 403 CONSTANT_BUFFER_6 = 14, 404 CONSTANT_BUFFER_7 = 15, 405 CONSTANT_BUFFER_8 = 16, 406 CONSTANT_BUFFER_9 = 17, 407 CONSTANT_BUFFER_10 = 18, 408 CONSTANT_BUFFER_11 = 19, 409 CONSTANT_BUFFER_12 = 20, 410 CONSTANT_BUFFER_13 = 21, 411 CONSTANT_BUFFER_14 = 22, 412 CONSTANT_BUFFER_15 = 23, 413 414 // Some places use this if the address space can't be determined. 415 UNKNOWN_ADDRESS_SPACE = ~0u, 416 }; 417 } 418 419 namespace AMDGPU { 420 421 // FIXME: Missing constant_32bit 422 inline bool isFlatGlobalAddrSpace(unsigned AS) { 423 return AS == AMDGPUAS::GLOBAL_ADDRESS || 424 AS == AMDGPUAS::FLAT_ADDRESS || 425 AS == AMDGPUAS::CONSTANT_ADDRESS || 426 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 427 } 428 429 inline bool isExtendedGlobalAddrSpace(unsigned AS) { 430 return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS || 431 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || 432 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 433 } 434 435 static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) { 436 static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 8, "Addr space out of range"); 437 438 if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS) 439 return true; 440 441 // This array is indexed by address space value enum elements 0 ... to 8 442 // clang-format off 443 static const bool ASAliasRules[9][9] = { 444 /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc */ 445 /* Flat */ {true, true, false, true, true, true, true, true, true}, 446 /* Global */ {true, true, false, false, true, false, true, true, true}, 447 /* Region */ {false, false, true, false, false, false, false, false, false}, 448 /* Group */ {true, false, false, true, false, false, false, false, false}, 449 /* Constant */ {true, true, false, false, false, false, true, true, true}, 450 /* Private */ {true, false, false, false, false, true, false, false, false}, 451 /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true}, 452 /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true}, 453 /* Buffer Resource */ {true, true, false, false, true, false, true, true, true}, 454 }; 455 // clang-format on 456 457 return ASAliasRules[AS1][AS2]; 458 } 459 460 } 461 462 } // End namespace llvm 463 464 #endif 465