1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 /// \file 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 12 13 #include "llvm/IR/PassManager.h" 14 #include "llvm/Pass.h" 15 #include "llvm/Support/CodeGen.h" 16 17 namespace llvm { 18 19 class TargetMachine; 20 21 // GlobalISel passes 22 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); 23 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); 24 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); 25 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); 26 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); 27 void initializeAMDGPURegBankCombinerPass(PassRegistry &); 28 29 void initializeAMDGPURegBankSelectPass(PassRegistry &); 30 31 // SI Passes 32 FunctionPass *createGCNDPPCombinePass(); 33 FunctionPass *createSIAnnotateControlFlowPass(); 34 FunctionPass *createSIFoldOperandsPass(); 35 FunctionPass *createSIPeepholeSDWAPass(); 36 FunctionPass *createSILowerI1CopiesPass(); 37 FunctionPass *createSIShrinkInstructionsPass(); 38 FunctionPass *createSILoadStoreOptimizerPass(); 39 FunctionPass *createSIWholeQuadModePass(); 40 FunctionPass *createSIFixControlFlowLiveIntervalsPass(); 41 FunctionPass *createSIOptimizeExecMaskingPreRAPass(); 42 FunctionPass *createSIOptimizeVGPRLiveRangePass(); 43 FunctionPass *createSIFixSGPRCopiesPass(); 44 FunctionPass *createSIMemoryLegalizerPass(); 45 FunctionPass *createSIInsertWaitcntsPass(); 46 FunctionPass *createSIPreAllocateWWMRegsPass(); 47 FunctionPass *createSIFormMemoryClausesPass(); 48 49 FunctionPass *createSIPostRABundlerPass(); 50 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *); 51 FunctionPass *createAMDGPUUseNativeCallsPass(); 52 ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); 53 FunctionPass *createAMDGPUCodeGenPreparePass(); 54 FunctionPass *createAMDGPULateCodeGenPreparePass(); 55 FunctionPass *createAMDGPUMachineCFGStructurizerPass(); 56 FunctionPass *createAMDGPURewriteOutArgumentsPass(); 57 ModulePass *createAMDGPULowerModuleLDSPass(); 58 FunctionPass *createSIModeRegisterPass(); 59 FunctionPass *createGCNPreRAOptimizationsPass(); 60 61 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { 62 AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {} 63 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 64 65 private: 66 TargetMachine &TM; 67 }; 68 69 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> { 70 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 71 }; 72 73 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&); 74 75 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); 76 extern char &AMDGPUMachineCFGStructurizerID; 77 78 void initializeAMDGPUAlwaysInlinePass(PassRegistry&); 79 80 Pass *createAMDGPUAnnotateKernelFeaturesPass(); 81 Pass *createAMDGPUAttributorPass(); 82 void initializeAMDGPUAttributorPass(PassRegistry &); 83 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); 84 extern char &AMDGPUAnnotateKernelFeaturesID; 85 86 // DPP/Iterative option enables the atomic optimizer with given strategy 87 // whereas None disables the atomic optimizer. 88 enum class ScanOptions { DPP, Iterative, None }; 89 FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy); 90 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &); 91 extern char &AMDGPUAtomicOptimizerID; 92 93 ModulePass *createAMDGPUCtorDtorLoweringLegacyPass(); 94 void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &); 95 extern char &AMDGPUCtorDtorLoweringLegacyPassID; 96 97 FunctionPass *createAMDGPULowerKernelArgumentsPass(); 98 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); 99 extern char &AMDGPULowerKernelArgumentsID; 100 101 FunctionPass *createAMDGPUPromoteKernelArgumentsPass(); 102 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &); 103 extern char &AMDGPUPromoteKernelArgumentsID; 104 105 struct AMDGPUPromoteKernelArgumentsPass 106 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> { 107 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 108 }; 109 110 ModulePass *createAMDGPULowerKernelAttributesPass(); 111 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); 112 extern char &AMDGPULowerKernelAttributesID; 113 114 struct AMDGPULowerKernelAttributesPass 115 : PassInfoMixin<AMDGPULowerKernelAttributesPass> { 116 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 117 }; 118 119 void initializeAMDGPULowerModuleLDSPass(PassRegistry &); 120 extern char &AMDGPULowerModuleLDSID; 121 122 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> { 123 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 124 }; 125 126 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); 127 extern char &AMDGPURewriteOutArgumentsID; 128 129 void initializeGCNDPPCombinePass(PassRegistry &); 130 extern char &GCNDPPCombineID; 131 132 void initializeSIFoldOperandsPass(PassRegistry &); 133 extern char &SIFoldOperandsID; 134 135 void initializeSIPeepholeSDWAPass(PassRegistry &); 136 extern char &SIPeepholeSDWAID; 137 138 void initializeSIShrinkInstructionsPass(PassRegistry&); 139 extern char &SIShrinkInstructionsID; 140 141 void initializeSIFixSGPRCopiesPass(PassRegistry &); 142 extern char &SIFixSGPRCopiesID; 143 144 void initializeSIFixVGPRCopiesPass(PassRegistry &); 145 extern char &SIFixVGPRCopiesID; 146 147 void initializeSILowerI1CopiesPass(PassRegistry &); 148 extern char &SILowerI1CopiesID; 149 150 void initializeSILowerSGPRSpillsPass(PassRegistry &); 151 extern char &SILowerSGPRSpillsID; 152 153 void initializeSILoadStoreOptimizerPass(PassRegistry &); 154 extern char &SILoadStoreOptimizerID; 155 156 void initializeSIWholeQuadModePass(PassRegistry &); 157 extern char &SIWholeQuadModeID; 158 159 void initializeSILowerControlFlowPass(PassRegistry &); 160 extern char &SILowerControlFlowID; 161 162 void initializeSIPreEmitPeepholePass(PassRegistry &); 163 extern char &SIPreEmitPeepholeID; 164 165 void initializeSILateBranchLoweringPass(PassRegistry &); 166 extern char &SILateBranchLoweringPassID; 167 168 void initializeSIOptimizeExecMaskingPass(PassRegistry &); 169 extern char &SIOptimizeExecMaskingID; 170 171 void initializeSIPreAllocateWWMRegsPass(PassRegistry &); 172 extern char &SIPreAllocateWWMRegsID; 173 174 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &); 175 extern char &AMDGPUSimplifyLibCallsID; 176 177 void initializeAMDGPUUseNativeCallsPass(PassRegistry &); 178 extern char &AMDGPUUseNativeCallsID; 179 180 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); 181 extern char &AMDGPUPerfHintAnalysisID; 182 183 // Passes common to R600 and SI 184 FunctionPass *createAMDGPUPromoteAlloca(); 185 void initializeAMDGPUPromoteAllocaPass(PassRegistry&); 186 extern char &AMDGPUPromoteAllocaID; 187 188 FunctionPass *createAMDGPUPromoteAllocaToVector(); 189 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&); 190 extern char &AMDGPUPromoteAllocaToVectorID; 191 192 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> { 193 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {} 194 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 195 196 private: 197 TargetMachine &TM; 198 }; 199 200 struct AMDGPUPromoteAllocaToVectorPass 201 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> { 202 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {} 203 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 204 205 private: 206 TargetMachine &TM; 207 }; 208 209 struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> { 210 AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl) 211 : TM(TM), ScanImpl(ScanImpl) {} 212 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 213 214 private: 215 TargetMachine &TM; 216 ScanOptions ScanImpl; 217 }; 218 219 Pass *createAMDGPUStructurizeCFGPass(); 220 FunctionPass *createAMDGPUISelDag(TargetMachine &TM, 221 CodeGenOpt::Level OptLevel); 222 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); 223 224 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> { 225 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} 226 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 227 228 private: 229 bool GlobalOpt; 230 }; 231 232 class AMDGPUCodeGenPreparePass 233 : public PassInfoMixin<AMDGPUCodeGenPreparePass> { 234 private: 235 TargetMachine &TM; 236 237 public: 238 AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){}; 239 PreservedAnalyses run(Function &, FunctionAnalysisManager &); 240 }; 241 242 FunctionPass *createAMDGPUAnnotateUniformValues(); 243 244 ModulePass *createAMDGPUPrintfRuntimeBinding(); 245 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); 246 extern char &AMDGPUPrintfRuntimeBindingID; 247 248 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &); 249 extern char &AMDGPUResourceUsageAnalysisID; 250 251 struct AMDGPUPrintfRuntimeBindingPass 252 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { 253 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 254 }; 255 256 ModulePass* createAMDGPUUnifyMetadataPass(); 257 void initializeAMDGPUUnifyMetadataPass(PassRegistry&); 258 extern char &AMDGPUUnifyMetadataID; 259 260 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { 261 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 262 }; 263 264 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); 265 extern char &SIOptimizeExecMaskingPreRAID; 266 267 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &); 268 extern char &SIOptimizeVGPRLiveRangeID; 269 270 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); 271 extern char &AMDGPUAnnotateUniformValuesPassID; 272 273 void initializeAMDGPUCodeGenPreparePass(PassRegistry&); 274 extern char &AMDGPUCodeGenPrepareID; 275 276 void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &); 277 extern char &AMDGPURemoveIncompatibleFunctionsID; 278 279 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); 280 extern char &AMDGPULateCodeGenPrepareID; 281 282 FunctionPass *createAMDGPURewriteUndefForPHIPass(); 283 void initializeAMDGPURewriteUndefForPHIPass(PassRegistry &); 284 extern char &AMDGPURewriteUndefForPHIPassID; 285 286 void initializeSIAnnotateControlFlowPass(PassRegistry&); 287 extern char &SIAnnotateControlFlowPassID; 288 289 void initializeSIMemoryLegalizerPass(PassRegistry&); 290 extern char &SIMemoryLegalizerID; 291 292 void initializeSIModeRegisterPass(PassRegistry&); 293 extern char &SIModeRegisterID; 294 295 void initializeAMDGPUInsertDelayAluPass(PassRegistry &); 296 extern char &AMDGPUInsertDelayAluID; 297 298 void initializeSIInsertHardClausesPass(PassRegistry &); 299 extern char &SIInsertHardClausesID; 300 301 void initializeSIInsertWaitcntsPass(PassRegistry&); 302 extern char &SIInsertWaitcntsID; 303 304 void initializeSIFormMemoryClausesPass(PassRegistry&); 305 extern char &SIFormMemoryClausesID; 306 307 void initializeSIPostRABundlerPass(PassRegistry&); 308 extern char &SIPostRABundlerID; 309 310 void initializeGCNCreateVOPDPass(PassRegistry &); 311 extern char &GCNCreateVOPDID; 312 313 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); 314 extern char &AMDGPUUnifyDivergentExitNodesID; 315 316 ImmutablePass *createAMDGPUAAWrapperPass(); 317 void initializeAMDGPUAAWrapperPassPass(PassRegistry&); 318 ImmutablePass *createAMDGPUExternalAAWrapperPass(); 319 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); 320 321 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); 322 323 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); 324 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); 325 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; 326 327 void initializeGCNNSAReassignPass(PassRegistry &); 328 extern char &GCNNSAReassignID; 329 330 void initializeGCNPreRALongBranchRegPass(PassRegistry &); 331 extern char &GCNPreRALongBranchRegID; 332 333 void initializeGCNPreRAOptimizationsPass(PassRegistry &); 334 extern char &GCNPreRAOptimizationsID; 335 336 FunctionPass *createAMDGPUSetWavePriorityPass(); 337 void initializeAMDGPUSetWavePriorityPass(PassRegistry &); 338 339 void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &); 340 extern char &GCNRewritePartialRegUsesID; 341 342 namespace AMDGPU { 343 enum TargetIndex { 344 TI_CONSTDATA_START, 345 TI_SCRATCH_RSRC_DWORD0, 346 TI_SCRATCH_RSRC_DWORD1, 347 TI_SCRATCH_RSRC_DWORD2, 348 TI_SCRATCH_RSRC_DWORD3 349 }; 350 } 351 352 /// OpenCL uses address spaces to differentiate between 353 /// various memory regions on the hardware. On the CPU 354 /// all of the address spaces point to the same memory, 355 /// however on the GPU, each address space points to 356 /// a separate piece of memory that is unique from other 357 /// memory locations. 358 namespace AMDGPUAS { 359 enum : unsigned { 360 // The maximum value for flat, generic, local, private, constant and region. 361 MAX_AMDGPU_ADDRESS = 8, 362 363 FLAT_ADDRESS = 0, ///< Address space for flat memory. 364 GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). 365 REGION_ADDRESS = 2, ///< Address space for region memory. (GDS) 366 367 CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2). 368 LOCAL_ADDRESS = 3, ///< Address space for local memory. 369 PRIVATE_ADDRESS = 5, ///< Address space for private memory. 370 371 CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory. 372 373 BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers. 374 ///< Not used in backend. 375 376 BUFFER_RESOURCE = 8, ///< Address space for 128-bit buffer resources. 377 378 /// Internal address spaces. Can be freely renumbered. 379 STREAMOUT_REGISTER = 128, ///< Address space for GS NGG Streamout registers. 380 /// end Internal address spaces. 381 382 /// Address space for direct addressable parameter memory (CONST0). 383 PARAM_D_ADDRESS = 6, 384 /// Address space for indirect addressable parameter memory (VTX1). 385 PARAM_I_ADDRESS = 7, 386 387 // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on 388 // this order to be able to dynamically index a constant buffer, for 389 // example: 390 // 391 // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx 392 393 CONSTANT_BUFFER_0 = 8, 394 CONSTANT_BUFFER_1 = 9, 395 CONSTANT_BUFFER_2 = 10, 396 CONSTANT_BUFFER_3 = 11, 397 CONSTANT_BUFFER_4 = 12, 398 CONSTANT_BUFFER_5 = 13, 399 CONSTANT_BUFFER_6 = 14, 400 CONSTANT_BUFFER_7 = 15, 401 CONSTANT_BUFFER_8 = 16, 402 CONSTANT_BUFFER_9 = 17, 403 CONSTANT_BUFFER_10 = 18, 404 CONSTANT_BUFFER_11 = 19, 405 CONSTANT_BUFFER_12 = 20, 406 CONSTANT_BUFFER_13 = 21, 407 CONSTANT_BUFFER_14 = 22, 408 CONSTANT_BUFFER_15 = 23, 409 410 // Some places use this if the address space can't be determined. 411 UNKNOWN_ADDRESS_SPACE = ~0u, 412 }; 413 } 414 415 namespace AMDGPU { 416 417 // FIXME: Missing constant_32bit 418 inline bool isFlatGlobalAddrSpace(unsigned AS) { 419 return AS == AMDGPUAS::GLOBAL_ADDRESS || 420 AS == AMDGPUAS::FLAT_ADDRESS || 421 AS == AMDGPUAS::CONSTANT_ADDRESS || 422 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 423 } 424 425 inline bool isExtendedGlobalAddrSpace(unsigned AS) { 426 return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS || 427 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || 428 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 429 } 430 431 static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) { 432 static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 8, "Addr space out of range"); 433 434 if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS) 435 return true; 436 437 // This array is indexed by address space value enum elements 0 ... to 8 438 // clang-format off 439 static const bool ASAliasRules[9][9] = { 440 /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc */ 441 /* Flat */ {true, true, false, true, true, true, true, true, true}, 442 /* Global */ {true, true, false, false, true, false, true, true, true}, 443 /* Region */ {false, false, true, false, false, false, false, false, false}, 444 /* Group */ {true, false, false, true, false, false, false, false, false}, 445 /* Constant */ {true, true, false, false, false, false, true, true, true}, 446 /* Private */ {true, false, false, false, false, true, false, false, false}, 447 /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true}, 448 /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true}, 449 /* Buffer Resource */ {true, true, false, false, true, false, true, true, true}, 450 }; 451 // clang-format on 452 453 return ASAliasRules[AS1][AS2]; 454 } 455 456 } 457 458 } // End namespace llvm 459 460 #endif 461