1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 /// \file 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 12 13 #include "llvm/IR/PassManager.h" 14 #include "llvm/Pass.h" 15 #include "llvm/Support/CodeGen.h" 16 17 namespace llvm { 18 19 class TargetMachine; 20 21 // GlobalISel passes 22 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); 23 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); 24 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); 25 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); 26 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); 27 void initializeAMDGPURegBankCombinerPass(PassRegistry &); 28 29 // SI Passes 30 FunctionPass *createGCNDPPCombinePass(); 31 FunctionPass *createSIAnnotateControlFlowPass(); 32 FunctionPass *createSIFoldOperandsPass(); 33 FunctionPass *createSIPeepholeSDWAPass(); 34 FunctionPass *createSILowerI1CopiesPass(); 35 FunctionPass *createSIShrinkInstructionsPass(); 36 FunctionPass *createSILoadStoreOptimizerPass(); 37 FunctionPass *createSIWholeQuadModePass(); 38 FunctionPass *createSIFixControlFlowLiveIntervalsPass(); 39 FunctionPass *createSIOptimizeExecMaskingPreRAPass(); 40 FunctionPass *createSIOptimizeVGPRLiveRangePass(); 41 FunctionPass *createSIFixSGPRCopiesPass(); 42 FunctionPass *createSIMemoryLegalizerPass(); 43 FunctionPass *createSIInsertWaitcntsPass(); 44 FunctionPass *createSIPreAllocateWWMRegsPass(); 45 FunctionPass *createSIFormMemoryClausesPass(); 46 47 FunctionPass *createSIPostRABundlerPass(); 48 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *); 49 FunctionPass *createAMDGPUUseNativeCallsPass(); 50 FunctionPass *createAMDGPUCodeGenPreparePass(); 51 FunctionPass *createAMDGPULateCodeGenPreparePass(); 52 FunctionPass *createAMDGPUMachineCFGStructurizerPass(); 53 FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *); 54 ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *); 55 FunctionPass *createAMDGPURewriteOutArgumentsPass(); 56 ModulePass *createAMDGPUReplaceLDSUseWithPointerPass(); 57 ModulePass *createAMDGPULowerModuleLDSPass(); 58 FunctionPass *createSIModeRegisterPass(); 59 FunctionPass *createGCNPreRAOptimizationsPass(); 60 61 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { 62 AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {} 63 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 64 65 private: 66 TargetMachine &TM; 67 }; 68 69 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> { 70 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 71 }; 72 73 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&); 74 75 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); 76 extern char &AMDGPUMachineCFGStructurizerID; 77 78 void initializeAMDGPUAlwaysInlinePass(PassRegistry&); 79 80 Pass *createAMDGPUAnnotateKernelFeaturesPass(); 81 Pass *createAMDGPUAttributorPass(); 82 void initializeAMDGPUAttributorPass(PassRegistry &); 83 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); 84 extern char &AMDGPUAnnotateKernelFeaturesID; 85 86 FunctionPass *createAMDGPUAtomicOptimizerPass(); 87 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &); 88 extern char &AMDGPUAtomicOptimizerID; 89 90 ModulePass *createAMDGPULowerIntrinsicsPass(); 91 void initializeAMDGPULowerIntrinsicsPass(PassRegistry &); 92 extern char &AMDGPULowerIntrinsicsID; 93 94 ModulePass *createAMDGPUCtorDtorLoweringLegacyPass(); 95 void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &); 96 extern char &AMDGPUCtorDtorLoweringLegacyPassID; 97 98 FunctionPass *createAMDGPULowerKernelArgumentsPass(); 99 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); 100 extern char &AMDGPULowerKernelArgumentsID; 101 102 FunctionPass *createAMDGPUPromoteKernelArgumentsPass(); 103 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &); 104 extern char &AMDGPUPromoteKernelArgumentsID; 105 106 struct AMDGPUPromoteKernelArgumentsPass 107 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> { 108 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 109 }; 110 111 ModulePass *createAMDGPULowerKernelAttributesPass(); 112 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); 113 extern char &AMDGPULowerKernelAttributesID; 114 115 struct AMDGPULowerKernelAttributesPass 116 : PassInfoMixin<AMDGPULowerKernelAttributesPass> { 117 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 118 }; 119 120 void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &); 121 extern char &AMDGPUPropagateAttributesEarlyID; 122 123 struct AMDGPUPropagateAttributesEarlyPass 124 : PassInfoMixin<AMDGPUPropagateAttributesEarlyPass> { 125 AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {} 126 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 127 128 private: 129 TargetMachine &TM; 130 }; 131 132 void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &); 133 extern char &AMDGPUPropagateAttributesLateID; 134 135 struct AMDGPUPropagateAttributesLatePass 136 : PassInfoMixin<AMDGPUPropagateAttributesLatePass> { 137 AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {} 138 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 139 140 private: 141 TargetMachine &TM; 142 }; 143 144 void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &); 145 extern char &AMDGPUReplaceLDSUseWithPointerID; 146 147 struct AMDGPUReplaceLDSUseWithPointerPass 148 : PassInfoMixin<AMDGPUReplaceLDSUseWithPointerPass> { 149 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 150 }; 151 152 void initializeAMDGPULowerModuleLDSPass(PassRegistry &); 153 extern char &AMDGPULowerModuleLDSID; 154 155 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> { 156 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 157 }; 158 159 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); 160 extern char &AMDGPURewriteOutArgumentsID; 161 162 void initializeGCNDPPCombinePass(PassRegistry &); 163 extern char &GCNDPPCombineID; 164 165 void initializeSIFoldOperandsPass(PassRegistry &); 166 extern char &SIFoldOperandsID; 167 168 void initializeSIPeepholeSDWAPass(PassRegistry &); 169 extern char &SIPeepholeSDWAID; 170 171 void initializeSIShrinkInstructionsPass(PassRegistry&); 172 extern char &SIShrinkInstructionsID; 173 174 void initializeSIFixSGPRCopiesPass(PassRegistry &); 175 extern char &SIFixSGPRCopiesID; 176 177 void initializeSIFixVGPRCopiesPass(PassRegistry &); 178 extern char &SIFixVGPRCopiesID; 179 180 void initializeSILowerI1CopiesPass(PassRegistry &); 181 extern char &SILowerI1CopiesID; 182 183 void initializeSILowerSGPRSpillsPass(PassRegistry &); 184 extern char &SILowerSGPRSpillsID; 185 186 void initializeSILoadStoreOptimizerPass(PassRegistry &); 187 extern char &SILoadStoreOptimizerID; 188 189 void initializeSIWholeQuadModePass(PassRegistry &); 190 extern char &SIWholeQuadModeID; 191 192 void initializeSILowerControlFlowPass(PassRegistry &); 193 extern char &SILowerControlFlowID; 194 195 void initializeSIPreEmitPeepholePass(PassRegistry &); 196 extern char &SIPreEmitPeepholeID; 197 198 void initializeSILateBranchLoweringPass(PassRegistry &); 199 extern char &SILateBranchLoweringPassID; 200 201 void initializeSIOptimizeExecMaskingPass(PassRegistry &); 202 extern char &SIOptimizeExecMaskingID; 203 204 void initializeSIPreAllocateWWMRegsPass(PassRegistry &); 205 extern char &SIPreAllocateWWMRegsID; 206 207 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &); 208 extern char &AMDGPUSimplifyLibCallsID; 209 210 void initializeAMDGPUUseNativeCallsPass(PassRegistry &); 211 extern char &AMDGPUUseNativeCallsID; 212 213 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); 214 extern char &AMDGPUPerfHintAnalysisID; 215 216 // Passes common to R600 and SI 217 FunctionPass *createAMDGPUPromoteAlloca(); 218 void initializeAMDGPUPromoteAllocaPass(PassRegistry&); 219 extern char &AMDGPUPromoteAllocaID; 220 221 FunctionPass *createAMDGPUPromoteAllocaToVector(); 222 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&); 223 extern char &AMDGPUPromoteAllocaToVectorID; 224 225 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> { 226 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {} 227 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 228 229 private: 230 TargetMachine &TM; 231 }; 232 233 struct AMDGPUPromoteAllocaToVectorPass 234 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> { 235 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {} 236 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 237 238 private: 239 TargetMachine &TM; 240 }; 241 242 Pass *createAMDGPUStructurizeCFGPass(); 243 FunctionPass *createAMDGPUISelDag(TargetMachine &TM, 244 CodeGenOpt::Level OptLevel); 245 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); 246 247 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> { 248 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} 249 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 250 251 private: 252 bool GlobalOpt; 253 }; 254 255 FunctionPass *createAMDGPUAnnotateUniformValues(); 256 257 ModulePass *createAMDGPUPrintfRuntimeBinding(); 258 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); 259 extern char &AMDGPUPrintfRuntimeBindingID; 260 261 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &); 262 extern char &AMDGPUResourceUsageAnalysisID; 263 264 struct AMDGPUPrintfRuntimeBindingPass 265 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { 266 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 267 }; 268 269 ModulePass* createAMDGPUUnifyMetadataPass(); 270 void initializeAMDGPUUnifyMetadataPass(PassRegistry&); 271 extern char &AMDGPUUnifyMetadataID; 272 273 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { 274 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 275 }; 276 277 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); 278 extern char &SIOptimizeExecMaskingPreRAID; 279 280 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &); 281 extern char &SIOptimizeVGPRLiveRangeID; 282 283 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); 284 extern char &AMDGPUAnnotateUniformValuesPassID; 285 286 void initializeAMDGPUCodeGenPreparePass(PassRegistry&); 287 extern char &AMDGPUCodeGenPrepareID; 288 289 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); 290 extern char &AMDGPULateCodeGenPrepareID; 291 292 FunctionPass *createAMDGPURewriteUndefForPHIPass(); 293 void initializeAMDGPURewriteUndefForPHIPass(PassRegistry &); 294 extern char &AMDGPURewriteUndefForPHIPassID; 295 296 void initializeSIAnnotateControlFlowPass(PassRegistry&); 297 extern char &SIAnnotateControlFlowPassID; 298 299 void initializeSIMemoryLegalizerPass(PassRegistry&); 300 extern char &SIMemoryLegalizerID; 301 302 void initializeSIModeRegisterPass(PassRegistry&); 303 extern char &SIModeRegisterID; 304 305 void initializeAMDGPUReleaseVGPRsPass(PassRegistry &); 306 extern char &AMDGPUReleaseVGPRsID; 307 308 void initializeAMDGPUInsertDelayAluPass(PassRegistry &); 309 extern char &AMDGPUInsertDelayAluID; 310 311 void initializeSIInsertHardClausesPass(PassRegistry &); 312 extern char &SIInsertHardClausesID; 313 314 void initializeSIInsertWaitcntsPass(PassRegistry&); 315 extern char &SIInsertWaitcntsID; 316 317 void initializeSIFormMemoryClausesPass(PassRegistry&); 318 extern char &SIFormMemoryClausesID; 319 320 void initializeSIPostRABundlerPass(PassRegistry&); 321 extern char &SIPostRABundlerID; 322 323 void initializeGCNCreateVOPDPass(PassRegistry &); 324 extern char &GCNCreateVOPDID; 325 326 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); 327 extern char &AMDGPUUnifyDivergentExitNodesID; 328 329 ImmutablePass *createAMDGPUAAWrapperPass(); 330 void initializeAMDGPUAAWrapperPassPass(PassRegistry&); 331 ImmutablePass *createAMDGPUExternalAAWrapperPass(); 332 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); 333 334 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); 335 336 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); 337 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); 338 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; 339 340 void initializeGCNNSAReassignPass(PassRegistry &); 341 extern char &GCNNSAReassignID; 342 343 void initializeGCNPreRAOptimizationsPass(PassRegistry &); 344 extern char &GCNPreRAOptimizationsID; 345 346 FunctionPass *createAMDGPUSetWavePriorityPass(); 347 void initializeAMDGPUSetWavePriorityPass(PassRegistry &); 348 349 namespace AMDGPU { 350 enum TargetIndex { 351 TI_CONSTDATA_START, 352 TI_SCRATCH_RSRC_DWORD0, 353 TI_SCRATCH_RSRC_DWORD1, 354 TI_SCRATCH_RSRC_DWORD2, 355 TI_SCRATCH_RSRC_DWORD3 356 }; 357 } 358 359 /// OpenCL uses address spaces to differentiate between 360 /// various memory regions on the hardware. On the CPU 361 /// all of the address spaces point to the same memory, 362 /// however on the GPU, each address space points to 363 /// a separate piece of memory that is unique from other 364 /// memory locations. 365 namespace AMDGPUAS { 366 enum : unsigned { 367 // The maximum value for flat, generic, local, private, constant and region. 368 MAX_AMDGPU_ADDRESS = 7, 369 370 FLAT_ADDRESS = 0, ///< Address space for flat memory. 371 GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). 372 REGION_ADDRESS = 2, ///< Address space for region memory. (GDS) 373 374 CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2). 375 LOCAL_ADDRESS = 3, ///< Address space for local memory. 376 PRIVATE_ADDRESS = 5, ///< Address space for private memory. 377 378 CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory. 379 380 BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers. 381 382 /// Address space for direct addressable parameter memory (CONST0). 383 PARAM_D_ADDRESS = 6, 384 /// Address space for indirect addressable parameter memory (VTX1). 385 PARAM_I_ADDRESS = 7, 386 387 // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on 388 // this order to be able to dynamically index a constant buffer, for 389 // example: 390 // 391 // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx 392 393 CONSTANT_BUFFER_0 = 8, 394 CONSTANT_BUFFER_1 = 9, 395 CONSTANT_BUFFER_2 = 10, 396 CONSTANT_BUFFER_3 = 11, 397 CONSTANT_BUFFER_4 = 12, 398 CONSTANT_BUFFER_5 = 13, 399 CONSTANT_BUFFER_6 = 14, 400 CONSTANT_BUFFER_7 = 15, 401 CONSTANT_BUFFER_8 = 16, 402 CONSTANT_BUFFER_9 = 17, 403 CONSTANT_BUFFER_10 = 18, 404 CONSTANT_BUFFER_11 = 19, 405 CONSTANT_BUFFER_12 = 20, 406 CONSTANT_BUFFER_13 = 21, 407 CONSTANT_BUFFER_14 = 22, 408 CONSTANT_BUFFER_15 = 23, 409 410 // Some places use this if the address space can't be determined. 411 UNKNOWN_ADDRESS_SPACE = ~0u, 412 }; 413 } 414 415 namespace AMDGPU { 416 417 // FIXME: Missing constant_32bit 418 inline bool isFlatGlobalAddrSpace(unsigned AS) { 419 return AS == AMDGPUAS::GLOBAL_ADDRESS || 420 AS == AMDGPUAS::FLAT_ADDRESS || 421 AS == AMDGPUAS::CONSTANT_ADDRESS || 422 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 423 } 424 } 425 426 } // End namespace llvm 427 428 #endif 429