1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 /// \file 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 12 13 #include "llvm/IR/PassManager.h" 14 #include "llvm/Pass.h" 15 #include "llvm/Support/CodeGen.h" 16 17 namespace llvm { 18 19 class TargetMachine; 20 21 // GlobalISel passes 22 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); 23 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); 24 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); 25 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); 26 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); 27 void initializeAMDGPURegBankCombinerPass(PassRegistry &); 28 29 // SI Passes 30 FunctionPass *createGCNDPPCombinePass(); 31 FunctionPass *createSIAnnotateControlFlowPass(); 32 FunctionPass *createSIFoldOperandsPass(); 33 FunctionPass *createSIPeepholeSDWAPass(); 34 FunctionPass *createSILowerI1CopiesPass(); 35 FunctionPass *createSIShrinkInstructionsPass(); 36 FunctionPass *createSILoadStoreOptimizerPass(); 37 FunctionPass *createSIWholeQuadModePass(); 38 FunctionPass *createSIFixControlFlowLiveIntervalsPass(); 39 FunctionPass *createSIOptimizeExecMaskingPreRAPass(); 40 FunctionPass *createSIOptimizeVGPRLiveRangePass(); 41 FunctionPass *createSIFixSGPRCopiesPass(); 42 FunctionPass *createSIMemoryLegalizerPass(); 43 FunctionPass *createSIInsertWaitcntsPass(); 44 FunctionPass *createSIPreAllocateWWMRegsPass(); 45 FunctionPass *createSIFormMemoryClausesPass(); 46 47 FunctionPass *createSIPostRABundlerPass(); 48 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *); 49 FunctionPass *createAMDGPUUseNativeCallsPass(); 50 FunctionPass *createAMDGPUCodeGenPreparePass(); 51 FunctionPass *createAMDGPULateCodeGenPreparePass(); 52 FunctionPass *createAMDGPUMachineCFGStructurizerPass(); 53 FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *); 54 ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *); 55 FunctionPass *createAMDGPURewriteOutArgumentsPass(); 56 ModulePass *createAMDGPUReplaceLDSUseWithPointerPass(); 57 ModulePass *createAMDGPULowerModuleLDSPass(); 58 FunctionPass *createSIModeRegisterPass(); 59 FunctionPass *createGCNPreRAOptimizationsPass(); 60 61 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { 62 AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {} 63 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 64 65 private: 66 TargetMachine &TM; 67 }; 68 69 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> { 70 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 71 }; 72 73 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&); 74 75 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); 76 extern char &AMDGPUMachineCFGStructurizerID; 77 78 void initializeAMDGPUAlwaysInlinePass(PassRegistry&); 79 80 Pass *createAMDGPUAnnotateKernelFeaturesPass(); 81 Pass *createAMDGPUAttributorPass(); 82 void initializeAMDGPUAttributorPass(PassRegistry &); 83 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); 84 extern char &AMDGPUAnnotateKernelFeaturesID; 85 86 FunctionPass *createAMDGPUAtomicOptimizerPass(); 87 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &); 88 extern char &AMDGPUAtomicOptimizerID; 89 90 ModulePass *createAMDGPULowerIntrinsicsPass(); 91 void initializeAMDGPULowerIntrinsicsPass(PassRegistry &); 92 extern char &AMDGPULowerIntrinsicsID; 93 94 ModulePass *createAMDGPUCtorDtorLoweringPass(); 95 void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &); 96 extern char &AMDGPUCtorDtorLoweringID; 97 98 FunctionPass *createAMDGPULowerKernelArgumentsPass(); 99 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); 100 extern char &AMDGPULowerKernelArgumentsID; 101 102 FunctionPass *createAMDGPUPromoteKernelArgumentsPass(); 103 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &); 104 extern char &AMDGPUPromoteKernelArgumentsID; 105 106 struct AMDGPUPromoteKernelArgumentsPass 107 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> { 108 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 109 }; 110 111 ModulePass *createAMDGPULowerKernelAttributesPass(); 112 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); 113 extern char &AMDGPULowerKernelAttributesID; 114 115 struct AMDGPULowerKernelAttributesPass 116 : PassInfoMixin<AMDGPULowerKernelAttributesPass> { 117 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 118 }; 119 120 void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &); 121 extern char &AMDGPUPropagateAttributesEarlyID; 122 123 struct AMDGPUPropagateAttributesEarlyPass 124 : PassInfoMixin<AMDGPUPropagateAttributesEarlyPass> { 125 AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {} 126 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 127 128 private: 129 TargetMachine &TM; 130 }; 131 132 void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &); 133 extern char &AMDGPUPropagateAttributesLateID; 134 135 struct AMDGPUPropagateAttributesLatePass 136 : PassInfoMixin<AMDGPUPropagateAttributesLatePass> { 137 AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {} 138 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 139 140 private: 141 TargetMachine &TM; 142 }; 143 144 void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &); 145 extern char &AMDGPUReplaceLDSUseWithPointerID; 146 147 struct AMDGPUReplaceLDSUseWithPointerPass 148 : PassInfoMixin<AMDGPUReplaceLDSUseWithPointerPass> { 149 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 150 }; 151 152 void initializeAMDGPULowerModuleLDSPass(PassRegistry &); 153 extern char &AMDGPULowerModuleLDSID; 154 155 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> { 156 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 157 }; 158 159 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); 160 extern char &AMDGPURewriteOutArgumentsID; 161 162 void initializeGCNDPPCombinePass(PassRegistry &); 163 extern char &GCNDPPCombineID; 164 165 void initializeSIFoldOperandsPass(PassRegistry &); 166 extern char &SIFoldOperandsID; 167 168 void initializeSIPeepholeSDWAPass(PassRegistry &); 169 extern char &SIPeepholeSDWAID; 170 171 void initializeSIShrinkInstructionsPass(PassRegistry&); 172 extern char &SIShrinkInstructionsID; 173 174 void initializeSIFixSGPRCopiesPass(PassRegistry &); 175 extern char &SIFixSGPRCopiesID; 176 177 void initializeSIFixVGPRCopiesPass(PassRegistry &); 178 extern char &SIFixVGPRCopiesID; 179 180 void initializeSILowerI1CopiesPass(PassRegistry &); 181 extern char &SILowerI1CopiesID; 182 183 void initializeSILowerSGPRSpillsPass(PassRegistry &); 184 extern char &SILowerSGPRSpillsID; 185 186 void initializeSILoadStoreOptimizerPass(PassRegistry &); 187 extern char &SILoadStoreOptimizerID; 188 189 void initializeSIWholeQuadModePass(PassRegistry &); 190 extern char &SIWholeQuadModeID; 191 192 void initializeSILowerControlFlowPass(PassRegistry &); 193 extern char &SILowerControlFlowID; 194 195 void initializeSIPreEmitPeepholePass(PassRegistry &); 196 extern char &SIPreEmitPeepholeID; 197 198 void initializeSILateBranchLoweringPass(PassRegistry &); 199 extern char &SILateBranchLoweringPassID; 200 201 void initializeSIOptimizeExecMaskingPass(PassRegistry &); 202 extern char &SIOptimizeExecMaskingID; 203 204 void initializeSIPreAllocateWWMRegsPass(PassRegistry &); 205 extern char &SIPreAllocateWWMRegsID; 206 207 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &); 208 extern char &AMDGPUSimplifyLibCallsID; 209 210 void initializeAMDGPUUseNativeCallsPass(PassRegistry &); 211 extern char &AMDGPUUseNativeCallsID; 212 213 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); 214 extern char &AMDGPUPerfHintAnalysisID; 215 216 // Passes common to R600 and SI 217 FunctionPass *createAMDGPUPromoteAlloca(); 218 void initializeAMDGPUPromoteAllocaPass(PassRegistry&); 219 extern char &AMDGPUPromoteAllocaID; 220 221 FunctionPass *createAMDGPUPromoteAllocaToVector(); 222 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&); 223 extern char &AMDGPUPromoteAllocaToVectorID; 224 225 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> { 226 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {} 227 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 228 229 private: 230 TargetMachine &TM; 231 }; 232 233 struct AMDGPUPromoteAllocaToVectorPass 234 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> { 235 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {} 236 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 237 238 private: 239 TargetMachine &TM; 240 }; 241 242 Pass *createAMDGPUStructurizeCFGPass(); 243 FunctionPass *createAMDGPUISelDag( 244 TargetMachine *TM = nullptr, 245 CodeGenOpt::Level OptLevel = CodeGenOpt::Default); 246 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); 247 248 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> { 249 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} 250 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 251 252 private: 253 bool GlobalOpt; 254 }; 255 256 FunctionPass *createAMDGPUAnnotateUniformValues(); 257 258 ModulePass *createAMDGPUPrintfRuntimeBinding(); 259 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); 260 extern char &AMDGPUPrintfRuntimeBindingID; 261 262 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &); 263 extern char &AMDGPUResourceUsageAnalysisID; 264 265 struct AMDGPUPrintfRuntimeBindingPass 266 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { 267 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 268 }; 269 270 ModulePass* createAMDGPUUnifyMetadataPass(); 271 void initializeAMDGPUUnifyMetadataPass(PassRegistry&); 272 extern char &AMDGPUUnifyMetadataID; 273 274 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { 275 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 276 }; 277 278 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); 279 extern char &SIOptimizeExecMaskingPreRAID; 280 281 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &); 282 extern char &SIOptimizeVGPRLiveRangeID; 283 284 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); 285 extern char &AMDGPUAnnotateUniformValuesPassID; 286 287 void initializeAMDGPUCodeGenPreparePass(PassRegistry&); 288 extern char &AMDGPUCodeGenPrepareID; 289 290 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); 291 extern char &AMDGPULateCodeGenPrepareID; 292 293 void initializeSIAnnotateControlFlowPass(PassRegistry&); 294 extern char &SIAnnotateControlFlowPassID; 295 296 void initializeSIMemoryLegalizerPass(PassRegistry&); 297 extern char &SIMemoryLegalizerID; 298 299 void initializeSIModeRegisterPass(PassRegistry&); 300 extern char &SIModeRegisterID; 301 302 void initializeAMDGPUReleaseVGPRsPass(PassRegistry &); 303 extern char &AMDGPUReleaseVGPRsID; 304 305 void initializeAMDGPUInsertDelayAluPass(PassRegistry &); 306 extern char &AMDGPUInsertDelayAluID; 307 308 void initializeSIInsertHardClausesPass(PassRegistry &); 309 extern char &SIInsertHardClausesID; 310 311 void initializeSIInsertWaitcntsPass(PassRegistry&); 312 extern char &SIInsertWaitcntsID; 313 314 void initializeSIFormMemoryClausesPass(PassRegistry&); 315 extern char &SIFormMemoryClausesID; 316 317 void initializeSIPostRABundlerPass(PassRegistry&); 318 extern char &SIPostRABundlerID; 319 320 void initializeGCNCreateVOPDPass(PassRegistry &); 321 extern char &GCNCreateVOPDID; 322 323 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); 324 extern char &AMDGPUUnifyDivergentExitNodesID; 325 326 ImmutablePass *createAMDGPUAAWrapperPass(); 327 void initializeAMDGPUAAWrapperPassPass(PassRegistry&); 328 ImmutablePass *createAMDGPUExternalAAWrapperPass(); 329 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); 330 331 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); 332 333 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); 334 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); 335 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; 336 337 void initializeGCNNSAReassignPass(PassRegistry &); 338 extern char &GCNNSAReassignID; 339 340 void initializeGCNPreRAOptimizationsPass(PassRegistry &); 341 extern char &GCNPreRAOptimizationsID; 342 343 FunctionPass *createAMDGPUSetWavePriorityPass(); 344 void initializeAMDGPUSetWavePriorityPass(PassRegistry &); 345 346 namespace AMDGPU { 347 enum TargetIndex { 348 TI_CONSTDATA_START, 349 TI_SCRATCH_RSRC_DWORD0, 350 TI_SCRATCH_RSRC_DWORD1, 351 TI_SCRATCH_RSRC_DWORD2, 352 TI_SCRATCH_RSRC_DWORD3 353 }; 354 } 355 356 /// OpenCL uses address spaces to differentiate between 357 /// various memory regions on the hardware. On the CPU 358 /// all of the address spaces point to the same memory, 359 /// however on the GPU, each address space points to 360 /// a separate piece of memory that is unique from other 361 /// memory locations. 362 namespace AMDGPUAS { 363 enum : unsigned { 364 // The maximum value for flat, generic, local, private, constant and region. 365 MAX_AMDGPU_ADDRESS = 7, 366 367 FLAT_ADDRESS = 0, ///< Address space for flat memory. 368 GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). 369 REGION_ADDRESS = 2, ///< Address space for region memory. (GDS) 370 371 CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2). 372 LOCAL_ADDRESS = 3, ///< Address space for local memory. 373 PRIVATE_ADDRESS = 5, ///< Address space for private memory. 374 375 CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory. 376 377 BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers. 378 379 /// Address space for direct addressable parameter memory (CONST0). 380 PARAM_D_ADDRESS = 6, 381 /// Address space for indirect addressable parameter memory (VTX1). 382 PARAM_I_ADDRESS = 7, 383 384 // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on 385 // this order to be able to dynamically index a constant buffer, for 386 // example: 387 // 388 // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx 389 390 CONSTANT_BUFFER_0 = 8, 391 CONSTANT_BUFFER_1 = 9, 392 CONSTANT_BUFFER_2 = 10, 393 CONSTANT_BUFFER_3 = 11, 394 CONSTANT_BUFFER_4 = 12, 395 CONSTANT_BUFFER_5 = 13, 396 CONSTANT_BUFFER_6 = 14, 397 CONSTANT_BUFFER_7 = 15, 398 CONSTANT_BUFFER_8 = 16, 399 CONSTANT_BUFFER_9 = 17, 400 CONSTANT_BUFFER_10 = 18, 401 CONSTANT_BUFFER_11 = 19, 402 CONSTANT_BUFFER_12 = 20, 403 CONSTANT_BUFFER_13 = 21, 404 CONSTANT_BUFFER_14 = 22, 405 CONSTANT_BUFFER_15 = 23, 406 407 // Some places use this if the address space can't be determined. 408 UNKNOWN_ADDRESS_SPACE = ~0u, 409 }; 410 } 411 412 namespace AMDGPU { 413 414 // FIXME: Missing constant_32bit 415 inline bool isFlatGlobalAddrSpace(unsigned AS) { 416 return AS == AMDGPUAS::GLOBAL_ADDRESS || 417 AS == AMDGPUAS::FLAT_ADDRESS || 418 AS == AMDGPUAS::CONSTANT_ADDRESS || 419 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 420 } 421 } 422 423 } // End namespace llvm 424 425 #endif 426