1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 /// \file 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 12 13 #include "llvm/IR/PassManager.h" 14 #include "llvm/Pass.h" 15 #include "llvm/Support/CodeGen.h" 16 17 namespace llvm { 18 19 class TargetMachine; 20 21 // GlobalISel passes 22 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); 23 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); 24 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); 25 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); 26 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); 27 void initializeAMDGPURegBankCombinerPass(PassRegistry &); 28 29 // SI Passes 30 FunctionPass *createGCNDPPCombinePass(); 31 FunctionPass *createSIAnnotateControlFlowPass(); 32 FunctionPass *createSIFoldOperandsPass(); 33 FunctionPass *createSIPeepholeSDWAPass(); 34 FunctionPass *createSILowerI1CopiesPass(); 35 FunctionPass *createSIShrinkInstructionsPass(); 36 FunctionPass *createSILoadStoreOptimizerPass(); 37 FunctionPass *createSIWholeQuadModePass(); 38 FunctionPass *createSIFixControlFlowLiveIntervalsPass(); 39 FunctionPass *createSIOptimizeExecMaskingPreRAPass(); 40 FunctionPass *createSIOptimizeVGPRLiveRangePass(); 41 FunctionPass *createSIFixSGPRCopiesPass(); 42 FunctionPass *createSIMemoryLegalizerPass(); 43 FunctionPass *createSIInsertWaitcntsPass(); 44 FunctionPass *createSIPreAllocateWWMRegsPass(); 45 FunctionPass *createSIFormMemoryClausesPass(); 46 47 FunctionPass *createSIPostRABundlerPass(); 48 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *); 49 FunctionPass *createAMDGPUUseNativeCallsPass(); 50 FunctionPass *createAMDGPUCodeGenPreparePass(); 51 FunctionPass *createAMDGPULateCodeGenPreparePass(); 52 FunctionPass *createAMDGPUMachineCFGStructurizerPass(); 53 FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *); 54 ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *); 55 FunctionPass *createAMDGPURewriteOutArgumentsPass(); 56 ModulePass *createAMDGPUReplaceLDSUseWithPointerPass(); 57 ModulePass *createAMDGPULowerModuleLDSPass(); 58 FunctionPass *createSIModeRegisterPass(); 59 FunctionPass *createGCNPreRAOptimizationsPass(); 60 61 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { 62 AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {} 63 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 64 65 private: 66 TargetMachine &TM; 67 }; 68 69 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> { 70 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 71 }; 72 73 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&); 74 75 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); 76 extern char &AMDGPUMachineCFGStructurizerID; 77 78 void initializeAMDGPUAlwaysInlinePass(PassRegistry&); 79 80 Pass *createAMDGPUAnnotateKernelFeaturesPass(); 81 Pass *createAMDGPUAttributorPass(); 82 void initializeAMDGPUAttributorPass(PassRegistry &); 83 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); 84 extern char &AMDGPUAnnotateKernelFeaturesID; 85 86 FunctionPass *createAMDGPUAtomicOptimizerPass(); 87 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &); 88 extern char &AMDGPUAtomicOptimizerID; 89 90 ModulePass *createAMDGPULowerIntrinsicsPass(); 91 void initializeAMDGPULowerIntrinsicsPass(PassRegistry &); 92 extern char &AMDGPULowerIntrinsicsID; 93 94 ModulePass *createAMDGPUFixFunctionBitcastsPass(); 95 void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &); 96 extern char &AMDGPUFixFunctionBitcastsID; 97 98 ModulePass *createAMDGPUCtorDtorLoweringPass(); 99 void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &); 100 extern char &AMDGPUCtorDtorLoweringID; 101 102 FunctionPass *createAMDGPULowerKernelArgumentsPass(); 103 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); 104 extern char &AMDGPULowerKernelArgumentsID; 105 106 FunctionPass *createAMDGPUPromoteKernelArgumentsPass(); 107 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &); 108 extern char &AMDGPUPromoteKernelArgumentsID; 109 110 struct AMDGPUPromoteKernelArgumentsPass 111 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> { 112 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 113 }; 114 115 ModulePass *createAMDGPULowerKernelAttributesPass(); 116 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); 117 extern char &AMDGPULowerKernelAttributesID; 118 119 struct AMDGPULowerKernelAttributesPass 120 : PassInfoMixin<AMDGPULowerKernelAttributesPass> { 121 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 122 }; 123 124 void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &); 125 extern char &AMDGPUPropagateAttributesEarlyID; 126 127 struct AMDGPUPropagateAttributesEarlyPass 128 : PassInfoMixin<AMDGPUPropagateAttributesEarlyPass> { 129 AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {} 130 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 131 132 private: 133 TargetMachine &TM; 134 }; 135 136 void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &); 137 extern char &AMDGPUPropagateAttributesLateID; 138 139 struct AMDGPUPropagateAttributesLatePass 140 : PassInfoMixin<AMDGPUPropagateAttributesLatePass> { 141 AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {} 142 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 143 144 private: 145 TargetMachine &TM; 146 }; 147 148 void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &); 149 extern char &AMDGPUReplaceLDSUseWithPointerID; 150 151 struct AMDGPUReplaceLDSUseWithPointerPass 152 : PassInfoMixin<AMDGPUReplaceLDSUseWithPointerPass> { 153 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 154 }; 155 156 void initializeAMDGPULowerModuleLDSPass(PassRegistry &); 157 extern char &AMDGPULowerModuleLDSID; 158 159 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> { 160 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 161 }; 162 163 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); 164 extern char &AMDGPURewriteOutArgumentsID; 165 166 void initializeGCNDPPCombinePass(PassRegistry &); 167 extern char &GCNDPPCombineID; 168 169 void initializeSIFoldOperandsPass(PassRegistry &); 170 extern char &SIFoldOperandsID; 171 172 void initializeSIPeepholeSDWAPass(PassRegistry &); 173 extern char &SIPeepholeSDWAID; 174 175 void initializeSIShrinkInstructionsPass(PassRegistry&); 176 extern char &SIShrinkInstructionsID; 177 178 void initializeSIFixSGPRCopiesPass(PassRegistry &); 179 extern char &SIFixSGPRCopiesID; 180 181 void initializeSIFixVGPRCopiesPass(PassRegistry &); 182 extern char &SIFixVGPRCopiesID; 183 184 void initializeSILowerI1CopiesPass(PassRegistry &); 185 extern char &SILowerI1CopiesID; 186 187 void initializeSILowerSGPRSpillsPass(PassRegistry &); 188 extern char &SILowerSGPRSpillsID; 189 190 void initializeSILoadStoreOptimizerPass(PassRegistry &); 191 extern char &SILoadStoreOptimizerID; 192 193 void initializeSIWholeQuadModePass(PassRegistry &); 194 extern char &SIWholeQuadModeID; 195 196 void initializeSILowerControlFlowPass(PassRegistry &); 197 extern char &SILowerControlFlowID; 198 199 void initializeSIPreEmitPeepholePass(PassRegistry &); 200 extern char &SIPreEmitPeepholeID; 201 202 void initializeSILateBranchLoweringPass(PassRegistry &); 203 extern char &SILateBranchLoweringPassID; 204 205 void initializeSIOptimizeExecMaskingPass(PassRegistry &); 206 extern char &SIOptimizeExecMaskingID; 207 208 void initializeSIPreAllocateWWMRegsPass(PassRegistry &); 209 extern char &SIPreAllocateWWMRegsID; 210 211 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &); 212 extern char &AMDGPUSimplifyLibCallsID; 213 214 void initializeAMDGPUUseNativeCallsPass(PassRegistry &); 215 extern char &AMDGPUUseNativeCallsID; 216 217 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); 218 extern char &AMDGPUPerfHintAnalysisID; 219 220 // Passes common to R600 and SI 221 FunctionPass *createAMDGPUPromoteAlloca(); 222 void initializeAMDGPUPromoteAllocaPass(PassRegistry&); 223 extern char &AMDGPUPromoteAllocaID; 224 225 FunctionPass *createAMDGPUPromoteAllocaToVector(); 226 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&); 227 extern char &AMDGPUPromoteAllocaToVectorID; 228 229 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> { 230 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {} 231 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 232 233 private: 234 TargetMachine &TM; 235 }; 236 237 struct AMDGPUPromoteAllocaToVectorPass 238 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> { 239 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {} 240 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 241 242 private: 243 TargetMachine &TM; 244 }; 245 246 Pass *createAMDGPUStructurizeCFGPass(); 247 FunctionPass *createAMDGPUISelDag( 248 TargetMachine *TM = nullptr, 249 CodeGenOpt::Level OptLevel = CodeGenOpt::Default); 250 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); 251 252 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> { 253 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} 254 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 255 256 private: 257 bool GlobalOpt; 258 }; 259 260 FunctionPass *createAMDGPUAnnotateUniformValues(); 261 262 ModulePass *createAMDGPUPrintfRuntimeBinding(); 263 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); 264 extern char &AMDGPUPrintfRuntimeBindingID; 265 266 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &); 267 extern char &AMDGPUResourceUsageAnalysisID; 268 269 struct AMDGPUPrintfRuntimeBindingPass 270 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { 271 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 272 }; 273 274 ModulePass* createAMDGPUUnifyMetadataPass(); 275 void initializeAMDGPUUnifyMetadataPass(PassRegistry&); 276 extern char &AMDGPUUnifyMetadataID; 277 278 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { 279 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 280 }; 281 282 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); 283 extern char &SIOptimizeExecMaskingPreRAID; 284 285 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &); 286 extern char &SIOptimizeVGPRLiveRangeID; 287 288 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); 289 extern char &AMDGPUAnnotateUniformValuesPassID; 290 291 void initializeAMDGPUCodeGenPreparePass(PassRegistry&); 292 extern char &AMDGPUCodeGenPrepareID; 293 294 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); 295 extern char &AMDGPULateCodeGenPrepareID; 296 297 void initializeSIAnnotateControlFlowPass(PassRegistry&); 298 extern char &SIAnnotateControlFlowPassID; 299 300 void initializeSIMemoryLegalizerPass(PassRegistry&); 301 extern char &SIMemoryLegalizerID; 302 303 void initializeSIModeRegisterPass(PassRegistry&); 304 extern char &SIModeRegisterID; 305 306 void initializeSIInsertHardClausesPass(PassRegistry &); 307 extern char &SIInsertHardClausesID; 308 309 void initializeSIInsertWaitcntsPass(PassRegistry&); 310 extern char &SIInsertWaitcntsID; 311 312 void initializeSIFormMemoryClausesPass(PassRegistry&); 313 extern char &SIFormMemoryClausesID; 314 315 void initializeSIPostRABundlerPass(PassRegistry&); 316 extern char &SIPostRABundlerID; 317 318 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); 319 extern char &AMDGPUUnifyDivergentExitNodesID; 320 321 ImmutablePass *createAMDGPUAAWrapperPass(); 322 void initializeAMDGPUAAWrapperPassPass(PassRegistry&); 323 ImmutablePass *createAMDGPUExternalAAWrapperPass(); 324 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); 325 326 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); 327 328 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); 329 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); 330 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; 331 332 void initializeGCNNSAReassignPass(PassRegistry &); 333 extern char &GCNNSAReassignID; 334 335 void initializeGCNPreRAOptimizationsPass(PassRegistry &); 336 extern char &GCNPreRAOptimizationsID; 337 338 namespace AMDGPU { 339 enum TargetIndex { 340 TI_CONSTDATA_START, 341 TI_SCRATCH_RSRC_DWORD0, 342 TI_SCRATCH_RSRC_DWORD1, 343 TI_SCRATCH_RSRC_DWORD2, 344 TI_SCRATCH_RSRC_DWORD3 345 }; 346 } 347 348 /// OpenCL uses address spaces to differentiate between 349 /// various memory regions on the hardware. On the CPU 350 /// all of the address spaces point to the same memory, 351 /// however on the GPU, each address space points to 352 /// a separate piece of memory that is unique from other 353 /// memory locations. 354 namespace AMDGPUAS { 355 enum : unsigned { 356 // The maximum value for flat, generic, local, private, constant and region. 357 MAX_AMDGPU_ADDRESS = 7, 358 359 FLAT_ADDRESS = 0, ///< Address space for flat memory. 360 GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). 361 REGION_ADDRESS = 2, ///< Address space for region memory. (GDS) 362 363 CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2). 364 LOCAL_ADDRESS = 3, ///< Address space for local memory. 365 PRIVATE_ADDRESS = 5, ///< Address space for private memory. 366 367 CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory. 368 369 BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers. 370 371 /// Address space for direct addressable parameter memory (CONST0). 372 PARAM_D_ADDRESS = 6, 373 /// Address space for indirect addressable parameter memory (VTX1). 374 PARAM_I_ADDRESS = 7, 375 376 // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on 377 // this order to be able to dynamically index a constant buffer, for 378 // example: 379 // 380 // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx 381 382 CONSTANT_BUFFER_0 = 8, 383 CONSTANT_BUFFER_1 = 9, 384 CONSTANT_BUFFER_2 = 10, 385 CONSTANT_BUFFER_3 = 11, 386 CONSTANT_BUFFER_4 = 12, 387 CONSTANT_BUFFER_5 = 13, 388 CONSTANT_BUFFER_6 = 14, 389 CONSTANT_BUFFER_7 = 15, 390 CONSTANT_BUFFER_8 = 16, 391 CONSTANT_BUFFER_9 = 17, 392 CONSTANT_BUFFER_10 = 18, 393 CONSTANT_BUFFER_11 = 19, 394 CONSTANT_BUFFER_12 = 20, 395 CONSTANT_BUFFER_13 = 21, 396 CONSTANT_BUFFER_14 = 22, 397 CONSTANT_BUFFER_15 = 23, 398 399 // Some places use this if the address space can't be determined. 400 UNKNOWN_ADDRESS_SPACE = ~0u, 401 }; 402 } 403 404 namespace AMDGPU { 405 406 // FIXME: Missing constant_32bit 407 inline bool isFlatGlobalAddrSpace(unsigned AS) { 408 return AS == AMDGPUAS::GLOBAL_ADDRESS || 409 AS == AMDGPUAS::FLAT_ADDRESS || 410 AS == AMDGPUAS::CONSTANT_ADDRESS || 411 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 412 } 413 } 414 415 } // End namespace llvm 416 417 #endif 418