1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 /// \file 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 12 13 #include "llvm/IR/PassManager.h" 14 #include "llvm/Support/CodeGen.h" 15 16 namespace llvm { 17 18 class FunctionPass; 19 class GCNTargetMachine; 20 class ImmutablePass; 21 class MachineFunctionPass; 22 class ModulePass; 23 class Pass; 24 class Target; 25 class TargetMachine; 26 class TargetOptions; 27 class PassRegistry; 28 class Module; 29 30 // GlobalISel passes 31 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); 32 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); 33 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); 34 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); 35 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); 36 void initializeAMDGPURegBankCombinerPass(PassRegistry &); 37 38 // R600 Passes 39 FunctionPass *createR600VectorRegMerger(); 40 FunctionPass *createR600ExpandSpecialInstrsPass(); 41 FunctionPass *createR600EmitClauseMarkers(); 42 FunctionPass *createR600ClauseMergePass(); 43 FunctionPass *createR600Packetizer(); 44 FunctionPass *createR600ControlFlowFinalizer(); 45 FunctionPass *createAMDGPUCFGStructurizerPass(); 46 FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel); 47 48 // SI Passes 49 FunctionPass *createGCNDPPCombinePass(); 50 FunctionPass *createSIAnnotateControlFlowPass(); 51 FunctionPass *createSIFoldOperandsPass(); 52 FunctionPass *createSIPeepholeSDWAPass(); 53 FunctionPass *createSILowerI1CopiesPass(); 54 FunctionPass *createSIShrinkInstructionsPass(); 55 FunctionPass *createSILoadStoreOptimizerPass(); 56 FunctionPass *createSIWholeQuadModePass(); 57 FunctionPass *createSIFixControlFlowLiveIntervalsPass(); 58 FunctionPass *createSIOptimizeExecMaskingPreRAPass(); 59 FunctionPass *createSIOptimizeVGPRLiveRangePass(); 60 FunctionPass *createSIFixSGPRCopiesPass(); 61 FunctionPass *createSIMemoryLegalizerPass(); 62 FunctionPass *createSIInsertWaitcntsPass(); 63 FunctionPass *createSIPreAllocateWWMRegsPass(); 64 FunctionPass *createSIFormMemoryClausesPass(); 65 66 FunctionPass *createSIPostRABundlerPass(); 67 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *); 68 FunctionPass *createAMDGPUUseNativeCallsPass(); 69 FunctionPass *createAMDGPUCodeGenPreparePass(); 70 FunctionPass *createAMDGPULateCodeGenPreparePass(); 71 FunctionPass *createAMDGPUMachineCFGStructurizerPass(); 72 FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *); 73 ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *); 74 FunctionPass *createAMDGPURewriteOutArgumentsPass(); 75 ModulePass *createAMDGPUReplaceLDSUseWithPointerPass(); 76 ModulePass *createAMDGPULowerModuleLDSPass(); 77 FunctionPass *createSIModeRegisterPass(); 78 FunctionPass *createGCNPreRAOptimizationsPass(); 79 80 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { 81 AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {} 82 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 83 84 private: 85 TargetMachine &TM; 86 }; 87 88 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> { 89 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 90 }; 91 92 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&); 93 94 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); 95 extern char &AMDGPUMachineCFGStructurizerID; 96 97 void initializeAMDGPUAlwaysInlinePass(PassRegistry&); 98 99 Pass *createAMDGPUAnnotateKernelFeaturesPass(); 100 Pass *createAMDGPUAttributorPass(); 101 void initializeAMDGPUAttributorPass(PassRegistry &); 102 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); 103 extern char &AMDGPUAnnotateKernelFeaturesID; 104 105 FunctionPass *createAMDGPUAtomicOptimizerPass(); 106 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &); 107 extern char &AMDGPUAtomicOptimizerID; 108 109 ModulePass *createAMDGPULowerIntrinsicsPass(); 110 void initializeAMDGPULowerIntrinsicsPass(PassRegistry &); 111 extern char &AMDGPULowerIntrinsicsID; 112 113 ModulePass *createAMDGPUFixFunctionBitcastsPass(); 114 void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &); 115 extern char &AMDGPUFixFunctionBitcastsID; 116 117 FunctionPass *createAMDGPULowerKernelArgumentsPass(); 118 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); 119 extern char &AMDGPULowerKernelArgumentsID; 120 121 ModulePass *createAMDGPULowerKernelAttributesPass(); 122 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); 123 extern char &AMDGPULowerKernelAttributesID; 124 125 struct AMDGPULowerKernelAttributesPass 126 : PassInfoMixin<AMDGPULowerKernelAttributesPass> { 127 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 128 }; 129 130 void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &); 131 extern char &AMDGPUPropagateAttributesEarlyID; 132 133 struct AMDGPUPropagateAttributesEarlyPass 134 : PassInfoMixin<AMDGPUPropagateAttributesEarlyPass> { 135 AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {} 136 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 137 138 private: 139 TargetMachine &TM; 140 }; 141 142 void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &); 143 extern char &AMDGPUPropagateAttributesLateID; 144 145 struct AMDGPUPropagateAttributesLatePass 146 : PassInfoMixin<AMDGPUPropagateAttributesLatePass> { 147 AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {} 148 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 149 150 private: 151 TargetMachine &TM; 152 }; 153 154 void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &); 155 extern char &AMDGPUReplaceLDSUseWithPointerID; 156 157 struct AMDGPUReplaceLDSUseWithPointerPass 158 : PassInfoMixin<AMDGPUReplaceLDSUseWithPointerPass> { 159 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 160 }; 161 162 void initializeAMDGPULowerModuleLDSPass(PassRegistry &); 163 extern char &AMDGPULowerModuleLDSID; 164 165 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> { 166 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 167 }; 168 169 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); 170 extern char &AMDGPURewriteOutArgumentsID; 171 172 void initializeGCNDPPCombinePass(PassRegistry &); 173 extern char &GCNDPPCombineID; 174 175 void initializeR600ClauseMergePassPass(PassRegistry &); 176 extern char &R600ClauseMergePassID; 177 178 void initializeR600ControlFlowFinalizerPass(PassRegistry &); 179 extern char &R600ControlFlowFinalizerID; 180 181 void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &); 182 extern char &R600ExpandSpecialInstrsPassID; 183 184 void initializeR600VectorRegMergerPass(PassRegistry &); 185 extern char &R600VectorRegMergerID; 186 187 void initializeR600PacketizerPass(PassRegistry &); 188 extern char &R600PacketizerID; 189 190 void initializeSIFoldOperandsPass(PassRegistry &); 191 extern char &SIFoldOperandsID; 192 193 void initializeSIPeepholeSDWAPass(PassRegistry &); 194 extern char &SIPeepholeSDWAID; 195 196 void initializeSIShrinkInstructionsPass(PassRegistry&); 197 extern char &SIShrinkInstructionsID; 198 199 void initializeSIFixSGPRCopiesPass(PassRegistry &); 200 extern char &SIFixSGPRCopiesID; 201 202 void initializeSIFixVGPRCopiesPass(PassRegistry &); 203 extern char &SIFixVGPRCopiesID; 204 205 void initializeSILowerI1CopiesPass(PassRegistry &); 206 extern char &SILowerI1CopiesID; 207 208 void initializeSILowerSGPRSpillsPass(PassRegistry &); 209 extern char &SILowerSGPRSpillsID; 210 211 void initializeSILoadStoreOptimizerPass(PassRegistry &); 212 extern char &SILoadStoreOptimizerID; 213 214 void initializeSIWholeQuadModePass(PassRegistry &); 215 extern char &SIWholeQuadModeID; 216 217 void initializeSILowerControlFlowPass(PassRegistry &); 218 extern char &SILowerControlFlowID; 219 220 void initializeSIPreEmitPeepholePass(PassRegistry &); 221 extern char &SIPreEmitPeepholeID; 222 223 void initializeSILateBranchLoweringPass(PassRegistry &); 224 extern char &SILateBranchLoweringPassID; 225 226 void initializeSIOptimizeExecMaskingPass(PassRegistry &); 227 extern char &SIOptimizeExecMaskingID; 228 229 void initializeSIPreAllocateWWMRegsPass(PassRegistry &); 230 extern char &SIPreAllocateWWMRegsID; 231 232 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &); 233 extern char &AMDGPUSimplifyLibCallsID; 234 235 void initializeAMDGPUUseNativeCallsPass(PassRegistry &); 236 extern char &AMDGPUUseNativeCallsID; 237 238 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); 239 extern char &AMDGPUPerfHintAnalysisID; 240 241 // Passes common to R600 and SI 242 FunctionPass *createAMDGPUPromoteAlloca(); 243 void initializeAMDGPUPromoteAllocaPass(PassRegistry&); 244 extern char &AMDGPUPromoteAllocaID; 245 246 FunctionPass *createAMDGPUPromoteAllocaToVector(); 247 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&); 248 extern char &AMDGPUPromoteAllocaToVectorID; 249 250 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> { 251 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {} 252 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 253 254 private: 255 TargetMachine &TM; 256 }; 257 258 struct AMDGPUPromoteAllocaToVectorPass 259 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> { 260 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {} 261 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 262 263 private: 264 TargetMachine &TM; 265 }; 266 267 Pass *createAMDGPUStructurizeCFGPass(); 268 FunctionPass *createAMDGPUISelDag( 269 TargetMachine *TM = nullptr, 270 CodeGenOpt::Level OptLevel = CodeGenOpt::Default); 271 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); 272 273 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> { 274 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} 275 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 276 277 private: 278 bool GlobalOpt; 279 }; 280 281 ModulePass *createR600OpenCLImageTypeLoweringPass(); 282 FunctionPass *createAMDGPUAnnotateUniformValues(); 283 284 ModulePass *createAMDGPUPrintfRuntimeBinding(); 285 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); 286 extern char &AMDGPUPrintfRuntimeBindingID; 287 288 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &); 289 extern char &AMDGPUResourceUsageAnalysisID; 290 291 struct AMDGPUPrintfRuntimeBindingPass 292 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { 293 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 294 }; 295 296 ModulePass* createAMDGPUUnifyMetadataPass(); 297 void initializeAMDGPUUnifyMetadataPass(PassRegistry&); 298 extern char &AMDGPUUnifyMetadataID; 299 300 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { 301 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 302 }; 303 304 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); 305 extern char &SIOptimizeExecMaskingPreRAID; 306 307 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &); 308 extern char &SIOptimizeVGPRLiveRangeID; 309 310 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); 311 extern char &AMDGPUAnnotateUniformValuesPassID; 312 313 void initializeAMDGPUCodeGenPreparePass(PassRegistry&); 314 extern char &AMDGPUCodeGenPrepareID; 315 316 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); 317 extern char &AMDGPULateCodeGenPrepareID; 318 319 void initializeSIAnnotateControlFlowPass(PassRegistry&); 320 extern char &SIAnnotateControlFlowPassID; 321 322 void initializeSIMemoryLegalizerPass(PassRegistry&); 323 extern char &SIMemoryLegalizerID; 324 325 void initializeSIModeRegisterPass(PassRegistry&); 326 extern char &SIModeRegisterID; 327 328 void initializeSIInsertHardClausesPass(PassRegistry &); 329 extern char &SIInsertHardClausesID; 330 331 void initializeSIInsertWaitcntsPass(PassRegistry&); 332 extern char &SIInsertWaitcntsID; 333 334 void initializeSIFormMemoryClausesPass(PassRegistry&); 335 extern char &SIFormMemoryClausesID; 336 337 void initializeSIPostRABundlerPass(PassRegistry&); 338 extern char &SIPostRABundlerID; 339 340 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); 341 extern char &AMDGPUUnifyDivergentExitNodesID; 342 343 ImmutablePass *createAMDGPUAAWrapperPass(); 344 void initializeAMDGPUAAWrapperPassPass(PassRegistry&); 345 ImmutablePass *createAMDGPUExternalAAWrapperPass(); 346 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); 347 348 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); 349 350 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); 351 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); 352 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; 353 354 void initializeGCNNSAReassignPass(PassRegistry &); 355 extern char &GCNNSAReassignID; 356 357 void initializeGCNPreRAOptimizationsPass(PassRegistry &); 358 extern char &GCNPreRAOptimizationsID; 359 360 namespace AMDGPU { 361 enum TargetIndex { 362 TI_CONSTDATA_START, 363 TI_SCRATCH_RSRC_DWORD0, 364 TI_SCRATCH_RSRC_DWORD1, 365 TI_SCRATCH_RSRC_DWORD2, 366 TI_SCRATCH_RSRC_DWORD3 367 }; 368 } 369 370 /// OpenCL uses address spaces to differentiate between 371 /// various memory regions on the hardware. On the CPU 372 /// all of the address spaces point to the same memory, 373 /// however on the GPU, each address space points to 374 /// a separate piece of memory that is unique from other 375 /// memory locations. 376 namespace AMDGPUAS { 377 enum : unsigned { 378 // The maximum value for flat, generic, local, private, constant and region. 379 MAX_AMDGPU_ADDRESS = 7, 380 381 FLAT_ADDRESS = 0, ///< Address space for flat memory. 382 GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). 383 REGION_ADDRESS = 2, ///< Address space for region memory. (GDS) 384 385 CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2). 386 LOCAL_ADDRESS = 3, ///< Address space for local memory. 387 PRIVATE_ADDRESS = 5, ///< Address space for private memory. 388 389 CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory. 390 391 BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers. 392 393 /// Address space for direct addressible parameter memory (CONST0). 394 PARAM_D_ADDRESS = 6, 395 /// Address space for indirect addressible parameter memory (VTX1). 396 PARAM_I_ADDRESS = 7, 397 398 // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on 399 // this order to be able to dynamically index a constant buffer, for 400 // example: 401 // 402 // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx 403 404 CONSTANT_BUFFER_0 = 8, 405 CONSTANT_BUFFER_1 = 9, 406 CONSTANT_BUFFER_2 = 10, 407 CONSTANT_BUFFER_3 = 11, 408 CONSTANT_BUFFER_4 = 12, 409 CONSTANT_BUFFER_5 = 13, 410 CONSTANT_BUFFER_6 = 14, 411 CONSTANT_BUFFER_7 = 15, 412 CONSTANT_BUFFER_8 = 16, 413 CONSTANT_BUFFER_9 = 17, 414 CONSTANT_BUFFER_10 = 18, 415 CONSTANT_BUFFER_11 = 19, 416 CONSTANT_BUFFER_12 = 20, 417 CONSTANT_BUFFER_13 = 21, 418 CONSTANT_BUFFER_14 = 22, 419 CONSTANT_BUFFER_15 = 23, 420 421 // Some places use this if the address space can't be determined. 422 UNKNOWN_ADDRESS_SPACE = ~0u, 423 }; 424 } 425 426 namespace AMDGPU { 427 428 // FIXME: Missing constant_32bit 429 inline bool isFlatGlobalAddrSpace(unsigned AS) { 430 return AS == AMDGPUAS::GLOBAL_ADDRESS || 431 AS == AMDGPUAS::FLAT_ADDRESS || 432 AS == AMDGPUAS::CONSTANT_ADDRESS || 433 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 434 } 435 } 436 437 } // End namespace llvm 438 439 #endif 440