1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 /// \file 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 12 13 #include "llvm/IR/PassManager.h" 14 #include "llvm/Support/CodeGen.h" 15 16 namespace llvm { 17 18 class AMDGPUTargetMachine; 19 class FunctionPass; 20 class GCNTargetMachine; 21 class ImmutablePass; 22 class ModulePass; 23 class Pass; 24 class Target; 25 class TargetMachine; 26 class TargetOptions; 27 class PassRegistry; 28 class Module; 29 30 // GlobalISel passes 31 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); 32 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); 33 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); 34 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); 35 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); 36 void initializeAMDGPURegBankCombinerPass(PassRegistry &); 37 38 // R600 Passes 39 FunctionPass *createR600VectorRegMerger(); 40 FunctionPass *createR600ExpandSpecialInstrsPass(); 41 FunctionPass *createR600EmitClauseMarkers(); 42 FunctionPass *createR600ClauseMergePass(); 43 FunctionPass *createR600Packetizer(); 44 FunctionPass *createR600ControlFlowFinalizer(); 45 FunctionPass *createAMDGPUCFGStructurizerPass(); 46 FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel); 47 48 // SI Passes 49 FunctionPass *createGCNDPPCombinePass(); 50 FunctionPass *createSIAnnotateControlFlowPass(); 51 FunctionPass *createSIFoldOperandsPass(); 52 FunctionPass *createSIPeepholeSDWAPass(); 53 FunctionPass *createSILowerI1CopiesPass(); 54 FunctionPass *createSIAddIMGInitPass(); 55 FunctionPass *createSIShrinkInstructionsPass(); 56 FunctionPass *createSILoadStoreOptimizerPass(); 57 FunctionPass *createSIWholeQuadModePass(); 58 FunctionPass *createSIFixControlFlowLiveIntervalsPass(); 59 FunctionPass *createSIOptimizeExecMaskingPreRAPass(); 60 FunctionPass *createSIFixSGPRCopiesPass(); 61 FunctionPass *createSIMemoryLegalizerPass(); 62 FunctionPass *createSIInsertWaitcntsPass(); 63 FunctionPass *createSIPreAllocateWWMRegsPass(); 64 FunctionPass *createSIFormMemoryClausesPass(); 65 66 FunctionPass *createSIPostRABundlerPass(); 67 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *); 68 FunctionPass *createAMDGPUUseNativeCallsPass(); 69 FunctionPass *createAMDGPUCodeGenPreparePass(); 70 FunctionPass *createAMDGPULateCodeGenPreparePass(); 71 FunctionPass *createAMDGPUMachineCFGStructurizerPass(); 72 FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *); 73 ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *); 74 FunctionPass *createAMDGPURewriteOutArgumentsPass(); 75 FunctionPass *createSIModeRegisterPass(); 76 77 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { 78 AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {} 79 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 80 81 private: 82 TargetMachine &TM; 83 }; 84 85 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> { 86 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 87 }; 88 89 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&); 90 91 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); 92 extern char &AMDGPUMachineCFGStructurizerID; 93 94 void initializeAMDGPUAlwaysInlinePass(PassRegistry&); 95 96 Pass *createAMDGPUAnnotateKernelFeaturesPass(); 97 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); 98 extern char &AMDGPUAnnotateKernelFeaturesID; 99 100 FunctionPass *createAMDGPUAtomicOptimizerPass(); 101 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &); 102 extern char &AMDGPUAtomicOptimizerID; 103 104 ModulePass *createAMDGPULowerIntrinsicsPass(); 105 void initializeAMDGPULowerIntrinsicsPass(PassRegistry &); 106 extern char &AMDGPULowerIntrinsicsID; 107 108 ModulePass *createAMDGPUFixFunctionBitcastsPass(); 109 void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &); 110 extern char &AMDGPUFixFunctionBitcastsID; 111 112 FunctionPass *createAMDGPULowerKernelArgumentsPass(); 113 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); 114 extern char &AMDGPULowerKernelArgumentsID; 115 116 ModulePass *createAMDGPULowerKernelAttributesPass(); 117 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); 118 extern char &AMDGPULowerKernelAttributesID; 119 120 struct AMDGPULowerKernelAttributesPass 121 : PassInfoMixin<AMDGPULowerKernelAttributesPass> { 122 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 123 }; 124 125 void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &); 126 extern char &AMDGPUPropagateAttributesEarlyID; 127 128 struct AMDGPUPropagateAttributesEarlyPass 129 : PassInfoMixin<AMDGPUPropagateAttributesEarlyPass> { 130 AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {} 131 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 132 133 private: 134 TargetMachine &TM; 135 }; 136 137 void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &); 138 extern char &AMDGPUPropagateAttributesLateID; 139 140 struct AMDGPUPropagateAttributesLatePass 141 : PassInfoMixin<AMDGPUPropagateAttributesLatePass> { 142 AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {} 143 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 144 145 private: 146 TargetMachine &TM; 147 }; 148 149 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); 150 extern char &AMDGPURewriteOutArgumentsID; 151 152 void initializeGCNDPPCombinePass(PassRegistry &); 153 extern char &GCNDPPCombineID; 154 155 void initializeR600ClauseMergePassPass(PassRegistry &); 156 extern char &R600ClauseMergePassID; 157 158 void initializeR600ControlFlowFinalizerPass(PassRegistry &); 159 extern char &R600ControlFlowFinalizerID; 160 161 void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &); 162 extern char &R600ExpandSpecialInstrsPassID; 163 164 void initializeR600VectorRegMergerPass(PassRegistry &); 165 extern char &R600VectorRegMergerID; 166 167 void initializeR600PacketizerPass(PassRegistry &); 168 extern char &R600PacketizerID; 169 170 void initializeSIFoldOperandsPass(PassRegistry &); 171 extern char &SIFoldOperandsID; 172 173 void initializeSIPeepholeSDWAPass(PassRegistry &); 174 extern char &SIPeepholeSDWAID; 175 176 void initializeSIShrinkInstructionsPass(PassRegistry&); 177 extern char &SIShrinkInstructionsID; 178 179 void initializeSIFixSGPRCopiesPass(PassRegistry &); 180 extern char &SIFixSGPRCopiesID; 181 182 void initializeSIFixVGPRCopiesPass(PassRegistry &); 183 extern char &SIFixVGPRCopiesID; 184 185 void initializeSILowerI1CopiesPass(PassRegistry &); 186 extern char &SILowerI1CopiesID; 187 188 void initializeSILowerSGPRSpillsPass(PassRegistry &); 189 extern char &SILowerSGPRSpillsID; 190 191 void initializeSILoadStoreOptimizerPass(PassRegistry &); 192 extern char &SILoadStoreOptimizerID; 193 194 void initializeSIWholeQuadModePass(PassRegistry &); 195 extern char &SIWholeQuadModeID; 196 197 void initializeSILowerControlFlowPass(PassRegistry &); 198 extern char &SILowerControlFlowID; 199 200 void initializeSIRemoveShortExecBranchesPass(PassRegistry &); 201 extern char &SIRemoveShortExecBranchesID; 202 203 void initializeSIPreEmitPeepholePass(PassRegistry &); 204 extern char &SIPreEmitPeepholeID; 205 206 void initializeSIInsertSkipsPass(PassRegistry &); 207 extern char &SIInsertSkipsPassID; 208 209 void initializeSIOptimizeExecMaskingPass(PassRegistry &); 210 extern char &SIOptimizeExecMaskingID; 211 212 void initializeSIPreAllocateWWMRegsPass(PassRegistry &); 213 extern char &SIPreAllocateWWMRegsID; 214 215 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &); 216 extern char &AMDGPUSimplifyLibCallsID; 217 218 void initializeAMDGPUUseNativeCallsPass(PassRegistry &); 219 extern char &AMDGPUUseNativeCallsID; 220 221 void initializeSIAddIMGInitPass(PassRegistry &); 222 extern char &SIAddIMGInitID; 223 224 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); 225 extern char &AMDGPUPerfHintAnalysisID; 226 227 // Passes common to R600 and SI 228 FunctionPass *createAMDGPUPromoteAlloca(); 229 void initializeAMDGPUPromoteAllocaPass(PassRegistry&); 230 extern char &AMDGPUPromoteAllocaID; 231 232 FunctionPass *createAMDGPUPromoteAllocaToVector(); 233 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&); 234 extern char &AMDGPUPromoteAllocaToVectorID; 235 236 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> { 237 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {} 238 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 239 240 private: 241 TargetMachine &TM; 242 }; 243 244 struct AMDGPUPromoteAllocaToVectorPass 245 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> { 246 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {} 247 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 248 249 private: 250 TargetMachine &TM; 251 }; 252 253 Pass *createAMDGPUStructurizeCFGPass(); 254 FunctionPass *createAMDGPUISelDag( 255 TargetMachine *TM = nullptr, 256 CodeGenOpt::Level OptLevel = CodeGenOpt::Default); 257 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); 258 259 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> { 260 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} 261 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 262 263 private: 264 bool GlobalOpt; 265 }; 266 267 ModulePass *createR600OpenCLImageTypeLoweringPass(); 268 FunctionPass *createAMDGPUAnnotateUniformValues(); 269 270 ModulePass *createAMDGPUPrintfRuntimeBinding(); 271 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); 272 extern char &AMDGPUPrintfRuntimeBindingID; 273 274 struct AMDGPUPrintfRuntimeBindingPass 275 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { 276 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 277 }; 278 279 ModulePass* createAMDGPUUnifyMetadataPass(); 280 void initializeAMDGPUUnifyMetadataPass(PassRegistry&); 281 extern char &AMDGPUUnifyMetadataID; 282 283 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { 284 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 285 }; 286 287 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); 288 extern char &SIOptimizeExecMaskingPreRAID; 289 290 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); 291 extern char &AMDGPUAnnotateUniformValuesPassID; 292 293 void initializeAMDGPUCodeGenPreparePass(PassRegistry&); 294 extern char &AMDGPUCodeGenPrepareID; 295 296 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); 297 extern char &AMDGPULateCodeGenPrepareID; 298 299 void initializeSIAnnotateControlFlowPass(PassRegistry&); 300 extern char &SIAnnotateControlFlowPassID; 301 302 void initializeSIMemoryLegalizerPass(PassRegistry&); 303 extern char &SIMemoryLegalizerID; 304 305 void initializeSIModeRegisterPass(PassRegistry&); 306 extern char &SIModeRegisterID; 307 308 void initializeSIInsertHardClausesPass(PassRegistry &); 309 extern char &SIInsertHardClausesID; 310 311 void initializeSIInsertWaitcntsPass(PassRegistry&); 312 extern char &SIInsertWaitcntsID; 313 314 void initializeSIFormMemoryClausesPass(PassRegistry&); 315 extern char &SIFormMemoryClausesID; 316 317 void initializeSIPostRABundlerPass(PassRegistry&); 318 extern char &SIPostRABundlerID; 319 320 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); 321 extern char &AMDGPUUnifyDivergentExitNodesID; 322 323 ImmutablePass *createAMDGPUAAWrapperPass(); 324 void initializeAMDGPUAAWrapperPassPass(PassRegistry&); 325 ImmutablePass *createAMDGPUExternalAAWrapperPass(); 326 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); 327 328 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); 329 330 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); 331 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); 332 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; 333 334 void initializeGCNRegBankReassignPass(PassRegistry &); 335 extern char &GCNRegBankReassignID; 336 337 void initializeGCNNSAReassignPass(PassRegistry &); 338 extern char &GCNNSAReassignID; 339 340 namespace AMDGPU { 341 enum TargetIndex { 342 TI_CONSTDATA_START, 343 TI_SCRATCH_RSRC_DWORD0, 344 TI_SCRATCH_RSRC_DWORD1, 345 TI_SCRATCH_RSRC_DWORD2, 346 TI_SCRATCH_RSRC_DWORD3 347 }; 348 } 349 350 /// OpenCL uses address spaces to differentiate between 351 /// various memory regions on the hardware. On the CPU 352 /// all of the address spaces point to the same memory, 353 /// however on the GPU, each address space points to 354 /// a separate piece of memory that is unique from other 355 /// memory locations. 356 namespace AMDGPUAS { 357 enum : unsigned { 358 // The maximum value for flat, generic, local, private, constant and region. 359 MAX_AMDGPU_ADDRESS = 7, 360 361 FLAT_ADDRESS = 0, ///< Address space for flat memory. 362 GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). 363 REGION_ADDRESS = 2, ///< Address space for region memory. (GDS) 364 365 CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2). 366 LOCAL_ADDRESS = 3, ///< Address space for local memory. 367 PRIVATE_ADDRESS = 5, ///< Address space for private memory. 368 369 CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory. 370 371 BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers. 372 373 /// Address space for direct addressible parameter memory (CONST0). 374 PARAM_D_ADDRESS = 6, 375 /// Address space for indirect addressible parameter memory (VTX1). 376 PARAM_I_ADDRESS = 7, 377 378 // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on 379 // this order to be able to dynamically index a constant buffer, for 380 // example: 381 // 382 // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx 383 384 CONSTANT_BUFFER_0 = 8, 385 CONSTANT_BUFFER_1 = 9, 386 CONSTANT_BUFFER_2 = 10, 387 CONSTANT_BUFFER_3 = 11, 388 CONSTANT_BUFFER_4 = 12, 389 CONSTANT_BUFFER_5 = 13, 390 CONSTANT_BUFFER_6 = 14, 391 CONSTANT_BUFFER_7 = 15, 392 CONSTANT_BUFFER_8 = 16, 393 CONSTANT_BUFFER_9 = 17, 394 CONSTANT_BUFFER_10 = 18, 395 CONSTANT_BUFFER_11 = 19, 396 CONSTANT_BUFFER_12 = 20, 397 CONSTANT_BUFFER_13 = 21, 398 CONSTANT_BUFFER_14 = 22, 399 CONSTANT_BUFFER_15 = 23, 400 401 // Some places use this if the address space can't be determined. 402 UNKNOWN_ADDRESS_SPACE = ~0u, 403 }; 404 } 405 406 namespace AMDGPU { 407 408 // FIXME: Missing constant_32bit 409 inline bool isFlatGlobalAddrSpace(unsigned AS) { 410 return AS == AMDGPUAS::GLOBAL_ADDRESS || 411 AS == AMDGPUAS::FLAT_ADDRESS || 412 AS == AMDGPUAS::CONSTANT_ADDRESS || 413 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 414 } 415 } 416 417 } // End namespace llvm 418 419 #endif 420