10b57cec5SDimitry Andric //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// The AMDGPU target machine contains all of the hardware specific
11349cc55cSDimitry Andric /// information needed to emit code for SI+ GPUs.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric
150b57cec5SDimitry Andric #include "AMDGPUTargetMachine.h"
160b57cec5SDimitry Andric #include "AMDGPU.h"
170b57cec5SDimitry Andric #include "AMDGPUAliasAnalysis.h"
18*0fca6ea1SDimitry Andric #include "AMDGPUCodeGenPassBuilder.h"
19bdd1243dSDimitry Andric #include "AMDGPUCtorDtorLowering.h"
205ffd83dbSDimitry Andric #include "AMDGPUExportClustering.h"
2181ad6265SDimitry Andric #include "AMDGPUIGroupLP.h"
22*0fca6ea1SDimitry Andric #include "AMDGPUISelDAGToDAG.h"
230b57cec5SDimitry Andric #include "AMDGPUMacroFusion.h"
2406c3fb27SDimitry Andric #include "AMDGPURegBankSelect.h"
25*0fca6ea1SDimitry Andric #include "AMDGPUSplitModule.h"
260b57cec5SDimitry Andric #include "AMDGPUTargetObjectFile.h"
270b57cec5SDimitry Andric #include "AMDGPUTargetTransformInfo.h"
2806c3fb27SDimitry Andric #include "AMDGPUUnifyDivergentExitNodes.h"
290b57cec5SDimitry Andric #include "GCNIterativeScheduler.h"
300b57cec5SDimitry Andric #include "GCNSchedStrategy.h"
31753f127fSDimitry Andric #include "GCNVOPDUtils.h"
32349cc55cSDimitry Andric #include "R600.h"
33bdd1243dSDimitry Andric #include "R600MachineFunctionInfo.h"
34349cc55cSDimitry Andric #include "R600TargetMachine.h"
350b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
360b57cec5SDimitry Andric #include "SIMachineScheduler.h"
370b57cec5SDimitry Andric #include "TargetInfo/AMDGPUTargetInfo.h"
38753f127fSDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
39e8d8bef9SDimitry Andric #include "llvm/Analysis/CGSCCPassManager.h"
40*0fca6ea1SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h"
4181ad6265SDimitry Andric #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
420b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
430b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
440b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/Legalizer.h"
455ffd83dbSDimitry Andric #include "llvm/CodeGen/GlobalISel/Localizer.h"
460b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
470b57cec5SDimitry Andric #include "llvm/CodeGen/MIRParser/MIParser.h"
48fe6060f1SDimitry Andric #include "llvm/CodeGen/Passes.h"
49fe6060f1SDimitry Andric #include "llvm/CodeGen/RegAllocRegistry.h"
500b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
51349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
52e8d8bef9SDimitry Andric #include "llvm/IR/PassManager.h"
53349cc55cSDimitry Andric #include "llvm/IR/PatternMatch.h"
54480093f4SDimitry Andric #include "llvm/InitializePasses.h"
55349cc55cSDimitry Andric #include "llvm/MC/TargetRegistry.h"
56e8d8bef9SDimitry Andric #include "llvm/Passes/PassBuilder.h"
575f757f3fSDimitry Andric #include "llvm/Transforms/HipStdPar/HipStdPar.h"
580b57cec5SDimitry Andric #include "llvm/Transforms/IPO.h"
590b57cec5SDimitry Andric #include "llvm/Transforms/IPO/AlwaysInliner.h"
60*0fca6ea1SDimitry Andric #include "llvm/Transforms/IPO/ExpandVariadics.h"
61e8d8bef9SDimitry Andric #include "llvm/Transforms/IPO/GlobalDCE.h"
62e8d8bef9SDimitry Andric #include "llvm/Transforms/IPO/Internalize.h"
630b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
640b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/GVN.h"
65e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/InferAddressSpaces.h"
660b57cec5SDimitry Andric #include "llvm/Transforms/Utils.h"
67e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
6806c3fb27SDimitry Andric #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
69bdd1243dSDimitry Andric #include <optional>
700b57cec5SDimitry Andric
710b57cec5SDimitry Andric using namespace llvm;
7281ad6265SDimitry Andric using namespace llvm::PatternMatch;
730b57cec5SDimitry Andric
74fe6060f1SDimitry Andric namespace {
75fe6060f1SDimitry Andric class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
76fe6060f1SDimitry Andric public:
SGPRRegisterRegAlloc(const char * N,const char * D,FunctionPassCtor C)77fe6060f1SDimitry Andric SGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
78fe6060f1SDimitry Andric : RegisterRegAllocBase(N, D, C) {}
79fe6060f1SDimitry Andric };
80fe6060f1SDimitry Andric
81fe6060f1SDimitry Andric class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
82fe6060f1SDimitry Andric public:
VGPRRegisterRegAlloc(const char * N,const char * D,FunctionPassCtor C)83fe6060f1SDimitry Andric VGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
84fe6060f1SDimitry Andric : RegisterRegAllocBase(N, D, C) {}
85fe6060f1SDimitry Andric };
86fe6060f1SDimitry Andric
onlyAllocateSGPRs(const TargetRegisterInfo & TRI,const MachineRegisterInfo & MRI,const Register Reg)87fe6060f1SDimitry Andric static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
88*0fca6ea1SDimitry Andric const MachineRegisterInfo &MRI,
89*0fca6ea1SDimitry Andric const Register Reg) {
90*0fca6ea1SDimitry Andric const TargetRegisterClass *RC = MRI.getRegClass(Reg);
91*0fca6ea1SDimitry Andric return static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC);
92fe6060f1SDimitry Andric }
93fe6060f1SDimitry Andric
onlyAllocateVGPRs(const TargetRegisterInfo & TRI,const MachineRegisterInfo & MRI,const Register Reg)94fe6060f1SDimitry Andric static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
95*0fca6ea1SDimitry Andric const MachineRegisterInfo &MRI,
96*0fca6ea1SDimitry Andric const Register Reg) {
97*0fca6ea1SDimitry Andric const TargetRegisterClass *RC = MRI.getRegClass(Reg);
98*0fca6ea1SDimitry Andric return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC);
99fe6060f1SDimitry Andric }
100fe6060f1SDimitry Andric
101fe6060f1SDimitry Andric /// -{sgpr|vgpr}-regalloc=... command line option.
useDefaultRegisterAllocator()102fe6060f1SDimitry Andric static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
103fe6060f1SDimitry Andric
104fe6060f1SDimitry Andric /// A dummy default pass factory indicates whether the register allocator is
105fe6060f1SDimitry Andric /// overridden on the command line.
106fe6060f1SDimitry Andric static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
107fe6060f1SDimitry Andric static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
108fe6060f1SDimitry Andric
109fe6060f1SDimitry Andric static SGPRRegisterRegAlloc
110fe6060f1SDimitry Andric defaultSGPRRegAlloc("default",
111fe6060f1SDimitry Andric "pick SGPR register allocator based on -O option",
112fe6060f1SDimitry Andric useDefaultRegisterAllocator);
113fe6060f1SDimitry Andric
114fe6060f1SDimitry Andric static cl::opt<SGPRRegisterRegAlloc::FunctionPassCtor, false,
115fe6060f1SDimitry Andric RegisterPassParser<SGPRRegisterRegAlloc>>
116fe6060f1SDimitry Andric SGPRRegAlloc("sgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
117fe6060f1SDimitry Andric cl::desc("Register allocator to use for SGPRs"));
118fe6060f1SDimitry Andric
119fe6060f1SDimitry Andric static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
120fe6060f1SDimitry Andric RegisterPassParser<VGPRRegisterRegAlloc>>
121fe6060f1SDimitry Andric VGPRRegAlloc("vgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
122fe6060f1SDimitry Andric cl::desc("Register allocator to use for VGPRs"));
123fe6060f1SDimitry Andric
124fe6060f1SDimitry Andric
initializeDefaultSGPRRegisterAllocatorOnce()125fe6060f1SDimitry Andric static void initializeDefaultSGPRRegisterAllocatorOnce() {
126fe6060f1SDimitry Andric RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
127fe6060f1SDimitry Andric
128fe6060f1SDimitry Andric if (!Ctor) {
129fe6060f1SDimitry Andric Ctor = SGPRRegAlloc;
130fe6060f1SDimitry Andric SGPRRegisterRegAlloc::setDefault(SGPRRegAlloc);
131fe6060f1SDimitry Andric }
132fe6060f1SDimitry Andric }
133fe6060f1SDimitry Andric
initializeDefaultVGPRRegisterAllocatorOnce()134fe6060f1SDimitry Andric static void initializeDefaultVGPRRegisterAllocatorOnce() {
135fe6060f1SDimitry Andric RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
136fe6060f1SDimitry Andric
137fe6060f1SDimitry Andric if (!Ctor) {
138fe6060f1SDimitry Andric Ctor = VGPRRegAlloc;
139fe6060f1SDimitry Andric VGPRRegisterRegAlloc::setDefault(VGPRRegAlloc);
140fe6060f1SDimitry Andric }
141fe6060f1SDimitry Andric }
142fe6060f1SDimitry Andric
createBasicSGPRRegisterAllocator()143fe6060f1SDimitry Andric static FunctionPass *createBasicSGPRRegisterAllocator() {
144fe6060f1SDimitry Andric return createBasicRegisterAllocator(onlyAllocateSGPRs);
145fe6060f1SDimitry Andric }
146fe6060f1SDimitry Andric
createGreedySGPRRegisterAllocator()147fe6060f1SDimitry Andric static FunctionPass *createGreedySGPRRegisterAllocator() {
148fe6060f1SDimitry Andric return createGreedyRegisterAllocator(onlyAllocateSGPRs);
149fe6060f1SDimitry Andric }
150fe6060f1SDimitry Andric
createFastSGPRRegisterAllocator()151fe6060f1SDimitry Andric static FunctionPass *createFastSGPRRegisterAllocator() {
152fe6060f1SDimitry Andric return createFastRegisterAllocator(onlyAllocateSGPRs, false);
153fe6060f1SDimitry Andric }
154fe6060f1SDimitry Andric
createBasicVGPRRegisterAllocator()155fe6060f1SDimitry Andric static FunctionPass *createBasicVGPRRegisterAllocator() {
156fe6060f1SDimitry Andric return createBasicRegisterAllocator(onlyAllocateVGPRs);
157fe6060f1SDimitry Andric }
158fe6060f1SDimitry Andric
createGreedyVGPRRegisterAllocator()159fe6060f1SDimitry Andric static FunctionPass *createGreedyVGPRRegisterAllocator() {
160fe6060f1SDimitry Andric return createGreedyRegisterAllocator(onlyAllocateVGPRs);
161fe6060f1SDimitry Andric }
162fe6060f1SDimitry Andric
createFastVGPRRegisterAllocator()163fe6060f1SDimitry Andric static FunctionPass *createFastVGPRRegisterAllocator() {
164fe6060f1SDimitry Andric return createFastRegisterAllocator(onlyAllocateVGPRs, true);
165fe6060f1SDimitry Andric }
166fe6060f1SDimitry Andric
167fe6060f1SDimitry Andric static SGPRRegisterRegAlloc basicRegAllocSGPR(
168fe6060f1SDimitry Andric "basic", "basic register allocator", createBasicSGPRRegisterAllocator);
169fe6060f1SDimitry Andric static SGPRRegisterRegAlloc greedyRegAllocSGPR(
170fe6060f1SDimitry Andric "greedy", "greedy register allocator", createGreedySGPRRegisterAllocator);
171fe6060f1SDimitry Andric
172fe6060f1SDimitry Andric static SGPRRegisterRegAlloc fastRegAllocSGPR(
173fe6060f1SDimitry Andric "fast", "fast register allocator", createFastSGPRRegisterAllocator);
174fe6060f1SDimitry Andric
175fe6060f1SDimitry Andric
176fe6060f1SDimitry Andric static VGPRRegisterRegAlloc basicRegAllocVGPR(
177fe6060f1SDimitry Andric "basic", "basic register allocator", createBasicVGPRRegisterAllocator);
178fe6060f1SDimitry Andric static VGPRRegisterRegAlloc greedyRegAllocVGPR(
179fe6060f1SDimitry Andric "greedy", "greedy register allocator", createGreedyVGPRRegisterAllocator);
180fe6060f1SDimitry Andric
181fe6060f1SDimitry Andric static VGPRRegisterRegAlloc fastRegAllocVGPR(
182fe6060f1SDimitry Andric "fast", "fast register allocator", createFastVGPRRegisterAllocator);
183*0fca6ea1SDimitry Andric } // anonymous namespace
184fe6060f1SDimitry Andric
1850b57cec5SDimitry Andric static cl::opt<bool>
1860b57cec5SDimitry Andric EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
1870b57cec5SDimitry Andric cl::desc("Run early if-conversion"),
1880b57cec5SDimitry Andric cl::init(false));
1890b57cec5SDimitry Andric
1900b57cec5SDimitry Andric static cl::opt<bool>
1910b57cec5SDimitry Andric OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
1920b57cec5SDimitry Andric cl::desc("Run pre-RA exec mask optimizations"),
1930b57cec5SDimitry Andric cl::init(true));
1940b57cec5SDimitry Andric
19506c3fb27SDimitry Andric static cl::opt<bool>
19606c3fb27SDimitry Andric LowerCtorDtor("amdgpu-lower-global-ctor-dtor",
19706c3fb27SDimitry Andric cl::desc("Lower GPU ctor / dtors to globals on the device."),
19806c3fb27SDimitry Andric cl::init(true), cl::Hidden);
19906c3fb27SDimitry Andric
2000b57cec5SDimitry Andric // Option to disable vectorizer for tests.
2010b57cec5SDimitry Andric static cl::opt<bool> EnableLoadStoreVectorizer(
2020b57cec5SDimitry Andric "amdgpu-load-store-vectorizer",
2030b57cec5SDimitry Andric cl::desc("Enable load store vectorizer"),
2040b57cec5SDimitry Andric cl::init(true),
2050b57cec5SDimitry Andric cl::Hidden);
2060b57cec5SDimitry Andric
2070b57cec5SDimitry Andric // Option to control global loads scalarization
2080b57cec5SDimitry Andric static cl::opt<bool> ScalarizeGlobal(
2090b57cec5SDimitry Andric "amdgpu-scalarize-global-loads",
2100b57cec5SDimitry Andric cl::desc("Enable global load scalarization"),
2110b57cec5SDimitry Andric cl::init(true),
2120b57cec5SDimitry Andric cl::Hidden);
2130b57cec5SDimitry Andric
2140b57cec5SDimitry Andric // Option to run internalize pass.
2150b57cec5SDimitry Andric static cl::opt<bool> InternalizeSymbols(
2160b57cec5SDimitry Andric "amdgpu-internalize-symbols",
2170b57cec5SDimitry Andric cl::desc("Enable elimination of non-kernel functions and unused globals"),
2180b57cec5SDimitry Andric cl::init(false),
2190b57cec5SDimitry Andric cl::Hidden);
2200b57cec5SDimitry Andric
2210b57cec5SDimitry Andric // Option to inline all early.
2220b57cec5SDimitry Andric static cl::opt<bool> EarlyInlineAll(
2230b57cec5SDimitry Andric "amdgpu-early-inline-all",
2240b57cec5SDimitry Andric cl::desc("Inline all functions early"),
2250b57cec5SDimitry Andric cl::init(false),
2260b57cec5SDimitry Andric cl::Hidden);
2270b57cec5SDimitry Andric
22806c3fb27SDimitry Andric static cl::opt<bool> RemoveIncompatibleFunctions(
22906c3fb27SDimitry Andric "amdgpu-enable-remove-incompatible-functions", cl::Hidden,
23006c3fb27SDimitry Andric cl::desc("Enable removal of functions when they"
23106c3fb27SDimitry Andric "use features not supported by the target GPU"),
23206c3fb27SDimitry Andric cl::init(true));
23306c3fb27SDimitry Andric
2340b57cec5SDimitry Andric static cl::opt<bool> EnableSDWAPeephole(
2350b57cec5SDimitry Andric "amdgpu-sdwa-peephole",
2360b57cec5SDimitry Andric cl::desc("Enable SDWA peepholer"),
2370b57cec5SDimitry Andric cl::init(true));
2380b57cec5SDimitry Andric
2390b57cec5SDimitry Andric static cl::opt<bool> EnableDPPCombine(
2400b57cec5SDimitry Andric "amdgpu-dpp-combine",
2410b57cec5SDimitry Andric cl::desc("Enable DPP combiner"),
2420b57cec5SDimitry Andric cl::init(true));
2430b57cec5SDimitry Andric
2440b57cec5SDimitry Andric // Enable address space based alias analysis
2450b57cec5SDimitry Andric static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
2460b57cec5SDimitry Andric cl::desc("Enable AMDGPU Alias Analysis"),
2470b57cec5SDimitry Andric cl::init(true));
2480b57cec5SDimitry Andric
2490b57cec5SDimitry Andric // Option to run late CFG structurizer
2500b57cec5SDimitry Andric static cl::opt<bool, true> LateCFGStructurize(
2510b57cec5SDimitry Andric "amdgpu-late-structurize",
2520b57cec5SDimitry Andric cl::desc("Enable late CFG structurization"),
2530b57cec5SDimitry Andric cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
2540b57cec5SDimitry Andric cl::Hidden);
2550b57cec5SDimitry Andric
256*0fca6ea1SDimitry Andric // Disable structurizer-based control-flow lowering in order to test convergence
257*0fca6ea1SDimitry Andric // control tokens. This should eventually be replaced by the wave-transform.
258*0fca6ea1SDimitry Andric static cl::opt<bool, true> DisableStructurizer(
259*0fca6ea1SDimitry Andric "amdgpu-disable-structurizer",
260*0fca6ea1SDimitry Andric cl::desc("Disable structurizer for experiments; produces unusable code"),
261*0fca6ea1SDimitry Andric cl::location(AMDGPUTargetMachine::DisableStructurizer), cl::ReallyHidden);
262*0fca6ea1SDimitry Andric
2630b57cec5SDimitry Andric // Enable lib calls simplifications
2640b57cec5SDimitry Andric static cl::opt<bool> EnableLibCallSimplify(
2650b57cec5SDimitry Andric "amdgpu-simplify-libcall",
2660b57cec5SDimitry Andric cl::desc("Enable amdgpu library simplifications"),
2670b57cec5SDimitry Andric cl::init(true),
2680b57cec5SDimitry Andric cl::Hidden);
2690b57cec5SDimitry Andric
2700b57cec5SDimitry Andric static cl::opt<bool> EnableLowerKernelArguments(
2710b57cec5SDimitry Andric "amdgpu-ir-lower-kernel-arguments",
2720b57cec5SDimitry Andric cl::desc("Lower kernel argument loads in IR pass"),
2730b57cec5SDimitry Andric cl::init(true),
2740b57cec5SDimitry Andric cl::Hidden);
2750b57cec5SDimitry Andric
2760b57cec5SDimitry Andric static cl::opt<bool> EnableRegReassign(
2770b57cec5SDimitry Andric "amdgpu-reassign-regs",
2780b57cec5SDimitry Andric cl::desc("Enable register reassign optimizations on gfx10+"),
2790b57cec5SDimitry Andric cl::init(true),
2800b57cec5SDimitry Andric cl::Hidden);
2810b57cec5SDimitry Andric
282fe6060f1SDimitry Andric static cl::opt<bool> OptVGPRLiveRange(
283fe6060f1SDimitry Andric "amdgpu-opt-vgpr-liverange",
284fe6060f1SDimitry Andric cl::desc("Enable VGPR liverange optimizations for if-else structure"),
285fe6060f1SDimitry Andric cl::init(true), cl::Hidden);
286fe6060f1SDimitry Andric
28706c3fb27SDimitry Andric static cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy(
28806c3fb27SDimitry Andric "amdgpu-atomic-optimizer-strategy",
28906c3fb27SDimitry Andric cl::desc("Select DPP or Iterative strategy for scan"),
29006c3fb27SDimitry Andric cl::init(ScanOptions::Iterative),
29106c3fb27SDimitry Andric cl::values(
29206c3fb27SDimitry Andric clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"),
29306c3fb27SDimitry Andric clEnumValN(ScanOptions::Iterative, "Iterative",
29406c3fb27SDimitry Andric "Use Iterative approach for scan"),
29506c3fb27SDimitry Andric clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")));
2960b57cec5SDimitry Andric
2970b57cec5SDimitry Andric // Enable Mode register optimization
2980b57cec5SDimitry Andric static cl::opt<bool> EnableSIModeRegisterPass(
2990b57cec5SDimitry Andric "amdgpu-mode-register",
3000b57cec5SDimitry Andric cl::desc("Enable mode register pass"),
3010b57cec5SDimitry Andric cl::init(true),
3020b57cec5SDimitry Andric cl::Hidden);
3030b57cec5SDimitry Andric
3045f757f3fSDimitry Andric // Enable GFX11.5+ s_singleuse_vdst insertion
3055f757f3fSDimitry Andric static cl::opt<bool>
3065f757f3fSDimitry Andric EnableInsertSingleUseVDST("amdgpu-enable-single-use-vdst",
3075f757f3fSDimitry Andric cl::desc("Enable s_singleuse_vdst insertion"),
3085f757f3fSDimitry Andric cl::init(false), cl::Hidden);
3095f757f3fSDimitry Andric
31081ad6265SDimitry Andric // Enable GFX11+ s_delay_alu insertion
31181ad6265SDimitry Andric static cl::opt<bool>
31281ad6265SDimitry Andric EnableInsertDelayAlu("amdgpu-enable-delay-alu",
31381ad6265SDimitry Andric cl::desc("Enable s_delay_alu insertion"),
31481ad6265SDimitry Andric cl::init(true), cl::Hidden);
31581ad6265SDimitry Andric
316753f127fSDimitry Andric // Enable GFX11+ VOPD
317753f127fSDimitry Andric static cl::opt<bool>
318753f127fSDimitry Andric EnableVOPD("amdgpu-enable-vopd",
319753f127fSDimitry Andric cl::desc("Enable VOPD, dual issue of VALU in wave32"),
320753f127fSDimitry Andric cl::init(true), cl::Hidden);
321753f127fSDimitry Andric
3220b57cec5SDimitry Andric // Option is used in lit tests to prevent deadcoding of patterns inspected.
3230b57cec5SDimitry Andric static cl::opt<bool>
3240b57cec5SDimitry Andric EnableDCEInRA("amdgpu-dce-in-ra",
3250b57cec5SDimitry Andric cl::init(true), cl::Hidden,
3260b57cec5SDimitry Andric cl::desc("Enable machine DCE inside regalloc"));
3270b57cec5SDimitry Andric
32881ad6265SDimitry Andric static cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority",
32981ad6265SDimitry Andric cl::desc("Adjust wave priority"),
33081ad6265SDimitry Andric cl::init(false), cl::Hidden);
33181ad6265SDimitry Andric
3320b57cec5SDimitry Andric static cl::opt<bool> EnableScalarIRPasses(
3330b57cec5SDimitry Andric "amdgpu-scalar-ir-passes",
3340b57cec5SDimitry Andric cl::desc("Enable scalar IR passes"),
3350b57cec5SDimitry Andric cl::init(true),
3360b57cec5SDimitry Andric cl::Hidden);
3370b57cec5SDimitry Andric
3385ffd83dbSDimitry Andric static cl::opt<bool> EnableStructurizerWorkarounds(
3395ffd83dbSDimitry Andric "amdgpu-enable-structurizer-workarounds",
3405ffd83dbSDimitry Andric cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
3415ffd83dbSDimitry Andric cl::Hidden);
3425ffd83dbSDimitry Andric
343fe6060f1SDimitry Andric static cl::opt<bool, true> EnableLowerModuleLDS(
344fe6060f1SDimitry Andric "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
345fe6060f1SDimitry Andric cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true),
346fe6060f1SDimitry Andric cl::Hidden);
347fe6060f1SDimitry Andric
348fe6060f1SDimitry Andric static cl::opt<bool> EnablePreRAOptimizations(
349fe6060f1SDimitry Andric "amdgpu-enable-pre-ra-optimizations",
350fe6060f1SDimitry Andric cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),
351fe6060f1SDimitry Andric cl::Hidden);
352fe6060f1SDimitry Andric
353349cc55cSDimitry Andric static cl::opt<bool> EnablePromoteKernelArguments(
354349cc55cSDimitry Andric "amdgpu-enable-promote-kernel-arguments",
355349cc55cSDimitry Andric cl::desc("Enable promotion of flat kernel pointer arguments to global"),
356349cc55cSDimitry Andric cl::Hidden, cl::init(true));
357349cc55cSDimitry Andric
3585f757f3fSDimitry Andric static cl::opt<bool> EnableImageIntrinsicOptimizer(
3595f757f3fSDimitry Andric "amdgpu-enable-image-intrinsic-optimizer",
3605f757f3fSDimitry Andric cl::desc("Enable image intrinsic optimizer pass"), cl::init(true),
3615f757f3fSDimitry Andric cl::Hidden);
3625f757f3fSDimitry Andric
363cb14a3feSDimitry Andric static cl::opt<bool>
364cb14a3feSDimitry Andric EnableLoopPrefetch("amdgpu-loop-prefetch",
365cb14a3feSDimitry Andric cl::desc("Enable loop data prefetch on AMDGPU"),
366cb14a3feSDimitry Andric cl::Hidden, cl::init(false));
367cb14a3feSDimitry Andric
368bdd1243dSDimitry Andric static cl::opt<bool> EnableMaxIlpSchedStrategy(
369bdd1243dSDimitry Andric "amdgpu-enable-max-ilp-scheduling-strategy",
370bdd1243dSDimitry Andric cl::desc("Enable scheduling strategy to maximize ILP for a single wave."),
371bdd1243dSDimitry Andric cl::Hidden, cl::init(false));
372bdd1243dSDimitry Andric
37306c3fb27SDimitry Andric static cl::opt<bool> EnableRewritePartialRegUses(
37406c3fb27SDimitry Andric "amdgpu-enable-rewrite-partial-reg-uses",
3755f757f3fSDimitry Andric cl::desc("Enable rewrite partial reg uses pass"), cl::init(true),
3765f757f3fSDimitry Andric cl::Hidden);
3775f757f3fSDimitry Andric
3785f757f3fSDimitry Andric static cl::opt<bool> EnableHipStdPar(
3795f757f3fSDimitry Andric "amdgpu-enable-hipstdpar",
3805f757f3fSDimitry Andric cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),
38106c3fb27SDimitry Andric cl::Hidden);
38206c3fb27SDimitry Andric
LLVMInitializeAMDGPUTarget()383480093f4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
3840b57cec5SDimitry Andric // Register the target
38506c3fb27SDimitry Andric RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
3860b57cec5SDimitry Andric RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget());
3870b57cec5SDimitry Andric
3880b57cec5SDimitry Andric PassRegistry *PR = PassRegistry::getPassRegistry();
3890b57cec5SDimitry Andric initializeR600ClauseMergePassPass(*PR);
3900b57cec5SDimitry Andric initializeR600ControlFlowFinalizerPass(*PR);
3910b57cec5SDimitry Andric initializeR600PacketizerPass(*PR);
3920b57cec5SDimitry Andric initializeR600ExpandSpecialInstrsPassPass(*PR);
3930b57cec5SDimitry Andric initializeR600VectorRegMergerPass(*PR);
3940b57cec5SDimitry Andric initializeGlobalISel(*PR);
395*0fca6ea1SDimitry Andric initializeAMDGPUDAGToDAGISelLegacyPass(*PR);
3960b57cec5SDimitry Andric initializeGCNDPPCombinePass(*PR);
3970b57cec5SDimitry Andric initializeSILowerI1CopiesPass(*PR);
3985f757f3fSDimitry Andric initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
3995f757f3fSDimitry Andric initializeSILowerWWMCopiesPass(*PR);
4007a6dacacSDimitry Andric initializeAMDGPUMarkLastScratchLoadPass(*PR);
4010b57cec5SDimitry Andric initializeSILowerSGPRSpillsPass(*PR);
4020b57cec5SDimitry Andric initializeSIFixSGPRCopiesPass(*PR);
4030b57cec5SDimitry Andric initializeSIFixVGPRCopiesPass(*PR);
4040b57cec5SDimitry Andric initializeSIFoldOperandsPass(*PR);
4050b57cec5SDimitry Andric initializeSIPeepholeSDWAPass(*PR);
4060b57cec5SDimitry Andric initializeSIShrinkInstructionsPass(*PR);
4070b57cec5SDimitry Andric initializeSIOptimizeExecMaskingPreRAPass(*PR);
408fe6060f1SDimitry Andric initializeSIOptimizeVGPRLiveRangePass(*PR);
4090b57cec5SDimitry Andric initializeSILoadStoreOptimizerPass(*PR);
410bdd1243dSDimitry Andric initializeAMDGPUCtorDtorLoweringLegacyPass(*PR);
4110b57cec5SDimitry Andric initializeAMDGPUAlwaysInlinePass(*PR);
4125f757f3fSDimitry Andric initializeAMDGPUAttributorLegacyPass(*PR);
4130b57cec5SDimitry Andric initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
4140b57cec5SDimitry Andric initializeAMDGPUAnnotateUniformValuesPass(*PR);
4150b57cec5SDimitry Andric initializeAMDGPUArgumentUsageInfoPass(*PR);
4160b57cec5SDimitry Andric initializeAMDGPUAtomicOptimizerPass(*PR);
4170b57cec5SDimitry Andric initializeAMDGPULowerKernelArgumentsPass(*PR);
418349cc55cSDimitry Andric initializeAMDGPUPromoteKernelArgumentsPass(*PR);
4190b57cec5SDimitry Andric initializeAMDGPULowerKernelAttributesPass(*PR);
4200b57cec5SDimitry Andric initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
4215ffd83dbSDimitry Andric initializeAMDGPUPostLegalizerCombinerPass(*PR);
4225ffd83dbSDimitry Andric initializeAMDGPUPreLegalizerCombinerPass(*PR);
423fe6060f1SDimitry Andric initializeAMDGPURegBankCombinerPass(*PR);
42406c3fb27SDimitry Andric initializeAMDGPURegBankSelectPass(*PR);
4250b57cec5SDimitry Andric initializeAMDGPUPromoteAllocaPass(*PR);
4265ffd83dbSDimitry Andric initializeAMDGPUPromoteAllocaToVectorPass(*PR);
4270b57cec5SDimitry Andric initializeAMDGPUCodeGenPreparePass(*PR);
428e8d8bef9SDimitry Andric initializeAMDGPULateCodeGenPreparePass(*PR);
42906c3fb27SDimitry Andric initializeAMDGPURemoveIncompatibleFunctionsPass(*PR);
4305f757f3fSDimitry Andric initializeAMDGPULowerModuleLDSLegacyPass(*PR);
431*0fca6ea1SDimitry Andric initializeAMDGPULowerBufferFatPointersPass(*PR);
4320b57cec5SDimitry Andric initializeAMDGPURewriteOutArgumentsPass(*PR);
4335f757f3fSDimitry Andric initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
4340b57cec5SDimitry Andric initializeAMDGPUUnifyMetadataPass(*PR);
4350b57cec5SDimitry Andric initializeSIAnnotateControlFlowPass(*PR);
4365f757f3fSDimitry Andric initializeAMDGPUInsertSingleUseVDSTPass(*PR);
43781ad6265SDimitry Andric initializeAMDGPUInsertDelayAluPass(*PR);
4385ffd83dbSDimitry Andric initializeSIInsertHardClausesPass(*PR);
4390b57cec5SDimitry Andric initializeSIInsertWaitcntsPass(*PR);
4400b57cec5SDimitry Andric initializeSIModeRegisterPass(*PR);
4410b57cec5SDimitry Andric initializeSIWholeQuadModePass(*PR);
4420b57cec5SDimitry Andric initializeSILowerControlFlowPass(*PR);
4435ffd83dbSDimitry Andric initializeSIPreEmitPeepholePass(*PR);
444fe6060f1SDimitry Andric initializeSILateBranchLoweringPass(*PR);
4450b57cec5SDimitry Andric initializeSIMemoryLegalizerPass(*PR);
4460b57cec5SDimitry Andric initializeSIOptimizeExecMaskingPass(*PR);
4470b57cec5SDimitry Andric initializeSIPreAllocateWWMRegsPass(*PR);
4480b57cec5SDimitry Andric initializeSIFormMemoryClausesPass(*PR);
4495ffd83dbSDimitry Andric initializeSIPostRABundlerPass(*PR);
450753f127fSDimitry Andric initializeGCNCreateVOPDPass(*PR);
4510b57cec5SDimitry Andric initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
4520b57cec5SDimitry Andric initializeAMDGPUAAWrapperPassPass(*PR);
4530b57cec5SDimitry Andric initializeAMDGPUExternalAAWrapperPass(*PR);
4545f757f3fSDimitry Andric initializeAMDGPUImageIntrinsicOptimizerPass(*PR);
4558bcb0991SDimitry Andric initializeAMDGPUPrintfRuntimeBindingPass(*PR);
456fe6060f1SDimitry Andric initializeAMDGPUResourceUsageAnalysisPass(*PR);
4570b57cec5SDimitry Andric initializeGCNNSAReassignPass(*PR);
458fe6060f1SDimitry Andric initializeGCNPreRAOptimizationsPass(*PR);
45906c3fb27SDimitry Andric initializeGCNPreRALongBranchRegPass(*PR);
46006c3fb27SDimitry Andric initializeGCNRewritePartialRegUsesPass(*PR);
4615f757f3fSDimitry Andric initializeGCNRegPressurePrinterPass(*PR);
4620b57cec5SDimitry Andric }
4630b57cec5SDimitry Andric
createTLOF(const Triple & TT)4640b57cec5SDimitry Andric static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
4658bcb0991SDimitry Andric return std::make_unique<AMDGPUTargetObjectFile>();
4660b57cec5SDimitry Andric }
4670b57cec5SDimitry Andric
createSIMachineScheduler(MachineSchedContext * C)4680b57cec5SDimitry Andric static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
4690b57cec5SDimitry Andric return new SIScheduleDAGMI(C);
4700b57cec5SDimitry Andric }
4710b57cec5SDimitry Andric
4720b57cec5SDimitry Andric static ScheduleDAGInstrs *
createGCNMaxOccupancyMachineScheduler(MachineSchedContext * C)4730b57cec5SDimitry Andric createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
47481ad6265SDimitry Andric const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
4750b57cec5SDimitry Andric ScheduleDAGMILive *DAG =
4768bcb0991SDimitry Andric new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
4770b57cec5SDimitry Andric DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
47881ad6265SDimitry Andric if (ST.shouldClusterStores())
47981ad6265SDimitry Andric DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
480*0fca6ea1SDimitry Andric DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
4810b57cec5SDimitry Andric DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
4825ffd83dbSDimitry Andric DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
4830b57cec5SDimitry Andric return DAG;
4840b57cec5SDimitry Andric }
4850b57cec5SDimitry Andric
4860b57cec5SDimitry Andric static ScheduleDAGInstrs *
createGCNMaxILPMachineScheduler(MachineSchedContext * C)487bdd1243dSDimitry Andric createGCNMaxILPMachineScheduler(MachineSchedContext *C) {
488bdd1243dSDimitry Andric ScheduleDAGMILive *DAG =
489bdd1243dSDimitry Andric new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxILPSchedStrategy>(C));
490*0fca6ea1SDimitry Andric DAG->addMutation(createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::Initial));
491bdd1243dSDimitry Andric return DAG;
492bdd1243dSDimitry Andric }
493bdd1243dSDimitry Andric
494bdd1243dSDimitry Andric static ScheduleDAGInstrs *
createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext * C)4950b57cec5SDimitry Andric createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
49681ad6265SDimitry Andric const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
4970b57cec5SDimitry Andric auto DAG = new GCNIterativeScheduler(C,
4980b57cec5SDimitry Andric GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY);
4990b57cec5SDimitry Andric DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
50081ad6265SDimitry Andric if (ST.shouldClusterStores())
50181ad6265SDimitry Andric DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
5020b57cec5SDimitry Andric return DAG;
5030b57cec5SDimitry Andric }
5040b57cec5SDimitry Andric
createMinRegScheduler(MachineSchedContext * C)5050b57cec5SDimitry Andric static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) {
5060b57cec5SDimitry Andric return new GCNIterativeScheduler(C,
5070b57cec5SDimitry Andric GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
5080b57cec5SDimitry Andric }
5090b57cec5SDimitry Andric
5100b57cec5SDimitry Andric static ScheduleDAGInstrs *
createIterativeILPMachineScheduler(MachineSchedContext * C)5110b57cec5SDimitry Andric createIterativeILPMachineScheduler(MachineSchedContext *C) {
51281ad6265SDimitry Andric const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
5130b57cec5SDimitry Andric auto DAG = new GCNIterativeScheduler(C,
5140b57cec5SDimitry Andric GCNIterativeScheduler::SCHEDULE_ILP);
5150b57cec5SDimitry Andric DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
51681ad6265SDimitry Andric if (ST.shouldClusterStores())
51781ad6265SDimitry Andric DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
5180b57cec5SDimitry Andric DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
5190b57cec5SDimitry Andric return DAG;
5200b57cec5SDimitry Andric }
5210b57cec5SDimitry Andric
5220b57cec5SDimitry Andric static MachineSchedRegistry
5230b57cec5SDimitry Andric SISchedRegistry("si", "Run SI's custom scheduler",
5240b57cec5SDimitry Andric createSIMachineScheduler);
5250b57cec5SDimitry Andric
5260b57cec5SDimitry Andric static MachineSchedRegistry
5270b57cec5SDimitry Andric GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
5280b57cec5SDimitry Andric "Run GCN scheduler to maximize occupancy",
5290b57cec5SDimitry Andric createGCNMaxOccupancyMachineScheduler);
5300b57cec5SDimitry Andric
5310b57cec5SDimitry Andric static MachineSchedRegistry
532bdd1243dSDimitry Andric GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp",
533bdd1243dSDimitry Andric createGCNMaxILPMachineScheduler);
534bdd1243dSDimitry Andric
535bdd1243dSDimitry Andric static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry(
536bdd1243dSDimitry Andric "gcn-iterative-max-occupancy-experimental",
5370b57cec5SDimitry Andric "Run GCN scheduler to maximize occupancy (experimental)",
5380b57cec5SDimitry Andric createIterativeGCNMaxOccupancyMachineScheduler);
5390b57cec5SDimitry Andric
540bdd1243dSDimitry Andric static MachineSchedRegistry GCNMinRegSchedRegistry(
541bdd1243dSDimitry Andric "gcn-iterative-minreg",
5420b57cec5SDimitry Andric "Run GCN iterative scheduler for minimal register usage (experimental)",
5430b57cec5SDimitry Andric createMinRegScheduler);
5440b57cec5SDimitry Andric
545bdd1243dSDimitry Andric static MachineSchedRegistry GCNILPSchedRegistry(
546bdd1243dSDimitry Andric "gcn-iterative-ilp",
5470b57cec5SDimitry Andric "Run GCN iterative scheduler for ILP scheduling (experimental)",
5480b57cec5SDimitry Andric createIterativeILPMachineScheduler);
5490b57cec5SDimitry Andric
computeDataLayout(const Triple & TT)5500b57cec5SDimitry Andric static StringRef computeDataLayout(const Triple &TT) {
5510b57cec5SDimitry Andric if (TT.getArch() == Triple::r600) {
5520b57cec5SDimitry Andric // 32-bit pointers.
5530b57cec5SDimitry Andric return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
554e8d8bef9SDimitry Andric "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
5550b57cec5SDimitry Andric }
5560b57cec5SDimitry Andric
5570b57cec5SDimitry Andric // 32-bit private, local, and region pointers. 64-bit global, constant and
55806c3fb27SDimitry Andric // flat. 160-bit non-integral fat buffer pointers that include a 128-bit
55906c3fb27SDimitry Andric // buffer descriptor and a 32-bit offset, which are indexed by 32-bit values
56006c3fb27SDimitry Andric // (address space 7), and 128-bit non-integral buffer resourcees (address
56106c3fb27SDimitry Andric // space 8) which cannot be non-trivilally accessed by LLVM memory operations
56206c3fb27SDimitry Andric // like getelementptr.
5630b57cec5SDimitry Andric return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
5645f757f3fSDimitry Andric "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-"
5655f757f3fSDimitry Andric "v32:32-v48:64-v96:"
56606c3fb27SDimitry Andric "128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-"
5675f757f3fSDimitry Andric "G1-ni:7:8:9";
5680b57cec5SDimitry Andric }
5690b57cec5SDimitry Andric
5700b57cec5SDimitry Andric LLVM_READNONE
getGPUOrDefault(const Triple & TT,StringRef GPU)5710b57cec5SDimitry Andric static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
5720b57cec5SDimitry Andric if (!GPU.empty())
5730b57cec5SDimitry Andric return GPU;
5740b57cec5SDimitry Andric
5750b57cec5SDimitry Andric // Need to default to a target with flat support for HSA.
5760b57cec5SDimitry Andric if (TT.getArch() == Triple::amdgcn)
5770b57cec5SDimitry Andric return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
5780b57cec5SDimitry Andric
5790b57cec5SDimitry Andric return "r600";
5800b57cec5SDimitry Andric }
5810b57cec5SDimitry Andric
getEffectiveRelocModel(std::optional<Reloc::Model> RM)582bdd1243dSDimitry Andric static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
5830b57cec5SDimitry Andric // The AMDGPU toolchain only supports generating shared objects, so we
5840b57cec5SDimitry Andric // must always use PIC.
5850b57cec5SDimitry Andric return Reloc::PIC_;
5860b57cec5SDimitry Andric }
5870b57cec5SDimitry Andric
AMDGPUTargetMachine(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,const TargetOptions & Options,std::optional<Reloc::Model> RM,std::optional<CodeModel::Model> CM,CodeGenOptLevel OptLevel)5880b57cec5SDimitry Andric AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
5890b57cec5SDimitry Andric StringRef CPU, StringRef FS,
590*0fca6ea1SDimitry Andric const TargetOptions &Options,
591bdd1243dSDimitry Andric std::optional<Reloc::Model> RM,
592bdd1243dSDimitry Andric std::optional<CodeModel::Model> CM,
5935f757f3fSDimitry Andric CodeGenOptLevel OptLevel)
5940b57cec5SDimitry Andric : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
5950b57cec5SDimitry Andric FS, Options, getEffectiveRelocModel(RM),
5960b57cec5SDimitry Andric getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
5970b57cec5SDimitry Andric TLOF(createTLOF(getTargetTriple())) {
5980b57cec5SDimitry Andric initAsmInfo();
5995ffd83dbSDimitry Andric if (TT.getArch() == Triple::amdgcn) {
6005ffd83dbSDimitry Andric if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize64"))
6015ffd83dbSDimitry Andric MRI.reset(llvm::createGCNMCRegisterInfo(AMDGPUDwarfFlavour::Wave64));
6025ffd83dbSDimitry Andric else if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize32"))
6035ffd83dbSDimitry Andric MRI.reset(llvm::createGCNMCRegisterInfo(AMDGPUDwarfFlavour::Wave32));
6045ffd83dbSDimitry Andric }
6050b57cec5SDimitry Andric }
6060b57cec5SDimitry Andric
6070b57cec5SDimitry Andric bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
6080b57cec5SDimitry Andric bool AMDGPUTargetMachine::EnableFunctionCalls = false;
609fe6060f1SDimitry Andric bool AMDGPUTargetMachine::EnableLowerModuleLDS = true;
610*0fca6ea1SDimitry Andric bool AMDGPUTargetMachine::DisableStructurizer = false;
6110b57cec5SDimitry Andric
6120b57cec5SDimitry Andric AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
6130b57cec5SDimitry Andric
getGPUName(const Function & F) const6140b57cec5SDimitry Andric StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
6150b57cec5SDimitry Andric Attribute GPUAttr = F.getFnAttribute("target-cpu");
616e8d8bef9SDimitry Andric return GPUAttr.isValid() ? GPUAttr.getValueAsString() : getTargetCPU();
6170b57cec5SDimitry Andric }
6180b57cec5SDimitry Andric
getFeatureString(const Function & F) const6190b57cec5SDimitry Andric StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
6200b57cec5SDimitry Andric Attribute FSAttr = F.getFnAttribute("target-features");
6210b57cec5SDimitry Andric
622e8d8bef9SDimitry Andric return FSAttr.isValid() ? FSAttr.getValueAsString()
623e8d8bef9SDimitry Andric : getTargetFeatureString();
6240b57cec5SDimitry Andric }
6250b57cec5SDimitry Andric
6260b57cec5SDimitry Andric /// Predicate for Internalize pass.
mustPreserveGV(const GlobalValue & GV)6270b57cec5SDimitry Andric static bool mustPreserveGV(const GlobalValue &GV) {
6280b57cec5SDimitry Andric if (const Function *F = dyn_cast<Function>(&GV))
6295f757f3fSDimitry Andric return F->isDeclaration() || F->getName().starts_with("__asan_") ||
6305f757f3fSDimitry Andric F->getName().starts_with("__sanitizer_") ||
631349cc55cSDimitry Andric AMDGPU::isEntryFunctionCC(F->getCallingConv());
6320b57cec5SDimitry Andric
633fe6060f1SDimitry Andric GV.removeDeadConstantUsers();
6340b57cec5SDimitry Andric return !GV.use_empty();
6350b57cec5SDimitry Andric }
6360b57cec5SDimitry Andric
registerDefaultAliasAnalyses(AAManager & AAM)637e8d8bef9SDimitry Andric void AMDGPUTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) {
638e8d8bef9SDimitry Andric AAM.registerFunctionAnalysis<AMDGPUAA>();
639e8d8bef9SDimitry Andric }
640e8d8bef9SDimitry Andric
641*0fca6ea1SDimitry Andric static Expected<ScanOptions>
parseAMDGPUAtomicOptimizerStrategy(StringRef Params)642*0fca6ea1SDimitry Andric parseAMDGPUAtomicOptimizerStrategy(StringRef Params) {
643*0fca6ea1SDimitry Andric if (Params.empty())
644*0fca6ea1SDimitry Andric return ScanOptions::Iterative;
645*0fca6ea1SDimitry Andric Params.consume_front("strategy=");
646*0fca6ea1SDimitry Andric auto Result = StringSwitch<std::optional<ScanOptions>>(Params)
647*0fca6ea1SDimitry Andric .Case("dpp", ScanOptions::DPP)
648*0fca6ea1SDimitry Andric .Cases("iterative", "", ScanOptions::Iterative)
649*0fca6ea1SDimitry Andric .Case("none", ScanOptions::None)
650*0fca6ea1SDimitry Andric .Default(std::nullopt);
651*0fca6ea1SDimitry Andric if (Result)
652*0fca6ea1SDimitry Andric return *Result;
653*0fca6ea1SDimitry Andric return make_error<StringError>("invalid parameter", inconvertibleErrorCode());
6545f757f3fSDimitry Andric }
655e8d8bef9SDimitry Andric
buildCodeGenPipeline(ModulePassManager & MPM,raw_pwrite_stream & Out,raw_pwrite_stream * DwoOut,CodeGenFileType FileType,const CGPassBuilderOption & Opts,PassInstrumentationCallbacks * PIC)656*0fca6ea1SDimitry Andric Error AMDGPUTargetMachine::buildCodeGenPipeline(
657*0fca6ea1SDimitry Andric ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
658*0fca6ea1SDimitry Andric CodeGenFileType FileType, const CGPassBuilderOption &Opts,
659*0fca6ea1SDimitry Andric PassInstrumentationCallbacks *PIC) {
660*0fca6ea1SDimitry Andric AMDGPUCodeGenPassBuilder CGPB(*this, Opts, PIC);
661*0fca6ea1SDimitry Andric return CGPB.buildPipeline(MPM, Out, DwoOut, FileType);
662e8d8bef9SDimitry Andric }
663*0fca6ea1SDimitry Andric
registerPassBuilderCallbacks(PassBuilder & PB)664*0fca6ea1SDimitry Andric void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
665*0fca6ea1SDimitry Andric
666*0fca6ea1SDimitry Andric #define GET_PASS_REGISTRY "AMDGPUPassRegistry.def"
667*0fca6ea1SDimitry Andric #include "llvm/Passes/TargetPassRegistry.inc"
668e8d8bef9SDimitry Andric
669fe6060f1SDimitry Andric PB.registerPipelineStartEPCallback(
6705f757f3fSDimitry Andric [](ModulePassManager &PM, OptimizationLevel Level) {
671fe6060f1SDimitry Andric FunctionPassManager FPM;
672e8d8bef9SDimitry Andric PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
6735f757f3fSDimitry Andric if (EnableHipStdPar)
6745f757f3fSDimitry Andric PM.addPass(HipStdParAcceleratorCodeSelectionPass());
675e8d8bef9SDimitry Andric });
676e8d8bef9SDimitry Andric
677e8d8bef9SDimitry Andric PB.registerPipelineEarlySimplificationEPCallback(
67806c3fb27SDimitry Andric [](ModulePassManager &PM, OptimizationLevel Level) {
67906c3fb27SDimitry Andric PM.addPass(AMDGPUPrintfRuntimeBindingPass());
68006c3fb27SDimitry Andric
681349cc55cSDimitry Andric if (Level == OptimizationLevel::O0)
682e8d8bef9SDimitry Andric return;
683e8d8bef9SDimitry Andric
684e8d8bef9SDimitry Andric PM.addPass(AMDGPUUnifyMetadataPass());
685e8d8bef9SDimitry Andric
686e8d8bef9SDimitry Andric if (InternalizeSymbols) {
687e8d8bef9SDimitry Andric PM.addPass(InternalizePass(mustPreserveGV));
688e8d8bef9SDimitry Andric PM.addPass(GlobalDCEPass());
689e8d8bef9SDimitry Andric }
69006c3fb27SDimitry Andric
691e8d8bef9SDimitry Andric if (EarlyInlineAll && !EnableFunctionCalls)
692e8d8bef9SDimitry Andric PM.addPass(AMDGPUAlwaysInlinePass());
693e8d8bef9SDimitry Andric });
694e8d8bef9SDimitry Andric
695*0fca6ea1SDimitry Andric PB.registerPeepholeEPCallback(
696*0fca6ea1SDimitry Andric [](FunctionPassManager &FPM, OptimizationLevel Level) {
697*0fca6ea1SDimitry Andric if (Level == OptimizationLevel::O0)
698*0fca6ea1SDimitry Andric return;
699*0fca6ea1SDimitry Andric
700*0fca6ea1SDimitry Andric FPM.addPass(AMDGPUUseNativeCallsPass());
701*0fca6ea1SDimitry Andric if (EnableLibCallSimplify)
702*0fca6ea1SDimitry Andric FPM.addPass(AMDGPUSimplifyLibCallsPass());
703*0fca6ea1SDimitry Andric });
704*0fca6ea1SDimitry Andric
705e8d8bef9SDimitry Andric PB.registerCGSCCOptimizerLateEPCallback(
706349cc55cSDimitry Andric [this](CGSCCPassManager &PM, OptimizationLevel Level) {
707349cc55cSDimitry Andric if (Level == OptimizationLevel::O0)
708e8d8bef9SDimitry Andric return;
709e8d8bef9SDimitry Andric
710fe6060f1SDimitry Andric FunctionPassManager FPM;
711e8d8bef9SDimitry Andric
712349cc55cSDimitry Andric // Add promote kernel arguments pass to the opt pipeline right before
713349cc55cSDimitry Andric // infer address spaces which is needed to do actual address space
714349cc55cSDimitry Andric // rewriting.
715349cc55cSDimitry Andric if (Level.getSpeedupLevel() > OptimizationLevel::O1.getSpeedupLevel() &&
716349cc55cSDimitry Andric EnablePromoteKernelArguments)
717349cc55cSDimitry Andric FPM.addPass(AMDGPUPromoteKernelArgumentsPass());
718349cc55cSDimitry Andric
719e8d8bef9SDimitry Andric // Add infer address spaces pass to the opt pipeline after inlining
720e8d8bef9SDimitry Andric // but before SROA to increase SROA opportunities.
721e8d8bef9SDimitry Andric FPM.addPass(InferAddressSpacesPass());
722e8d8bef9SDimitry Andric
723e8d8bef9SDimitry Andric // This should run after inlining to have any chance of doing
724e8d8bef9SDimitry Andric // anything, and before other cleanup optimizations.
725e8d8bef9SDimitry Andric FPM.addPass(AMDGPULowerKernelAttributesPass());
726e8d8bef9SDimitry Andric
727349cc55cSDimitry Andric if (Level != OptimizationLevel::O0) {
728e8d8bef9SDimitry Andric // Promote alloca to vector before SROA and loop unroll. If we
729e8d8bef9SDimitry Andric // manage to eliminate allocas before unroll we may choose to unroll
730e8d8bef9SDimitry Andric // less.
731e8d8bef9SDimitry Andric FPM.addPass(AMDGPUPromoteAllocaToVectorPass(*this));
732e8d8bef9SDimitry Andric }
733e8d8bef9SDimitry Andric
734e8d8bef9SDimitry Andric PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
735e8d8bef9SDimitry Andric });
736*0fca6ea1SDimitry Andric
737*0fca6ea1SDimitry Andric // FIXME: Why is AMDGPUAttributor not in CGSCC?
738*0fca6ea1SDimitry Andric PB.registerOptimizerLastEPCallback(
739*0fca6ea1SDimitry Andric [this](ModulePassManager &MPM, OptimizationLevel Level) {
740*0fca6ea1SDimitry Andric if (Level != OptimizationLevel::O0) {
741*0fca6ea1SDimitry Andric MPM.addPass(AMDGPUAttributorPass(*this));
742*0fca6ea1SDimitry Andric }
743*0fca6ea1SDimitry Andric });
744*0fca6ea1SDimitry Andric
745*0fca6ea1SDimitry Andric PB.registerFullLinkTimeOptimizationLastEPCallback(
746*0fca6ea1SDimitry Andric [this](ModulePassManager &PM, OptimizationLevel Level) {
747*0fca6ea1SDimitry Andric // We want to support the -lto-partitions=N option as "best effort".
748*0fca6ea1SDimitry Andric // For that, we need to lower LDS earlier in the pipeline before the
749*0fca6ea1SDimitry Andric // module is partitioned for codegen.
750*0fca6ea1SDimitry Andric if (EnableLowerModuleLDS)
751*0fca6ea1SDimitry Andric PM.addPass(AMDGPULowerModuleLDSPass(*this));
752*0fca6ea1SDimitry Andric });
753*0fca6ea1SDimitry Andric
754*0fca6ea1SDimitry Andric PB.registerRegClassFilterParsingCallback(
755*0fca6ea1SDimitry Andric [](StringRef FilterName) -> RegAllocFilterFunc {
756*0fca6ea1SDimitry Andric if (FilterName == "sgpr")
757*0fca6ea1SDimitry Andric return onlyAllocateSGPRs;
758*0fca6ea1SDimitry Andric if (FilterName == "vgpr")
759*0fca6ea1SDimitry Andric return onlyAllocateVGPRs;
760*0fca6ea1SDimitry Andric return nullptr;
761*0fca6ea1SDimitry Andric });
762e8d8bef9SDimitry Andric }
763e8d8bef9SDimitry Andric
getNullPointerValue(unsigned AddrSpace)764e8d8bef9SDimitry Andric int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) {
765e8d8bef9SDimitry Andric return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
766e8d8bef9SDimitry Andric AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
767e8d8bef9SDimitry Andric AddrSpace == AMDGPUAS::REGION_ADDRESS)
768e8d8bef9SDimitry Andric ? -1
769e8d8bef9SDimitry Andric : 0;
770e8d8bef9SDimitry Andric }
771e8d8bef9SDimitry Andric
isNoopAddrSpaceCast(unsigned SrcAS,unsigned DestAS) const772e8d8bef9SDimitry Andric bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
773e8d8bef9SDimitry Andric unsigned DestAS) const {
774e8d8bef9SDimitry Andric return AMDGPU::isFlatGlobalAddrSpace(SrcAS) &&
775e8d8bef9SDimitry Andric AMDGPU::isFlatGlobalAddrSpace(DestAS);
776e8d8bef9SDimitry Andric }
777e8d8bef9SDimitry Andric
getAssumedAddrSpace(const Value * V) const778e8d8bef9SDimitry Andric unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
779e8d8bef9SDimitry Andric const auto *LD = dyn_cast<LoadInst>(V);
780e8d8bef9SDimitry Andric if (!LD)
781e8d8bef9SDimitry Andric return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
782e8d8bef9SDimitry Andric
783e8d8bef9SDimitry Andric // It must be a generic pointer loaded.
784e8d8bef9SDimitry Andric assert(V->getType()->isPointerTy() &&
785e8d8bef9SDimitry Andric V->getType()->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS);
786e8d8bef9SDimitry Andric
787e8d8bef9SDimitry Andric const auto *Ptr = LD->getPointerOperand();
788e8d8bef9SDimitry Andric if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
789e8d8bef9SDimitry Andric return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
790e8d8bef9SDimitry Andric // For a generic pointer loaded from the constant memory, it could be assumed
791e8d8bef9SDimitry Andric // as a global pointer since the constant memory is only populated on the
792e8d8bef9SDimitry Andric // host side. As implied by the offload programming model, only global
793e8d8bef9SDimitry Andric // pointers could be referenced on the host side.
794e8d8bef9SDimitry Andric return AMDGPUAS::GLOBAL_ADDRESS;
795e8d8bef9SDimitry Andric }
796e8d8bef9SDimitry Andric
797349cc55cSDimitry Andric std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value * V) const798349cc55cSDimitry Andric AMDGPUTargetMachine::getPredicatedAddrSpace(const Value *V) const {
799349cc55cSDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(V)) {
800349cc55cSDimitry Andric switch (II->getIntrinsicID()) {
801349cc55cSDimitry Andric case Intrinsic::amdgcn_is_shared:
802bdd1243dSDimitry Andric return std::pair(II->getArgOperand(0), AMDGPUAS::LOCAL_ADDRESS);
803349cc55cSDimitry Andric case Intrinsic::amdgcn_is_private:
804bdd1243dSDimitry Andric return std::pair(II->getArgOperand(0), AMDGPUAS::PRIVATE_ADDRESS);
805349cc55cSDimitry Andric default:
806349cc55cSDimitry Andric break;
807349cc55cSDimitry Andric }
808bdd1243dSDimitry Andric return std::pair(nullptr, -1);
809349cc55cSDimitry Andric }
810349cc55cSDimitry Andric // Check the global pointer predication based on
811349cc55cSDimitry Andric // (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and
812349cc55cSDimitry Andric // the order of 'is_shared' and 'is_private' is not significant.
813349cc55cSDimitry Andric Value *Ptr;
814349cc55cSDimitry Andric if (match(
815349cc55cSDimitry Andric const_cast<Value *>(V),
816349cc55cSDimitry Andric m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))),
817349cc55cSDimitry Andric m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(
818349cc55cSDimitry Andric m_Deferred(Ptr))))))
819bdd1243dSDimitry Andric return std::pair(Ptr, AMDGPUAS::GLOBAL_ADDRESS);
820349cc55cSDimitry Andric
821bdd1243dSDimitry Andric return std::pair(nullptr, -1);
8220b57cec5SDimitry Andric }
8230b57cec5SDimitry Andric
82481ad6265SDimitry Andric unsigned
getAddressSpaceForPseudoSourceKind(unsigned Kind) const82581ad6265SDimitry Andric AMDGPUTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
82681ad6265SDimitry Andric switch (Kind) {
82781ad6265SDimitry Andric case PseudoSourceValue::Stack:
82881ad6265SDimitry Andric case PseudoSourceValue::FixedStack:
82981ad6265SDimitry Andric return AMDGPUAS::PRIVATE_ADDRESS;
83081ad6265SDimitry Andric case PseudoSourceValue::ConstantPool:
83181ad6265SDimitry Andric case PseudoSourceValue::GOT:
83281ad6265SDimitry Andric case PseudoSourceValue::JumpTable:
83381ad6265SDimitry Andric case PseudoSourceValue::GlobalValueCallEntry:
83481ad6265SDimitry Andric case PseudoSourceValue::ExternalSymbolCallEntry:
83581ad6265SDimitry Andric return AMDGPUAS::CONSTANT_ADDRESS;
83681ad6265SDimitry Andric }
83781ad6265SDimitry Andric return AMDGPUAS::FLAT_ADDRESS;
83881ad6265SDimitry Andric }
83981ad6265SDimitry Andric
splitModule(Module & M,unsigned NumParts,function_ref<void (std::unique_ptr<Module> MPart)> ModuleCallback)840*0fca6ea1SDimitry Andric bool AMDGPUTargetMachine::splitModule(
841*0fca6ea1SDimitry Andric Module &M, unsigned NumParts,
842*0fca6ea1SDimitry Andric function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
843*0fca6ea1SDimitry Andric // FIXME(?): Would be better to use an already existing Analysis/PassManager,
844*0fca6ea1SDimitry Andric // but all current users of this API don't have one ready and would need to
845*0fca6ea1SDimitry Andric // create one anyway. Let's hide the boilerplate for now to keep it simple.
846*0fca6ea1SDimitry Andric
847*0fca6ea1SDimitry Andric LoopAnalysisManager LAM;
848*0fca6ea1SDimitry Andric FunctionAnalysisManager FAM;
849*0fca6ea1SDimitry Andric CGSCCAnalysisManager CGAM;
850*0fca6ea1SDimitry Andric ModuleAnalysisManager MAM;
851*0fca6ea1SDimitry Andric
852*0fca6ea1SDimitry Andric PassBuilder PB(this);
853*0fca6ea1SDimitry Andric PB.registerModuleAnalyses(MAM);
854*0fca6ea1SDimitry Andric PB.registerFunctionAnalyses(FAM);
855*0fca6ea1SDimitry Andric PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
856*0fca6ea1SDimitry Andric
857*0fca6ea1SDimitry Andric ModulePassManager MPM;
858*0fca6ea1SDimitry Andric MPM.addPass(AMDGPUSplitModulePass(NumParts, ModuleCallback));
859*0fca6ea1SDimitry Andric MPM.run(M, MAM);
860*0fca6ea1SDimitry Andric return true;
861*0fca6ea1SDimitry Andric }
862*0fca6ea1SDimitry Andric
8630b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8640b57cec5SDimitry Andric // GCN Target Machine (SI+)
8650b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8660b57cec5SDimitry Andric
GCNTargetMachine(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,const TargetOptions & Options,std::optional<Reloc::Model> RM,std::optional<CodeModel::Model> CM,CodeGenOptLevel OL,bool JIT)8670b57cec5SDimitry Andric GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
8680b57cec5SDimitry Andric StringRef CPU, StringRef FS,
869*0fca6ea1SDimitry Andric const TargetOptions &Options,
870bdd1243dSDimitry Andric std::optional<Reloc::Model> RM,
871bdd1243dSDimitry Andric std::optional<CodeModel::Model> CM,
8725f757f3fSDimitry Andric CodeGenOptLevel OL, bool JIT)
8730b57cec5SDimitry Andric : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
8740b57cec5SDimitry Andric
875349cc55cSDimitry Andric const TargetSubtargetInfo *
getSubtargetImpl(const Function & F) const876349cc55cSDimitry Andric GCNTargetMachine::getSubtargetImpl(const Function &F) const {
8770b57cec5SDimitry Andric StringRef GPU = getGPUName(F);
8780b57cec5SDimitry Andric StringRef FS = getFeatureString(F);
8790b57cec5SDimitry Andric
8800b57cec5SDimitry Andric SmallString<128> SubtargetKey(GPU);
8810b57cec5SDimitry Andric SubtargetKey.append(FS);
8820b57cec5SDimitry Andric
8830b57cec5SDimitry Andric auto &I = SubtargetMap[SubtargetKey];
8840b57cec5SDimitry Andric if (!I) {
8850b57cec5SDimitry Andric // This needs to be done before we create a new subtarget since any
8860b57cec5SDimitry Andric // creation will depend on the TM and the code generation flags on the
8870b57cec5SDimitry Andric // function that reside in TargetOptions.
8880b57cec5SDimitry Andric resetTargetOptions(F);
8898bcb0991SDimitry Andric I = std::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
8900b57cec5SDimitry Andric }
8910b57cec5SDimitry Andric
8920b57cec5SDimitry Andric I->setScalarizeGlobalBehavior(ScalarizeGlobal);
8930b57cec5SDimitry Andric
8940b57cec5SDimitry Andric return I.get();
8950b57cec5SDimitry Andric }
8960b57cec5SDimitry Andric
8970b57cec5SDimitry Andric TargetTransformInfo
getTargetTransformInfo(const Function & F) const89881ad6265SDimitry Andric GCNTargetMachine::getTargetTransformInfo(const Function &F) const {
8990b57cec5SDimitry Andric return TargetTransformInfo(GCNTTIImpl(this, F));
9000b57cec5SDimitry Andric }
9010b57cec5SDimitry Andric
9020b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
9030b57cec5SDimitry Andric // AMDGPU Pass Setup
9040b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
9050b57cec5SDimitry Andric
getCSEConfig() const906349cc55cSDimitry Andric std::unique_ptr<CSEConfigBase> llvm::AMDGPUPassConfig::getCSEConfig() const {
9070b57cec5SDimitry Andric return getStandardCSEConfigForOpt(TM->getOptLevel());
9080b57cec5SDimitry Andric }
9090b57cec5SDimitry Andric
910349cc55cSDimitry Andric namespace {
9110b57cec5SDimitry Andric
9120b57cec5SDimitry Andric class GCNPassConfig final : public AMDGPUPassConfig {
9130b57cec5SDimitry Andric public:
GCNPassConfig(LLVMTargetMachine & TM,PassManagerBase & PM)9140b57cec5SDimitry Andric GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
9150b57cec5SDimitry Andric : AMDGPUPassConfig(TM, PM) {
9160b57cec5SDimitry Andric // It is necessary to know the register usage of the entire call graph. We
9170b57cec5SDimitry Andric // allow calls without EnableAMDGPUFunctionCalls if they are marked
9180b57cec5SDimitry Andric // noinline, so this is always required.
9190b57cec5SDimitry Andric setRequiresCodeGenSCCOrder(true);
920349cc55cSDimitry Andric substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
9210b57cec5SDimitry Andric }
9220b57cec5SDimitry Andric
getGCNTargetMachine() const9230b57cec5SDimitry Andric GCNTargetMachine &getGCNTargetMachine() const {
9240b57cec5SDimitry Andric return getTM<GCNTargetMachine>();
9250b57cec5SDimitry Andric }
9260b57cec5SDimitry Andric
9270b57cec5SDimitry Andric ScheduleDAGInstrs *
9280b57cec5SDimitry Andric createMachineScheduler(MachineSchedContext *C) const override;
9290b57cec5SDimitry Andric
930349cc55cSDimitry Andric ScheduleDAGInstrs *
createPostMachineScheduler(MachineSchedContext * C) const931349cc55cSDimitry Andric createPostMachineScheduler(MachineSchedContext *C) const override {
932bdd1243dSDimitry Andric ScheduleDAGMI *DAG = new GCNPostScheduleDAGMILive(
933bdd1243dSDimitry Andric C, std::make_unique<PostGenericScheduler>(C),
934bdd1243dSDimitry Andric /*RemoveKillFlags=*/true);
935349cc55cSDimitry Andric const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
936349cc55cSDimitry Andric DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
93781ad6265SDimitry Andric if (ST.shouldClusterStores())
93881ad6265SDimitry Andric DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
939349cc55cSDimitry Andric DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
940*0fca6ea1SDimitry Andric DAG->addMutation(
941*0fca6ea1SDimitry Andric createIGroupLPDAGMutation(AMDGPU::SchedulingPhase::PostRA));
9425f757f3fSDimitry Andric if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less))
943753f127fSDimitry Andric DAG->addMutation(createVOPDPairingMutation());
944349cc55cSDimitry Andric return DAG;
945349cc55cSDimitry Andric }
946349cc55cSDimitry Andric
9470b57cec5SDimitry Andric bool addPreISel() override;
9480b57cec5SDimitry Andric void addMachineSSAOptimization() override;
9490b57cec5SDimitry Andric bool addILPOpts() override;
9500b57cec5SDimitry Andric bool addInstSelector() override;
9510b57cec5SDimitry Andric bool addIRTranslator() override;
9525ffd83dbSDimitry Andric void addPreLegalizeMachineIR() override;
9530b57cec5SDimitry Andric bool addLegalizeMachineIR() override;
9545ffd83dbSDimitry Andric void addPreRegBankSelect() override;
9550b57cec5SDimitry Andric bool addRegBankSelect() override;
956fe6060f1SDimitry Andric void addPreGlobalInstructionSelect() override;
9570b57cec5SDimitry Andric bool addGlobalInstructionSelect() override;
9580b57cec5SDimitry Andric void addFastRegAlloc() override;
9590b57cec5SDimitry Andric void addOptimizedRegAlloc() override;
960fe6060f1SDimitry Andric
961fe6060f1SDimitry Andric FunctionPass *createSGPRAllocPass(bool Optimized);
962fe6060f1SDimitry Andric FunctionPass *createVGPRAllocPass(bool Optimized);
963fe6060f1SDimitry Andric FunctionPass *createRegAllocPass(bool Optimized) override;
964fe6060f1SDimitry Andric
965fe6060f1SDimitry Andric bool addRegAssignAndRewriteFast() override;
966fe6060f1SDimitry Andric bool addRegAssignAndRewriteOptimized() override;
967fe6060f1SDimitry Andric
9680b57cec5SDimitry Andric void addPreRegAlloc() override;
9690b57cec5SDimitry Andric bool addPreRewrite() override;
9700b57cec5SDimitry Andric void addPostRegAlloc() override;
9710b57cec5SDimitry Andric void addPreSched2() override;
9720b57cec5SDimitry Andric void addPreEmitPass() override;
9730b57cec5SDimitry Andric };
9740b57cec5SDimitry Andric
9750b57cec5SDimitry Andric } // end anonymous namespace
9760b57cec5SDimitry Andric
AMDGPUPassConfig(LLVMTargetMachine & TM,PassManagerBase & PM)977349cc55cSDimitry Andric AMDGPUPassConfig::AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
978349cc55cSDimitry Andric : TargetPassConfig(TM, PM) {
979349cc55cSDimitry Andric // Exceptions and StackMaps are not supported, so these passes will never do
980349cc55cSDimitry Andric // anything.
981349cc55cSDimitry Andric disablePass(&StackMapLivenessID);
982349cc55cSDimitry Andric disablePass(&FuncletLayoutID);
983349cc55cSDimitry Andric // Garbage collection is not supported.
984349cc55cSDimitry Andric disablePass(&GCLoweringID);
985349cc55cSDimitry Andric disablePass(&ShadowStackGCLoweringID);
986349cc55cSDimitry Andric }
987349cc55cSDimitry Andric
addEarlyCSEOrGVNPass()9880b57cec5SDimitry Andric void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
9895f757f3fSDimitry Andric if (getOptLevel() == CodeGenOptLevel::Aggressive)
9900b57cec5SDimitry Andric addPass(createGVNPass());
9910b57cec5SDimitry Andric else
9920b57cec5SDimitry Andric addPass(createEarlyCSEPass());
9930b57cec5SDimitry Andric }
9940b57cec5SDimitry Andric
addStraightLineScalarOptimizationPasses()9950b57cec5SDimitry Andric void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
996cb14a3feSDimitry Andric if (isPassEnabled(EnableLoopPrefetch, CodeGenOptLevel::Aggressive))
997cb14a3feSDimitry Andric addPass(createLoopDataPrefetchPass());
9980b57cec5SDimitry Andric addPass(createSeparateConstOffsetFromGEPPass());
999349cc55cSDimitry Andric // ReassociateGEPs exposes more opportunities for SLSR. See
10000b57cec5SDimitry Andric // the example in reassociate-geps-and-slsr.ll.
10010b57cec5SDimitry Andric addPass(createStraightLineStrengthReducePass());
10020b57cec5SDimitry Andric // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
10030b57cec5SDimitry Andric // EarlyCSE can reuse.
10040b57cec5SDimitry Andric addEarlyCSEOrGVNPass();
10050b57cec5SDimitry Andric // Run NaryReassociate after EarlyCSE/GVN to be more effective.
10060b57cec5SDimitry Andric addPass(createNaryReassociatePass());
10070b57cec5SDimitry Andric // NaryReassociate on GEPs creates redundant common expressions, so run
10080b57cec5SDimitry Andric // EarlyCSE after it.
10090b57cec5SDimitry Andric addPass(createEarlyCSEPass());
10100b57cec5SDimitry Andric }
10110b57cec5SDimitry Andric
addIRPasses()10120b57cec5SDimitry Andric void AMDGPUPassConfig::addIRPasses() {
10130b57cec5SDimitry Andric const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
10140b57cec5SDimitry Andric
10155f757f3fSDimitry Andric Triple::ArchType Arch = TM.getTargetTriple().getArch();
10165f757f3fSDimitry Andric if (RemoveIncompatibleFunctions && Arch == Triple::amdgcn)
10175f757f3fSDimitry Andric addPass(createAMDGPURemoveIncompatibleFunctionsPass(&TM));
10185f757f3fSDimitry Andric
10190b57cec5SDimitry Andric // There is no reason to run these.
10200b57cec5SDimitry Andric disablePass(&StackMapLivenessID);
10210b57cec5SDimitry Andric disablePass(&FuncletLayoutID);
10220b57cec5SDimitry Andric disablePass(&PatchableFunctionID);
10230b57cec5SDimitry Andric
10248bcb0991SDimitry Andric addPass(createAMDGPUPrintfRuntimeBinding());
102506c3fb27SDimitry Andric if (LowerCtorDtor)
1026bdd1243dSDimitry Andric addPass(createAMDGPUCtorDtorLoweringLegacyPass());
10278bcb0991SDimitry Andric
10285f757f3fSDimitry Andric if (isPassEnabled(EnableImageIntrinsicOptimizer))
10295f757f3fSDimitry Andric addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM));
10305f757f3fSDimitry Andric
1031*0fca6ea1SDimitry Andric // This can be disabled by passing ::Disable here or on the command line
1032*0fca6ea1SDimitry Andric // with --expand-variadics-override=disable.
1033*0fca6ea1SDimitry Andric addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering));
1034*0fca6ea1SDimitry Andric
10350b57cec5SDimitry Andric // Function calls are not supported, so make sure we inline everything.
10360b57cec5SDimitry Andric addPass(createAMDGPUAlwaysInlinePass());
10370b57cec5SDimitry Andric addPass(createAlwaysInlinerLegacyPass());
10380b57cec5SDimitry Andric
10390b57cec5SDimitry Andric // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
10405f757f3fSDimitry Andric if (Arch == Triple::r600)
10410b57cec5SDimitry Andric addPass(createR600OpenCLImageTypeLoweringPass());
10420b57cec5SDimitry Andric
10430b57cec5SDimitry Andric // Replace OpenCL enqueued block function pointers with global variables.
10440b57cec5SDimitry Andric addPass(createAMDGPUOpenCLEnqueuedBlockLoweringPass());
10450b57cec5SDimitry Andric
104606c3fb27SDimitry Andric // Runs before PromoteAlloca so the latter can account for function uses
1047fe6060f1SDimitry Andric if (EnableLowerModuleLDS) {
10485f757f3fSDimitry Andric addPass(createAMDGPULowerModuleLDSLegacyPass(&TM));
1049fe6060f1SDimitry Andric }
1050fe6060f1SDimitry Andric
10515f757f3fSDimitry Andric if (TM.getOptLevel() > CodeGenOptLevel::None)
10520b57cec5SDimitry Andric addPass(createInferAddressSpacesPass());
1053fe6060f1SDimitry Andric
10545f757f3fSDimitry Andric // Run atomic optimizer before Atomic Expand
10555f757f3fSDimitry Andric if ((TM.getTargetTriple().getArch() == Triple::amdgcn) &&
10565f757f3fSDimitry Andric (TM.getOptLevel() >= CodeGenOptLevel::Less) &&
10575f757f3fSDimitry Andric (AMDGPUAtomicOptimizerStrategy != ScanOptions::None)) {
10585f757f3fSDimitry Andric addPass(createAMDGPUAtomicOptimizerPass(AMDGPUAtomicOptimizerStrategy));
10595f757f3fSDimitry Andric }
10605f757f3fSDimitry Andric
1061*0fca6ea1SDimitry Andric addPass(createAtomicExpandLegacyPass());
1062fe6060f1SDimitry Andric
10635f757f3fSDimitry Andric if (TM.getOptLevel() > CodeGenOptLevel::None) {
10640b57cec5SDimitry Andric addPass(createAMDGPUPromoteAlloca());
10650b57cec5SDimitry Andric
1066fe6060f1SDimitry Andric if (isPassEnabled(EnableScalarIRPasses))
10670b57cec5SDimitry Andric addStraightLineScalarOptimizationPasses();
10680b57cec5SDimitry Andric
10690b57cec5SDimitry Andric if (EnableAMDGPUAliasAnalysis) {
10700b57cec5SDimitry Andric addPass(createAMDGPUAAWrapperPass());
10710b57cec5SDimitry Andric addPass(createExternalAAWrapperPass([](Pass &P, Function &,
10720b57cec5SDimitry Andric AAResults &AAR) {
10730b57cec5SDimitry Andric if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
10740b57cec5SDimitry Andric AAR.addAAResult(WrapperPass->getResult());
10750b57cec5SDimitry Andric }));
10760b57cec5SDimitry Andric }
10770b57cec5SDimitry Andric
10788bcb0991SDimitry Andric if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
10798bcb0991SDimitry Andric // TODO: May want to move later or split into an early and late one.
10808bcb0991SDimitry Andric addPass(createAMDGPUCodeGenPreparePass());
10818bcb0991SDimitry Andric }
108206c3fb27SDimitry Andric
108306c3fb27SDimitry Andric // Try to hoist loop invariant parts of divisions AMDGPUCodeGenPrepare may
108406c3fb27SDimitry Andric // have expanded.
10855f757f3fSDimitry Andric if (TM.getOptLevel() > CodeGenOptLevel::Less)
108606c3fb27SDimitry Andric addPass(createLICMPass());
1087fe6060f1SDimitry Andric }
10888bcb0991SDimitry Andric
10890b57cec5SDimitry Andric TargetPassConfig::addIRPasses();
10900b57cec5SDimitry Andric
10910b57cec5SDimitry Andric // EarlyCSE is not always strong enough to clean up what LSR produces. For
10920b57cec5SDimitry Andric // example, GVN can combine
10930b57cec5SDimitry Andric //
10940b57cec5SDimitry Andric // %0 = add %a, %b
10950b57cec5SDimitry Andric // %1 = add %b, %a
10960b57cec5SDimitry Andric //
10970b57cec5SDimitry Andric // and
10980b57cec5SDimitry Andric //
10990b57cec5SDimitry Andric // %0 = shl nsw %a, 2
11000b57cec5SDimitry Andric // %1 = shl %a, 2
11010b57cec5SDimitry Andric //
11020b57cec5SDimitry Andric // but EarlyCSE can do neither of them.
1103fe6060f1SDimitry Andric if (isPassEnabled(EnableScalarIRPasses))
11040b57cec5SDimitry Andric addEarlyCSEOrGVNPass();
11050b57cec5SDimitry Andric }
11060b57cec5SDimitry Andric
addCodeGenPrepare()11070b57cec5SDimitry Andric void AMDGPUPassConfig::addCodeGenPrepare() {
1108349cc55cSDimitry Andric if (TM->getTargetTriple().getArch() == Triple::amdgcn) {
1109349cc55cSDimitry Andric // FIXME: This pass adds 2 hacky attributes that can be replaced with an
1110349cc55cSDimitry Andric // analysis, and should be removed.
11110b57cec5SDimitry Andric addPass(createAMDGPUAnnotateKernelFeaturesPass());
1112349cc55cSDimitry Andric }
11130b57cec5SDimitry Andric
11140b57cec5SDimitry Andric if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
11150b57cec5SDimitry Andric EnableLowerKernelArguments)
11160b57cec5SDimitry Andric addPass(createAMDGPULowerKernelArgumentsPass());
11170b57cec5SDimitry Andric
1118*0fca6ea1SDimitry Andric if (TM->getTargetTriple().getArch() == Triple::amdgcn) {
1119*0fca6ea1SDimitry Andric // This lowering has been placed after codegenprepare to take advantage of
1120*0fca6ea1SDimitry Andric // address mode matching (which is why it isn't put with the LDS lowerings).
1121*0fca6ea1SDimitry Andric // It could be placed anywhere before uniformity annotations (an analysis
1122*0fca6ea1SDimitry Andric // that it changes by splitting up fat pointers into their components)
1123*0fca6ea1SDimitry Andric // but has been put before switch lowering and CFG flattening so that those
1124*0fca6ea1SDimitry Andric // passes can run on the more optimized control flow this pass creates in
1125*0fca6ea1SDimitry Andric // many cases.
1126*0fca6ea1SDimitry Andric //
1127*0fca6ea1SDimitry Andric // FIXME: This should ideally be put after the LoadStoreVectorizer.
1128*0fca6ea1SDimitry Andric // However, due to some annoying facts about ResourceUsageAnalysis,
1129*0fca6ea1SDimitry Andric // (especially as exercised in the resource-usage-dead-function test),
1130*0fca6ea1SDimitry Andric // we need all the function passes codegenprepare all the way through
1131*0fca6ea1SDimitry Andric // said resource usage analysis to run on the call graph produced
1132*0fca6ea1SDimitry Andric // before codegenprepare runs (because codegenprepare will knock some
1133*0fca6ea1SDimitry Andric // nodes out of the graph, which leads to function-level passes not
1134*0fca6ea1SDimitry Andric // being run on them, which causes crashes in the resource usage analysis).
1135*0fca6ea1SDimitry Andric addPass(createAMDGPULowerBufferFatPointersPass());
1136*0fca6ea1SDimitry Andric // In accordance with the above FIXME, manually force all the
1137*0fca6ea1SDimitry Andric // function-level passes into a CGSCCPassManager.
1138*0fca6ea1SDimitry Andric addPass(new DummyCGSCCPass());
1139*0fca6ea1SDimitry Andric }
1140*0fca6ea1SDimitry Andric
11410b57cec5SDimitry Andric TargetPassConfig::addCodeGenPrepare();
11420b57cec5SDimitry Andric
1143fe6060f1SDimitry Andric if (isPassEnabled(EnableLoadStoreVectorizer))
11440b57cec5SDimitry Andric addPass(createLoadStoreVectorizerPass());
11455ffd83dbSDimitry Andric
11465ffd83dbSDimitry Andric // LowerSwitch pass may introduce unreachable blocks that can
11475ffd83dbSDimitry Andric // cause unexpected behavior for subsequent passes. Placing it
11485ffd83dbSDimitry Andric // here seems better that these blocks would get cleaned up by
11495ffd83dbSDimitry Andric // UnreachableBlockElim inserted next in the pass flow.
11505ffd83dbSDimitry Andric addPass(createLowerSwitchPass());
11510b57cec5SDimitry Andric }
11520b57cec5SDimitry Andric
addPreISel()11530b57cec5SDimitry Andric bool AMDGPUPassConfig::addPreISel() {
11545f757f3fSDimitry Andric if (TM->getOptLevel() > CodeGenOptLevel::None)
11550b57cec5SDimitry Andric addPass(createFlattenCFGPass());
11560b57cec5SDimitry Andric return false;
11570b57cec5SDimitry Andric }
11580b57cec5SDimitry Andric
addInstSelector()11590b57cec5SDimitry Andric bool AMDGPUPassConfig::addInstSelector() {
1160bdd1243dSDimitry Andric addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel()));
11610b57cec5SDimitry Andric return false;
11620b57cec5SDimitry Andric }
11630b57cec5SDimitry Andric
addGCPasses()11640b57cec5SDimitry Andric bool AMDGPUPassConfig::addGCPasses() {
11650b57cec5SDimitry Andric // Do nothing. GC is not supported.
11660b57cec5SDimitry Andric return false;
11670b57cec5SDimitry Andric }
11680b57cec5SDimitry Andric
1169349cc55cSDimitry Andric llvm::ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext * C) const1170349cc55cSDimitry Andric AMDGPUPassConfig::createMachineScheduler(MachineSchedContext *C) const {
117181ad6265SDimitry Andric const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
1172349cc55cSDimitry Andric ScheduleDAGMILive *DAG = createGenericSchedLive(C);
1173349cc55cSDimitry Andric DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
117481ad6265SDimitry Andric if (ST.shouldClusterStores())
117581ad6265SDimitry Andric DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
1176349cc55cSDimitry Andric return DAG;
11770b57cec5SDimitry Andric }
11780b57cec5SDimitry Andric
createMachineFunctionInfo(BumpPtrAllocator & Allocator,const Function & F,const TargetSubtargetInfo * STI) const1179bdd1243dSDimitry Andric MachineFunctionInfo *R600TargetMachine::createMachineFunctionInfo(
1180bdd1243dSDimitry Andric BumpPtrAllocator &Allocator, const Function &F,
1181bdd1243dSDimitry Andric const TargetSubtargetInfo *STI) const {
1182bdd1243dSDimitry Andric return R600MachineFunctionInfo::create<R600MachineFunctionInfo>(
1183bdd1243dSDimitry Andric Allocator, F, static_cast<const R600Subtarget *>(STI));
1184bdd1243dSDimitry Andric }
1185bdd1243dSDimitry Andric
11860b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
11870b57cec5SDimitry Andric // GCN Pass Setup
11880b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
11890b57cec5SDimitry Andric
createMachineScheduler(MachineSchedContext * C) const11900b57cec5SDimitry Andric ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
11910b57cec5SDimitry Andric MachineSchedContext *C) const {
11920b57cec5SDimitry Andric const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
11930b57cec5SDimitry Andric if (ST.enableSIScheduler())
11940b57cec5SDimitry Andric return createSIMachineScheduler(C);
1195bdd1243dSDimitry Andric
1196bdd1243dSDimitry Andric if (EnableMaxIlpSchedStrategy)
1197bdd1243dSDimitry Andric return createGCNMaxILPMachineScheduler(C);
1198bdd1243dSDimitry Andric
11990b57cec5SDimitry Andric return createGCNMaxOccupancyMachineScheduler(C);
12000b57cec5SDimitry Andric }
12010b57cec5SDimitry Andric
addPreISel()12020b57cec5SDimitry Andric bool GCNPassConfig::addPreISel() {
12030b57cec5SDimitry Andric AMDGPUPassConfig::addPreISel();
12040b57cec5SDimitry Andric
12055f757f3fSDimitry Andric if (TM->getOptLevel() > CodeGenOptLevel::None)
1206*0fca6ea1SDimitry Andric addPass(createSinkingPass());
1207fe6060f1SDimitry Andric
12085f757f3fSDimitry Andric if (TM->getOptLevel() > CodeGenOptLevel::None)
1209*0fca6ea1SDimitry Andric addPass(createAMDGPULateCodeGenPreparePass());
12100b57cec5SDimitry Andric
12110b57cec5SDimitry Andric // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
12120b57cec5SDimitry Andric // regions formed by them.
12130b57cec5SDimitry Andric addPass(&AMDGPUUnifyDivergentExitNodesID);
1214*0fca6ea1SDimitry Andric if (!LateCFGStructurize && !DisableStructurizer) {
12155ffd83dbSDimitry Andric if (EnableStructurizerWorkarounds) {
12165ffd83dbSDimitry Andric addPass(createFixIrreduciblePass());
12175ffd83dbSDimitry Andric addPass(createUnifyLoopExitsPass());
12185ffd83dbSDimitry Andric }
12195ffd83dbSDimitry Andric addPass(createStructurizeCFGPass(false)); // true -> SkipUniformRegions
12200b57cec5SDimitry Andric }
12210b57cec5SDimitry Andric addPass(createAMDGPUAnnotateUniformValues());
1222*0fca6ea1SDimitry Andric if (!LateCFGStructurize && !DisableStructurizer) {
12230b57cec5SDimitry Andric addPass(createSIAnnotateControlFlowPass());
1224bdd1243dSDimitry Andric // TODO: Move this right after structurizeCFG to avoid extra divergence
1225bdd1243dSDimitry Andric // analysis. This depends on stopping SIAnnotateControlFlow from making
1226bdd1243dSDimitry Andric // control flow modifications.
12275f757f3fSDimitry Andric addPass(createAMDGPURewriteUndefForPHILegacyPass());
12280b57cec5SDimitry Andric }
12290b57cec5SDimitry Andric addPass(createLCSSAPass());
12300b57cec5SDimitry Andric
12315f757f3fSDimitry Andric if (TM->getOptLevel() > CodeGenOptLevel::Less)
1232fe6060f1SDimitry Andric addPass(&AMDGPUPerfHintAnalysisID);
1233fe6060f1SDimitry Andric
12340b57cec5SDimitry Andric return false;
12350b57cec5SDimitry Andric }
12360b57cec5SDimitry Andric
addMachineSSAOptimization()12370b57cec5SDimitry Andric void GCNPassConfig::addMachineSSAOptimization() {
12380b57cec5SDimitry Andric TargetPassConfig::addMachineSSAOptimization();
12390b57cec5SDimitry Andric
12400b57cec5SDimitry Andric // We want to fold operands after PeepholeOptimizer has run (or as part of
12410b57cec5SDimitry Andric // it), because it will eliminate extra copies making it easier to fold the
12420b57cec5SDimitry Andric // real source operand. We want to eliminate dead instructions after, so that
12430b57cec5SDimitry Andric // we see fewer uses of the copies. We then need to clean up the dead
12440b57cec5SDimitry Andric // instructions leftover after the operands are folded as well.
12450b57cec5SDimitry Andric //
12460b57cec5SDimitry Andric // XXX - Can we get away without running DeadMachineInstructionElim again?
12470b57cec5SDimitry Andric addPass(&SIFoldOperandsID);
12480b57cec5SDimitry Andric if (EnableDPPCombine)
12490b57cec5SDimitry Andric addPass(&GCNDPPCombineID);
12500b57cec5SDimitry Andric addPass(&SILoadStoreOptimizerID);
1251fe6060f1SDimitry Andric if (isPassEnabled(EnableSDWAPeephole)) {
12520b57cec5SDimitry Andric addPass(&SIPeepholeSDWAID);
12530b57cec5SDimitry Andric addPass(&EarlyMachineLICMID);
12540b57cec5SDimitry Andric addPass(&MachineCSEID);
12550b57cec5SDimitry Andric addPass(&SIFoldOperandsID);
12560b57cec5SDimitry Andric }
1257fe6060f1SDimitry Andric addPass(&DeadMachineInstructionElimID);
12580b57cec5SDimitry Andric addPass(createSIShrinkInstructionsPass());
12590b57cec5SDimitry Andric }
12600b57cec5SDimitry Andric
addILPOpts()12610b57cec5SDimitry Andric bool GCNPassConfig::addILPOpts() {
12620b57cec5SDimitry Andric if (EnableEarlyIfConversion)
12630b57cec5SDimitry Andric addPass(&EarlyIfConverterID);
12640b57cec5SDimitry Andric
12650b57cec5SDimitry Andric TargetPassConfig::addILPOpts();
12660b57cec5SDimitry Andric return false;
12670b57cec5SDimitry Andric }
12680b57cec5SDimitry Andric
addInstSelector()12690b57cec5SDimitry Andric bool GCNPassConfig::addInstSelector() {
12700b57cec5SDimitry Andric AMDGPUPassConfig::addInstSelector();
12710b57cec5SDimitry Andric addPass(&SIFixSGPRCopiesID);
12720b57cec5SDimitry Andric addPass(createSILowerI1CopiesPass());
12730b57cec5SDimitry Andric return false;
12740b57cec5SDimitry Andric }
12750b57cec5SDimitry Andric
addIRTranslator()12760b57cec5SDimitry Andric bool GCNPassConfig::addIRTranslator() {
1277e8d8bef9SDimitry Andric addPass(new IRTranslator(getOptLevel()));
12780b57cec5SDimitry Andric return false;
12790b57cec5SDimitry Andric }
12800b57cec5SDimitry Andric
addPreLegalizeMachineIR()12815ffd83dbSDimitry Andric void GCNPassConfig::addPreLegalizeMachineIR() {
12825f757f3fSDimitry Andric bool IsOptNone = getOptLevel() == CodeGenOptLevel::None;
12835ffd83dbSDimitry Andric addPass(createAMDGPUPreLegalizeCombiner(IsOptNone));
12845ffd83dbSDimitry Andric addPass(new Localizer());
12855ffd83dbSDimitry Andric }
12865ffd83dbSDimitry Andric
addLegalizeMachineIR()12870b57cec5SDimitry Andric bool GCNPassConfig::addLegalizeMachineIR() {
12880b57cec5SDimitry Andric addPass(new Legalizer());
12890b57cec5SDimitry Andric return false;
12900b57cec5SDimitry Andric }
12910b57cec5SDimitry Andric
addPreRegBankSelect()12925ffd83dbSDimitry Andric void GCNPassConfig::addPreRegBankSelect() {
12935f757f3fSDimitry Andric bool IsOptNone = getOptLevel() == CodeGenOptLevel::None;
12945ffd83dbSDimitry Andric addPass(createAMDGPUPostLegalizeCombiner(IsOptNone));
12955f757f3fSDimitry Andric addPass(createAMDGPUGlobalISelDivergenceLoweringPass());
12965ffd83dbSDimitry Andric }
12975ffd83dbSDimitry Andric
addRegBankSelect()12980b57cec5SDimitry Andric bool GCNPassConfig::addRegBankSelect() {
129906c3fb27SDimitry Andric addPass(new AMDGPURegBankSelect());
13000b57cec5SDimitry Andric return false;
13010b57cec5SDimitry Andric }
13020b57cec5SDimitry Andric
addPreGlobalInstructionSelect()1303fe6060f1SDimitry Andric void GCNPassConfig::addPreGlobalInstructionSelect() {
13045f757f3fSDimitry Andric bool IsOptNone = getOptLevel() == CodeGenOptLevel::None;
1305fe6060f1SDimitry Andric addPass(createAMDGPURegBankCombiner(IsOptNone));
1306fe6060f1SDimitry Andric }
1307fe6060f1SDimitry Andric
addGlobalInstructionSelect()13080b57cec5SDimitry Andric bool GCNPassConfig::addGlobalInstructionSelect() {
1309fe6060f1SDimitry Andric addPass(new InstructionSelect(getOptLevel()));
13100b57cec5SDimitry Andric return false;
13110b57cec5SDimitry Andric }
13120b57cec5SDimitry Andric
addPreRegAlloc()13130b57cec5SDimitry Andric void GCNPassConfig::addPreRegAlloc() {
13140b57cec5SDimitry Andric if (LateCFGStructurize) {
13150b57cec5SDimitry Andric addPass(createAMDGPUMachineCFGStructurizerPass());
13160b57cec5SDimitry Andric }
13170b57cec5SDimitry Andric }
13180b57cec5SDimitry Andric
addFastRegAlloc()13190b57cec5SDimitry Andric void GCNPassConfig::addFastRegAlloc() {
13200b57cec5SDimitry Andric // FIXME: We have to disable the verifier here because of PHIElimination +
13210b57cec5SDimitry Andric // TwoAddressInstructions disabling it.
13220b57cec5SDimitry Andric
13230b57cec5SDimitry Andric // This must be run immediately after phi elimination and before
13240b57cec5SDimitry Andric // TwoAddressInstructions, otherwise the processing of the tied operand of
13250b57cec5SDimitry Andric // SI_ELSE will introduce a copy of the tied operand source after the else.
1326349cc55cSDimitry Andric insertPass(&PHIEliminationID, &SILowerControlFlowID);
13270b57cec5SDimitry Andric
1328e8d8bef9SDimitry Andric insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID);
13290b57cec5SDimitry Andric
13300b57cec5SDimitry Andric TargetPassConfig::addFastRegAlloc();
13310b57cec5SDimitry Andric }
13320b57cec5SDimitry Andric
addOptimizedRegAlloc()13330b57cec5SDimitry Andric void GCNPassConfig::addOptimizedRegAlloc() {
1334e8d8bef9SDimitry Andric // Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
1335e8d8bef9SDimitry Andric // instructions that cause scheduling barriers.
1336e8d8bef9SDimitry Andric insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
1337e8d8bef9SDimitry Andric
13385ffd83dbSDimitry Andric if (OptExecMaskPreRA)
13390b57cec5SDimitry Andric insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
1340fe6060f1SDimitry Andric
134106c3fb27SDimitry Andric if (EnableRewritePartialRegUses)
134206c3fb27SDimitry Andric insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID);
134306c3fb27SDimitry Andric
1344fe6060f1SDimitry Andric if (isPassEnabled(EnablePreRAOptimizations))
1345fe6060f1SDimitry Andric insertPass(&RenameIndependentSubregsID, &GCNPreRAOptimizationsID);
1346fe6060f1SDimitry Andric
1347fe6060f1SDimitry Andric // This is not an essential optimization and it has a noticeable impact on
1348fe6060f1SDimitry Andric // compilation time, so we only enable it from O2.
13495f757f3fSDimitry Andric if (TM->getOptLevel() > CodeGenOptLevel::Less)
13500b57cec5SDimitry Andric insertPass(&MachineSchedulerID, &SIFormMemoryClausesID);
13510b57cec5SDimitry Andric
1352fe6060f1SDimitry Andric // FIXME: when an instruction has a Killed operand, and the instruction is
1353fe6060f1SDimitry Andric // inside a bundle, seems only the BUNDLE instruction appears as the Kills of
1354fe6060f1SDimitry Andric // the register in LiveVariables, this would trigger a failure in verifier,
1355fe6060f1SDimitry Andric // we should fix it and enable the verifier.
1356fe6060f1SDimitry Andric if (OptVGPRLiveRange)
1357349cc55cSDimitry Andric insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeID);
13580b57cec5SDimitry Andric // This must be run immediately after phi elimination and before
13590b57cec5SDimitry Andric // TwoAddressInstructions, otherwise the processing of the tied operand of
13600b57cec5SDimitry Andric // SI_ELSE will introduce a copy of the tied operand source after the else.
1361349cc55cSDimitry Andric insertPass(&PHIEliminationID, &SILowerControlFlowID);
13620b57cec5SDimitry Andric
13630b57cec5SDimitry Andric if (EnableDCEInRA)
1364480093f4SDimitry Andric insertPass(&DetectDeadLanesID, &DeadMachineInstructionElimID);
13650b57cec5SDimitry Andric
13660b57cec5SDimitry Andric TargetPassConfig::addOptimizedRegAlloc();
13670b57cec5SDimitry Andric }
13680b57cec5SDimitry Andric
addPreRewrite()13690b57cec5SDimitry Andric bool GCNPassConfig::addPreRewrite() {
13705f757f3fSDimitry Andric addPass(&SILowerWWMCopiesID);
1371fe6060f1SDimitry Andric if (EnableRegReassign)
13720b57cec5SDimitry Andric addPass(&GCNNSAReassignID);
1373fe6060f1SDimitry Andric return true;
13740b57cec5SDimitry Andric }
1375fe6060f1SDimitry Andric
createSGPRAllocPass(bool Optimized)1376fe6060f1SDimitry Andric FunctionPass *GCNPassConfig::createSGPRAllocPass(bool Optimized) {
1377fe6060f1SDimitry Andric // Initialize the global default.
1378fe6060f1SDimitry Andric llvm::call_once(InitializeDefaultSGPRRegisterAllocatorFlag,
1379fe6060f1SDimitry Andric initializeDefaultSGPRRegisterAllocatorOnce);
1380fe6060f1SDimitry Andric
1381fe6060f1SDimitry Andric RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
1382fe6060f1SDimitry Andric if (Ctor != useDefaultRegisterAllocator)
1383fe6060f1SDimitry Andric return Ctor();
1384fe6060f1SDimitry Andric
1385fe6060f1SDimitry Andric if (Optimized)
1386fe6060f1SDimitry Andric return createGreedyRegisterAllocator(onlyAllocateSGPRs);
1387fe6060f1SDimitry Andric
1388fe6060f1SDimitry Andric return createFastRegisterAllocator(onlyAllocateSGPRs, false);
1389fe6060f1SDimitry Andric }
1390fe6060f1SDimitry Andric
createVGPRAllocPass(bool Optimized)1391fe6060f1SDimitry Andric FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
1392fe6060f1SDimitry Andric // Initialize the global default.
1393fe6060f1SDimitry Andric llvm::call_once(InitializeDefaultVGPRRegisterAllocatorFlag,
1394fe6060f1SDimitry Andric initializeDefaultVGPRRegisterAllocatorOnce);
1395fe6060f1SDimitry Andric
1396fe6060f1SDimitry Andric RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
1397fe6060f1SDimitry Andric if (Ctor != useDefaultRegisterAllocator)
1398fe6060f1SDimitry Andric return Ctor();
1399fe6060f1SDimitry Andric
1400fe6060f1SDimitry Andric if (Optimized)
1401fe6060f1SDimitry Andric return createGreedyVGPRRegisterAllocator();
1402fe6060f1SDimitry Andric
1403fe6060f1SDimitry Andric return createFastVGPRRegisterAllocator();
1404fe6060f1SDimitry Andric }
1405fe6060f1SDimitry Andric
createRegAllocPass(bool Optimized)1406fe6060f1SDimitry Andric FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) {
1407fe6060f1SDimitry Andric llvm_unreachable("should not be used");
1408fe6060f1SDimitry Andric }
1409fe6060f1SDimitry Andric
1410fe6060f1SDimitry Andric static const char RegAllocOptNotSupportedMessage[] =
1411fe6060f1SDimitry Andric "-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc";
1412fe6060f1SDimitry Andric
addRegAssignAndRewriteFast()1413fe6060f1SDimitry Andric bool GCNPassConfig::addRegAssignAndRewriteFast() {
1414fe6060f1SDimitry Andric if (!usingDefaultRegAlloc())
1415fe6060f1SDimitry Andric report_fatal_error(RegAllocOptNotSupportedMessage);
1416fe6060f1SDimitry Andric
141706c3fb27SDimitry Andric addPass(&GCNPreRALongBranchRegID);
141806c3fb27SDimitry Andric
1419fe6060f1SDimitry Andric addPass(createSGPRAllocPass(false));
1420fe6060f1SDimitry Andric
1421fe6060f1SDimitry Andric // Equivalent of PEI for SGPRs.
1422fe6060f1SDimitry Andric addPass(&SILowerSGPRSpillsID);
14235f757f3fSDimitry Andric addPass(&SIPreAllocateWWMRegsID);
1424fe6060f1SDimitry Andric
1425fe6060f1SDimitry Andric addPass(createVGPRAllocPass(false));
14265f757f3fSDimitry Andric
14275f757f3fSDimitry Andric addPass(&SILowerWWMCopiesID);
1428fe6060f1SDimitry Andric return true;
1429fe6060f1SDimitry Andric }
1430fe6060f1SDimitry Andric
addRegAssignAndRewriteOptimized()1431fe6060f1SDimitry Andric bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
1432fe6060f1SDimitry Andric if (!usingDefaultRegAlloc())
1433fe6060f1SDimitry Andric report_fatal_error(RegAllocOptNotSupportedMessage);
1434fe6060f1SDimitry Andric
143506c3fb27SDimitry Andric addPass(&GCNPreRALongBranchRegID);
143606c3fb27SDimitry Andric
1437fe6060f1SDimitry Andric addPass(createSGPRAllocPass(true));
1438fe6060f1SDimitry Andric
1439fe6060f1SDimitry Andric // Commit allocated register changes. This is mostly necessary because too
1440fe6060f1SDimitry Andric // many things rely on the use lists of the physical registers, such as the
1441fe6060f1SDimitry Andric // verifier. This is only necessary with allocators which use LiveIntervals,
1442349cc55cSDimitry Andric // since FastRegAlloc does the replacements itself.
1443fe6060f1SDimitry Andric addPass(createVirtRegRewriter(false));
1444fe6060f1SDimitry Andric
1445fe6060f1SDimitry Andric // Equivalent of PEI for SGPRs.
1446fe6060f1SDimitry Andric addPass(&SILowerSGPRSpillsID);
14475f757f3fSDimitry Andric addPass(&SIPreAllocateWWMRegsID);
1448fe6060f1SDimitry Andric
1449fe6060f1SDimitry Andric addPass(createVGPRAllocPass(true));
1450fe6060f1SDimitry Andric
1451fe6060f1SDimitry Andric addPreRewrite();
1452fe6060f1SDimitry Andric addPass(&VirtRegRewriterID);
1453fe6060f1SDimitry Andric
14547a6dacacSDimitry Andric addPass(&AMDGPUMarkLastScratchLoadID);
14557a6dacacSDimitry Andric
14560b57cec5SDimitry Andric return true;
14570b57cec5SDimitry Andric }
14580b57cec5SDimitry Andric
addPostRegAlloc()14590b57cec5SDimitry Andric void GCNPassConfig::addPostRegAlloc() {
14600b57cec5SDimitry Andric addPass(&SIFixVGPRCopiesID);
14615f757f3fSDimitry Andric if (getOptLevel() > CodeGenOptLevel::None)
14620b57cec5SDimitry Andric addPass(&SIOptimizeExecMaskingID);
14630b57cec5SDimitry Andric TargetPassConfig::addPostRegAlloc();
14640b57cec5SDimitry Andric }
14650b57cec5SDimitry Andric
addPreSched2()14660b57cec5SDimitry Andric void GCNPassConfig::addPreSched2() {
14675f757f3fSDimitry Andric if (TM->getOptLevel() > CodeGenOptLevel::None)
1468349cc55cSDimitry Andric addPass(createSIShrinkInstructionsPass());
14695ffd83dbSDimitry Andric addPass(&SIPostRABundlerID);
14700b57cec5SDimitry Andric }
14710b57cec5SDimitry Andric
addPreEmitPass()14720b57cec5SDimitry Andric void GCNPassConfig::addPreEmitPass() {
14735f757f3fSDimitry Andric if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less))
1474753f127fSDimitry Andric addPass(&GCNCreateVOPDID);
14750b57cec5SDimitry Andric addPass(createSIMemoryLegalizerPass());
14760b57cec5SDimitry Andric addPass(createSIInsertWaitcntsPass());
1477fe6060f1SDimitry Andric
14780b57cec5SDimitry Andric addPass(createSIModeRegisterPass());
14790b57cec5SDimitry Andric
14805f757f3fSDimitry Andric if (getOptLevel() > CodeGenOptLevel::None)
1481e8d8bef9SDimitry Andric addPass(&SIInsertHardClausesID);
1482e8d8bef9SDimitry Andric
1483fe6060f1SDimitry Andric addPass(&SILateBranchLoweringPassID);
14845f757f3fSDimitry Andric if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less))
148581ad6265SDimitry Andric addPass(createAMDGPUSetWavePriorityPass());
14865f757f3fSDimitry Andric if (getOptLevel() > CodeGenOptLevel::None)
1487e8d8bef9SDimitry Andric addPass(&SIPreEmitPeepholeID);
14880b57cec5SDimitry Andric // The hazard recognizer that runs as part of the post-ra scheduler does not
14890b57cec5SDimitry Andric // guarantee to be able handle all hazards correctly. This is because if there
14900b57cec5SDimitry Andric // are multiple scheduling regions in a basic block, the regions are scheduled
14910b57cec5SDimitry Andric // bottom up, so when we begin to schedule a region we don't know what
14920b57cec5SDimitry Andric // instructions were emitted directly before it.
14930b57cec5SDimitry Andric //
14940b57cec5SDimitry Andric // Here we add a stand-alone hazard recognizer pass which can handle all
14950b57cec5SDimitry Andric // cases.
14960b57cec5SDimitry Andric addPass(&PostRAHazardRecognizerID);
149781ad6265SDimitry Andric
14985f757f3fSDimitry Andric if (isPassEnabled(EnableInsertSingleUseVDST, CodeGenOptLevel::Less))
14995f757f3fSDimitry Andric addPass(&AMDGPUInsertSingleUseVDSTID);
15005f757f3fSDimitry Andric
15015f757f3fSDimitry Andric if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less))
150281ad6265SDimitry Andric addPass(&AMDGPUInsertDelayAluID);
150381ad6265SDimitry Andric
15040b57cec5SDimitry Andric addPass(&BranchRelaxationPassID);
15050b57cec5SDimitry Andric }
15060b57cec5SDimitry Andric
createPassConfig(PassManagerBase & PM)15070b57cec5SDimitry Andric TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
15080b57cec5SDimitry Andric return new GCNPassConfig(*this, PM);
15090b57cec5SDimitry Andric }
15100b57cec5SDimitry Andric
registerMachineRegisterInfoCallback(MachineFunction & MF) const151106c3fb27SDimitry Andric void GCNTargetMachine::registerMachineRegisterInfoCallback(
151206c3fb27SDimitry Andric MachineFunction &MF) const {
151306c3fb27SDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
151406c3fb27SDimitry Andric MF.getRegInfo().addDelegate(MFI);
151506c3fb27SDimitry Andric }
151606c3fb27SDimitry Andric
createMachineFunctionInfo(BumpPtrAllocator & Allocator,const Function & F,const TargetSubtargetInfo * STI) const1517bdd1243dSDimitry Andric MachineFunctionInfo *GCNTargetMachine::createMachineFunctionInfo(
1518bdd1243dSDimitry Andric BumpPtrAllocator &Allocator, const Function &F,
1519bdd1243dSDimitry Andric const TargetSubtargetInfo *STI) const {
1520bdd1243dSDimitry Andric return SIMachineFunctionInfo::create<SIMachineFunctionInfo>(
1521bdd1243dSDimitry Andric Allocator, F, static_cast<const GCNSubtarget *>(STI));
1522bdd1243dSDimitry Andric }
1523bdd1243dSDimitry Andric
createDefaultFuncInfoYAML() const15240b57cec5SDimitry Andric yaml::MachineFunctionInfo *GCNTargetMachine::createDefaultFuncInfoYAML() const {
15250b57cec5SDimitry Andric return new yaml::SIMachineFunctionInfo();
15260b57cec5SDimitry Andric }
15270b57cec5SDimitry Andric
15280b57cec5SDimitry Andric yaml::MachineFunctionInfo *
convertFuncInfoToYAML(const MachineFunction & MF) const15290b57cec5SDimitry Andric GCNTargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const {
15300b57cec5SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1531fe6060f1SDimitry Andric return new yaml::SIMachineFunctionInfo(
1532bdd1243dSDimitry Andric *MFI, *MF.getSubtarget<GCNSubtarget>().getRegisterInfo(), MF);
15330b57cec5SDimitry Andric }
15340b57cec5SDimitry Andric
parseMachineFunctionInfo(const yaml::MachineFunctionInfo & MFI_,PerFunctionMIParsingState & PFS,SMDiagnostic & Error,SMRange & SourceRange) const15350b57cec5SDimitry Andric bool GCNTargetMachine::parseMachineFunctionInfo(
15360b57cec5SDimitry Andric const yaml::MachineFunctionInfo &MFI_, PerFunctionMIParsingState &PFS,
15370b57cec5SDimitry Andric SMDiagnostic &Error, SMRange &SourceRange) const {
15380b57cec5SDimitry Andric const yaml::SIMachineFunctionInfo &YamlMFI =
153981ad6265SDimitry Andric static_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
15400b57cec5SDimitry Andric MachineFunction &MF = PFS.MF;
15410b57cec5SDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
15425f757f3fSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
15430b57cec5SDimitry Andric
1544fe6060f1SDimitry Andric if (MFI->initializeBaseYamlFields(YamlMFI, MF, PFS, Error, SourceRange))
1545fe6060f1SDimitry Andric return true;
15460b57cec5SDimitry Andric
1547e8d8bef9SDimitry Andric if (MFI->Occupancy == 0) {
1548e8d8bef9SDimitry Andric // Fixup the subtarget dependent default value.
1549e8d8bef9SDimitry Andric MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize());
1550e8d8bef9SDimitry Andric }
1551e8d8bef9SDimitry Andric
15525ffd83dbSDimitry Andric auto parseRegister = [&](const yaml::StringValue &RegName, Register &RegVal) {
15535ffd83dbSDimitry Andric Register TempReg;
15545ffd83dbSDimitry Andric if (parseNamedRegisterReference(PFS, TempReg, RegName.Value, Error)) {
15550b57cec5SDimitry Andric SourceRange = RegName.SourceRange;
15560b57cec5SDimitry Andric return true;
15570b57cec5SDimitry Andric }
15585ffd83dbSDimitry Andric RegVal = TempReg;
15590b57cec5SDimitry Andric
15600b57cec5SDimitry Andric return false;
15610b57cec5SDimitry Andric };
15620b57cec5SDimitry Andric
156381ad6265SDimitry Andric auto parseOptionalRegister = [&](const yaml::StringValue &RegName,
156481ad6265SDimitry Andric Register &RegVal) {
156581ad6265SDimitry Andric return !RegName.Value.empty() && parseRegister(RegName, RegVal);
156681ad6265SDimitry Andric };
156781ad6265SDimitry Andric
156881ad6265SDimitry Andric if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy))
156981ad6265SDimitry Andric return true;
157081ad6265SDimitry Andric
157106c3fb27SDimitry Andric if (parseOptionalRegister(YamlMFI.SGPRForEXECCopy, MFI->SGPRForEXECCopy))
157206c3fb27SDimitry Andric return true;
157306c3fb27SDimitry Andric
157406c3fb27SDimitry Andric if (parseOptionalRegister(YamlMFI.LongBranchReservedReg,
157506c3fb27SDimitry Andric MFI->LongBranchReservedReg))
157606c3fb27SDimitry Andric return true;
157706c3fb27SDimitry Andric
15780b57cec5SDimitry Andric auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
15790b57cec5SDimitry Andric // Create a diagnostic for a the register string literal.
15800b57cec5SDimitry Andric const MemoryBuffer &Buffer =
15810b57cec5SDimitry Andric *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
15820b57cec5SDimitry Andric Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
15830b57cec5SDimitry Andric RegName.Value.size(), SourceMgr::DK_Error,
15840b57cec5SDimitry Andric "incorrect register class for field", RegName.Value,
1585bdd1243dSDimitry Andric std::nullopt, std::nullopt);
15860b57cec5SDimitry Andric SourceRange = RegName.SourceRange;
15870b57cec5SDimitry Andric return true;
15880b57cec5SDimitry Andric };
15890b57cec5SDimitry Andric
15900b57cec5SDimitry Andric if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
15910b57cec5SDimitry Andric parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
15920b57cec5SDimitry Andric parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
15930b57cec5SDimitry Andric return true;
15940b57cec5SDimitry Andric
15950b57cec5SDimitry Andric if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
15968bcb0991SDimitry Andric !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {
15970b57cec5SDimitry Andric return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
15980b57cec5SDimitry Andric }
15990b57cec5SDimitry Andric
16000b57cec5SDimitry Andric if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
16010b57cec5SDimitry Andric !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
16020b57cec5SDimitry Andric return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
16030b57cec5SDimitry Andric }
16040b57cec5SDimitry Andric
16050b57cec5SDimitry Andric if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
16060b57cec5SDimitry Andric !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
16070b57cec5SDimitry Andric return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
16080b57cec5SDimitry Andric }
16090b57cec5SDimitry Andric
161081ad6265SDimitry Andric for (const auto &YamlReg : YamlMFI.WWMReservedRegs) {
161181ad6265SDimitry Andric Register ParsedReg;
161281ad6265SDimitry Andric if (parseRegister(YamlReg, ParsedReg))
161381ad6265SDimitry Andric return true;
161481ad6265SDimitry Andric
161581ad6265SDimitry Andric MFI->reserveWWMRegister(ParsedReg);
161681ad6265SDimitry Andric }
161781ad6265SDimitry Andric
1618bdd1243dSDimitry Andric auto parseAndCheckArgument = [&](const std::optional<yaml::SIArgument> &A,
16190b57cec5SDimitry Andric const TargetRegisterClass &RC,
16200b57cec5SDimitry Andric ArgDescriptor &Arg, unsigned UserSGPRs,
16210b57cec5SDimitry Andric unsigned SystemSGPRs) {
16220b57cec5SDimitry Andric // Skip parsing if it's not present.
16230b57cec5SDimitry Andric if (!A)
16240b57cec5SDimitry Andric return false;
16250b57cec5SDimitry Andric
16260b57cec5SDimitry Andric if (A->IsRegister) {
16275ffd83dbSDimitry Andric Register Reg;
16280b57cec5SDimitry Andric if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
16290b57cec5SDimitry Andric SourceRange = A->RegisterName.SourceRange;
16300b57cec5SDimitry Andric return true;
16310b57cec5SDimitry Andric }
16320b57cec5SDimitry Andric if (!RC.contains(Reg))
16330b57cec5SDimitry Andric return diagnoseRegisterClass(A->RegisterName);
16340b57cec5SDimitry Andric Arg = ArgDescriptor::createRegister(Reg);
16350b57cec5SDimitry Andric } else
16360b57cec5SDimitry Andric Arg = ArgDescriptor::createStack(A->StackOffset);
16370b57cec5SDimitry Andric // Check and apply the optional mask.
16380b57cec5SDimitry Andric if (A->Mask)
163981ad6265SDimitry Andric Arg = ArgDescriptor::createArg(Arg, *A->Mask);
16400b57cec5SDimitry Andric
16410b57cec5SDimitry Andric MFI->NumUserSGPRs += UserSGPRs;
16420b57cec5SDimitry Andric MFI->NumSystemSGPRs += SystemSGPRs;
16430b57cec5SDimitry Andric return false;
16440b57cec5SDimitry Andric };
16450b57cec5SDimitry Andric
16460b57cec5SDimitry Andric if (YamlMFI.ArgInfo &&
16470b57cec5SDimitry Andric (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
16488bcb0991SDimitry Andric AMDGPU::SGPR_128RegClass,
16490b57cec5SDimitry Andric MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
16500b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
16510b57cec5SDimitry Andric AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
16520b57cec5SDimitry Andric 2, 0) ||
16530b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
16540b57cec5SDimitry Andric MFI->ArgInfo.QueuePtr, 2, 0) ||
16550b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
16560b57cec5SDimitry Andric AMDGPU::SReg_64RegClass,
16570b57cec5SDimitry Andric MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
16580b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
16590b57cec5SDimitry Andric AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
16600b57cec5SDimitry Andric 2, 0) ||
16610b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
16620b57cec5SDimitry Andric AMDGPU::SReg_64RegClass,
16630b57cec5SDimitry Andric MFI->ArgInfo.FlatScratchInit, 2, 0) ||
16640b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
16650b57cec5SDimitry Andric AMDGPU::SGPR_32RegClass,
16660b57cec5SDimitry Andric MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
1667fcaf7f86SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->LDSKernelId,
1668fcaf7f86SDimitry Andric AMDGPU::SGPR_32RegClass,
1669fcaf7f86SDimitry Andric MFI->ArgInfo.LDSKernelId, 0, 1) ||
16700b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
16710b57cec5SDimitry Andric AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
16720b57cec5SDimitry Andric 0, 1) ||
16730b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
16740b57cec5SDimitry Andric AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
16750b57cec5SDimitry Andric 0, 1) ||
16760b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
16770b57cec5SDimitry Andric AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
16780b57cec5SDimitry Andric 0, 1) ||
16790b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
16800b57cec5SDimitry Andric AMDGPU::SGPR_32RegClass,
16810b57cec5SDimitry Andric MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
16820b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
16830b57cec5SDimitry Andric AMDGPU::SGPR_32RegClass,
16840b57cec5SDimitry Andric MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
16850b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
16860b57cec5SDimitry Andric AMDGPU::SReg_64RegClass,
16870b57cec5SDimitry Andric MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
16880b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
16890b57cec5SDimitry Andric AMDGPU::SReg_64RegClass,
16900b57cec5SDimitry Andric MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
16910b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
16920b57cec5SDimitry Andric AMDGPU::VGPR_32RegClass,
16930b57cec5SDimitry Andric MFI->ArgInfo.WorkItemIDX, 0, 0) ||
16940b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
16950b57cec5SDimitry Andric AMDGPU::VGPR_32RegClass,
16960b57cec5SDimitry Andric MFI->ArgInfo.WorkItemIDY, 0, 0) ||
16970b57cec5SDimitry Andric parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
16980b57cec5SDimitry Andric AMDGPU::VGPR_32RegClass,
16990b57cec5SDimitry Andric MFI->ArgInfo.WorkItemIDZ, 0, 0)))
17000b57cec5SDimitry Andric return true;
17010b57cec5SDimitry Andric
17025f757f3fSDimitry Andric if (ST.hasIEEEMode())
17030b57cec5SDimitry Andric MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
17045f757f3fSDimitry Andric if (ST.hasDX10ClampMode())
17050b57cec5SDimitry Andric MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
1706bdd1243dSDimitry Andric
1707bdd1243dSDimitry Andric // FIXME: Move proper support for denormal-fp-math into base MachineFunction
1708bdd1243dSDimitry Andric MFI->Mode.FP32Denormals.Input = YamlMFI.Mode.FP32InputDenormals
1709bdd1243dSDimitry Andric ? DenormalMode::IEEE
1710bdd1243dSDimitry Andric : DenormalMode::PreserveSign;
1711bdd1243dSDimitry Andric MFI->Mode.FP32Denormals.Output = YamlMFI.Mode.FP32OutputDenormals
1712bdd1243dSDimitry Andric ? DenormalMode::IEEE
1713bdd1243dSDimitry Andric : DenormalMode::PreserveSign;
1714bdd1243dSDimitry Andric
1715bdd1243dSDimitry Andric MFI->Mode.FP64FP16Denormals.Input = YamlMFI.Mode.FP64FP16InputDenormals
1716bdd1243dSDimitry Andric ? DenormalMode::IEEE
1717bdd1243dSDimitry Andric : DenormalMode::PreserveSign;
1718bdd1243dSDimitry Andric MFI->Mode.FP64FP16Denormals.Output = YamlMFI.Mode.FP64FP16OutputDenormals
1719bdd1243dSDimitry Andric ? DenormalMode::IEEE
1720bdd1243dSDimitry Andric : DenormalMode::PreserveSign;
17210b57cec5SDimitry Andric
17220b57cec5SDimitry Andric return false;
17230b57cec5SDimitry Andric }
1724