1 //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass recursively promotes generic pointer arguments of a kernel
10 /// into the global address space.
11 ///
12 /// The pass walks kernel's pointer arguments, then loads from them. If a loaded
13 /// value is a pointer and loaded pointer is unmodified in the kernel before the
14 /// load, then promote loaded pointer to global. Then recursively continue.
15 //
16 //===----------------------------------------------------------------------===//
17
18 #include "AMDGPU.h"
19 #include "Utils/AMDGPUMemoryUtils.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/Analysis/AliasAnalysis.h"
22 #include "llvm/Analysis/MemorySSA.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/InitializePasses.h"
25
26 #define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
27
28 using namespace llvm;
29
30 namespace {
31
32 class AMDGPUPromoteKernelArguments : public FunctionPass {
33 MemorySSA *MSSA;
34
35 AliasAnalysis *AA;
36
37 Instruction *ArgCastInsertPt;
38
39 SmallVector<Value *> Ptrs;
40
41 void enqueueUsers(Value *Ptr);
42
43 bool promotePointer(Value *Ptr);
44
45 bool promoteLoad(LoadInst *LI);
46
47 public:
48 static char ID;
49
AMDGPUPromoteKernelArguments()50 AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
51
52 bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);
53
54 bool runOnFunction(Function &F) override;
55
getAnalysisUsage(AnalysisUsage & AU) const56 void getAnalysisUsage(AnalysisUsage &AU) const override {
57 AU.addRequired<AAResultsWrapperPass>();
58 AU.addRequired<MemorySSAWrapperPass>();
59 AU.setPreservesAll();
60 }
61 };
62
63 } // end anonymous namespace
64
enqueueUsers(Value * Ptr)65 void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
66 SmallVector<User *> PtrUsers(Ptr->users());
67
68 while (!PtrUsers.empty()) {
69 Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
70 if (!U)
71 continue;
72
73 switch (U->getOpcode()) {
74 default:
75 break;
76 case Instruction::Load: {
77 LoadInst *LD = cast<LoadInst>(U);
78 if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr &&
79 !AMDGPU::isClobberedInFunction(LD, MSSA, AA))
80 Ptrs.push_back(LD);
81
82 break;
83 }
84 case Instruction::GetElementPtr:
85 case Instruction::AddrSpaceCast:
86 case Instruction::BitCast:
87 if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
88 PtrUsers.append(U->user_begin(), U->user_end());
89 break;
90 }
91 }
92 }
93
promotePointer(Value * Ptr)94 bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
95 bool Changed = false;
96
97 LoadInst *LI = dyn_cast<LoadInst>(Ptr);
98 if (LI)
99 Changed |= promoteLoad(LI);
100
101 PointerType *PT = dyn_cast<PointerType>(Ptr->getType());
102 if (!PT)
103 return Changed;
104
105 if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
106 PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
107 PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
108 enqueueUsers(Ptr);
109
110 if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
111 return Changed;
112
113 IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())
114 : ArgCastInsertPt);
115
116 // Cast pointer to global address space and back to flat and let
117 // Infer Address Spaces pass to do all necessary rewriting.
118 PointerType *NewPT =
119 PointerType::get(PT->getContext(), AMDGPUAS::GLOBAL_ADDRESS);
120 Value *Cast =
121 B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
122 Value *CastBack =
123 B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
124 Ptr->replaceUsesWithIf(CastBack,
125 [Cast](Use &U) { return U.getUser() != Cast; });
126
127 return true;
128 }
129
promoteLoad(LoadInst * LI)130 bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
131 if (!LI->isSimple())
132 return false;
133
134 LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {}));
135 return true;
136 }
137
138 // skip allocas
getInsertPt(BasicBlock & BB)139 static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
140 BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
141 for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
142 AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
143
144 // If this is a dynamic alloca, the value may depend on the loaded kernargs,
145 // so loads will need to be inserted before it.
146 if (!AI || !AI->isStaticAlloca())
147 break;
148 }
149
150 return InsPt;
151 }
152
run(Function & F,MemorySSA & MSSA,AliasAnalysis & AA)153 bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
154 AliasAnalysis &AA) {
155 if (skipFunction(F))
156 return false;
157
158 CallingConv::ID CC = F.getCallingConv();
159 if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
160 return false;
161
162 ArgCastInsertPt = &*getInsertPt(*F.begin());
163 this->MSSA = &MSSA;
164 this->AA = &AA;
165
166 for (Argument &Arg : F.args()) {
167 if (Arg.use_empty())
168 continue;
169
170 PointerType *PT = dyn_cast<PointerType>(Arg.getType());
171 if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
172 PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
173 PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
174 continue;
175
176 Ptrs.push_back(&Arg);
177 }
178
179 bool Changed = false;
180 while (!Ptrs.empty()) {
181 Value *Ptr = Ptrs.pop_back_val();
182 Changed |= promotePointer(Ptr);
183 }
184
185 return Changed;
186 }
187
runOnFunction(Function & F)188 bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
189 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
190 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
191 return run(F, MSSA, AA);
192 }
193
194 INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
195 "AMDGPU Promote Kernel Arguments", false, false)
196 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
197 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
198 INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
199 "AMDGPU Promote Kernel Arguments", false, false)
200
201 char AMDGPUPromoteKernelArguments::ID = 0;
202
createAMDGPUPromoteKernelArgumentsPass()203 FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
204 return new AMDGPUPromoteKernelArguments();
205 }
206
207 PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)208 AMDGPUPromoteKernelArgumentsPass::run(Function &F,
209 FunctionAnalysisManager &AM) {
210 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
211 AliasAnalysis &AA = AM.getResult<AAManager>(F);
212 if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {
213 PreservedAnalyses PA;
214 PA.preserveSet<CFGAnalyses>();
215 PA.preserve<MemorySSAAnalysis>();
216 return PA;
217 }
218 return PreservedAnalyses::all();
219 }
220