1*700637cbSDimitry Andric //===- AMDGPUPreloadKernelArguments.cpp - Preload Kernel Arguments --------===//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric //
9*700637cbSDimitry Andric /// \file This pass preloads kernel arguments into user_data SGPRs before kernel
10*700637cbSDimitry Andric /// execution begins. The number of registers available for preloading depends
11*700637cbSDimitry Andric /// on the number of free user SGPRs, up to the hardware's maximum limit.
12*700637cbSDimitry Andric /// Implicit arguments enabled in the kernel descriptor are allocated first,
13*700637cbSDimitry Andric /// followed by SGPRs used for preloaded kernel arguments. (Reference:
14*700637cbSDimitry Andric /// https://llvm.org/docs/AMDGPUUsage.html#initial-kernel-execution-state)
15*700637cbSDimitry Andric /// Additionally, hidden kernel arguments may be preloaded, in which case they
16*700637cbSDimitry Andric /// are appended to the kernel signature after explicit arguments. Preloaded
17*700637cbSDimitry Andric /// arguments will be marked with `inreg`.
18*700637cbSDimitry Andric //
19*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
20*700637cbSDimitry Andric
21*700637cbSDimitry Andric #include "AMDGPU.h"
22*700637cbSDimitry Andric #include "AMDGPUTargetMachine.h"
23*700637cbSDimitry Andric #include "llvm/Analysis/ValueTracking.h"
24*700637cbSDimitry Andric #include "llvm/IR/Function.h"
25*700637cbSDimitry Andric #include "llvm/IR/Instructions.h"
26*700637cbSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
27*700637cbSDimitry Andric #include "llvm/IR/Module.h"
28*700637cbSDimitry Andric #include "llvm/IR/PassManager.h"
29*700637cbSDimitry Andric #include "llvm/IR/Verifier.h"
30*700637cbSDimitry Andric #include "llvm/Pass.h"
31*700637cbSDimitry Andric
32*700637cbSDimitry Andric #define DEBUG_TYPE "amdgpu-preload-kernel-arguments"
33*700637cbSDimitry Andric
34*700637cbSDimitry Andric using namespace llvm;
35*700637cbSDimitry Andric
36*700637cbSDimitry Andric static cl::opt<unsigned> KernargPreloadCount(
37*700637cbSDimitry Andric "amdgpu-kernarg-preload-count",
38*700637cbSDimitry Andric cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0));
39*700637cbSDimitry Andric
40*700637cbSDimitry Andric namespace {
41*700637cbSDimitry Andric
42*700637cbSDimitry Andric class AMDGPUPreloadKernelArgumentsLegacy : public ModulePass {
43*700637cbSDimitry Andric const GCNTargetMachine *TM;
44*700637cbSDimitry Andric
45*700637cbSDimitry Andric public:
46*700637cbSDimitry Andric static char ID;
47*700637cbSDimitry Andric explicit AMDGPUPreloadKernelArgumentsLegacy(
48*700637cbSDimitry Andric const GCNTargetMachine *TM = nullptr);
49*700637cbSDimitry Andric
getPassName() const50*700637cbSDimitry Andric StringRef getPassName() const override {
51*700637cbSDimitry Andric return "AMDGPU Preload Kernel Arguments";
52*700637cbSDimitry Andric }
53*700637cbSDimitry Andric
54*700637cbSDimitry Andric bool runOnModule(Module &M) override;
55*700637cbSDimitry Andric };
56*700637cbSDimitry Andric
57*700637cbSDimitry Andric class PreloadKernelArgInfo {
58*700637cbSDimitry Andric private:
59*700637cbSDimitry Andric Function &F;
60*700637cbSDimitry Andric const GCNSubtarget &ST;
61*700637cbSDimitry Andric unsigned NumFreeUserSGPRs;
62*700637cbSDimitry Andric
63*700637cbSDimitry Andric enum HiddenArg : unsigned {
64*700637cbSDimitry Andric HIDDEN_BLOCK_COUNT_X,
65*700637cbSDimitry Andric HIDDEN_BLOCK_COUNT_Y,
66*700637cbSDimitry Andric HIDDEN_BLOCK_COUNT_Z,
67*700637cbSDimitry Andric HIDDEN_GROUP_SIZE_X,
68*700637cbSDimitry Andric HIDDEN_GROUP_SIZE_Y,
69*700637cbSDimitry Andric HIDDEN_GROUP_SIZE_Z,
70*700637cbSDimitry Andric HIDDEN_REMAINDER_X,
71*700637cbSDimitry Andric HIDDEN_REMAINDER_Y,
72*700637cbSDimitry Andric HIDDEN_REMAINDER_Z,
73*700637cbSDimitry Andric END_HIDDEN_ARGS
74*700637cbSDimitry Andric };
75*700637cbSDimitry Andric
76*700637cbSDimitry Andric // Stores information about a specific hidden argument.
77*700637cbSDimitry Andric struct HiddenArgInfo {
78*700637cbSDimitry Andric // Offset in bytes from the location in the kernearg segment pointed to by
79*700637cbSDimitry Andric // the implicitarg pointer.
80*700637cbSDimitry Andric uint8_t Offset;
81*700637cbSDimitry Andric // The size of the hidden argument in bytes.
82*700637cbSDimitry Andric uint8_t Size;
83*700637cbSDimitry Andric // The name of the hidden argument in the kernel signature.
84*700637cbSDimitry Andric const char *Name;
85*700637cbSDimitry Andric };
86*700637cbSDimitry Andric
87*700637cbSDimitry Andric static constexpr HiddenArgInfo HiddenArgs[END_HIDDEN_ARGS] = {
88*700637cbSDimitry Andric {0, 4, "_hidden_block_count_x"}, {4, 4, "_hidden_block_count_y"},
89*700637cbSDimitry Andric {8, 4, "_hidden_block_count_z"}, {12, 2, "_hidden_group_size_x"},
90*700637cbSDimitry Andric {14, 2, "_hidden_group_size_y"}, {16, 2, "_hidden_group_size_z"},
91*700637cbSDimitry Andric {18, 2, "_hidden_remainder_x"}, {20, 2, "_hidden_remainder_y"},
92*700637cbSDimitry Andric {22, 2, "_hidden_remainder_z"}};
93*700637cbSDimitry Andric
getHiddenArgFromOffset(unsigned Offset)94*700637cbSDimitry Andric static HiddenArg getHiddenArgFromOffset(unsigned Offset) {
95*700637cbSDimitry Andric for (unsigned I = 0; I < END_HIDDEN_ARGS; ++I)
96*700637cbSDimitry Andric if (HiddenArgs[I].Offset == Offset)
97*700637cbSDimitry Andric return static_cast<HiddenArg>(I);
98*700637cbSDimitry Andric
99*700637cbSDimitry Andric return END_HIDDEN_ARGS;
100*700637cbSDimitry Andric }
101*700637cbSDimitry Andric
getHiddenArgType(LLVMContext & Ctx,HiddenArg HA)102*700637cbSDimitry Andric static Type *getHiddenArgType(LLVMContext &Ctx, HiddenArg HA) {
103*700637cbSDimitry Andric if (HA < END_HIDDEN_ARGS)
104*700637cbSDimitry Andric return Type::getIntNTy(Ctx, HiddenArgs[HA].Size * 8);
105*700637cbSDimitry Andric
106*700637cbSDimitry Andric llvm_unreachable("Unexpected hidden argument.");
107*700637cbSDimitry Andric }
108*700637cbSDimitry Andric
getHiddenArgName(HiddenArg HA)109*700637cbSDimitry Andric static const char *getHiddenArgName(HiddenArg HA) {
110*700637cbSDimitry Andric if (HA < END_HIDDEN_ARGS)
111*700637cbSDimitry Andric return HiddenArgs[HA].Name;
112*700637cbSDimitry Andric
113*700637cbSDimitry Andric llvm_unreachable("Unexpected hidden argument.");
114*700637cbSDimitry Andric }
115*700637cbSDimitry Andric
116*700637cbSDimitry Andric // Clones the function after adding implicit arguments to the argument list
117*700637cbSDimitry Andric // and returns the new updated function. Preloaded implicit arguments are
118*700637cbSDimitry Andric // added up to and including the last one that will be preloaded, indicated by
119*700637cbSDimitry Andric // LastPreloadIndex. Currently preloading is only performed on the totality of
120*700637cbSDimitry Andric // sequential data from the kernarg segment including implicit (hidden)
121*700637cbSDimitry Andric // arguments. This means that all arguments up to the last preloaded argument
122*700637cbSDimitry Andric // will also be preloaded even if that data is unused.
cloneFunctionWithPreloadImplicitArgs(unsigned LastPreloadIndex)123*700637cbSDimitry Andric Function *cloneFunctionWithPreloadImplicitArgs(unsigned LastPreloadIndex) {
124*700637cbSDimitry Andric FunctionType *FT = F.getFunctionType();
125*700637cbSDimitry Andric LLVMContext &Ctx = F.getParent()->getContext();
126*700637cbSDimitry Andric SmallVector<Type *, 16> FTypes(FT->param_begin(), FT->param_end());
127*700637cbSDimitry Andric for (unsigned I = 0; I <= LastPreloadIndex; ++I)
128*700637cbSDimitry Andric FTypes.push_back(getHiddenArgType(Ctx, HiddenArg(I)));
129*700637cbSDimitry Andric
130*700637cbSDimitry Andric FunctionType *NFT =
131*700637cbSDimitry Andric FunctionType::get(FT->getReturnType(), FTypes, FT->isVarArg());
132*700637cbSDimitry Andric Function *NF =
133*700637cbSDimitry Andric Function::Create(NFT, F.getLinkage(), F.getAddressSpace(), F.getName());
134*700637cbSDimitry Andric
135*700637cbSDimitry Andric NF->copyAttributesFrom(&F);
136*700637cbSDimitry Andric NF->copyMetadata(&F, 0);
137*700637cbSDimitry Andric
138*700637cbSDimitry Andric F.getParent()->getFunctionList().insert(F.getIterator(), NF);
139*700637cbSDimitry Andric NF->takeName(&F);
140*700637cbSDimitry Andric NF->splice(NF->begin(), &F);
141*700637cbSDimitry Andric
142*700637cbSDimitry Andric Function::arg_iterator NFArg = NF->arg_begin();
143*700637cbSDimitry Andric for (Argument &Arg : F.args()) {
144*700637cbSDimitry Andric Arg.replaceAllUsesWith(&*NFArg);
145*700637cbSDimitry Andric NFArg->takeName(&Arg);
146*700637cbSDimitry Andric ++NFArg;
147*700637cbSDimitry Andric }
148*700637cbSDimitry Andric
149*700637cbSDimitry Andric AttrBuilder AB(Ctx);
150*700637cbSDimitry Andric AB.addAttribute(Attribute::InReg);
151*700637cbSDimitry Andric AB.addAttribute("amdgpu-hidden-argument");
152*700637cbSDimitry Andric AttributeList AL = NF->getAttributes();
153*700637cbSDimitry Andric for (unsigned I = 0; I <= LastPreloadIndex; ++I) {
154*700637cbSDimitry Andric AL = AL.addParamAttributes(Ctx, NFArg->getArgNo(), AB);
155*700637cbSDimitry Andric NFArg++->setName(getHiddenArgName(HiddenArg(I)));
156*700637cbSDimitry Andric }
157*700637cbSDimitry Andric
158*700637cbSDimitry Andric NF->setAttributes(AL);
159*700637cbSDimitry Andric F.replaceAllUsesWith(NF);
160*700637cbSDimitry Andric
161*700637cbSDimitry Andric return NF;
162*700637cbSDimitry Andric }
163*700637cbSDimitry Andric
164*700637cbSDimitry Andric public:
PreloadKernelArgInfo(Function & F,const GCNSubtarget & ST)165*700637cbSDimitry Andric PreloadKernelArgInfo(Function &F, const GCNSubtarget &ST) : F(F), ST(ST) {
166*700637cbSDimitry Andric setInitialFreeUserSGPRsCount();
167*700637cbSDimitry Andric }
168*700637cbSDimitry Andric
169*700637cbSDimitry Andric // Returns the maximum number of user SGPRs that we have available to preload
170*700637cbSDimitry Andric // arguments.
setInitialFreeUserSGPRsCount()171*700637cbSDimitry Andric void setInitialFreeUserSGPRsCount() {
172*700637cbSDimitry Andric GCNUserSGPRUsageInfo UserSGPRInfo(F, ST);
173*700637cbSDimitry Andric NumFreeUserSGPRs = UserSGPRInfo.getNumFreeUserSGPRs();
174*700637cbSDimitry Andric }
175*700637cbSDimitry Andric
canPreloadKernArgAtOffset(uint64_t ExplicitArgOffset)176*700637cbSDimitry Andric bool canPreloadKernArgAtOffset(uint64_t ExplicitArgOffset) {
177*700637cbSDimitry Andric return ExplicitArgOffset <= NumFreeUserSGPRs * 4;
178*700637cbSDimitry Andric }
179*700637cbSDimitry Andric
180*700637cbSDimitry Andric // Try to allocate SGPRs to preload hidden kernel arguments.
181*700637cbSDimitry Andric void
tryAllocHiddenArgPreloadSGPRs(uint64_t ImplicitArgsBaseOffset,SmallVectorImpl<Function * > & FunctionsToErase)182*700637cbSDimitry Andric tryAllocHiddenArgPreloadSGPRs(uint64_t ImplicitArgsBaseOffset,
183*700637cbSDimitry Andric SmallVectorImpl<Function *> &FunctionsToErase) {
184*700637cbSDimitry Andric Function *ImplicitArgPtr = Intrinsic::getDeclarationIfExists(
185*700637cbSDimitry Andric F.getParent(), Intrinsic::amdgcn_implicitarg_ptr);
186*700637cbSDimitry Andric if (!ImplicitArgPtr)
187*700637cbSDimitry Andric return;
188*700637cbSDimitry Andric
189*700637cbSDimitry Andric const DataLayout &DL = F.getParent()->getDataLayout();
190*700637cbSDimitry Andric // Pair is the load and the load offset.
191*700637cbSDimitry Andric SmallVector<std::pair<LoadInst *, unsigned>, 4> ImplicitArgLoads;
192*700637cbSDimitry Andric for (auto *U : ImplicitArgPtr->users()) {
193*700637cbSDimitry Andric Instruction *CI = dyn_cast<Instruction>(U);
194*700637cbSDimitry Andric if (!CI || CI->getParent()->getParent() != &F)
195*700637cbSDimitry Andric continue;
196*700637cbSDimitry Andric
197*700637cbSDimitry Andric for (auto *U : CI->users()) {
198*700637cbSDimitry Andric int64_t Offset = 0;
199*700637cbSDimitry Andric auto *Load = dyn_cast<LoadInst>(U); // Load from ImplicitArgPtr?
200*700637cbSDimitry Andric if (!Load) {
201*700637cbSDimitry Andric if (GetPointerBaseWithConstantOffset(U, Offset, DL) != CI)
202*700637cbSDimitry Andric continue;
203*700637cbSDimitry Andric
204*700637cbSDimitry Andric Load = dyn_cast<LoadInst>(*U->user_begin()); // Load from GEP?
205*700637cbSDimitry Andric }
206*700637cbSDimitry Andric
207*700637cbSDimitry Andric if (!Load || !Load->isSimple())
208*700637cbSDimitry Andric continue;
209*700637cbSDimitry Andric
210*700637cbSDimitry Andric // FIXME: Expand handle merged loads.
211*700637cbSDimitry Andric LLVMContext &Ctx = F.getParent()->getContext();
212*700637cbSDimitry Andric Type *LoadTy = Load->getType();
213*700637cbSDimitry Andric HiddenArg HA = getHiddenArgFromOffset(Offset);
214*700637cbSDimitry Andric if (HA == END_HIDDEN_ARGS || LoadTy != getHiddenArgType(Ctx, HA))
215*700637cbSDimitry Andric continue;
216*700637cbSDimitry Andric
217*700637cbSDimitry Andric ImplicitArgLoads.push_back(std::make_pair(Load, Offset));
218*700637cbSDimitry Andric }
219*700637cbSDimitry Andric }
220*700637cbSDimitry Andric
221*700637cbSDimitry Andric if (ImplicitArgLoads.empty())
222*700637cbSDimitry Andric return;
223*700637cbSDimitry Andric
224*700637cbSDimitry Andric // Allocate loads in order of offset. We need to be sure that the implicit
225*700637cbSDimitry Andric // argument can actually be preloaded.
226*700637cbSDimitry Andric std::sort(ImplicitArgLoads.begin(), ImplicitArgLoads.end(), less_second());
227*700637cbSDimitry Andric
228*700637cbSDimitry Andric // If we fail to preload any implicit argument we know we don't have SGPRs
229*700637cbSDimitry Andric // to preload any subsequent ones with larger offsets. Find the first
230*700637cbSDimitry Andric // argument that we cannot preload.
231*700637cbSDimitry Andric auto *PreloadEnd = llvm::find_if(
232*700637cbSDimitry Andric ImplicitArgLoads, [&](const std::pair<LoadInst *, unsigned> &Load) {
233*700637cbSDimitry Andric unsigned LoadSize = DL.getTypeStoreSize(Load.first->getType());
234*700637cbSDimitry Andric unsigned LoadOffset = Load.second;
235*700637cbSDimitry Andric if (!canPreloadKernArgAtOffset(LoadOffset + LoadSize +
236*700637cbSDimitry Andric ImplicitArgsBaseOffset))
237*700637cbSDimitry Andric return true;
238*700637cbSDimitry Andric
239*700637cbSDimitry Andric return false;
240*700637cbSDimitry Andric });
241*700637cbSDimitry Andric
242*700637cbSDimitry Andric if (PreloadEnd == ImplicitArgLoads.begin())
243*700637cbSDimitry Andric return;
244*700637cbSDimitry Andric
245*700637cbSDimitry Andric unsigned LastHiddenArgIndex = getHiddenArgFromOffset(PreloadEnd[-1].second);
246*700637cbSDimitry Andric Function *NF = cloneFunctionWithPreloadImplicitArgs(LastHiddenArgIndex);
247*700637cbSDimitry Andric assert(NF);
248*700637cbSDimitry Andric FunctionsToErase.push_back(&F);
249*700637cbSDimitry Andric for (const auto *I = ImplicitArgLoads.begin(); I != PreloadEnd; ++I) {
250*700637cbSDimitry Andric LoadInst *LoadInst = I->first;
251*700637cbSDimitry Andric unsigned LoadOffset = I->second;
252*700637cbSDimitry Andric unsigned HiddenArgIndex = getHiddenArgFromOffset(LoadOffset);
253*700637cbSDimitry Andric unsigned Index = NF->arg_size() - LastHiddenArgIndex + HiddenArgIndex - 1;
254*700637cbSDimitry Andric Argument *Arg = NF->getArg(Index);
255*700637cbSDimitry Andric LoadInst->replaceAllUsesWith(Arg);
256*700637cbSDimitry Andric }
257*700637cbSDimitry Andric }
258*700637cbSDimitry Andric };
259*700637cbSDimitry Andric
260*700637cbSDimitry Andric } // end anonymous namespace
261*700637cbSDimitry Andric
262*700637cbSDimitry Andric char AMDGPUPreloadKernelArgumentsLegacy::ID = 0;
263*700637cbSDimitry Andric
264*700637cbSDimitry Andric INITIALIZE_PASS(AMDGPUPreloadKernelArgumentsLegacy, DEBUG_TYPE,
265*700637cbSDimitry Andric "AMDGPU Preload Kernel Arguments", false, false)
266*700637cbSDimitry Andric
267*700637cbSDimitry Andric ModulePass *
createAMDGPUPreloadKernelArgumentsLegacyPass(const TargetMachine * TM)268*700637cbSDimitry Andric llvm::createAMDGPUPreloadKernelArgumentsLegacyPass(const TargetMachine *TM) {
269*700637cbSDimitry Andric return new AMDGPUPreloadKernelArgumentsLegacy(
270*700637cbSDimitry Andric static_cast<const GCNTargetMachine *>(TM));
271*700637cbSDimitry Andric }
272*700637cbSDimitry Andric
AMDGPUPreloadKernelArgumentsLegacy(const GCNTargetMachine * TM)273*700637cbSDimitry Andric AMDGPUPreloadKernelArgumentsLegacy::AMDGPUPreloadKernelArgumentsLegacy(
274*700637cbSDimitry Andric const GCNTargetMachine *TM)
275*700637cbSDimitry Andric : ModulePass(ID), TM(TM) {}
276*700637cbSDimitry Andric
markKernelArgsAsInreg(Module & M,const TargetMachine & TM)277*700637cbSDimitry Andric static bool markKernelArgsAsInreg(Module &M, const TargetMachine &TM) {
278*700637cbSDimitry Andric SmallVector<Function *, 4> FunctionsToErase;
279*700637cbSDimitry Andric bool Changed = false;
280*700637cbSDimitry Andric for (auto &F : M) {
281*700637cbSDimitry Andric const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
282*700637cbSDimitry Andric if (!ST.hasKernargPreload() ||
283*700637cbSDimitry Andric F.getCallingConv() != CallingConv::AMDGPU_KERNEL)
284*700637cbSDimitry Andric continue;
285*700637cbSDimitry Andric
286*700637cbSDimitry Andric PreloadKernelArgInfo PreloadInfo(F, ST);
287*700637cbSDimitry Andric uint64_t ExplicitArgOffset = 0;
288*700637cbSDimitry Andric const DataLayout &DL = F.getDataLayout();
289*700637cbSDimitry Andric const uint64_t BaseOffset = ST.getExplicitKernelArgOffset();
290*700637cbSDimitry Andric unsigned NumPreloadsRequested = KernargPreloadCount;
291*700637cbSDimitry Andric unsigned NumPreloadedExplicitArgs = 0;
292*700637cbSDimitry Andric for (Argument &Arg : F.args()) {
293*700637cbSDimitry Andric // Avoid incompatible attributes and guard against running this pass
294*700637cbSDimitry Andric // twice.
295*700637cbSDimitry Andric //
296*700637cbSDimitry Andric // TODO: Preload byref kernel arguments
297*700637cbSDimitry Andric if (Arg.hasByRefAttr() || Arg.hasNestAttr() ||
298*700637cbSDimitry Andric Arg.hasAttribute("amdgpu-hidden-argument"))
299*700637cbSDimitry Andric break;
300*700637cbSDimitry Andric
301*700637cbSDimitry Andric // Inreg may be pre-existing on some arguments, try to preload these.
302*700637cbSDimitry Andric if (NumPreloadsRequested == 0 && !Arg.hasInRegAttr())
303*700637cbSDimitry Andric break;
304*700637cbSDimitry Andric
305*700637cbSDimitry Andric // FIXME: Preload aggregates.
306*700637cbSDimitry Andric if (Arg.getType()->isAggregateType())
307*700637cbSDimitry Andric break;
308*700637cbSDimitry Andric
309*700637cbSDimitry Andric Type *ArgTy = Arg.getType();
310*700637cbSDimitry Andric Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
311*700637cbSDimitry Andric uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);
312*700637cbSDimitry Andric ExplicitArgOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize;
313*700637cbSDimitry Andric
314*700637cbSDimitry Andric if (!PreloadInfo.canPreloadKernArgAtOffset(ExplicitArgOffset))
315*700637cbSDimitry Andric break;
316*700637cbSDimitry Andric
317*700637cbSDimitry Andric Arg.addAttr(Attribute::InReg);
318*700637cbSDimitry Andric NumPreloadedExplicitArgs++;
319*700637cbSDimitry Andric if (NumPreloadsRequested > 0)
320*700637cbSDimitry Andric NumPreloadsRequested--;
321*700637cbSDimitry Andric }
322*700637cbSDimitry Andric
323*700637cbSDimitry Andric // Only try preloading hidden arguments if we can successfully preload the
324*700637cbSDimitry Andric // last explicit argument.
325*700637cbSDimitry Andric if (NumPreloadedExplicitArgs == F.arg_size()) {
326*700637cbSDimitry Andric uint64_t ImplicitArgsBaseOffset =
327*700637cbSDimitry Andric alignTo(ExplicitArgOffset, ST.getAlignmentForImplicitArgPtr()) +
328*700637cbSDimitry Andric BaseOffset;
329*700637cbSDimitry Andric PreloadInfo.tryAllocHiddenArgPreloadSGPRs(ImplicitArgsBaseOffset,
330*700637cbSDimitry Andric FunctionsToErase);
331*700637cbSDimitry Andric }
332*700637cbSDimitry Andric
333*700637cbSDimitry Andric Changed |= NumPreloadedExplicitArgs > 0;
334*700637cbSDimitry Andric }
335*700637cbSDimitry Andric
336*700637cbSDimitry Andric // Erase cloned functions if we needed to update the kernel signature to
337*700637cbSDimitry Andric // support preloading hidden kernel arguments.
338*700637cbSDimitry Andric for (auto *F : FunctionsToErase)
339*700637cbSDimitry Andric F->eraseFromParent();
340*700637cbSDimitry Andric
341*700637cbSDimitry Andric return Changed;
342*700637cbSDimitry Andric }
343*700637cbSDimitry Andric
runOnModule(Module & M)344*700637cbSDimitry Andric bool AMDGPUPreloadKernelArgumentsLegacy::runOnModule(Module &M) {
345*700637cbSDimitry Andric if (skipModule(M) || !TM)
346*700637cbSDimitry Andric return false;
347*700637cbSDimitry Andric
348*700637cbSDimitry Andric return markKernelArgsAsInreg(M, *TM);
349*700637cbSDimitry Andric }
350*700637cbSDimitry Andric
351*700637cbSDimitry Andric PreservedAnalyses
run(Module & M,ModuleAnalysisManager & AM)352*700637cbSDimitry Andric AMDGPUPreloadKernelArgumentsPass::run(Module &M, ModuleAnalysisManager &AM) {
353*700637cbSDimitry Andric bool Changed = markKernelArgsAsInreg(M, TM);
354*700637cbSDimitry Andric return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
355*700637cbSDimitry Andric }
356