xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric //===- AMDGPURewriteOutArgumentsPass.cpp - Create struct returns ----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file This pass attempts to replace out argument usage with a return of a
100b57cec5SDimitry Andric /// struct.
110b57cec5SDimitry Andric ///
120b57cec5SDimitry Andric /// We can support returning a lot of values directly in registers, but
130b57cec5SDimitry Andric /// idiomatic C code frequently uses a pointer argument to return a second value
140b57cec5SDimitry Andric /// rather than returning a struct by value. GPU stack access is also quite
150b57cec5SDimitry Andric /// painful, so we want to avoid that if possible. Passing a stack object
160b57cec5SDimitry Andric /// pointer to a function also requires an additional address expansion code
170b57cec5SDimitry Andric /// sequence to convert the pointer to be relative to the kernel's scratch wave
180b57cec5SDimitry Andric /// offset register since the callee doesn't know what stack frame the incoming
190b57cec5SDimitry Andric /// pointer is relative to.
200b57cec5SDimitry Andric ///
210b57cec5SDimitry Andric /// The goal is to try rewriting code that looks like this:
220b57cec5SDimitry Andric ///
230b57cec5SDimitry Andric ///  int foo(int a, int b, int* out) {
240b57cec5SDimitry Andric ///     *out = bar();
250b57cec5SDimitry Andric ///     return a + b;
260b57cec5SDimitry Andric /// }
270b57cec5SDimitry Andric ///
280b57cec5SDimitry Andric /// into something like this:
290b57cec5SDimitry Andric ///
300b57cec5SDimitry Andric ///  std::pair<int, int> foo(int a, int b) {
31bdd1243dSDimitry Andric ///     return std::pair(a + b, bar());
320b57cec5SDimitry Andric /// }
330b57cec5SDimitry Andric ///
340b57cec5SDimitry Andric /// Typically the incoming pointer is a simple alloca for a temporary variable
350b57cec5SDimitry Andric /// to use the API, which if replaced with a struct return will be easily SROA'd
360b57cec5SDimitry Andric /// out when the stub function we create is inlined
370b57cec5SDimitry Andric ///
380b57cec5SDimitry Andric /// This pass introduces the struct return, but leaves the unused pointer
390b57cec5SDimitry Andric /// arguments and introduces a new stub function calling the struct returning
400b57cec5SDimitry Andric /// body. DeadArgumentElimination should be run after this to clean these up.
410b57cec5SDimitry Andric //
420b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric #include "AMDGPU.h"
450b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
460b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
47480093f4SDimitry Andric #include "llvm/Analysis/MemoryDependenceAnalysis.h"
4806c3fb27SDimitry Andric #include "llvm/IR/AttributeMask.h"
490b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
500b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
51480093f4SDimitry Andric #include "llvm/InitializePasses.h"
520b57cec5SDimitry Andric #include "llvm/Pass.h"
530b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
540b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
550b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-rewrite-out-arguments"
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric using namespace llvm;
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric static cl::opt<bool> AnyAddressSpace(
620b57cec5SDimitry Andric   "amdgpu-any-address-space-out-arguments",
630b57cec5SDimitry Andric   cl::desc("Replace pointer out arguments with "
640b57cec5SDimitry Andric            "struct returns for non-private address space"),
650b57cec5SDimitry Andric   cl::Hidden,
660b57cec5SDimitry Andric   cl::init(false));
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric static cl::opt<unsigned> MaxNumRetRegs(
690b57cec5SDimitry Andric   "amdgpu-max-return-arg-num-regs",
700b57cec5SDimitry Andric   cl::desc("Approximately limit number of return registers for replacing out arguments"),
710b57cec5SDimitry Andric   cl::Hidden,
720b57cec5SDimitry Andric   cl::init(16));
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric STATISTIC(NumOutArgumentsReplaced,
750b57cec5SDimitry Andric           "Number out arguments moved to struct return values");
760b57cec5SDimitry Andric STATISTIC(NumOutArgumentFunctionsReplaced,
770b57cec5SDimitry Andric           "Number of functions with out arguments moved to struct return values");
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric namespace {
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric class AMDGPURewriteOutArguments : public FunctionPass {
820b57cec5SDimitry Andric private:
830b57cec5SDimitry Andric   const DataLayout *DL = nullptr;
840b57cec5SDimitry Andric   MemoryDependenceResults *MDA = nullptr;
850b57cec5SDimitry Andric 
8681ad6265SDimitry Andric   Type *getStoredType(Value &Arg) const;
8781ad6265SDimitry Andric   Type *getOutArgumentType(Argument &Arg) const;
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric public:
900b57cec5SDimitry Andric   static char ID;
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric   AMDGPURewriteOutArguments() : FunctionPass(ID) {}
930b57cec5SDimitry Andric 
940b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
950b57cec5SDimitry Andric     AU.addRequired<MemoryDependenceWrapperPass>();
960b57cec5SDimitry Andric     FunctionPass::getAnalysisUsage(AU);
970b57cec5SDimitry Andric   }
980b57cec5SDimitry Andric 
990b57cec5SDimitry Andric   bool doInitialization(Module &M) override;
1000b57cec5SDimitry Andric   bool runOnFunction(Function &F) override;
1010b57cec5SDimitry Andric };
1020b57cec5SDimitry Andric 
1030b57cec5SDimitry Andric } // end anonymous namespace
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPURewriteOutArguments, DEBUG_TYPE,
1060b57cec5SDimitry Andric                       "AMDGPU Rewrite Out Arguments", false, false)
1070b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
1080b57cec5SDimitry Andric INITIALIZE_PASS_END(AMDGPURewriteOutArguments, DEBUG_TYPE,
1090b57cec5SDimitry Andric                     "AMDGPU Rewrite Out Arguments", false, false)
1100b57cec5SDimitry Andric 
1110b57cec5SDimitry Andric char AMDGPURewriteOutArguments::ID = 0;
1120b57cec5SDimitry Andric 
11381ad6265SDimitry Andric Type *AMDGPURewriteOutArguments::getStoredType(Value &Arg) const {
1140b57cec5SDimitry Andric   const int MaxUses = 10;
1150b57cec5SDimitry Andric   int UseCount = 0;
1160b57cec5SDimitry Andric 
11781ad6265SDimitry Andric   SmallVector<Use *> Worklist;
11881ad6265SDimitry Andric   for (Use &U : Arg.uses())
11981ad6265SDimitry Andric     Worklist.push_back(&U);
1200b57cec5SDimitry Andric 
12181ad6265SDimitry Andric   Type *StoredType = nullptr;
12281ad6265SDimitry Andric   while (!Worklist.empty()) {
12381ad6265SDimitry Andric     Use *U = Worklist.pop_back_val();
1240b57cec5SDimitry Andric 
12581ad6265SDimitry Andric     if (auto *BCI = dyn_cast<BitCastInst>(U->getUser())) {
12681ad6265SDimitry Andric       for (Use &U : BCI->uses())
12781ad6265SDimitry Andric         Worklist.push_back(&U);
12881ad6265SDimitry Andric       continue;
1290b57cec5SDimitry Andric     }
1300b57cec5SDimitry Andric 
13181ad6265SDimitry Andric     if (auto *SI = dyn_cast<StoreInst>(U->getUser())) {
13281ad6265SDimitry Andric       if (UseCount++ > MaxUses)
13381ad6265SDimitry Andric         return nullptr;
13481ad6265SDimitry Andric 
1350b57cec5SDimitry Andric       if (!SI->isSimple() ||
13681ad6265SDimitry Andric           U->getOperandNo() != StoreInst::getPointerOperandIndex())
13781ad6265SDimitry Andric         return nullptr;
1380b57cec5SDimitry Andric 
13981ad6265SDimitry Andric       if (StoredType && StoredType != SI->getValueOperand()->getType())
14081ad6265SDimitry Andric         return nullptr; // More than one type.
14181ad6265SDimitry Andric       StoredType = SI->getValueOperand()->getType();
14281ad6265SDimitry Andric       continue;
1430b57cec5SDimitry Andric     }
1440b57cec5SDimitry Andric 
14581ad6265SDimitry Andric     // Unsupported user.
14681ad6265SDimitry Andric     return nullptr;
1470b57cec5SDimitry Andric   }
1480b57cec5SDimitry Andric 
14981ad6265SDimitry Andric   return StoredType;
15081ad6265SDimitry Andric }
15181ad6265SDimitry Andric 
15281ad6265SDimitry Andric Type *AMDGPURewriteOutArguments::getOutArgumentType(Argument &Arg) const {
1530b57cec5SDimitry Andric   const unsigned MaxOutArgSizeBytes = 4 * MaxNumRetRegs;
1540b57cec5SDimitry Andric   PointerType *ArgTy = dyn_cast<PointerType>(Arg.getType());
1550b57cec5SDimitry Andric 
1560b57cec5SDimitry Andric   // TODO: It might be useful for any out arguments, not just privates.
1570b57cec5SDimitry Andric   if (!ArgTy || (ArgTy->getAddressSpace() != DL->getAllocaAddrSpace() &&
1580b57cec5SDimitry Andric                  !AnyAddressSpace) ||
15981ad6265SDimitry Andric       Arg.hasByValAttr() || Arg.hasStructRetAttr()) {
16081ad6265SDimitry Andric     return nullptr;
1610b57cec5SDimitry Andric   }
1620b57cec5SDimitry Andric 
16381ad6265SDimitry Andric   Type *StoredType = getStoredType(Arg);
16481ad6265SDimitry Andric   if (!StoredType || DL->getTypeStoreSize(StoredType) > MaxOutArgSizeBytes)
16581ad6265SDimitry Andric     return nullptr;
16681ad6265SDimitry Andric 
16781ad6265SDimitry Andric   return StoredType;
1680b57cec5SDimitry Andric }
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric bool AMDGPURewriteOutArguments::doInitialization(Module &M) {
1710b57cec5SDimitry Andric   DL = &M.getDataLayout();
1720b57cec5SDimitry Andric   return false;
1730b57cec5SDimitry Andric }
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
1760b57cec5SDimitry Andric   if (skipFunction(F))
1770b57cec5SDimitry Andric     return false;
1780b57cec5SDimitry Andric 
1790b57cec5SDimitry Andric   // TODO: Could probably handle variadic functions.
1800b57cec5SDimitry Andric   if (F.isVarArg() || F.hasStructRetAttr() ||
1810b57cec5SDimitry Andric       AMDGPU::isEntryFunctionCC(F.getCallingConv()))
1820b57cec5SDimitry Andric     return false;
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric   MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
1850b57cec5SDimitry Andric 
1860b57cec5SDimitry Andric   unsigned ReturnNumRegs = 0;
18781ad6265SDimitry Andric   SmallDenseMap<int, Type *, 4> OutArgIndexes;
1880b57cec5SDimitry Andric   SmallVector<Type *, 4> ReturnTypes;
1890b57cec5SDimitry Andric   Type *RetTy = F.getReturnType();
1900b57cec5SDimitry Andric   if (!RetTy->isVoidTy()) {
1910b57cec5SDimitry Andric     ReturnNumRegs = DL->getTypeStoreSize(RetTy) / 4;
1920b57cec5SDimitry Andric 
1930b57cec5SDimitry Andric     if (ReturnNumRegs >= MaxNumRetRegs)
1940b57cec5SDimitry Andric       return false;
1950b57cec5SDimitry Andric 
1960b57cec5SDimitry Andric     ReturnTypes.push_back(RetTy);
1970b57cec5SDimitry Andric   }
1980b57cec5SDimitry Andric 
19981ad6265SDimitry Andric   SmallVector<std::pair<Argument *, Type *>, 4> OutArgs;
2000b57cec5SDimitry Andric   for (Argument &Arg : F.args()) {
20181ad6265SDimitry Andric     if (Type *Ty = getOutArgumentType(Arg)) {
2020b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg
2030b57cec5SDimitry Andric                         << " in function " << F.getName() << '\n');
20481ad6265SDimitry Andric       OutArgs.push_back({&Arg, Ty});
2050b57cec5SDimitry Andric     }
2060b57cec5SDimitry Andric   }
2070b57cec5SDimitry Andric 
2080b57cec5SDimitry Andric   if (OutArgs.empty())
2090b57cec5SDimitry Andric     return false;
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric   using ReplacementVec = SmallVector<std::pair<Argument *, Value *>, 4>;
2120b57cec5SDimitry Andric 
2130b57cec5SDimitry Andric   DenseMap<ReturnInst *, ReplacementVec> Replacements;
2140b57cec5SDimitry Andric 
2150b57cec5SDimitry Andric   SmallVector<ReturnInst *, 4> Returns;
2160b57cec5SDimitry Andric   for (BasicBlock &BB : F) {
2170b57cec5SDimitry Andric     if (ReturnInst *RI = dyn_cast<ReturnInst>(&BB.back()))
2180b57cec5SDimitry Andric       Returns.push_back(RI);
2190b57cec5SDimitry Andric   }
2200b57cec5SDimitry Andric 
2210b57cec5SDimitry Andric   if (Returns.empty())
2220b57cec5SDimitry Andric     return false;
2230b57cec5SDimitry Andric 
2240b57cec5SDimitry Andric   bool Changing;
2250b57cec5SDimitry Andric 
2260b57cec5SDimitry Andric   do {
2270b57cec5SDimitry Andric     Changing = false;
2280b57cec5SDimitry Andric 
2290b57cec5SDimitry Andric     // Keep retrying if we are able to successfully eliminate an argument. This
2300b57cec5SDimitry Andric     // helps with cases with multiple arguments which may alias, such as in a
231349cc55cSDimitry Andric     // sincos implementation. If we have 2 stores to arguments, on the first
2320b57cec5SDimitry Andric     // attempt the MDA query will succeed for the second store but not the
2330b57cec5SDimitry Andric     // first. On the second iteration we've removed that out clobbering argument
2340b57cec5SDimitry Andric     // (by effectively moving it into another function) and will find the second
2350b57cec5SDimitry Andric     // argument is OK to move.
23681ad6265SDimitry Andric     for (const auto &Pair : OutArgs) {
2370b57cec5SDimitry Andric       bool ThisReplaceable = true;
2380b57cec5SDimitry Andric       SmallVector<std::pair<ReturnInst *, StoreInst *>, 4> ReplaceableStores;
2390b57cec5SDimitry Andric 
24081ad6265SDimitry Andric       Argument *OutArg = Pair.first;
24181ad6265SDimitry Andric       Type *ArgTy = Pair.second;
2420b57cec5SDimitry Andric 
2430b57cec5SDimitry Andric       // Skip this argument if converting it will push us over the register
2440b57cec5SDimitry Andric       // count to return limit.
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric       // TODO: This is an approximation. When legalized this could be more. We
2470b57cec5SDimitry Andric       // can ask TLI for exactly how many.
2480b57cec5SDimitry Andric       unsigned ArgNumRegs = DL->getTypeStoreSize(ArgTy) / 4;
2490b57cec5SDimitry Andric       if (ArgNumRegs + ReturnNumRegs > MaxNumRetRegs)
2500b57cec5SDimitry Andric         continue;
2510b57cec5SDimitry Andric 
2520b57cec5SDimitry Andric       // An argument is convertible only if all exit blocks are able to replace
2530b57cec5SDimitry Andric       // it.
2540b57cec5SDimitry Andric       for (ReturnInst *RI : Returns) {
2550b57cec5SDimitry Andric         BasicBlock *BB = RI->getParent();
2560b57cec5SDimitry Andric 
257e8d8bef9SDimitry Andric         MemDepResult Q = MDA->getPointerDependencyFrom(
258e8d8bef9SDimitry Andric             MemoryLocation::getBeforeOrAfter(OutArg), true, BB->end(), BB, RI);
2590b57cec5SDimitry Andric         StoreInst *SI = nullptr;
2600b57cec5SDimitry Andric         if (Q.isDef())
2610b57cec5SDimitry Andric           SI = dyn_cast<StoreInst>(Q.getInst());
2620b57cec5SDimitry Andric 
2630b57cec5SDimitry Andric         if (SI) {
2640b57cec5SDimitry Andric           LLVM_DEBUG(dbgs() << "Found out argument store: " << *SI << '\n');
2650b57cec5SDimitry Andric           ReplaceableStores.emplace_back(RI, SI);
2660b57cec5SDimitry Andric         } else {
2670b57cec5SDimitry Andric           ThisReplaceable = false;
2680b57cec5SDimitry Andric           break;
2690b57cec5SDimitry Andric         }
2700b57cec5SDimitry Andric       }
2710b57cec5SDimitry Andric 
2720b57cec5SDimitry Andric       if (!ThisReplaceable)
2730b57cec5SDimitry Andric         continue; // Try the next argument candidate.
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric       for (std::pair<ReturnInst *, StoreInst *> Store : ReplaceableStores) {
2760b57cec5SDimitry Andric         Value *ReplVal = Store.second->getValueOperand();
2770b57cec5SDimitry Andric 
2780b57cec5SDimitry Andric         auto &ValVec = Replacements[Store.first];
279e8d8bef9SDimitry Andric         if (llvm::any_of(ValVec,
2800b57cec5SDimitry Andric                          [OutArg](const std::pair<Argument *, Value *> &Entry) {
281e8d8bef9SDimitry Andric                            return Entry.first == OutArg;
282e8d8bef9SDimitry Andric                          })) {
2830b57cec5SDimitry Andric           LLVM_DEBUG(dbgs()
2840b57cec5SDimitry Andric                      << "Saw multiple out arg stores" << *OutArg << '\n');
2850b57cec5SDimitry Andric           // It is possible to see stores to the same argument multiple times,
2860b57cec5SDimitry Andric           // but we expect these would have been optimized out already.
2870b57cec5SDimitry Andric           ThisReplaceable = false;
2880b57cec5SDimitry Andric           break;
2890b57cec5SDimitry Andric         }
2900b57cec5SDimitry Andric 
2910b57cec5SDimitry Andric         ValVec.emplace_back(OutArg, ReplVal);
2920b57cec5SDimitry Andric         Store.second->eraseFromParent();
2930b57cec5SDimitry Andric       }
2940b57cec5SDimitry Andric 
2950b57cec5SDimitry Andric       if (ThisReplaceable) {
2960b57cec5SDimitry Andric         ReturnTypes.push_back(ArgTy);
29781ad6265SDimitry Andric         OutArgIndexes.insert({OutArg->getArgNo(), ArgTy});
2980b57cec5SDimitry Andric         ++NumOutArgumentsReplaced;
2990b57cec5SDimitry Andric         Changing = true;
3000b57cec5SDimitry Andric       }
3010b57cec5SDimitry Andric     }
3020b57cec5SDimitry Andric   } while (Changing);
3030b57cec5SDimitry Andric 
3040b57cec5SDimitry Andric   if (Replacements.empty())
3050b57cec5SDimitry Andric     return false;
3060b57cec5SDimitry Andric 
3070b57cec5SDimitry Andric   LLVMContext &Ctx = F.getParent()->getContext();
3080b57cec5SDimitry Andric   StructType *NewRetTy = StructType::create(Ctx, ReturnTypes, F.getName());
3090b57cec5SDimitry Andric 
3100b57cec5SDimitry Andric   FunctionType *NewFuncTy = FunctionType::get(NewRetTy,
3110b57cec5SDimitry Andric                                               F.getFunctionType()->params(),
3120b57cec5SDimitry Andric                                               F.isVarArg());
3130b57cec5SDimitry Andric 
3140b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n');
3150b57cec5SDimitry Andric 
3160b57cec5SDimitry Andric   Function *NewFunc = Function::Create(NewFuncTy, Function::PrivateLinkage,
3170b57cec5SDimitry Andric                                        F.getName() + ".body");
3180b57cec5SDimitry Andric   F.getParent()->getFunctionList().insert(F.getIterator(), NewFunc);
3190b57cec5SDimitry Andric   NewFunc->copyAttributesFrom(&F);
3200b57cec5SDimitry Andric   NewFunc->setComdat(F.getComdat());
3210b57cec5SDimitry Andric 
3220b57cec5SDimitry Andric   // We want to preserve the function and param attributes, but need to strip
3230b57cec5SDimitry Andric   // off any return attributes, e.g. zeroext doesn't make sense with a struct.
3240b57cec5SDimitry Andric   NewFunc->stealArgumentListFrom(F);
3250b57cec5SDimitry Andric 
32604eeddc0SDimitry Andric   AttributeMask RetAttrs;
3270b57cec5SDimitry Andric   RetAttrs.addAttribute(Attribute::SExt);
3280b57cec5SDimitry Andric   RetAttrs.addAttribute(Attribute::ZExt);
3290b57cec5SDimitry Andric   RetAttrs.addAttribute(Attribute::NoAlias);
330349cc55cSDimitry Andric   NewFunc->removeRetAttrs(RetAttrs);
3310b57cec5SDimitry Andric   // TODO: How to preserve metadata?
3320b57cec5SDimitry Andric 
333*5f757f3fSDimitry Andric   NewFunc->setIsNewDbgInfoFormat(F.IsNewDbgInfoFormat);
334*5f757f3fSDimitry Andric 
3350b57cec5SDimitry Andric   // Move the body of the function into the new rewritten function, and replace
3360b57cec5SDimitry Andric   // this function with a stub.
337bdd1243dSDimitry Andric   NewFunc->splice(NewFunc->begin(), &F);
3380b57cec5SDimitry Andric 
3390b57cec5SDimitry Andric   for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) {
3400b57cec5SDimitry Andric     ReturnInst *RI = Replacement.first;
3410b57cec5SDimitry Andric     IRBuilder<> B(RI);
3420b57cec5SDimitry Andric     B.SetCurrentDebugLocation(RI->getDebugLoc());
3430b57cec5SDimitry Andric 
3440b57cec5SDimitry Andric     int RetIdx = 0;
345bdd1243dSDimitry Andric     Value *NewRetVal = PoisonValue::get(NewRetTy);
3460b57cec5SDimitry Andric 
3470b57cec5SDimitry Andric     Value *RetVal = RI->getReturnValue();
3480b57cec5SDimitry Andric     if (RetVal)
3490b57cec5SDimitry Andric       NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++);
3500b57cec5SDimitry Andric 
35181ad6265SDimitry Andric     for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second)
35281ad6265SDimitry Andric       NewRetVal = B.CreateInsertValue(NewRetVal, ReturnPoint.second, RetIdx++);
3530b57cec5SDimitry Andric 
3540b57cec5SDimitry Andric     if (RetVal)
3550b57cec5SDimitry Andric       RI->setOperand(0, NewRetVal);
3560b57cec5SDimitry Andric     else {
3570b57cec5SDimitry Andric       B.CreateRet(NewRetVal);
3580b57cec5SDimitry Andric       RI->eraseFromParent();
3590b57cec5SDimitry Andric     }
3600b57cec5SDimitry Andric   }
3610b57cec5SDimitry Andric 
3620b57cec5SDimitry Andric   SmallVector<Value *, 16> StubCallArgs;
3630b57cec5SDimitry Andric   for (Argument &Arg : F.args()) {
3640b57cec5SDimitry Andric     if (OutArgIndexes.count(Arg.getArgNo())) {
3650b57cec5SDimitry Andric       // It's easier to preserve the type of the argument list. We rely on
3660b57cec5SDimitry Andric       // DeadArgumentElimination to take care of these.
367bdd1243dSDimitry Andric       StubCallArgs.push_back(PoisonValue::get(Arg.getType()));
3680b57cec5SDimitry Andric     } else {
3690b57cec5SDimitry Andric       StubCallArgs.push_back(&Arg);
3700b57cec5SDimitry Andric     }
3710b57cec5SDimitry Andric   }
3720b57cec5SDimitry Andric 
3730b57cec5SDimitry Andric   BasicBlock *StubBB = BasicBlock::Create(Ctx, "", &F);
3740b57cec5SDimitry Andric   IRBuilder<> B(StubBB);
3750b57cec5SDimitry Andric   CallInst *StubCall = B.CreateCall(NewFunc, StubCallArgs);
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric   int RetIdx = RetTy->isVoidTy() ? 0 : 1;
3780b57cec5SDimitry Andric   for (Argument &Arg : F.args()) {
3790b57cec5SDimitry Andric     if (!OutArgIndexes.count(Arg.getArgNo()))
3800b57cec5SDimitry Andric       continue;
3810b57cec5SDimitry Andric 
38281ad6265SDimitry Andric     Type *EltTy = OutArgIndexes[Arg.getArgNo()];
3835ffd83dbSDimitry Andric     const auto Align =
3845ffd83dbSDimitry Andric         DL->getValueOrABITypeAlignment(Arg.getParamAlign(), EltTy);
3850b57cec5SDimitry Andric 
3860b57cec5SDimitry Andric     Value *Val = B.CreateExtractValue(StubCall, RetIdx++);
38706c3fb27SDimitry Andric     B.CreateAlignedStore(Val, &Arg, Align);
3880b57cec5SDimitry Andric   }
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric   if (!RetTy->isVoidTy()) {
3910b57cec5SDimitry Andric     B.CreateRet(B.CreateExtractValue(StubCall, 0));
3920b57cec5SDimitry Andric   } else {
3930b57cec5SDimitry Andric     B.CreateRetVoid();
3940b57cec5SDimitry Andric   }
3950b57cec5SDimitry Andric 
3960b57cec5SDimitry Andric   // The function is now a stub we want to inline.
3970b57cec5SDimitry Andric   F.addFnAttr(Attribute::AlwaysInline);
3980b57cec5SDimitry Andric 
3990b57cec5SDimitry Andric   ++NumOutArgumentFunctionsReplaced;
4000b57cec5SDimitry Andric   return true;
4010b57cec5SDimitry Andric }
4020b57cec5SDimitry Andric 
4030b57cec5SDimitry Andric FunctionPass *llvm::createAMDGPURewriteOutArgumentsPass() {
4040b57cec5SDimitry Andric   return new AMDGPURewriteOutArguments();
4050b57cec5SDimitry Andric }
406