10b57cec5SDimitry Andric //===- AMDGPURewriteOutArgumentsPass.cpp - Create struct returns ----------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file This pass attempts to replace out argument usage with a return of a 100b57cec5SDimitry Andric /// struct. 110b57cec5SDimitry Andric /// 120b57cec5SDimitry Andric /// We can support returning a lot of values directly in registers, but 130b57cec5SDimitry Andric /// idiomatic C code frequently uses a pointer argument to return a second value 140b57cec5SDimitry Andric /// rather than returning a struct by value. GPU stack access is also quite 150b57cec5SDimitry Andric /// painful, so we want to avoid that if possible. Passing a stack object 160b57cec5SDimitry Andric /// pointer to a function also requires an additional address expansion code 170b57cec5SDimitry Andric /// sequence to convert the pointer to be relative to the kernel's scratch wave 180b57cec5SDimitry Andric /// offset register since the callee doesn't know what stack frame the incoming 190b57cec5SDimitry Andric /// pointer is relative to. 200b57cec5SDimitry Andric /// 210b57cec5SDimitry Andric /// The goal is to try rewriting code that looks like this: 220b57cec5SDimitry Andric /// 230b57cec5SDimitry Andric /// int foo(int a, int b, int* out) { 240b57cec5SDimitry Andric /// *out = bar(); 250b57cec5SDimitry Andric /// return a + b; 260b57cec5SDimitry Andric /// } 270b57cec5SDimitry Andric /// 280b57cec5SDimitry Andric /// into something like this: 290b57cec5SDimitry Andric /// 300b57cec5SDimitry Andric /// std::pair<int, int> foo(int a, int b) { 31bdd1243dSDimitry Andric /// return std::pair(a + b, bar()); 320b57cec5SDimitry Andric /// } 330b57cec5SDimitry Andric /// 340b57cec5SDimitry Andric /// Typically the incoming pointer is a simple alloca for a temporary variable 350b57cec5SDimitry Andric /// to use the API, which if replaced with a struct return will be easily SROA'd 360b57cec5SDimitry Andric /// out when the stub function we create is inlined 370b57cec5SDimitry Andric /// 380b57cec5SDimitry Andric /// This pass introduces the struct return, but leaves the unused pointer 390b57cec5SDimitry Andric /// arguments and introduces a new stub function calling the struct returning 400b57cec5SDimitry Andric /// body. DeadArgumentElimination should be run after this to clean these up. 410b57cec5SDimitry Andric // 420b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric #include "AMDGPU.h" 450b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 460b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 47480093f4SDimitry Andric #include "llvm/Analysis/MemoryDependenceAnalysis.h" 4806c3fb27SDimitry Andric #include "llvm/IR/AttributeMask.h" 490b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 500b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 51480093f4SDimitry Andric #include "llvm/InitializePasses.h" 520b57cec5SDimitry Andric #include "llvm/Pass.h" 530b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 540b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 550b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-rewrite-out-arguments" 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric using namespace llvm; 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric static cl::opt<bool> AnyAddressSpace( 620b57cec5SDimitry Andric "amdgpu-any-address-space-out-arguments", 630b57cec5SDimitry Andric cl::desc("Replace pointer out arguments with " 640b57cec5SDimitry Andric "struct returns for non-private address space"), 650b57cec5SDimitry Andric cl::Hidden, 660b57cec5SDimitry Andric cl::init(false)); 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric static cl::opt<unsigned> MaxNumRetRegs( 690b57cec5SDimitry Andric "amdgpu-max-return-arg-num-regs", 700b57cec5SDimitry Andric cl::desc("Approximately limit number of return registers for replacing out arguments"), 710b57cec5SDimitry Andric cl::Hidden, 720b57cec5SDimitry Andric cl::init(16)); 730b57cec5SDimitry Andric 740b57cec5SDimitry Andric STATISTIC(NumOutArgumentsReplaced, 750b57cec5SDimitry Andric "Number out arguments moved to struct return values"); 760b57cec5SDimitry Andric STATISTIC(NumOutArgumentFunctionsReplaced, 770b57cec5SDimitry Andric "Number of functions with out arguments moved to struct return values"); 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric namespace { 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric class AMDGPURewriteOutArguments : public FunctionPass { 820b57cec5SDimitry Andric private: 830b57cec5SDimitry Andric const DataLayout *DL = nullptr; 840b57cec5SDimitry Andric MemoryDependenceResults *MDA = nullptr; 850b57cec5SDimitry Andric 8681ad6265SDimitry Andric Type *getStoredType(Value &Arg) const; 8781ad6265SDimitry Andric Type *getOutArgumentType(Argument &Arg) const; 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric public: 900b57cec5SDimitry Andric static char ID; 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric AMDGPURewriteOutArguments() : FunctionPass(ID) {} 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 950b57cec5SDimitry Andric AU.addRequired<MemoryDependenceWrapperPass>(); 960b57cec5SDimitry Andric FunctionPass::getAnalysisUsage(AU); 970b57cec5SDimitry Andric } 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric bool doInitialization(Module &M) override; 1000b57cec5SDimitry Andric bool runOnFunction(Function &F) override; 1010b57cec5SDimitry Andric }; 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric } // end anonymous namespace 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPURewriteOutArguments, DEBUG_TYPE, 1060b57cec5SDimitry Andric "AMDGPU Rewrite Out Arguments", false, false) 1070b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 1080b57cec5SDimitry Andric INITIALIZE_PASS_END(AMDGPURewriteOutArguments, DEBUG_TYPE, 1090b57cec5SDimitry Andric "AMDGPU Rewrite Out Arguments", false, false) 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric char AMDGPURewriteOutArguments::ID = 0; 1120b57cec5SDimitry Andric 11381ad6265SDimitry Andric Type *AMDGPURewriteOutArguments::getStoredType(Value &Arg) const { 1140b57cec5SDimitry Andric const int MaxUses = 10; 1150b57cec5SDimitry Andric int UseCount = 0; 1160b57cec5SDimitry Andric 11781ad6265SDimitry Andric SmallVector<Use *> Worklist; 11881ad6265SDimitry Andric for (Use &U : Arg.uses()) 11981ad6265SDimitry Andric Worklist.push_back(&U); 1200b57cec5SDimitry Andric 12181ad6265SDimitry Andric Type *StoredType = nullptr; 12281ad6265SDimitry Andric while (!Worklist.empty()) { 12381ad6265SDimitry Andric Use *U = Worklist.pop_back_val(); 1240b57cec5SDimitry Andric 12581ad6265SDimitry Andric if (auto *BCI = dyn_cast<BitCastInst>(U->getUser())) { 12681ad6265SDimitry Andric for (Use &U : BCI->uses()) 12781ad6265SDimitry Andric Worklist.push_back(&U); 12881ad6265SDimitry Andric continue; 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric 13181ad6265SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(U->getUser())) { 13281ad6265SDimitry Andric if (UseCount++ > MaxUses) 13381ad6265SDimitry Andric return nullptr; 13481ad6265SDimitry Andric 1350b57cec5SDimitry Andric if (!SI->isSimple() || 13681ad6265SDimitry Andric U->getOperandNo() != StoreInst::getPointerOperandIndex()) 13781ad6265SDimitry Andric return nullptr; 1380b57cec5SDimitry Andric 13981ad6265SDimitry Andric if (StoredType && StoredType != SI->getValueOperand()->getType()) 14081ad6265SDimitry Andric return nullptr; // More than one type. 14181ad6265SDimitry Andric StoredType = SI->getValueOperand()->getType(); 14281ad6265SDimitry Andric continue; 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric 14581ad6265SDimitry Andric // Unsupported user. 14681ad6265SDimitry Andric return nullptr; 1470b57cec5SDimitry Andric } 1480b57cec5SDimitry Andric 14981ad6265SDimitry Andric return StoredType; 15081ad6265SDimitry Andric } 15181ad6265SDimitry Andric 15281ad6265SDimitry Andric Type *AMDGPURewriteOutArguments::getOutArgumentType(Argument &Arg) const { 1530b57cec5SDimitry Andric const unsigned MaxOutArgSizeBytes = 4 * MaxNumRetRegs; 1540b57cec5SDimitry Andric PointerType *ArgTy = dyn_cast<PointerType>(Arg.getType()); 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric // TODO: It might be useful for any out arguments, not just privates. 1570b57cec5SDimitry Andric if (!ArgTy || (ArgTy->getAddressSpace() != DL->getAllocaAddrSpace() && 1580b57cec5SDimitry Andric !AnyAddressSpace) || 15981ad6265SDimitry Andric Arg.hasByValAttr() || Arg.hasStructRetAttr()) { 16081ad6265SDimitry Andric return nullptr; 1610b57cec5SDimitry Andric } 1620b57cec5SDimitry Andric 16381ad6265SDimitry Andric Type *StoredType = getStoredType(Arg); 16481ad6265SDimitry Andric if (!StoredType || DL->getTypeStoreSize(StoredType) > MaxOutArgSizeBytes) 16581ad6265SDimitry Andric return nullptr; 16681ad6265SDimitry Andric 16781ad6265SDimitry Andric return StoredType; 1680b57cec5SDimitry Andric } 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric bool AMDGPURewriteOutArguments::doInitialization(Module &M) { 1710b57cec5SDimitry Andric DL = &M.getDataLayout(); 1720b57cec5SDimitry Andric return false; 1730b57cec5SDimitry Andric } 1740b57cec5SDimitry Andric 1750b57cec5SDimitry Andric bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { 1760b57cec5SDimitry Andric if (skipFunction(F)) 1770b57cec5SDimitry Andric return false; 1780b57cec5SDimitry Andric 1790b57cec5SDimitry Andric // TODO: Could probably handle variadic functions. 1800b57cec5SDimitry Andric if (F.isVarArg() || F.hasStructRetAttr() || 1810b57cec5SDimitry Andric AMDGPU::isEntryFunctionCC(F.getCallingConv())) 1820b57cec5SDimitry Andric return false; 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); 1850b57cec5SDimitry Andric 1860b57cec5SDimitry Andric unsigned ReturnNumRegs = 0; 18781ad6265SDimitry Andric SmallDenseMap<int, Type *, 4> OutArgIndexes; 1880b57cec5SDimitry Andric SmallVector<Type *, 4> ReturnTypes; 1890b57cec5SDimitry Andric Type *RetTy = F.getReturnType(); 1900b57cec5SDimitry Andric if (!RetTy->isVoidTy()) { 1910b57cec5SDimitry Andric ReturnNumRegs = DL->getTypeStoreSize(RetTy) / 4; 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric if (ReturnNumRegs >= MaxNumRetRegs) 1940b57cec5SDimitry Andric return false; 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric ReturnTypes.push_back(RetTy); 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 19981ad6265SDimitry Andric SmallVector<std::pair<Argument *, Type *>, 4> OutArgs; 2000b57cec5SDimitry Andric for (Argument &Arg : F.args()) { 20181ad6265SDimitry Andric if (Type *Ty = getOutArgumentType(Arg)) { 2020b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg 2030b57cec5SDimitry Andric << " in function " << F.getName() << '\n'); 20481ad6265SDimitry Andric OutArgs.push_back({&Arg, Ty}); 2050b57cec5SDimitry Andric } 2060b57cec5SDimitry Andric } 2070b57cec5SDimitry Andric 2080b57cec5SDimitry Andric if (OutArgs.empty()) 2090b57cec5SDimitry Andric return false; 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric using ReplacementVec = SmallVector<std::pair<Argument *, Value *>, 4>; 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric DenseMap<ReturnInst *, ReplacementVec> Replacements; 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric SmallVector<ReturnInst *, 4> Returns; 2160b57cec5SDimitry Andric for (BasicBlock &BB : F) { 2170b57cec5SDimitry Andric if (ReturnInst *RI = dyn_cast<ReturnInst>(&BB.back())) 2180b57cec5SDimitry Andric Returns.push_back(RI); 2190b57cec5SDimitry Andric } 2200b57cec5SDimitry Andric 2210b57cec5SDimitry Andric if (Returns.empty()) 2220b57cec5SDimitry Andric return false; 2230b57cec5SDimitry Andric 2240b57cec5SDimitry Andric bool Changing; 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric do { 2270b57cec5SDimitry Andric Changing = false; 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric // Keep retrying if we are able to successfully eliminate an argument. This 2300b57cec5SDimitry Andric // helps with cases with multiple arguments which may alias, such as in a 231349cc55cSDimitry Andric // sincos implementation. If we have 2 stores to arguments, on the first 2320b57cec5SDimitry Andric // attempt the MDA query will succeed for the second store but not the 2330b57cec5SDimitry Andric // first. On the second iteration we've removed that out clobbering argument 2340b57cec5SDimitry Andric // (by effectively moving it into another function) and will find the second 2350b57cec5SDimitry Andric // argument is OK to move. 23681ad6265SDimitry Andric for (const auto &Pair : OutArgs) { 2370b57cec5SDimitry Andric bool ThisReplaceable = true; 2380b57cec5SDimitry Andric SmallVector<std::pair<ReturnInst *, StoreInst *>, 4> ReplaceableStores; 2390b57cec5SDimitry Andric 24081ad6265SDimitry Andric Argument *OutArg = Pair.first; 24181ad6265SDimitry Andric Type *ArgTy = Pair.second; 2420b57cec5SDimitry Andric 2430b57cec5SDimitry Andric // Skip this argument if converting it will push us over the register 2440b57cec5SDimitry Andric // count to return limit. 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric // TODO: This is an approximation. When legalized this could be more. We 2470b57cec5SDimitry Andric // can ask TLI for exactly how many. 2480b57cec5SDimitry Andric unsigned ArgNumRegs = DL->getTypeStoreSize(ArgTy) / 4; 2490b57cec5SDimitry Andric if (ArgNumRegs + ReturnNumRegs > MaxNumRetRegs) 2500b57cec5SDimitry Andric continue; 2510b57cec5SDimitry Andric 2520b57cec5SDimitry Andric // An argument is convertible only if all exit blocks are able to replace 2530b57cec5SDimitry Andric // it. 2540b57cec5SDimitry Andric for (ReturnInst *RI : Returns) { 2550b57cec5SDimitry Andric BasicBlock *BB = RI->getParent(); 2560b57cec5SDimitry Andric 257e8d8bef9SDimitry Andric MemDepResult Q = MDA->getPointerDependencyFrom( 258e8d8bef9SDimitry Andric MemoryLocation::getBeforeOrAfter(OutArg), true, BB->end(), BB, RI); 2590b57cec5SDimitry Andric StoreInst *SI = nullptr; 2600b57cec5SDimitry Andric if (Q.isDef()) 2610b57cec5SDimitry Andric SI = dyn_cast<StoreInst>(Q.getInst()); 2620b57cec5SDimitry Andric 2630b57cec5SDimitry Andric if (SI) { 2640b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found out argument store: " << *SI << '\n'); 2650b57cec5SDimitry Andric ReplaceableStores.emplace_back(RI, SI); 2660b57cec5SDimitry Andric } else { 2670b57cec5SDimitry Andric ThisReplaceable = false; 2680b57cec5SDimitry Andric break; 2690b57cec5SDimitry Andric } 2700b57cec5SDimitry Andric } 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric if (!ThisReplaceable) 2730b57cec5SDimitry Andric continue; // Try the next argument candidate. 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andric for (std::pair<ReturnInst *, StoreInst *> Store : ReplaceableStores) { 2760b57cec5SDimitry Andric Value *ReplVal = Store.second->getValueOperand(); 2770b57cec5SDimitry Andric 2780b57cec5SDimitry Andric auto &ValVec = Replacements[Store.first]; 279e8d8bef9SDimitry Andric if (llvm::any_of(ValVec, 2800b57cec5SDimitry Andric [OutArg](const std::pair<Argument *, Value *> &Entry) { 281e8d8bef9SDimitry Andric return Entry.first == OutArg; 282e8d8bef9SDimitry Andric })) { 2830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 2840b57cec5SDimitry Andric << "Saw multiple out arg stores" << *OutArg << '\n'); 2850b57cec5SDimitry Andric // It is possible to see stores to the same argument multiple times, 2860b57cec5SDimitry Andric // but we expect these would have been optimized out already. 2870b57cec5SDimitry Andric ThisReplaceable = false; 2880b57cec5SDimitry Andric break; 2890b57cec5SDimitry Andric } 2900b57cec5SDimitry Andric 2910b57cec5SDimitry Andric ValVec.emplace_back(OutArg, ReplVal); 2920b57cec5SDimitry Andric Store.second->eraseFromParent(); 2930b57cec5SDimitry Andric } 2940b57cec5SDimitry Andric 2950b57cec5SDimitry Andric if (ThisReplaceable) { 2960b57cec5SDimitry Andric ReturnTypes.push_back(ArgTy); 29781ad6265SDimitry Andric OutArgIndexes.insert({OutArg->getArgNo(), ArgTy}); 2980b57cec5SDimitry Andric ++NumOutArgumentsReplaced; 2990b57cec5SDimitry Andric Changing = true; 3000b57cec5SDimitry Andric } 3010b57cec5SDimitry Andric } 3020b57cec5SDimitry Andric } while (Changing); 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric if (Replacements.empty()) 3050b57cec5SDimitry Andric return false; 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric LLVMContext &Ctx = F.getParent()->getContext(); 3080b57cec5SDimitry Andric StructType *NewRetTy = StructType::create(Ctx, ReturnTypes, F.getName()); 3090b57cec5SDimitry Andric 3100b57cec5SDimitry Andric FunctionType *NewFuncTy = FunctionType::get(NewRetTy, 3110b57cec5SDimitry Andric F.getFunctionType()->params(), 3120b57cec5SDimitry Andric F.isVarArg()); 3130b57cec5SDimitry Andric 3140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n'); 3150b57cec5SDimitry Andric 3160b57cec5SDimitry Andric Function *NewFunc = Function::Create(NewFuncTy, Function::PrivateLinkage, 3170b57cec5SDimitry Andric F.getName() + ".body"); 3180b57cec5SDimitry Andric F.getParent()->getFunctionList().insert(F.getIterator(), NewFunc); 3190b57cec5SDimitry Andric NewFunc->copyAttributesFrom(&F); 3200b57cec5SDimitry Andric NewFunc->setComdat(F.getComdat()); 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric // We want to preserve the function and param attributes, but need to strip 3230b57cec5SDimitry Andric // off any return attributes, e.g. zeroext doesn't make sense with a struct. 3240b57cec5SDimitry Andric NewFunc->stealArgumentListFrom(F); 3250b57cec5SDimitry Andric 32604eeddc0SDimitry Andric AttributeMask RetAttrs; 3270b57cec5SDimitry Andric RetAttrs.addAttribute(Attribute::SExt); 3280b57cec5SDimitry Andric RetAttrs.addAttribute(Attribute::ZExt); 3290b57cec5SDimitry Andric RetAttrs.addAttribute(Attribute::NoAlias); 330349cc55cSDimitry Andric NewFunc->removeRetAttrs(RetAttrs); 3310b57cec5SDimitry Andric // TODO: How to preserve metadata? 3320b57cec5SDimitry Andric 333*5f757f3fSDimitry Andric NewFunc->setIsNewDbgInfoFormat(F.IsNewDbgInfoFormat); 334*5f757f3fSDimitry Andric 3350b57cec5SDimitry Andric // Move the body of the function into the new rewritten function, and replace 3360b57cec5SDimitry Andric // this function with a stub. 337bdd1243dSDimitry Andric NewFunc->splice(NewFunc->begin(), &F); 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) { 3400b57cec5SDimitry Andric ReturnInst *RI = Replacement.first; 3410b57cec5SDimitry Andric IRBuilder<> B(RI); 3420b57cec5SDimitry Andric B.SetCurrentDebugLocation(RI->getDebugLoc()); 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric int RetIdx = 0; 345bdd1243dSDimitry Andric Value *NewRetVal = PoisonValue::get(NewRetTy); 3460b57cec5SDimitry Andric 3470b57cec5SDimitry Andric Value *RetVal = RI->getReturnValue(); 3480b57cec5SDimitry Andric if (RetVal) 3490b57cec5SDimitry Andric NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++); 3500b57cec5SDimitry Andric 35181ad6265SDimitry Andric for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) 35281ad6265SDimitry Andric NewRetVal = B.CreateInsertValue(NewRetVal, ReturnPoint.second, RetIdx++); 3530b57cec5SDimitry Andric 3540b57cec5SDimitry Andric if (RetVal) 3550b57cec5SDimitry Andric RI->setOperand(0, NewRetVal); 3560b57cec5SDimitry Andric else { 3570b57cec5SDimitry Andric B.CreateRet(NewRetVal); 3580b57cec5SDimitry Andric RI->eraseFromParent(); 3590b57cec5SDimitry Andric } 3600b57cec5SDimitry Andric } 3610b57cec5SDimitry Andric 3620b57cec5SDimitry Andric SmallVector<Value *, 16> StubCallArgs; 3630b57cec5SDimitry Andric for (Argument &Arg : F.args()) { 3640b57cec5SDimitry Andric if (OutArgIndexes.count(Arg.getArgNo())) { 3650b57cec5SDimitry Andric // It's easier to preserve the type of the argument list. We rely on 3660b57cec5SDimitry Andric // DeadArgumentElimination to take care of these. 367bdd1243dSDimitry Andric StubCallArgs.push_back(PoisonValue::get(Arg.getType())); 3680b57cec5SDimitry Andric } else { 3690b57cec5SDimitry Andric StubCallArgs.push_back(&Arg); 3700b57cec5SDimitry Andric } 3710b57cec5SDimitry Andric } 3720b57cec5SDimitry Andric 3730b57cec5SDimitry Andric BasicBlock *StubBB = BasicBlock::Create(Ctx, "", &F); 3740b57cec5SDimitry Andric IRBuilder<> B(StubBB); 3750b57cec5SDimitry Andric CallInst *StubCall = B.CreateCall(NewFunc, StubCallArgs); 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric int RetIdx = RetTy->isVoidTy() ? 0 : 1; 3780b57cec5SDimitry Andric for (Argument &Arg : F.args()) { 3790b57cec5SDimitry Andric if (!OutArgIndexes.count(Arg.getArgNo())) 3800b57cec5SDimitry Andric continue; 3810b57cec5SDimitry Andric 38281ad6265SDimitry Andric Type *EltTy = OutArgIndexes[Arg.getArgNo()]; 3835ffd83dbSDimitry Andric const auto Align = 3845ffd83dbSDimitry Andric DL->getValueOrABITypeAlignment(Arg.getParamAlign(), EltTy); 3850b57cec5SDimitry Andric 3860b57cec5SDimitry Andric Value *Val = B.CreateExtractValue(StubCall, RetIdx++); 38706c3fb27SDimitry Andric B.CreateAlignedStore(Val, &Arg, Align); 3880b57cec5SDimitry Andric } 3890b57cec5SDimitry Andric 3900b57cec5SDimitry Andric if (!RetTy->isVoidTy()) { 3910b57cec5SDimitry Andric B.CreateRet(B.CreateExtractValue(StubCall, 0)); 3920b57cec5SDimitry Andric } else { 3930b57cec5SDimitry Andric B.CreateRetVoid(); 3940b57cec5SDimitry Andric } 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric // The function is now a stub we want to inline. 3970b57cec5SDimitry Andric F.addFnAttr(Attribute::AlwaysInline); 3980b57cec5SDimitry Andric 3990b57cec5SDimitry Andric ++NumOutArgumentFunctionsReplaced; 4000b57cec5SDimitry Andric return true; 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric FunctionPass *llvm::createAMDGPURewriteOutArgumentsPass() { 4040b57cec5SDimitry Andric return new AMDGPURewriteOutArguments(); 4050b57cec5SDimitry Andric } 406