10b57cec5SDimitry Andric //===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file transforms calls of the current function (self recursion) followed
100b57cec5SDimitry Andric // by a return instruction with a branch to the entry of the function, creating
110b57cec5SDimitry Andric // a loop. This pass also implements the following extensions to the basic
120b57cec5SDimitry Andric // algorithm:
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric // 1. Trivial instructions between the call and return do not prevent the
150b57cec5SDimitry Andric // transformation from taking place, though currently the analysis cannot
160b57cec5SDimitry Andric // support moving any really useful instructions (only dead ones).
170b57cec5SDimitry Andric // 2. This pass transforms functions that are prevented from being tail
180b57cec5SDimitry Andric // recursive by an associative and commutative expression to use an
190b57cec5SDimitry Andric // accumulator variable, thus compiling the typical naive factorial or
200b57cec5SDimitry Andric // 'fib' implementation into efficient code.
210b57cec5SDimitry Andric // 3. TRE is performed if the function returns void, if the return
220b57cec5SDimitry Andric // returns the result returned by the call, or if the function returns a
230b57cec5SDimitry Andric // run-time constant on all exits from the function. It is possible, though
240b57cec5SDimitry Andric // unlikely, that the return returns something else (like constant 0), and
250b57cec5SDimitry Andric // can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in
260b57cec5SDimitry Andric // the function return the exact same value.
270b57cec5SDimitry Andric // 4. If it can prove that callees do not access their caller stack frame,
280b57cec5SDimitry Andric // they are marked as eligible for tail call elimination (by the code
290b57cec5SDimitry Andric // generator).
300b57cec5SDimitry Andric //
310b57cec5SDimitry Andric // There are several improvements that could be made:
320b57cec5SDimitry Andric //
330b57cec5SDimitry Andric // 1. If the function has any alloca instructions, these instructions will be
340b57cec5SDimitry Andric // moved out of the entry block of the function, causing them to be
350b57cec5SDimitry Andric // evaluated each time through the tail recursion. Safely keeping allocas
360b57cec5SDimitry Andric // in the entry block requires analysis to proves that the tail-called
370b57cec5SDimitry Andric // function does not read or write the stack object.
380b57cec5SDimitry Andric // 2. Tail recursion is only performed if the call immediately precedes the
390b57cec5SDimitry Andric // return instruction. It's possible that there could be a jump between
400b57cec5SDimitry Andric // the call and the return.
410b57cec5SDimitry Andric // 3. There can be intervening operations between the call and the return that
420b57cec5SDimitry Andric // prevent the TRE from occurring. For example, there could be GEP's and
430b57cec5SDimitry Andric // stores to memory that will not be read or written by the call. This
440b57cec5SDimitry Andric // requires some substantial analysis (such as with DSA) to prove safe to
450b57cec5SDimitry Andric // move ahead of the call, but doing so could allow many more TREs to be
460b57cec5SDimitry Andric // performed, for example in TreeAdd/TreeAlloc from the treeadd benchmark.
470b57cec5SDimitry Andric // 4. The algorithm we use to detect if callees access their caller stack
480b57cec5SDimitry Andric // frames is very primitive.
490b57cec5SDimitry Andric //
500b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
510b57cec5SDimitry Andric
520b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
530b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
540b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
550b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
560b57cec5SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h"
570b57cec5SDimitry Andric #include "llvm/Analysis/GlobalsModRef.h"
580b57cec5SDimitry Andric #include "llvm/Analysis/InstructionSimplify.h"
590b57cec5SDimitry Andric #include "llvm/Analysis/Loads.h"
600b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
610b57cec5SDimitry Andric #include "llvm/Analysis/PostDominators.h"
620b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
63fe6060f1SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
640b57cec5SDimitry Andric #include "llvm/IR/CFG.h"
650b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
660b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
670b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
680b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
690b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
700b57cec5SDimitry Andric #include "llvm/IR/Function.h"
71fe6060f1SDimitry Andric #include "llvm/IR/IRBuilder.h"
720b57cec5SDimitry Andric #include "llvm/IR/InstIterator.h"
730b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
740b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
750b57cec5SDimitry Andric #include "llvm/IR/Module.h"
76480093f4SDimitry Andric #include "llvm/InitializePasses.h"
770b57cec5SDimitry Andric #include "llvm/Pass.h"
780b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
790b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
800b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
810b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
820b57cec5SDimitry Andric using namespace llvm;
830b57cec5SDimitry Andric
840b57cec5SDimitry Andric #define DEBUG_TYPE "tailcallelim"
850b57cec5SDimitry Andric
860b57cec5SDimitry Andric STATISTIC(NumEliminated, "Number of tail calls removed");
870b57cec5SDimitry Andric STATISTIC(NumRetDuped, "Number of return duplicated");
880b57cec5SDimitry Andric STATISTIC(NumAccumAdded, "Number of accumulators introduced");
890b57cec5SDimitry Andric
900b57cec5SDimitry Andric /// Scan the specified function for alloca instructions.
910b57cec5SDimitry Andric /// If it contains any dynamic allocas, returns false.
canTRE(Function & F)920b57cec5SDimitry Andric static bool canTRE(Function &F) {
93fe6060f1SDimitry Andric // TODO: We don't do TRE if dynamic allocas are used.
94fe6060f1SDimitry Andric // Dynamic allocas allocate stack space which should be
95fe6060f1SDimitry Andric // deallocated before new iteration started. That is
96fe6060f1SDimitry Andric // currently not implemented.
970b57cec5SDimitry Andric return llvm::all_of(instructions(F), [](Instruction &I) {
980b57cec5SDimitry Andric auto *AI = dyn_cast<AllocaInst>(&I);
990b57cec5SDimitry Andric return !AI || AI->isStaticAlloca();
1000b57cec5SDimitry Andric });
1010b57cec5SDimitry Andric }
1020b57cec5SDimitry Andric
1030b57cec5SDimitry Andric namespace {
1040b57cec5SDimitry Andric struct AllocaDerivedValueTracker {
1050b57cec5SDimitry Andric // Start at a root value and walk its use-def chain to mark calls that use the
1060b57cec5SDimitry Andric // value or a derived value in AllocaUsers, and places where it may escape in
1070b57cec5SDimitry Andric // EscapePoints.
walk__anone41365f00211::AllocaDerivedValueTracker1080b57cec5SDimitry Andric void walk(Value *Root) {
1090b57cec5SDimitry Andric SmallVector<Use *, 32> Worklist;
1100b57cec5SDimitry Andric SmallPtrSet<Use *, 32> Visited;
1110b57cec5SDimitry Andric
1120b57cec5SDimitry Andric auto AddUsesToWorklist = [&](Value *V) {
1130b57cec5SDimitry Andric for (auto &U : V->uses()) {
1140b57cec5SDimitry Andric if (!Visited.insert(&U).second)
1150b57cec5SDimitry Andric continue;
1160b57cec5SDimitry Andric Worklist.push_back(&U);
1170b57cec5SDimitry Andric }
1180b57cec5SDimitry Andric };
1190b57cec5SDimitry Andric
1200b57cec5SDimitry Andric AddUsesToWorklist(Root);
1210b57cec5SDimitry Andric
1220b57cec5SDimitry Andric while (!Worklist.empty()) {
1230b57cec5SDimitry Andric Use *U = Worklist.pop_back_val();
1240b57cec5SDimitry Andric Instruction *I = cast<Instruction>(U->getUser());
1250b57cec5SDimitry Andric
1260b57cec5SDimitry Andric switch (I->getOpcode()) {
1270b57cec5SDimitry Andric case Instruction::Call:
1280b57cec5SDimitry Andric case Instruction::Invoke: {
1295ffd83dbSDimitry Andric auto &CB = cast<CallBase>(*I);
1300b57cec5SDimitry Andric // If the alloca-derived argument is passed byval it is not an escape
1310b57cec5SDimitry Andric // point, or a use of an alloca. Calling with byval copies the contents
1320b57cec5SDimitry Andric // of the alloca into argument registers or stack slots, which exist
1330b57cec5SDimitry Andric // beyond the lifetime of the current frame.
1345ffd83dbSDimitry Andric if (CB.isArgOperand(U) && CB.isByValArgument(CB.getArgOperandNo(U)))
1350b57cec5SDimitry Andric continue;
1360b57cec5SDimitry Andric bool IsNocapture =
1375ffd83dbSDimitry Andric CB.isDataOperand(U) && CB.doesNotCapture(CB.getDataOperandNo(U));
1385ffd83dbSDimitry Andric callUsesLocalStack(CB, IsNocapture);
1390b57cec5SDimitry Andric if (IsNocapture) {
1400b57cec5SDimitry Andric // If the alloca-derived argument is passed in as nocapture, then it
1410b57cec5SDimitry Andric // can't propagate to the call's return. That would be capturing.
1420b57cec5SDimitry Andric continue;
1430b57cec5SDimitry Andric }
1440b57cec5SDimitry Andric break;
1450b57cec5SDimitry Andric }
1460b57cec5SDimitry Andric case Instruction::Load: {
1470b57cec5SDimitry Andric // The result of a load is not alloca-derived (unless an alloca has
1480b57cec5SDimitry Andric // otherwise escaped, but this is a local analysis).
1490b57cec5SDimitry Andric continue;
1500b57cec5SDimitry Andric }
1510b57cec5SDimitry Andric case Instruction::Store: {
1520b57cec5SDimitry Andric if (U->getOperandNo() == 0)
1530b57cec5SDimitry Andric EscapePoints.insert(I);
1540b57cec5SDimitry Andric continue; // Stores have no users to analyze.
1550b57cec5SDimitry Andric }
1560b57cec5SDimitry Andric case Instruction::BitCast:
1570b57cec5SDimitry Andric case Instruction::GetElementPtr:
1580b57cec5SDimitry Andric case Instruction::PHI:
1590b57cec5SDimitry Andric case Instruction::Select:
1600b57cec5SDimitry Andric case Instruction::AddrSpaceCast:
1610b57cec5SDimitry Andric break;
1620b57cec5SDimitry Andric default:
1630b57cec5SDimitry Andric EscapePoints.insert(I);
1640b57cec5SDimitry Andric break;
1650b57cec5SDimitry Andric }
1660b57cec5SDimitry Andric
1670b57cec5SDimitry Andric AddUsesToWorklist(I);
1680b57cec5SDimitry Andric }
1690b57cec5SDimitry Andric }
1700b57cec5SDimitry Andric
callUsesLocalStack__anone41365f00211::AllocaDerivedValueTracker1715ffd83dbSDimitry Andric void callUsesLocalStack(CallBase &CB, bool IsNocapture) {
1720b57cec5SDimitry Andric // Add it to the list of alloca users.
1735ffd83dbSDimitry Andric AllocaUsers.insert(&CB);
1740b57cec5SDimitry Andric
1750b57cec5SDimitry Andric // If it's nocapture then it can't capture this alloca.
1760b57cec5SDimitry Andric if (IsNocapture)
1770b57cec5SDimitry Andric return;
1780b57cec5SDimitry Andric
1790b57cec5SDimitry Andric // If it can write to memory, it can leak the alloca value.
1805ffd83dbSDimitry Andric if (!CB.onlyReadsMemory())
1815ffd83dbSDimitry Andric EscapePoints.insert(&CB);
1820b57cec5SDimitry Andric }
1830b57cec5SDimitry Andric
1840b57cec5SDimitry Andric SmallPtrSet<Instruction *, 32> AllocaUsers;
1850b57cec5SDimitry Andric SmallPtrSet<Instruction *, 32> EscapePoints;
1860b57cec5SDimitry Andric };
1870b57cec5SDimitry Andric }
1880b57cec5SDimitry Andric
markTails(Function & F,OptimizationRemarkEmitter * ORE)189fe6060f1SDimitry Andric static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
1900b57cec5SDimitry Andric if (F.callsFunctionThatReturnsTwice())
1910b57cec5SDimitry Andric return false;
1920b57cec5SDimitry Andric
1930b57cec5SDimitry Andric // The local stack holds all alloca instructions and all byval arguments.
1940b57cec5SDimitry Andric AllocaDerivedValueTracker Tracker;
1950b57cec5SDimitry Andric for (Argument &Arg : F.args()) {
1960b57cec5SDimitry Andric if (Arg.hasByValAttr())
1970b57cec5SDimitry Andric Tracker.walk(&Arg);
1980b57cec5SDimitry Andric }
1990b57cec5SDimitry Andric for (auto &BB : F) {
2000b57cec5SDimitry Andric for (auto &I : BB)
2010b57cec5SDimitry Andric if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
2020b57cec5SDimitry Andric Tracker.walk(AI);
2030b57cec5SDimitry Andric }
2040b57cec5SDimitry Andric
2050b57cec5SDimitry Andric bool Modified = false;
2060b57cec5SDimitry Andric
2070b57cec5SDimitry Andric // Track whether a block is reachable after an alloca has escaped. Blocks that
2080b57cec5SDimitry Andric // contain the escaping instruction will be marked as being visited without an
2090b57cec5SDimitry Andric // escaped alloca, since that is how the block began.
2100b57cec5SDimitry Andric enum VisitType {
2110b57cec5SDimitry Andric UNVISITED,
2120b57cec5SDimitry Andric UNESCAPED,
2130b57cec5SDimitry Andric ESCAPED
2140b57cec5SDimitry Andric };
2150b57cec5SDimitry Andric DenseMap<BasicBlock *, VisitType> Visited;
2160b57cec5SDimitry Andric
2170b57cec5SDimitry Andric // We propagate the fact that an alloca has escaped from block to successor.
2180b57cec5SDimitry Andric // Visit the blocks that are propagating the escapedness first. To do this, we
2190b57cec5SDimitry Andric // maintain two worklists.
2200b57cec5SDimitry Andric SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped;
2210b57cec5SDimitry Andric
2220b57cec5SDimitry Andric // We may enter a block and visit it thinking that no alloca has escaped yet,
2230b57cec5SDimitry Andric // then see an escape point and go back around a loop edge and come back to
2240b57cec5SDimitry Andric // the same block twice. Because of this, we defer setting tail on calls when
2250b57cec5SDimitry Andric // we first encounter them in a block. Every entry in this list does not
2260b57cec5SDimitry Andric // statically use an alloca via use-def chain analysis, but may find an alloca
2270b57cec5SDimitry Andric // through other means if the block turns out to be reachable after an escape
2280b57cec5SDimitry Andric // point.
2290b57cec5SDimitry Andric SmallVector<CallInst *, 32> DeferredTails;
2300b57cec5SDimitry Andric
2310b57cec5SDimitry Andric BasicBlock *BB = &F.getEntryBlock();
2320b57cec5SDimitry Andric VisitType Escaped = UNESCAPED;
2330b57cec5SDimitry Andric do {
2340b57cec5SDimitry Andric for (auto &I : *BB) {
2350b57cec5SDimitry Andric if (Tracker.EscapePoints.count(&I))
2360b57cec5SDimitry Andric Escaped = ESCAPED;
2370b57cec5SDimitry Andric
2380b57cec5SDimitry Andric CallInst *CI = dyn_cast<CallInst>(&I);
239e8d8bef9SDimitry Andric // A PseudoProbeInst has the IntrInaccessibleMemOnly tag hence it is
240e8d8bef9SDimitry Andric // considered accessing memory and will be marked as a tail call if we
241e8d8bef9SDimitry Andric // don't bail out here.
242e8d8bef9SDimitry Andric if (!CI || CI->isTailCall() || isa<DbgInfoIntrinsic>(&I) ||
243e8d8bef9SDimitry Andric isa<PseudoProbeInst>(&I))
2440b57cec5SDimitry Andric continue;
2450b57cec5SDimitry Andric
246bdd1243dSDimitry Andric // Special-case operand bundles "clang.arc.attachedcall", "ptrauth", and
247bdd1243dSDimitry Andric // "kcfi".
248bdd1243dSDimitry Andric bool IsNoTail = CI->isNoTailCall() ||
249bdd1243dSDimitry Andric CI->hasOperandBundlesOtherThan(
250bdd1243dSDimitry Andric {LLVMContext::OB_clang_arc_attachedcall,
251bdd1243dSDimitry Andric LLVMContext::OB_ptrauth, LLVMContext::OB_kcfi});
2520b57cec5SDimitry Andric
2530b57cec5SDimitry Andric if (!IsNoTail && CI->doesNotAccessMemory()) {
2540b57cec5SDimitry Andric // A call to a readnone function whose arguments are all things computed
2550b57cec5SDimitry Andric // outside this function can be marked tail. Even if you stored the
2560b57cec5SDimitry Andric // alloca address into a global, a readnone function can't load the
2570b57cec5SDimitry Andric // global anyhow.
2580b57cec5SDimitry Andric //
2590b57cec5SDimitry Andric // Note that this runs whether we know an alloca has escaped or not. If
2600b57cec5SDimitry Andric // it has, then we can't trust Tracker.AllocaUsers to be accurate.
2610b57cec5SDimitry Andric bool SafeToTail = true;
262349cc55cSDimitry Andric for (auto &Arg : CI->args()) {
2630b57cec5SDimitry Andric if (isa<Constant>(Arg.getUser()))
2640b57cec5SDimitry Andric continue;
2650b57cec5SDimitry Andric if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
2660b57cec5SDimitry Andric if (!A->hasByValAttr())
2670b57cec5SDimitry Andric continue;
2680b57cec5SDimitry Andric SafeToTail = false;
2690b57cec5SDimitry Andric break;
2700b57cec5SDimitry Andric }
2710b57cec5SDimitry Andric if (SafeToTail) {
2720b57cec5SDimitry Andric using namespace ore;
2730b57cec5SDimitry Andric ORE->emit([&]() {
2740b57cec5SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "tailcall-readnone", CI)
2750b57cec5SDimitry Andric << "marked as tail call candidate (readnone)";
2760b57cec5SDimitry Andric });
2770b57cec5SDimitry Andric CI->setTailCall();
2780b57cec5SDimitry Andric Modified = true;
2790b57cec5SDimitry Andric continue;
2800b57cec5SDimitry Andric }
2810b57cec5SDimitry Andric }
2820b57cec5SDimitry Andric
283fe6060f1SDimitry Andric if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI))
2840b57cec5SDimitry Andric DeferredTails.push_back(CI);
2850b57cec5SDimitry Andric }
2860b57cec5SDimitry Andric
287e8d8bef9SDimitry Andric for (auto *SuccBB : successors(BB)) {
2880b57cec5SDimitry Andric auto &State = Visited[SuccBB];
2890b57cec5SDimitry Andric if (State < Escaped) {
2900b57cec5SDimitry Andric State = Escaped;
2910b57cec5SDimitry Andric if (State == ESCAPED)
2920b57cec5SDimitry Andric WorklistEscaped.push_back(SuccBB);
2930b57cec5SDimitry Andric else
2940b57cec5SDimitry Andric WorklistUnescaped.push_back(SuccBB);
2950b57cec5SDimitry Andric }
2960b57cec5SDimitry Andric }
2970b57cec5SDimitry Andric
2980b57cec5SDimitry Andric if (!WorklistEscaped.empty()) {
2990b57cec5SDimitry Andric BB = WorklistEscaped.pop_back_val();
3000b57cec5SDimitry Andric Escaped = ESCAPED;
3010b57cec5SDimitry Andric } else {
3020b57cec5SDimitry Andric BB = nullptr;
3030b57cec5SDimitry Andric while (!WorklistUnescaped.empty()) {
3040b57cec5SDimitry Andric auto *NextBB = WorklistUnescaped.pop_back_val();
3050b57cec5SDimitry Andric if (Visited[NextBB] == UNESCAPED) {
3060b57cec5SDimitry Andric BB = NextBB;
3070b57cec5SDimitry Andric Escaped = UNESCAPED;
3080b57cec5SDimitry Andric break;
3090b57cec5SDimitry Andric }
3100b57cec5SDimitry Andric }
3110b57cec5SDimitry Andric }
3120b57cec5SDimitry Andric } while (BB);
3130b57cec5SDimitry Andric
3140b57cec5SDimitry Andric for (CallInst *CI : DeferredTails) {
3150b57cec5SDimitry Andric if (Visited[CI->getParent()] != ESCAPED) {
3160b57cec5SDimitry Andric // If the escape point was part way through the block, calls after the
3170b57cec5SDimitry Andric // escape point wouldn't have been put into DeferredTails.
3180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n");
3190b57cec5SDimitry Andric CI->setTailCall();
3200b57cec5SDimitry Andric Modified = true;
3210b57cec5SDimitry Andric }
3220b57cec5SDimitry Andric }
3230b57cec5SDimitry Andric
3240b57cec5SDimitry Andric return Modified;
3250b57cec5SDimitry Andric }
3260b57cec5SDimitry Andric
3270b57cec5SDimitry Andric /// Return true if it is safe to move the specified
3280b57cec5SDimitry Andric /// instruction from after the call to before the call, assuming that all
3290b57cec5SDimitry Andric /// instructions between the call and this instruction are movable.
3300b57cec5SDimitry Andric ///
canMoveAboveCall(Instruction * I,CallInst * CI,AliasAnalysis * AA)3310b57cec5SDimitry Andric static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) {
332fe6060f1SDimitry Andric if (isa<DbgInfoIntrinsic>(I))
333fe6060f1SDimitry Andric return true;
334fe6060f1SDimitry Andric
335fe6060f1SDimitry Andric if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
336fe6060f1SDimitry Andric if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
337fe6060f1SDimitry Andric llvm::findAllocaForValue(II->getArgOperand(1)))
338fe6060f1SDimitry Andric return true;
339fe6060f1SDimitry Andric
3400b57cec5SDimitry Andric // FIXME: We can move load/store/call/free instructions above the call if the
3410b57cec5SDimitry Andric // call does not mod/ref the memory location being processed.
3420b57cec5SDimitry Andric if (I->mayHaveSideEffects()) // This also handles volatile loads.
3430b57cec5SDimitry Andric return false;
3440b57cec5SDimitry Andric
3450b57cec5SDimitry Andric if (LoadInst *L = dyn_cast<LoadInst>(I)) {
3460b57cec5SDimitry Andric // Loads may always be moved above calls without side effects.
3470b57cec5SDimitry Andric if (CI->mayHaveSideEffects()) {
3480b57cec5SDimitry Andric // Non-volatile loads may be moved above a call with side effects if it
3490b57cec5SDimitry Andric // does not write to memory and the load provably won't trap.
3500b57cec5SDimitry Andric // Writes to memory only matter if they may alias the pointer
3510b57cec5SDimitry Andric // being loaded from.
352*0fca6ea1SDimitry Andric const DataLayout &DL = L->getDataLayout();
3530b57cec5SDimitry Andric if (isModSet(AA->getModRefInfo(CI, MemoryLocation::get(L))) ||
3540b57cec5SDimitry Andric !isSafeToLoadUnconditionally(L->getPointerOperand(), L->getType(),
3555ffd83dbSDimitry Andric L->getAlign(), DL, L))
3560b57cec5SDimitry Andric return false;
3570b57cec5SDimitry Andric }
3580b57cec5SDimitry Andric }
3590b57cec5SDimitry Andric
3600b57cec5SDimitry Andric // Otherwise, if this is a side-effect free instruction, check to make sure
3610b57cec5SDimitry Andric // that it does not use the return value of the call. If it doesn't use the
3620b57cec5SDimitry Andric // return value of the call, it must only use things that are defined before
3630b57cec5SDimitry Andric // the call, or movable instructions between the call and the instruction
3640b57cec5SDimitry Andric // itself.
3650b57cec5SDimitry Andric return !is_contained(I->operands(), CI);
3660b57cec5SDimitry Andric }
3670b57cec5SDimitry Andric
canTransformAccumulatorRecursion(Instruction * I,CallInst * CI)3685ffd83dbSDimitry Andric static bool canTransformAccumulatorRecursion(Instruction *I, CallInst *CI) {
3695ffd83dbSDimitry Andric if (!I->isAssociative() || !I->isCommutative())
3700b57cec5SDimitry Andric return false;
3710b57cec5SDimitry Andric
3725f757f3fSDimitry Andric assert(I->getNumOperands() >= 2 &&
3735f757f3fSDimitry Andric "Associative/commutative operations should have at least 2 args!");
3745f757f3fSDimitry Andric
3755f757f3fSDimitry Andric if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
3765f757f3fSDimitry Andric // Accumulators must have an identity.
3775f757f3fSDimitry Andric if (!ConstantExpr::getIntrinsicIdentity(II->getIntrinsicID(), I->getType()))
3785f757f3fSDimitry Andric return false;
3795f757f3fSDimitry Andric }
3800b57cec5SDimitry Andric
3810b57cec5SDimitry Andric // Exactly one operand should be the result of the call instruction.
3820b57cec5SDimitry Andric if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
3830b57cec5SDimitry Andric (I->getOperand(0) != CI && I->getOperand(1) != CI))
3845ffd83dbSDimitry Andric return false;
3850b57cec5SDimitry Andric
3860b57cec5SDimitry Andric // The only user of this instruction we allow is a single return instruction.
3870b57cec5SDimitry Andric if (!I->hasOneUse() || !isa<ReturnInst>(I->user_back()))
3885ffd83dbSDimitry Andric return false;
3890b57cec5SDimitry Andric
3905ffd83dbSDimitry Andric return true;
3910b57cec5SDimitry Andric }
3920b57cec5SDimitry Andric
firstNonDbg(BasicBlock::iterator I)3930b57cec5SDimitry Andric static Instruction *firstNonDbg(BasicBlock::iterator I) {
3940b57cec5SDimitry Andric while (isa<DbgInfoIntrinsic>(I))
3950b57cec5SDimitry Andric ++I;
3960b57cec5SDimitry Andric return &*I;
3970b57cec5SDimitry Andric }
3980b57cec5SDimitry Andric
3995ffd83dbSDimitry Andric namespace {
4005ffd83dbSDimitry Andric class TailRecursionEliminator {
4015ffd83dbSDimitry Andric Function &F;
4025ffd83dbSDimitry Andric const TargetTransformInfo *TTI;
4035ffd83dbSDimitry Andric AliasAnalysis *AA;
4045ffd83dbSDimitry Andric OptimizationRemarkEmitter *ORE;
4055ffd83dbSDimitry Andric DomTreeUpdater &DTU;
4065ffd83dbSDimitry Andric
4075ffd83dbSDimitry Andric // The below are shared state we want to have available when eliminating any
4085ffd83dbSDimitry Andric // calls in the function. There values should be populated by
4095ffd83dbSDimitry Andric // createTailRecurseLoopHeader the first time we find a call we can eliminate.
4105ffd83dbSDimitry Andric BasicBlock *HeaderBB = nullptr;
4115ffd83dbSDimitry Andric SmallVector<PHINode *, 8> ArgumentPHIs;
4125ffd83dbSDimitry Andric
4135ffd83dbSDimitry Andric // PHI node to store our return value.
4145ffd83dbSDimitry Andric PHINode *RetPN = nullptr;
4155ffd83dbSDimitry Andric
4165ffd83dbSDimitry Andric // i1 PHI node to track if we have a valid return value stored in RetPN.
4175ffd83dbSDimitry Andric PHINode *RetKnownPN = nullptr;
4185ffd83dbSDimitry Andric
4195ffd83dbSDimitry Andric // Vector of select instructions we insereted. These selects use RetKnownPN
4205ffd83dbSDimitry Andric // to either propagate RetPN or select a new return value.
4215ffd83dbSDimitry Andric SmallVector<SelectInst *, 8> RetSelects;
4225ffd83dbSDimitry Andric
4235ffd83dbSDimitry Andric // The below are shared state needed when performing accumulator recursion.
4245ffd83dbSDimitry Andric // There values should be populated by insertAccumulator the first time we
4255ffd83dbSDimitry Andric // find an elimination that requires an accumulator.
4265ffd83dbSDimitry Andric
4275ffd83dbSDimitry Andric // PHI node to store our current accumulated value.
4285ffd83dbSDimitry Andric PHINode *AccPN = nullptr;
4295ffd83dbSDimitry Andric
4305ffd83dbSDimitry Andric // The instruction doing the accumulating.
4315ffd83dbSDimitry Andric Instruction *AccumulatorRecursionInstr = nullptr;
4325ffd83dbSDimitry Andric
TailRecursionEliminator(Function & F,const TargetTransformInfo * TTI,AliasAnalysis * AA,OptimizationRemarkEmitter * ORE,DomTreeUpdater & DTU)4335ffd83dbSDimitry Andric TailRecursionEliminator(Function &F, const TargetTransformInfo *TTI,
4345ffd83dbSDimitry Andric AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
4355ffd83dbSDimitry Andric DomTreeUpdater &DTU)
4365ffd83dbSDimitry Andric : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
4375ffd83dbSDimitry Andric
438fe6060f1SDimitry Andric CallInst *findTRECandidate(BasicBlock *BB);
4395ffd83dbSDimitry Andric
4405ffd83dbSDimitry Andric void createTailRecurseLoopHeader(CallInst *CI);
4415ffd83dbSDimitry Andric
4425ffd83dbSDimitry Andric void insertAccumulator(Instruction *AccRecInstr);
4435ffd83dbSDimitry Andric
4445ffd83dbSDimitry Andric bool eliminateCall(CallInst *CI);
4455ffd83dbSDimitry Andric
4465ffd83dbSDimitry Andric void cleanupAndFinalize();
4475ffd83dbSDimitry Andric
448fe6060f1SDimitry Andric bool processBlock(BasicBlock &BB);
449fe6060f1SDimitry Andric
450fe6060f1SDimitry Andric void copyByValueOperandIntoLocalTemp(CallInst *CI, int OpndIdx);
451fe6060f1SDimitry Andric
452fe6060f1SDimitry Andric void copyLocalTempOfByValueOperandIntoArguments(CallInst *CI, int OpndIdx);
453e8d8bef9SDimitry Andric
4545ffd83dbSDimitry Andric public:
4555ffd83dbSDimitry Andric static bool eliminate(Function &F, const TargetTransformInfo *TTI,
4565ffd83dbSDimitry Andric AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
4575ffd83dbSDimitry Andric DomTreeUpdater &DTU);
4585ffd83dbSDimitry Andric };
4595ffd83dbSDimitry Andric } // namespace
4605ffd83dbSDimitry Andric
findTRECandidate(BasicBlock * BB)461fe6060f1SDimitry Andric CallInst *TailRecursionEliminator::findTRECandidate(BasicBlock *BB) {
462e8d8bef9SDimitry Andric Instruction *TI = BB->getTerminator();
4630b57cec5SDimitry Andric
4640b57cec5SDimitry Andric if (&BB->front() == TI) // Make sure there is something before the terminator.
4650b57cec5SDimitry Andric return nullptr;
4660b57cec5SDimitry Andric
4670b57cec5SDimitry Andric // Scan backwards from the return, checking to see if there is a tail call in
4680b57cec5SDimitry Andric // this block. If so, set CI to it.
4690b57cec5SDimitry Andric CallInst *CI = nullptr;
4700b57cec5SDimitry Andric BasicBlock::iterator BBI(TI);
4710b57cec5SDimitry Andric while (true) {
4720b57cec5SDimitry Andric CI = dyn_cast<CallInst>(BBI);
4735ffd83dbSDimitry Andric if (CI && CI->getCalledFunction() == &F)
4740b57cec5SDimitry Andric break;
4750b57cec5SDimitry Andric
4760b57cec5SDimitry Andric if (BBI == BB->begin())
4770b57cec5SDimitry Andric return nullptr; // Didn't find a potential tail call.
4780b57cec5SDimitry Andric --BBI;
4790b57cec5SDimitry Andric }
4800b57cec5SDimitry Andric
481fe6060f1SDimitry Andric assert((!CI->isTailCall() || !CI->isNoTailCall()) &&
482fe6060f1SDimitry Andric "Incompatible call site attributes(Tail,NoTail)");
483fe6060f1SDimitry Andric if (!CI->isTailCall())
4840b57cec5SDimitry Andric return nullptr;
4850b57cec5SDimitry Andric
4860b57cec5SDimitry Andric // As a special case, detect code like this:
4870b57cec5SDimitry Andric // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
4880b57cec5SDimitry Andric // and disable this xform in this case, because the code generator will
4890b57cec5SDimitry Andric // lower the call to fabs into inline code.
4905ffd83dbSDimitry Andric if (BB == &F.getEntryBlock() &&
4910b57cec5SDimitry Andric firstNonDbg(BB->front().getIterator()) == CI &&
4920b57cec5SDimitry Andric firstNonDbg(std::next(BB->begin())) == TI && CI->getCalledFunction() &&
4930b57cec5SDimitry Andric !TTI->isLoweredToCall(CI->getCalledFunction())) {
4940b57cec5SDimitry Andric // A single-block function with just a call and a return. Check that
4950b57cec5SDimitry Andric // the arguments match.
4965ffd83dbSDimitry Andric auto I = CI->arg_begin(), E = CI->arg_end();
4975ffd83dbSDimitry Andric Function::arg_iterator FI = F.arg_begin(), FE = F.arg_end();
4980b57cec5SDimitry Andric for (; I != E && FI != FE; ++I, ++FI)
4990b57cec5SDimitry Andric if (*I != &*FI) break;
5000b57cec5SDimitry Andric if (I == E && FI == FE)
5010b57cec5SDimitry Andric return nullptr;
5020b57cec5SDimitry Andric }
5030b57cec5SDimitry Andric
5040b57cec5SDimitry Andric return CI;
5050b57cec5SDimitry Andric }
5060b57cec5SDimitry Andric
createTailRecurseLoopHeader(CallInst * CI)5075ffd83dbSDimitry Andric void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) {
5085ffd83dbSDimitry Andric HeaderBB = &F.getEntryBlock();
5095ffd83dbSDimitry Andric BasicBlock *NewEntry = BasicBlock::Create(F.getContext(), "", &F, HeaderBB);
5105ffd83dbSDimitry Andric NewEntry->takeName(HeaderBB);
5115ffd83dbSDimitry Andric HeaderBB->setName("tailrecurse");
512*0fca6ea1SDimitry Andric BranchInst::Create(HeaderBB, NewEntry);
513*0fca6ea1SDimitry Andric // If the new branch preserves the debug location of CI, it could result in
514*0fca6ea1SDimitry Andric // misleading stepping, if CI is located in a conditional branch.
515*0fca6ea1SDimitry Andric // So, here we don't give any debug location to the new branch.
5165ffd83dbSDimitry Andric
5175ffd83dbSDimitry Andric // Move all fixed sized allocas from HeaderBB to NewEntry.
5185ffd83dbSDimitry Andric for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(),
5195ffd83dbSDimitry Andric NEBI = NewEntry->begin();
5205ffd83dbSDimitry Andric OEBI != E;)
5215ffd83dbSDimitry Andric if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
5225ffd83dbSDimitry Andric if (isa<ConstantInt>(AI->getArraySize()))
5235ffd83dbSDimitry Andric AI->moveBefore(&*NEBI);
5245ffd83dbSDimitry Andric
5255ffd83dbSDimitry Andric // Now that we have created a new block, which jumps to the entry
5265ffd83dbSDimitry Andric // block, insert a PHI node for each argument of the function.
5275ffd83dbSDimitry Andric // For now, we initialize each PHI to only have the real arguments
5285ffd83dbSDimitry Andric // which are passed in.
5295f757f3fSDimitry Andric BasicBlock::iterator InsertPos = HeaderBB->begin();
5305ffd83dbSDimitry Andric for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
5315f757f3fSDimitry Andric PHINode *PN = PHINode::Create(I->getType(), 2, I->getName() + ".tr");
5325f757f3fSDimitry Andric PN->insertBefore(InsertPos);
5335ffd83dbSDimitry Andric I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
5345ffd83dbSDimitry Andric PN->addIncoming(&*I, NewEntry);
5355ffd83dbSDimitry Andric ArgumentPHIs.push_back(PN);
5365ffd83dbSDimitry Andric }
5375ffd83dbSDimitry Andric
5385ffd83dbSDimitry Andric // If the function doen't return void, create the RetPN and RetKnownPN PHI
53981ad6265SDimitry Andric // nodes to track our return value. We initialize RetPN with poison and
5405ffd83dbSDimitry Andric // RetKnownPN with false since we can't know our return value at function
5415ffd83dbSDimitry Andric // entry.
5425ffd83dbSDimitry Andric Type *RetType = F.getReturnType();
5435ffd83dbSDimitry Andric if (!RetType->isVoidTy()) {
5445ffd83dbSDimitry Andric Type *BoolType = Type::getInt1Ty(F.getContext());
5455f757f3fSDimitry Andric RetPN = PHINode::Create(RetType, 2, "ret.tr");
5465f757f3fSDimitry Andric RetPN->insertBefore(InsertPos);
5475f757f3fSDimitry Andric RetKnownPN = PHINode::Create(BoolType, 2, "ret.known.tr");
5485f757f3fSDimitry Andric RetKnownPN->insertBefore(InsertPos);
5495ffd83dbSDimitry Andric
55081ad6265SDimitry Andric RetPN->addIncoming(PoisonValue::get(RetType), NewEntry);
5515ffd83dbSDimitry Andric RetKnownPN->addIncoming(ConstantInt::getFalse(BoolType), NewEntry);
5525ffd83dbSDimitry Andric }
5535ffd83dbSDimitry Andric
5545ffd83dbSDimitry Andric // The entry block was changed from HeaderBB to NewEntry.
5555ffd83dbSDimitry Andric // The forward DominatorTree needs to be recalculated when the EntryBB is
5565ffd83dbSDimitry Andric // changed. In this corner-case we recalculate the entire tree.
5575ffd83dbSDimitry Andric DTU.recalculate(*NewEntry->getParent());
5585ffd83dbSDimitry Andric }
5595ffd83dbSDimitry Andric
insertAccumulator(Instruction * AccRecInstr)5605ffd83dbSDimitry Andric void TailRecursionEliminator::insertAccumulator(Instruction *AccRecInstr) {
5615ffd83dbSDimitry Andric assert(!AccPN && "Trying to insert multiple accumulators");
5625ffd83dbSDimitry Andric
5635ffd83dbSDimitry Andric AccumulatorRecursionInstr = AccRecInstr;
5645ffd83dbSDimitry Andric
5655ffd83dbSDimitry Andric // Start by inserting a new PHI node for the accumulator.
5665ffd83dbSDimitry Andric pred_iterator PB = pred_begin(HeaderBB), PE = pred_end(HeaderBB);
5675ffd83dbSDimitry Andric AccPN = PHINode::Create(F.getReturnType(), std::distance(PB, PE) + 1,
5685f757f3fSDimitry Andric "accumulator.tr");
5695f757f3fSDimitry Andric AccPN->insertBefore(HeaderBB->begin());
5705ffd83dbSDimitry Andric
5715ffd83dbSDimitry Andric // Loop over all of the predecessors of the tail recursion block. For the
5725ffd83dbSDimitry Andric // real entry into the function we seed the PHI with the identity constant for
5735ffd83dbSDimitry Andric // the accumulation operation. For any other existing branches to this block
5745ffd83dbSDimitry Andric // (due to other tail recursions eliminated) the accumulator is not modified.
5755ffd83dbSDimitry Andric // Because we haven't added the branch in the current block to HeaderBB yet,
5765ffd83dbSDimitry Andric // it will not show up as a predecessor.
5775ffd83dbSDimitry Andric for (pred_iterator PI = PB; PI != PE; ++PI) {
5785ffd83dbSDimitry Andric BasicBlock *P = *PI;
5795ffd83dbSDimitry Andric if (P == &F.getEntryBlock()) {
5805f757f3fSDimitry Andric Constant *Identity =
5815f757f3fSDimitry Andric ConstantExpr::getIdentity(AccRecInstr, AccRecInstr->getType());
5825ffd83dbSDimitry Andric AccPN->addIncoming(Identity, P);
5835ffd83dbSDimitry Andric } else {
5845ffd83dbSDimitry Andric AccPN->addIncoming(AccPN, P);
5855ffd83dbSDimitry Andric }
5865ffd83dbSDimitry Andric }
5875ffd83dbSDimitry Andric
5885ffd83dbSDimitry Andric ++NumAccumAdded;
5895ffd83dbSDimitry Andric }
5905ffd83dbSDimitry Andric
591fe6060f1SDimitry Andric // Creates a copy of contents of ByValue operand of the specified
592fe6060f1SDimitry Andric // call instruction into the newly created temporarily variable.
copyByValueOperandIntoLocalTemp(CallInst * CI,int OpndIdx)593fe6060f1SDimitry Andric void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst *CI,
594fe6060f1SDimitry Andric int OpndIdx) {
595349cc55cSDimitry Andric Type *AggTy = CI->getParamByValType(OpndIdx);
596349cc55cSDimitry Andric assert(AggTy);
597*0fca6ea1SDimitry Andric const DataLayout &DL = F.getDataLayout();
598fe6060f1SDimitry Andric
599fe6060f1SDimitry Andric // Get alignment of byVal operand.
600fe6060f1SDimitry Andric Align Alignment(CI->getParamAlign(OpndIdx).valueOrOne());
601fe6060f1SDimitry Andric
602fe6060f1SDimitry Andric // Create alloca for temporarily byval operands.
603fe6060f1SDimitry Andric // Put alloca into the entry block.
604fe6060f1SDimitry Andric Value *NewAlloca = new AllocaInst(
605fe6060f1SDimitry Andric AggTy, DL.getAllocaAddrSpace(), nullptr, Alignment,
606*0fca6ea1SDimitry Andric CI->getArgOperand(OpndIdx)->getName(), F.getEntryBlock().begin());
607fe6060f1SDimitry Andric
608fe6060f1SDimitry Andric IRBuilder<> Builder(CI);
609fe6060f1SDimitry Andric Value *Size = Builder.getInt64(DL.getTypeAllocSize(AggTy));
610fe6060f1SDimitry Andric
611fe6060f1SDimitry Andric // Copy data from byvalue operand into the temporarily variable.
612fe6060f1SDimitry Andric Builder.CreateMemCpy(NewAlloca, /*DstAlign*/ Alignment,
613fe6060f1SDimitry Andric CI->getArgOperand(OpndIdx),
614fe6060f1SDimitry Andric /*SrcAlign*/ Alignment, Size);
615fe6060f1SDimitry Andric CI->setArgOperand(OpndIdx, NewAlloca);
616fe6060f1SDimitry Andric }
617fe6060f1SDimitry Andric
618fe6060f1SDimitry Andric // Creates a copy from temporarily variable(keeping value of ByVal argument)
619fe6060f1SDimitry Andric // into the corresponding function argument location.
copyLocalTempOfByValueOperandIntoArguments(CallInst * CI,int OpndIdx)620fe6060f1SDimitry Andric void TailRecursionEliminator::copyLocalTempOfByValueOperandIntoArguments(
621fe6060f1SDimitry Andric CallInst *CI, int OpndIdx) {
622349cc55cSDimitry Andric Type *AggTy = CI->getParamByValType(OpndIdx);
623349cc55cSDimitry Andric assert(AggTy);
624*0fca6ea1SDimitry Andric const DataLayout &DL = F.getDataLayout();
625fe6060f1SDimitry Andric
626fe6060f1SDimitry Andric // Get alignment of byVal operand.
627fe6060f1SDimitry Andric Align Alignment(CI->getParamAlign(OpndIdx).valueOrOne());
628fe6060f1SDimitry Andric
629fe6060f1SDimitry Andric IRBuilder<> Builder(CI);
630fe6060f1SDimitry Andric Value *Size = Builder.getInt64(DL.getTypeAllocSize(AggTy));
631fe6060f1SDimitry Andric
632fe6060f1SDimitry Andric // Copy data from the temporarily variable into corresponding
633fe6060f1SDimitry Andric // function argument location.
634fe6060f1SDimitry Andric Builder.CreateMemCpy(F.getArg(OpndIdx), /*DstAlign*/ Alignment,
635fe6060f1SDimitry Andric CI->getArgOperand(OpndIdx),
636fe6060f1SDimitry Andric /*SrcAlign*/ Alignment, Size);
637fe6060f1SDimitry Andric }
638fe6060f1SDimitry Andric
eliminateCall(CallInst * CI)6395ffd83dbSDimitry Andric bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
6405ffd83dbSDimitry Andric ReturnInst *Ret = cast<ReturnInst>(CI->getParent()->getTerminator());
6410b57cec5SDimitry Andric
6420b57cec5SDimitry Andric // Ok, we found a potential tail call. We can currently only transform the
6430b57cec5SDimitry Andric // tail call if all of the instructions between the call and the return are
6440b57cec5SDimitry Andric // movable to above the call itself, leaving the call next to the return.
6450b57cec5SDimitry Andric // Check that this is the case now.
6465ffd83dbSDimitry Andric Instruction *AccRecInstr = nullptr;
6470b57cec5SDimitry Andric BasicBlock::iterator BBI(CI);
6480b57cec5SDimitry Andric for (++BBI; &*BBI != Ret; ++BBI) {
6490b57cec5SDimitry Andric if (canMoveAboveCall(&*BBI, CI, AA))
6500b57cec5SDimitry Andric continue;
6510b57cec5SDimitry Andric
6520b57cec5SDimitry Andric // If we can't move the instruction above the call, it might be because it
6530b57cec5SDimitry Andric // is an associative and commutative operation that could be transformed
6540b57cec5SDimitry Andric // using accumulator recursion elimination. Check to see if this is the
6555ffd83dbSDimitry Andric // case, and if so, remember which instruction accumulates for later.
6565ffd83dbSDimitry Andric if (AccPN || !canTransformAccumulatorRecursion(&*BBI, CI))
6575ffd83dbSDimitry Andric return false; // We cannot eliminate the tail recursion!
6585ffd83dbSDimitry Andric
6590b57cec5SDimitry Andric // Yes, this is accumulator recursion. Remember which instruction
6600b57cec5SDimitry Andric // accumulates.
6615ffd83dbSDimitry Andric AccRecInstr = &*BBI;
6620b57cec5SDimitry Andric }
6630b57cec5SDimitry Andric
6640b57cec5SDimitry Andric BasicBlock *BB = Ret->getParent();
6650b57cec5SDimitry Andric
6660b57cec5SDimitry Andric using namespace ore;
6670b57cec5SDimitry Andric ORE->emit([&]() {
6680b57cec5SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "tailcall-recursion", CI)
6690b57cec5SDimitry Andric << "transforming tail recursion into loop";
6700b57cec5SDimitry Andric });
6710b57cec5SDimitry Andric
6720b57cec5SDimitry Andric // OK! We can transform this tail call. If this is the first one found,
6730b57cec5SDimitry Andric // create the new entry block, allowing us to branch back to the old entry.
6745ffd83dbSDimitry Andric if (!HeaderBB)
6755ffd83dbSDimitry Andric createTailRecurseLoopHeader(CI);
6760b57cec5SDimitry Andric
677fe6060f1SDimitry Andric // Copy values of ByVal operands into local temporarily variables.
678349cc55cSDimitry Andric for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
679fe6060f1SDimitry Andric if (CI->isByValArgument(I))
680fe6060f1SDimitry Andric copyByValueOperandIntoLocalTemp(CI, I);
681fe6060f1SDimitry Andric }
6820b57cec5SDimitry Andric
6830b57cec5SDimitry Andric // Ok, now that we know we have a pseudo-entry block WITH all of the
6840b57cec5SDimitry Andric // required PHI nodes, add entries into the PHI node for the actual
6850b57cec5SDimitry Andric // parameters passed into the tail-recursive call.
686349cc55cSDimitry Andric for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
687fe6060f1SDimitry Andric if (CI->isByValArgument(I)) {
688fe6060f1SDimitry Andric copyLocalTempOfByValueOperandIntoArguments(CI, I);
6898a4dda33SDimitry Andric // When eliminating a tail call, we modify the values of the arguments.
6908a4dda33SDimitry Andric // Therefore, if the byval parameter has a readonly attribute, we have to
6918a4dda33SDimitry Andric // remove it. It is safe because, from the perspective of a caller, the
6928a4dda33SDimitry Andric // byval parameter is always treated as "readonly," even if the readonly
6938a4dda33SDimitry Andric // attribute is removed.
6948a4dda33SDimitry Andric F.removeParamAttr(I, Attribute::ReadOnly);
695fe6060f1SDimitry Andric ArgumentPHIs[I]->addIncoming(F.getArg(I), BB);
696fe6060f1SDimitry Andric } else
697fe6060f1SDimitry Andric ArgumentPHIs[I]->addIncoming(CI->getArgOperand(I), BB);
698fe6060f1SDimitry Andric }
6990b57cec5SDimitry Andric
7000b57cec5SDimitry Andric if (AccRecInstr) {
7015ffd83dbSDimitry Andric insertAccumulator(AccRecInstr);
7020b57cec5SDimitry Andric
7035ffd83dbSDimitry Andric // Rewrite the accumulator recursion instruction so that it does not use
7045ffd83dbSDimitry Andric // the result of the call anymore, instead, use the PHI node we just
7050b57cec5SDimitry Andric // inserted.
7060b57cec5SDimitry Andric AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
7070b57cec5SDimitry Andric }
7080b57cec5SDimitry Andric
7095ffd83dbSDimitry Andric // Update our return value tracking
7105ffd83dbSDimitry Andric if (RetPN) {
7115ffd83dbSDimitry Andric if (Ret->getReturnValue() == CI || AccRecInstr) {
7125ffd83dbSDimitry Andric // Defer selecting a return value
7135ffd83dbSDimitry Andric RetPN->addIncoming(RetPN, BB);
7145ffd83dbSDimitry Andric RetKnownPN->addIncoming(RetKnownPN, BB);
7155ffd83dbSDimitry Andric } else {
7165ffd83dbSDimitry Andric // We found a return value we want to use, insert a select instruction to
7175ffd83dbSDimitry Andric // select it if we don't already know what our return value will be and
7185ffd83dbSDimitry Andric // store the result in our return value PHI node.
719*0fca6ea1SDimitry Andric SelectInst *SI =
720*0fca6ea1SDimitry Andric SelectInst::Create(RetKnownPN, RetPN, Ret->getReturnValue(),
721*0fca6ea1SDimitry Andric "current.ret.tr", Ret->getIterator());
7225ffd83dbSDimitry Andric RetSelects.push_back(SI);
7235ffd83dbSDimitry Andric
7245ffd83dbSDimitry Andric RetPN->addIncoming(SI, BB);
7255ffd83dbSDimitry Andric RetKnownPN->addIncoming(ConstantInt::getTrue(RetKnownPN->getType()), BB);
7265ffd83dbSDimitry Andric }
7275ffd83dbSDimitry Andric
7285ffd83dbSDimitry Andric if (AccPN)
7295ffd83dbSDimitry Andric AccPN->addIncoming(AccRecInstr ? AccRecInstr : AccPN, BB);
7300b57cec5SDimitry Andric }
7310b57cec5SDimitry Andric
7320b57cec5SDimitry Andric // Now that all of the PHI nodes are in place, remove the call and
7330b57cec5SDimitry Andric // ret instructions, replacing them with an unconditional branch.
734*0fca6ea1SDimitry Andric BranchInst *NewBI = BranchInst::Create(HeaderBB, Ret->getIterator());
7350b57cec5SDimitry Andric NewBI->setDebugLoc(CI->getDebugLoc());
7360b57cec5SDimitry Andric
737bdd1243dSDimitry Andric Ret->eraseFromParent(); // Remove return.
738bdd1243dSDimitry Andric CI->eraseFromParent(); // Remove call.
7395ffd83dbSDimitry Andric DTU.applyUpdates({{DominatorTree::Insert, BB, HeaderBB}});
7400b57cec5SDimitry Andric ++NumEliminated;
7410b57cec5SDimitry Andric return true;
7420b57cec5SDimitry Andric }
7430b57cec5SDimitry Andric
cleanupAndFinalize()7445ffd83dbSDimitry Andric void TailRecursionEliminator::cleanupAndFinalize() {
7455ffd83dbSDimitry Andric // If we eliminated any tail recursions, it's possible that we inserted some
7465ffd83dbSDimitry Andric // silly PHI nodes which just merge an initial value (the incoming operand)
7475ffd83dbSDimitry Andric // with themselves. Check to see if we did and clean up our mess if so. This
7485ffd83dbSDimitry Andric // occurs when a function passes an argument straight through to its tail
7495ffd83dbSDimitry Andric // call.
7505ffd83dbSDimitry Andric for (PHINode *PN : ArgumentPHIs) {
7515ffd83dbSDimitry Andric // If the PHI Node is a dynamic constant, replace it with the value it is.
752*0fca6ea1SDimitry Andric if (Value *PNV = simplifyInstruction(PN, F.getDataLayout())) {
7535ffd83dbSDimitry Andric PN->replaceAllUsesWith(PNV);
7545ffd83dbSDimitry Andric PN->eraseFromParent();
7555ffd83dbSDimitry Andric }
7565ffd83dbSDimitry Andric }
7575ffd83dbSDimitry Andric
7585ffd83dbSDimitry Andric if (RetPN) {
7595ffd83dbSDimitry Andric if (RetSelects.empty()) {
7605ffd83dbSDimitry Andric // If we didn't insert any select instructions, then we know we didn't
7615ffd83dbSDimitry Andric // store a return value and we can remove the PHI nodes we inserted.
7625ffd83dbSDimitry Andric RetPN->dropAllReferences();
7635ffd83dbSDimitry Andric RetPN->eraseFromParent();
7645ffd83dbSDimitry Andric
7655ffd83dbSDimitry Andric RetKnownPN->dropAllReferences();
7665ffd83dbSDimitry Andric RetKnownPN->eraseFromParent();
7675ffd83dbSDimitry Andric
7685ffd83dbSDimitry Andric if (AccPN) {
7695ffd83dbSDimitry Andric // We need to insert a copy of our accumulator instruction before any
7705ffd83dbSDimitry Andric // return in the function, and return its result instead.
7715ffd83dbSDimitry Andric Instruction *AccRecInstr = AccumulatorRecursionInstr;
7725ffd83dbSDimitry Andric for (BasicBlock &BB : F) {
7735ffd83dbSDimitry Andric ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator());
7745ffd83dbSDimitry Andric if (!RI)
7755ffd83dbSDimitry Andric continue;
7765ffd83dbSDimitry Andric
7775ffd83dbSDimitry Andric Instruction *AccRecInstrNew = AccRecInstr->clone();
7785ffd83dbSDimitry Andric AccRecInstrNew->setName("accumulator.ret.tr");
7795ffd83dbSDimitry Andric AccRecInstrNew->setOperand(AccRecInstr->getOperand(0) == AccPN,
7805ffd83dbSDimitry Andric RI->getOperand(0));
7815ffd83dbSDimitry Andric AccRecInstrNew->insertBefore(RI);
782*0fca6ea1SDimitry Andric AccRecInstrNew->dropLocation();
7835ffd83dbSDimitry Andric RI->setOperand(0, AccRecInstrNew);
7845ffd83dbSDimitry Andric }
7855ffd83dbSDimitry Andric }
7865ffd83dbSDimitry Andric } else {
7875ffd83dbSDimitry Andric // We need to insert a select instruction before any return left in the
7885ffd83dbSDimitry Andric // function to select our stored return value if we have one.
7895ffd83dbSDimitry Andric for (BasicBlock &BB : F) {
7905ffd83dbSDimitry Andric ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator());
7915ffd83dbSDimitry Andric if (!RI)
7925ffd83dbSDimitry Andric continue;
7935ffd83dbSDimitry Andric
794*0fca6ea1SDimitry Andric SelectInst *SI =
795*0fca6ea1SDimitry Andric SelectInst::Create(RetKnownPN, RetPN, RI->getOperand(0),
796*0fca6ea1SDimitry Andric "current.ret.tr", RI->getIterator());
7975ffd83dbSDimitry Andric RetSelects.push_back(SI);
7985ffd83dbSDimitry Andric RI->setOperand(0, SI);
7995ffd83dbSDimitry Andric }
8005ffd83dbSDimitry Andric
8015ffd83dbSDimitry Andric if (AccPN) {
8025ffd83dbSDimitry Andric // We need to insert a copy of our accumulator instruction before any
8035ffd83dbSDimitry Andric // of the selects we inserted, and select its result instead.
8045ffd83dbSDimitry Andric Instruction *AccRecInstr = AccumulatorRecursionInstr;
8055ffd83dbSDimitry Andric for (SelectInst *SI : RetSelects) {
8065ffd83dbSDimitry Andric Instruction *AccRecInstrNew = AccRecInstr->clone();
8075ffd83dbSDimitry Andric AccRecInstrNew->setName("accumulator.ret.tr");
8085ffd83dbSDimitry Andric AccRecInstrNew->setOperand(AccRecInstr->getOperand(0) == AccPN,
8095ffd83dbSDimitry Andric SI->getFalseValue());
8105ffd83dbSDimitry Andric AccRecInstrNew->insertBefore(SI);
811*0fca6ea1SDimitry Andric AccRecInstrNew->dropLocation();
8125ffd83dbSDimitry Andric SI->setFalseValue(AccRecInstrNew);
8135ffd83dbSDimitry Andric }
8145ffd83dbSDimitry Andric }
8155ffd83dbSDimitry Andric }
8165ffd83dbSDimitry Andric }
8175ffd83dbSDimitry Andric }
8185ffd83dbSDimitry Andric
processBlock(BasicBlock & BB)819fe6060f1SDimitry Andric bool TailRecursionEliminator::processBlock(BasicBlock &BB) {
820e8d8bef9SDimitry Andric Instruction *TI = BB.getTerminator();
821e8d8bef9SDimitry Andric
822e8d8bef9SDimitry Andric if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
823e8d8bef9SDimitry Andric if (BI->isConditional())
824e8d8bef9SDimitry Andric return false;
825e8d8bef9SDimitry Andric
826e8d8bef9SDimitry Andric BasicBlock *Succ = BI->getSuccessor(0);
827e8d8bef9SDimitry Andric ReturnInst *Ret = dyn_cast<ReturnInst>(Succ->getFirstNonPHIOrDbg(true));
828e8d8bef9SDimitry Andric
829e8d8bef9SDimitry Andric if (!Ret)
830e8d8bef9SDimitry Andric return false;
831e8d8bef9SDimitry Andric
832fe6060f1SDimitry Andric CallInst *CI = findTRECandidate(&BB);
833e8d8bef9SDimitry Andric
834e8d8bef9SDimitry Andric if (!CI)
835e8d8bef9SDimitry Andric return false;
836e8d8bef9SDimitry Andric
837e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "FOLDING: " << *Succ
838e8d8bef9SDimitry Andric << "INTO UNCOND BRANCH PRED: " << BB);
839e8d8bef9SDimitry Andric FoldReturnIntoUncondBranch(Ret, Succ, &BB, &DTU);
840e8d8bef9SDimitry Andric ++NumRetDuped;
841e8d8bef9SDimitry Andric
842e8d8bef9SDimitry Andric // If all predecessors of Succ have been eliminated by
843e8d8bef9SDimitry Andric // FoldReturnIntoUncondBranch, delete it. It is important to empty it,
844e8d8bef9SDimitry Andric // because the ret instruction in there is still using a value which
845e8d8bef9SDimitry Andric // eliminateCall will attempt to remove. This block can only contain
846e8d8bef9SDimitry Andric // instructions that can't have uses, therefore it is safe to remove.
847e8d8bef9SDimitry Andric if (pred_empty(Succ))
848e8d8bef9SDimitry Andric DTU.deleteBB(Succ);
849e8d8bef9SDimitry Andric
850e8d8bef9SDimitry Andric eliminateCall(CI);
851e8d8bef9SDimitry Andric return true;
852e8d8bef9SDimitry Andric } else if (isa<ReturnInst>(TI)) {
853fe6060f1SDimitry Andric CallInst *CI = findTRECandidate(&BB);
854e8d8bef9SDimitry Andric
855e8d8bef9SDimitry Andric if (CI)
856e8d8bef9SDimitry Andric return eliminateCall(CI);
857e8d8bef9SDimitry Andric }
858e8d8bef9SDimitry Andric
859e8d8bef9SDimitry Andric return false;
860e8d8bef9SDimitry Andric }
861e8d8bef9SDimitry Andric
eliminate(Function & F,const TargetTransformInfo * TTI,AliasAnalysis * AA,OptimizationRemarkEmitter * ORE,DomTreeUpdater & DTU)8625ffd83dbSDimitry Andric bool TailRecursionEliminator::eliminate(Function &F,
8635ffd83dbSDimitry Andric const TargetTransformInfo *TTI,
8640b57cec5SDimitry Andric AliasAnalysis *AA,
8650b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE,
8660b57cec5SDimitry Andric DomTreeUpdater &DTU) {
867fe6060f1SDimitry Andric if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
8680b57cec5SDimitry Andric return false;
8690b57cec5SDimitry Andric
8700b57cec5SDimitry Andric bool MadeChange = false;
871fe6060f1SDimitry Andric MadeChange |= markTails(F, ORE);
8720b57cec5SDimitry Andric
8730b57cec5SDimitry Andric // If this function is a varargs function, we won't be able to PHI the args
8740b57cec5SDimitry Andric // right, so don't even try to convert it...
8750b57cec5SDimitry Andric if (F.getFunctionType()->isVarArg())
8765ffd83dbSDimitry Andric return MadeChange;
8770b57cec5SDimitry Andric
878fe6060f1SDimitry Andric if (!canTRE(F))
879fe6060f1SDimitry Andric return MadeChange;
8800b57cec5SDimitry Andric
881e8d8bef9SDimitry Andric // Change any tail recursive calls to loops.
8825ffd83dbSDimitry Andric TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU);
8835ffd83dbSDimitry Andric
884e8d8bef9SDimitry Andric for (BasicBlock &BB : F)
885fe6060f1SDimitry Andric MadeChange |= TRE.processBlock(BB);
8860b57cec5SDimitry Andric
8875ffd83dbSDimitry Andric TRE.cleanupAndFinalize();
8880b57cec5SDimitry Andric
8890b57cec5SDimitry Andric return MadeChange;
8900b57cec5SDimitry Andric }
8910b57cec5SDimitry Andric
8920b57cec5SDimitry Andric namespace {
8930b57cec5SDimitry Andric struct TailCallElim : public FunctionPass {
8940b57cec5SDimitry Andric static char ID; // Pass identification, replacement for typeid
TailCallElim__anone41365f00711::TailCallElim8950b57cec5SDimitry Andric TailCallElim() : FunctionPass(ID) {
8960b57cec5SDimitry Andric initializeTailCallElimPass(*PassRegistry::getPassRegistry());
8970b57cec5SDimitry Andric }
8980b57cec5SDimitry Andric
getAnalysisUsage__anone41365f00711::TailCallElim8990b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
9000b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>();
9010b57cec5SDimitry Andric AU.addRequired<AAResultsWrapperPass>();
9020b57cec5SDimitry Andric AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
9030b57cec5SDimitry Andric AU.addPreserved<GlobalsAAWrapperPass>();
9040b57cec5SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>();
9050b57cec5SDimitry Andric AU.addPreserved<PostDominatorTreeWrapperPass>();
9060b57cec5SDimitry Andric }
9070b57cec5SDimitry Andric
runOnFunction__anone41365f00711::TailCallElim9080b57cec5SDimitry Andric bool runOnFunction(Function &F) override {
9090b57cec5SDimitry Andric if (skipFunction(F))
9100b57cec5SDimitry Andric return false;
9110b57cec5SDimitry Andric
9120b57cec5SDimitry Andric auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
9130b57cec5SDimitry Andric auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
9140b57cec5SDimitry Andric auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
9150b57cec5SDimitry Andric auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
9160b57cec5SDimitry Andric // There is no noticable performance difference here between Lazy and Eager
9170b57cec5SDimitry Andric // UpdateStrategy based on some test results. It is feasible to switch the
9180b57cec5SDimitry Andric // UpdateStrategy to Lazy if we find it profitable later.
9190b57cec5SDimitry Andric DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
9200b57cec5SDimitry Andric
9215ffd83dbSDimitry Andric return TailRecursionEliminator::eliminate(
9220b57cec5SDimitry Andric F, &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
9230b57cec5SDimitry Andric &getAnalysis<AAResultsWrapperPass>().getAAResults(),
9240b57cec5SDimitry Andric &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(), DTU);
9250b57cec5SDimitry Andric }
9260b57cec5SDimitry Andric };
9270b57cec5SDimitry Andric }
9280b57cec5SDimitry Andric
9290b57cec5SDimitry Andric char TailCallElim::ID = 0;
9300b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", "Tail Call Elimination",
9310b57cec5SDimitry Andric false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)9320b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
9330b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
9340b57cec5SDimitry Andric INITIALIZE_PASS_END(TailCallElim, "tailcallelim", "Tail Call Elimination",
9350b57cec5SDimitry Andric false, false)
9360b57cec5SDimitry Andric
9370b57cec5SDimitry Andric // Public interface to the TailCallElimination pass
9380b57cec5SDimitry Andric FunctionPass *llvm::createTailCallEliminationPass() {
9390b57cec5SDimitry Andric return new TailCallElim();
9400b57cec5SDimitry Andric }
9410b57cec5SDimitry Andric
run(Function & F,FunctionAnalysisManager & AM)9420b57cec5SDimitry Andric PreservedAnalyses TailCallElimPass::run(Function &F,
9430b57cec5SDimitry Andric FunctionAnalysisManager &AM) {
9440b57cec5SDimitry Andric
9450b57cec5SDimitry Andric TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
9460b57cec5SDimitry Andric AliasAnalysis &AA = AM.getResult<AAManager>(F);
9470b57cec5SDimitry Andric auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
9480b57cec5SDimitry Andric auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
9490b57cec5SDimitry Andric auto *PDT = AM.getCachedResult<PostDominatorTreeAnalysis>(F);
9500b57cec5SDimitry Andric // There is no noticable performance difference here between Lazy and Eager
9510b57cec5SDimitry Andric // UpdateStrategy based on some test results. It is feasible to switch the
9520b57cec5SDimitry Andric // UpdateStrategy to Lazy if we find it profitable later.
9530b57cec5SDimitry Andric DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Eager);
9545ffd83dbSDimitry Andric bool Changed = TailRecursionEliminator::eliminate(F, &TTI, &AA, &ORE, DTU);
9550b57cec5SDimitry Andric
9560b57cec5SDimitry Andric if (!Changed)
9570b57cec5SDimitry Andric return PreservedAnalyses::all();
9580b57cec5SDimitry Andric PreservedAnalyses PA;
9590b57cec5SDimitry Andric PA.preserve<DominatorTreeAnalysis>();
9600b57cec5SDimitry Andric PA.preserve<PostDominatorTreeAnalysis>();
9610b57cec5SDimitry Andric return PA;
9620b57cec5SDimitry Andric }
963