xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/IPO/PartialInlining.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- PartialInlining.cpp - Inline parts of functions --------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass performs partial inlining, typically by inlining an if statement
100b57cec5SDimitry Andric // that surrounds the body of the function.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "llvm/Transforms/IPO/PartialInlining.h"
150b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
160b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h"
1706c3fb27SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h"
180b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
190b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
200b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
210b57cec5SDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/BranchProbabilityInfo.h"
230b57cec5SDimitry Andric #include "llvm/Analysis/InlineCost.h"
240b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
250b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
260b57cec5SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
270b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
280b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
290b57cec5SDimitry Andric #include "llvm/IR/Attributes.h"
300b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
310b57cec5SDimitry Andric #include "llvm/IR/CFG.h"
320b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h"
330b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
340b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
350b57cec5SDimitry Andric #include "llvm/IR/Function.h"
360b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h"
370b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
380b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
390b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
400b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h"
410b57cec5SDimitry Andric #include "llvm/IR/Module.h"
4281ad6265SDimitry Andric #include "llvm/IR/Operator.h"
43bdd1243dSDimitry Andric #include "llvm/IR/ProfDataUtils.h"
440b57cec5SDimitry Andric #include "llvm/IR/User.h"
450b57cec5SDimitry Andric #include "llvm/Support/BlockFrequency.h"
460b57cec5SDimitry Andric #include "llvm/Support/BranchProbability.h"
470b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
480b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
490b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
500b57cec5SDimitry Andric #include "llvm/Transforms/IPO.h"
510b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
520b57cec5SDimitry Andric #include "llvm/Transforms/Utils/CodeExtractor.h"
530b57cec5SDimitry Andric #include "llvm/Transforms/Utils/ValueMapper.h"
540b57cec5SDimitry Andric #include <algorithm>
550b57cec5SDimitry Andric #include <cassert>
560b57cec5SDimitry Andric #include <cstdint>
570b57cec5SDimitry Andric #include <memory>
580b57cec5SDimitry Andric #include <tuple>
590b57cec5SDimitry Andric #include <vector>
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric using namespace llvm;
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric #define DEBUG_TYPE "partial-inlining"
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric STATISTIC(NumPartialInlined,
660b57cec5SDimitry Andric           "Number of callsites functions partially inlined into.");
670b57cec5SDimitry Andric STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "
680b57cec5SDimitry Andric                                         "cold outlined regions were partially "
690b57cec5SDimitry Andric                                         "inlined into its caller(s).");
700b57cec5SDimitry Andric STATISTIC(NumColdRegionsFound,
710b57cec5SDimitry Andric            "Number of cold single entry/exit regions found.");
720b57cec5SDimitry Andric STATISTIC(NumColdRegionsOutlined,
730b57cec5SDimitry Andric            "Number of cold single entry/exit regions outlined.");
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric // Command line option to disable partial-inlining. The default is false:
760b57cec5SDimitry Andric static cl::opt<bool>
770b57cec5SDimitry Andric     DisablePartialInlining("disable-partial-inlining", cl::init(false),
780b57cec5SDimitry Andric                            cl::Hidden, cl::desc("Disable partial inlining"));
790b57cec5SDimitry Andric // Command line option to disable multi-region partial-inlining. The default is
800b57cec5SDimitry Andric // false:
810b57cec5SDimitry Andric static cl::opt<bool> DisableMultiRegionPartialInline(
820b57cec5SDimitry Andric     "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
830b57cec5SDimitry Andric     cl::desc("Disable multi-region partial inlining"));
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric // Command line option to force outlining in regions with live exit variables.
860b57cec5SDimitry Andric // The default is false:
870b57cec5SDimitry Andric static cl::opt<bool>
880b57cec5SDimitry Andric     ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
890b57cec5SDimitry Andric                cl::desc("Force outline regions with live exits"));
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric // Command line option to enable marking outline functions with Cold Calling
920b57cec5SDimitry Andric // Convention. The default is false:
930b57cec5SDimitry Andric static cl::opt<bool>
940b57cec5SDimitry Andric     MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
950b57cec5SDimitry Andric                        cl::desc("Mark outline function calls with ColdCC"));
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric // This is an option used by testing:
980b57cec5SDimitry Andric static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
9981ad6265SDimitry Andric 
1000b57cec5SDimitry Andric                                       cl::ReallyHidden,
1010b57cec5SDimitry Andric                                       cl::desc("Skip Cost Analysis"));
1020b57cec5SDimitry Andric // Used to determine if a cold region is worth outlining based on
1030b57cec5SDimitry Andric // its inlining cost compared to the original function.  Default is set at 10%.
1040b57cec5SDimitry Andric // ie. if the cold region reduces the inlining cost of the original function by
1050b57cec5SDimitry Andric // at least 10%.
1060b57cec5SDimitry Andric static cl::opt<float> MinRegionSizeRatio(
1070b57cec5SDimitry Andric     "min-region-size-ratio", cl::init(0.1), cl::Hidden,
1080b57cec5SDimitry Andric     cl::desc("Minimum ratio comparing relative sizes of each "
1090b57cec5SDimitry Andric              "outline candidate and original function"));
1100b57cec5SDimitry Andric // Used to tune the minimum number of execution counts needed in the predecessor
1110b57cec5SDimitry Andric // block to the cold edge. ie. confidence interval.
1120b57cec5SDimitry Andric static cl::opt<unsigned>
1130b57cec5SDimitry Andric     MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
1140b57cec5SDimitry Andric                              cl::desc("Minimum block executions to consider "
1150b57cec5SDimitry Andric                                       "its BranchProbabilityInfo valid"));
1160b57cec5SDimitry Andric // Used to determine when an edge is considered cold. Default is set to 10%. ie.
1170b57cec5SDimitry Andric // if the branch probability is 10% or less, then it is deemed as 'cold'.
1180b57cec5SDimitry Andric static cl::opt<float> ColdBranchRatio(
1190b57cec5SDimitry Andric     "cold-branch-ratio", cl::init(0.1), cl::Hidden,
1200b57cec5SDimitry Andric     cl::desc("Minimum BranchProbability to consider a region cold."));
1210b57cec5SDimitry Andric 
1220b57cec5SDimitry Andric static cl::opt<unsigned> MaxNumInlineBlocks(
1230b57cec5SDimitry Andric     "max-num-inline-blocks", cl::init(5), cl::Hidden,
1240b57cec5SDimitry Andric     cl::desc("Max number of blocks to be partially inlined"));
1250b57cec5SDimitry Andric 
1260b57cec5SDimitry Andric // Command line option to set the maximum number of partial inlining allowed
1270b57cec5SDimitry Andric // for the module. The default value of -1 means no limit.
1280b57cec5SDimitry Andric static cl::opt<int> MaxNumPartialInlining(
12981ad6265SDimitry Andric     "max-partial-inlining", cl::init(-1), cl::Hidden,
1300b57cec5SDimitry Andric     cl::desc("Max number of partial inlining. The default is unlimited"));
1310b57cec5SDimitry Andric 
1320b57cec5SDimitry Andric // Used only when PGO or user annotated branch data is absent. It is
1330b57cec5SDimitry Andric // the least value that is used to weigh the outline region. If BFI
1340b57cec5SDimitry Andric // produces larger value, the BFI value will be used.
1350b57cec5SDimitry Andric static cl::opt<int>
1360b57cec5SDimitry Andric     OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
13781ad6265SDimitry Andric                              cl::Hidden,
1380b57cec5SDimitry Andric                              cl::desc("Relative frequency of outline region to "
1390b57cec5SDimitry Andric                                       "the entry block"));
1400b57cec5SDimitry Andric 
1410b57cec5SDimitry Andric static cl::opt<unsigned> ExtraOutliningPenalty(
1420b57cec5SDimitry Andric     "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
1430b57cec5SDimitry Andric     cl::desc("A debug option to add additional penalty to the computed one."));
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric namespace {
1460b57cec5SDimitry Andric 
1470b57cec5SDimitry Andric struct FunctionOutliningInfo {
1480b57cec5SDimitry Andric   FunctionOutliningInfo() = default;
1490b57cec5SDimitry Andric 
1500b57cec5SDimitry Andric   // Returns the number of blocks to be inlined including all blocks
1510b57cec5SDimitry Andric   // in Entries and one return block.
getNumInlinedBlocks__anon105b68ca0111::FunctionOutliningInfo152e8d8bef9SDimitry Andric   unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
1530b57cec5SDimitry Andric 
1540b57cec5SDimitry Andric   // A set of blocks including the function entry that guard
1550b57cec5SDimitry Andric   // the region to be outlined.
1560b57cec5SDimitry Andric   SmallVector<BasicBlock *, 4> Entries;
1570b57cec5SDimitry Andric 
1580b57cec5SDimitry Andric   // The return block that is not included in the outlined region.
1590b57cec5SDimitry Andric   BasicBlock *ReturnBlock = nullptr;
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric   // The dominating block of the region to be outlined.
1620b57cec5SDimitry Andric   BasicBlock *NonReturnBlock = nullptr;
1630b57cec5SDimitry Andric 
1645f757f3fSDimitry Andric   // The set of blocks in Entries that are predecessors to ReturnBlock
1650b57cec5SDimitry Andric   SmallVector<BasicBlock *, 4> ReturnBlockPreds;
1660b57cec5SDimitry Andric };
1670b57cec5SDimitry Andric 
1680b57cec5SDimitry Andric struct FunctionOutliningMultiRegionInfo {
16981ad6265SDimitry Andric   FunctionOutliningMultiRegionInfo() = default;
1700b57cec5SDimitry Andric 
1710b57cec5SDimitry Andric   // Container for outline regions
1720b57cec5SDimitry Andric   struct OutlineRegionInfo {
OutlineRegionInfo__anon105b68ca0111::FunctionOutliningMultiRegionInfo::OutlineRegionInfo1730b57cec5SDimitry Andric     OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
1740b57cec5SDimitry Andric                       BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1750b57cec5SDimitry Andric                       BasicBlock *ReturnBlock)
1760b57cec5SDimitry Andric         : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
1770b57cec5SDimitry Andric           ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
1780b57cec5SDimitry Andric     SmallVector<BasicBlock *, 8> Region;
1790b57cec5SDimitry Andric     BasicBlock *EntryBlock;
1800b57cec5SDimitry Andric     BasicBlock *ExitBlock;
1810b57cec5SDimitry Andric     BasicBlock *ReturnBlock;
1820b57cec5SDimitry Andric   };
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric   SmallVector<OutlineRegionInfo, 4> ORI;
1850b57cec5SDimitry Andric };
1860b57cec5SDimitry Andric 
1870b57cec5SDimitry Andric struct PartialInlinerImpl {
1880b57cec5SDimitry Andric 
PartialInlinerImpl__anon105b68ca0111::PartialInlinerImpl1890b57cec5SDimitry Andric   PartialInlinerImpl(
1905ffd83dbSDimitry Andric       function_ref<AssumptionCache &(Function &)> GetAC,
1910b57cec5SDimitry Andric       function_ref<AssumptionCache *(Function &)> LookupAC,
1925ffd83dbSDimitry Andric       function_ref<TargetTransformInfo &(Function &)> GTTI,
1935ffd83dbSDimitry Andric       function_ref<const TargetLibraryInfo &(Function &)> GTLI,
1945ffd83dbSDimitry Andric       ProfileSummaryInfo &ProfSI,
1955ffd83dbSDimitry Andric       function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
1960b57cec5SDimitry Andric       : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
1975ffd83dbSDimitry Andric         GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric   bool run(Module &M);
2000b57cec5SDimitry Andric   // Main part of the transformation that calls helper functions to find
2010b57cec5SDimitry Andric   // outlining candidates, clone & outline the function, and attempt to
2020b57cec5SDimitry Andric   // partially inline the resulting function. Returns true if
2030b57cec5SDimitry Andric   // inlining was successful, false otherwise.  Also returns the outline
2040b57cec5SDimitry Andric   // function (only if we partially inlined early returns) as there is a
2050b57cec5SDimitry Andric   // possibility to further "peel" early return statements that were left in the
2060b57cec5SDimitry Andric   // outline function due to code size.
207e8d8bef9SDimitry Andric   std::pair<bool, Function *> unswitchFunction(Function &F);
2080b57cec5SDimitry Andric 
2090b57cec5SDimitry Andric   // This class speculatively clones the function to be partial inlined.
2100b57cec5SDimitry Andric   // At the end of partial inlining, the remaining callsites to the cloned
2110b57cec5SDimitry Andric   // function that are not partially inlined will be fixed up to reference
2120b57cec5SDimitry Andric   // the original function, and the cloned function will be erased.
2130b57cec5SDimitry Andric   struct FunctionCloner {
2140b57cec5SDimitry Andric     // Two constructors, one for single region outlining, the other for
2150b57cec5SDimitry Andric     // multi-region outlining.
2160b57cec5SDimitry Andric     FunctionCloner(Function *F, FunctionOutliningInfo *OI,
2170b57cec5SDimitry Andric                    OptimizationRemarkEmitter &ORE,
218e8d8bef9SDimitry Andric                    function_ref<AssumptionCache *(Function &)> LookupAC,
219e8d8bef9SDimitry Andric                    function_ref<TargetTransformInfo &(Function &)> GetTTI);
2200b57cec5SDimitry Andric     FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
2210b57cec5SDimitry Andric                    OptimizationRemarkEmitter &ORE,
222e8d8bef9SDimitry Andric                    function_ref<AssumptionCache *(Function &)> LookupAC,
223e8d8bef9SDimitry Andric                    function_ref<TargetTransformInfo &(Function &)> GetTTI);
224e8d8bef9SDimitry Andric 
2250b57cec5SDimitry Andric     ~FunctionCloner();
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric     // Prepare for function outlining: making sure there is only
2280b57cec5SDimitry Andric     // one incoming edge from the extracted/outlined region to
2290b57cec5SDimitry Andric     // the return block.
230e8d8bef9SDimitry Andric     void normalizeReturnBlock() const;
2310b57cec5SDimitry Andric 
2320b57cec5SDimitry Andric     // Do function outlining for cold regions.
2330b57cec5SDimitry Andric     bool doMultiRegionFunctionOutlining();
2340b57cec5SDimitry Andric     // Do function outlining for region after early return block(s).
2350b57cec5SDimitry Andric     // NOTE: For vararg functions that do the vararg handling in the outlined
2360b57cec5SDimitry Andric     //       function, we temporarily generate IR that does not properly
2370b57cec5SDimitry Andric     //       forward varargs to the outlined function. Calling InlineFunction
2380b57cec5SDimitry Andric     //       will update calls to the outlined functions to properly forward
2390b57cec5SDimitry Andric     //       the varargs.
2400b57cec5SDimitry Andric     Function *doSingleRegionFunctionOutlining();
2410b57cec5SDimitry Andric 
2420b57cec5SDimitry Andric     Function *OrigFunc = nullptr;
2430b57cec5SDimitry Andric     Function *ClonedFunc = nullptr;
2440b57cec5SDimitry Andric 
2450b57cec5SDimitry Andric     typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
2460b57cec5SDimitry Andric     // Keep track of Outlined Functions and the basic block they're called from.
2470b57cec5SDimitry Andric     SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
2480b57cec5SDimitry Andric 
2490b57cec5SDimitry Andric     // ClonedFunc is inlined in one of its callers after function
2500b57cec5SDimitry Andric     // outlining.
2510b57cec5SDimitry Andric     bool IsFunctionInlined = false;
2520b57cec5SDimitry Andric     // The cost of the region to be outlined.
253fe6060f1SDimitry Andric     InstructionCost OutlinedRegionCost = 0;
2540b57cec5SDimitry Andric     // ClonedOI is specific to outlining non-early return blocks.
2550b57cec5SDimitry Andric     std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
2560b57cec5SDimitry Andric     // ClonedOMRI is specific to outlining cold regions.
2570b57cec5SDimitry Andric     std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
2580b57cec5SDimitry Andric     std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
2590b57cec5SDimitry Andric     OptimizationRemarkEmitter &ORE;
2600b57cec5SDimitry Andric     function_ref<AssumptionCache *(Function &)> LookupAC;
261e8d8bef9SDimitry Andric     function_ref<TargetTransformInfo &(Function &)> GetTTI;
2620b57cec5SDimitry Andric   };
2630b57cec5SDimitry Andric 
2640b57cec5SDimitry Andric private:
2650b57cec5SDimitry Andric   int NumPartialInlining = 0;
2665ffd83dbSDimitry Andric   function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
2670b57cec5SDimitry Andric   function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
2685ffd83dbSDimitry Andric   function_ref<TargetTransformInfo &(Function &)> GetTTI;
2695ffd83dbSDimitry Andric   function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
2705ffd83dbSDimitry Andric   function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
2715ffd83dbSDimitry Andric   ProfileSummaryInfo &PSI;
2720b57cec5SDimitry Andric 
2730b57cec5SDimitry Andric   // Return the frequency of the OutlininingBB relative to F's entry point.
2740b57cec5SDimitry Andric   // The result is no larger than 1 and is represented using BP.
2750b57cec5SDimitry Andric   // (Note that the outlined region's 'head' block can only have incoming
2760b57cec5SDimitry Andric   // edges from the guarding entry blocks).
277e8d8bef9SDimitry Andric   BranchProbability
278e8d8bef9SDimitry Andric   getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
2790b57cec5SDimitry Andric 
2805ffd83dbSDimitry Andric   // Return true if the callee of CB should be partially inlined with
2810b57cec5SDimitry Andric   // profit.
2825ffd83dbSDimitry Andric   bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
2830b57cec5SDimitry Andric                            BlockFrequency WeightedOutliningRcost,
284e8d8bef9SDimitry Andric                            OptimizationRemarkEmitter &ORE) const;
2850b57cec5SDimitry Andric 
2860b57cec5SDimitry Andric   // Try to inline DuplicateFunction (cloned from F with call to
2870b57cec5SDimitry Andric   // the OutlinedFunction into its callers. Return true
2880b57cec5SDimitry Andric   // if there is any successful inlining.
2890b57cec5SDimitry Andric   bool tryPartialInline(FunctionCloner &Cloner);
2900b57cec5SDimitry Andric 
2910b57cec5SDimitry Andric   // Compute the mapping from use site of DuplicationFunction to the enclosing
2920b57cec5SDimitry Andric   // BB's profile count.
293e8d8bef9SDimitry Andric   void
294e8d8bef9SDimitry Andric   computeCallsiteToProfCountMap(Function *DuplicateFunction,
295e8d8bef9SDimitry Andric                                 DenseMap<User *, uint64_t> &SiteCountMap) const;
2960b57cec5SDimitry Andric 
isLimitReached__anon105b68ca0111::PartialInlinerImpl297e8d8bef9SDimitry Andric   bool isLimitReached() const {
2980b57cec5SDimitry Andric     return (MaxNumPartialInlining != -1 &&
2990b57cec5SDimitry Andric             NumPartialInlining >= MaxNumPartialInlining);
3000b57cec5SDimitry Andric   }
3010b57cec5SDimitry Andric 
getSupportedCallBase__anon105b68ca0111::PartialInlinerImpl3025ffd83dbSDimitry Andric   static CallBase *getSupportedCallBase(User *U) {
3035ffd83dbSDimitry Andric     if (isa<CallInst>(U) || isa<InvokeInst>(U))
3045ffd83dbSDimitry Andric       return cast<CallBase>(U);
3050b57cec5SDimitry Andric     llvm_unreachable("All uses must be calls");
3065ffd83dbSDimitry Andric     return nullptr;
3070b57cec5SDimitry Andric   }
3080b57cec5SDimitry Andric 
getOneCallSiteTo__anon105b68ca0111::PartialInlinerImpl309e8d8bef9SDimitry Andric   static CallBase *getOneCallSiteTo(Function &F) {
310e8d8bef9SDimitry Andric     User *User = *F.user_begin();
3115ffd83dbSDimitry Andric     return getSupportedCallBase(User);
3120b57cec5SDimitry Andric   }
3130b57cec5SDimitry Andric 
getOneDebugLoc__anon105b68ca0111::PartialInlinerImpl314e8d8bef9SDimitry Andric   std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
3155ffd83dbSDimitry Andric     CallBase *CB = getOneCallSiteTo(F);
3165ffd83dbSDimitry Andric     DebugLoc DLoc = CB->getDebugLoc();
3175ffd83dbSDimitry Andric     BasicBlock *Block = CB->getParent();
3180b57cec5SDimitry Andric     return std::make_tuple(DLoc, Block);
3190b57cec5SDimitry Andric   }
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric   // Returns the costs associated with function outlining:
3220b57cec5SDimitry Andric   // - The first value is the non-weighted runtime cost for making the call
3230b57cec5SDimitry Andric   //   to the outlined function, including the addtional  setup cost in the
3240b57cec5SDimitry Andric   //    outlined function itself;
3250b57cec5SDimitry Andric   // - The second value is the estimated size of the new call sequence in
3260b57cec5SDimitry Andric   //   basic block Cloner.OutliningCallBB;
327fe6060f1SDimitry Andric   std::tuple<InstructionCost, InstructionCost>
328fe6060f1SDimitry Andric   computeOutliningCosts(FunctionCloner &Cloner) const;
3290b57cec5SDimitry Andric 
3300b57cec5SDimitry Andric   // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
3310b57cec5SDimitry Andric   // approximate both the size and runtime cost (Note that in the current
3320b57cec5SDimitry Andric   // inline cost analysis, there is no clear distinction there either).
333fe6060f1SDimitry Andric   static InstructionCost computeBBInlineCost(BasicBlock *BB,
334fe6060f1SDimitry Andric                                              TargetTransformInfo *TTI);
3350b57cec5SDimitry Andric 
336e8d8bef9SDimitry Andric   std::unique_ptr<FunctionOutliningInfo>
337e8d8bef9SDimitry Andric   computeOutliningInfo(Function &F) const;
338e8d8bef9SDimitry Andric 
3390b57cec5SDimitry Andric   std::unique_ptr<FunctionOutliningMultiRegionInfo>
340e8d8bef9SDimitry Andric   computeOutliningColdRegionsInfo(Function &F,
341e8d8bef9SDimitry Andric                                   OptimizationRemarkEmitter &ORE) const;
3420b57cec5SDimitry Andric };
3430b57cec5SDimitry Andric 
3440b57cec5SDimitry Andric } // end anonymous namespace
3450b57cec5SDimitry Andric 
3460b57cec5SDimitry Andric std::unique_ptr<FunctionOutliningMultiRegionInfo>
computeOutliningColdRegionsInfo(Function & F,OptimizationRemarkEmitter & ORE) const347e8d8bef9SDimitry Andric PartialInlinerImpl::computeOutliningColdRegionsInfo(
348e8d8bef9SDimitry Andric     Function &F, OptimizationRemarkEmitter &ORE) const {
349e8d8bef9SDimitry Andric   BasicBlock *EntryBlock = &F.front();
3500b57cec5SDimitry Andric 
351e8d8bef9SDimitry Andric   DominatorTree DT(F);
3520b57cec5SDimitry Andric   LoopInfo LI(DT);
353e8d8bef9SDimitry Andric   BranchProbabilityInfo BPI(F, LI);
3540b57cec5SDimitry Andric   std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
3550b57cec5SDimitry Andric   BlockFrequencyInfo *BFI;
3560b57cec5SDimitry Andric   if (!GetBFI) {
357e8d8bef9SDimitry Andric     ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
3580b57cec5SDimitry Andric     BFI = ScopedBFI.get();
3590b57cec5SDimitry Andric   } else
360e8d8bef9SDimitry Andric     BFI = &(GetBFI(F));
3610b57cec5SDimitry Andric 
3620b57cec5SDimitry Andric   // Return if we don't have profiling information.
3635ffd83dbSDimitry Andric   if (!PSI.hasInstrumentationProfile())
3640b57cec5SDimitry Andric     return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
3650b57cec5SDimitry Andric 
3660b57cec5SDimitry Andric   std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
3678bcb0991SDimitry Andric       std::make_unique<FunctionOutliningMultiRegionInfo>();
3680b57cec5SDimitry Andric 
3690b57cec5SDimitry Andric   auto IsSingleExit =
3700b57cec5SDimitry Andric       [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
3710b57cec5SDimitry Andric     BasicBlock *ExitBlock = nullptr;
3720b57cec5SDimitry Andric     for (auto *Block : BlockList) {
373fe6060f1SDimitry Andric       for (BasicBlock *Succ : successors(Block)) {
374fe6060f1SDimitry Andric         if (!is_contained(BlockList, Succ)) {
3750b57cec5SDimitry Andric           if (ExitBlock) {
3760b57cec5SDimitry Andric             ORE.emit([&]() {
3770b57cec5SDimitry Andric               return OptimizationRemarkMissed(DEBUG_TYPE, "MultiExitRegion",
378fe6060f1SDimitry Andric                                               &Succ->front())
3790b57cec5SDimitry Andric                      << "Region dominated by "
3800b57cec5SDimitry Andric                      << ore::NV("Block", BlockList.front()->getName())
3810b57cec5SDimitry Andric                      << " has more than one region exit edge.";
3820b57cec5SDimitry Andric             });
3830b57cec5SDimitry Andric             return nullptr;
384e8d8bef9SDimitry Andric           }
385e8d8bef9SDimitry Andric 
3860b57cec5SDimitry Andric           ExitBlock = Block;
3870b57cec5SDimitry Andric         }
3880b57cec5SDimitry Andric       }
3890b57cec5SDimitry Andric     }
3900b57cec5SDimitry Andric     return ExitBlock;
3910b57cec5SDimitry Andric   };
3920b57cec5SDimitry Andric 
3930b57cec5SDimitry Andric   auto BBProfileCount = [BFI](BasicBlock *BB) {
39481ad6265SDimitry Andric     return BFI->getBlockProfileCount(BB).value_or(0);
3950b57cec5SDimitry Andric   };
3960b57cec5SDimitry Andric 
3970b57cec5SDimitry Andric   // Use the same computeBBInlineCost function to compute the cost savings of
3980b57cec5SDimitry Andric   // the outlining the candidate region.
399e8d8bef9SDimitry Andric   TargetTransformInfo *FTTI = &GetTTI(F);
400fe6060f1SDimitry Andric   InstructionCost OverallFunctionCost = 0;
401e8d8bef9SDimitry Andric   for (auto &BB : F)
402e8d8bef9SDimitry Andric     OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
4030b57cec5SDimitry Andric 
404e8d8bef9SDimitry Andric   LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCost
405e8d8bef9SDimitry Andric                     << "\n";);
406e8d8bef9SDimitry Andric 
407fe6060f1SDimitry Andric   InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
408fe6060f1SDimitry Andric       [&](auto Cost) { return Cost * MinRegionSizeRatio; });
409fe6060f1SDimitry Andric 
4100b57cec5SDimitry Andric   BranchProbability MinBranchProbability(
4110b57cec5SDimitry Andric       static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
4120b57cec5SDimitry Andric       MinBlockCounterExecution);
4130b57cec5SDimitry Andric   bool ColdCandidateFound = false;
4140b57cec5SDimitry Andric   BasicBlock *CurrEntry = EntryBlock;
4150b57cec5SDimitry Andric   std::vector<BasicBlock *> DFS;
4160b57cec5SDimitry Andric   DenseMap<BasicBlock *, bool> VisitedMap;
4170b57cec5SDimitry Andric   DFS.push_back(CurrEntry);
4180b57cec5SDimitry Andric   VisitedMap[CurrEntry] = true;
419e8d8bef9SDimitry Andric 
4200b57cec5SDimitry Andric   // Use Depth First Search on the basic blocks to find CFG edges that are
4210b57cec5SDimitry Andric   // considered cold.
4220b57cec5SDimitry Andric   // Cold regions considered must also have its inline cost compared to the
4230b57cec5SDimitry Andric   // overall inline cost of the original function.  The region is outlined only
4240b57cec5SDimitry Andric   // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
4250b57cec5SDimitry Andric   // more.
4260b57cec5SDimitry Andric   while (!DFS.empty()) {
427e8d8bef9SDimitry Andric     auto *ThisBB = DFS.back();
4280b57cec5SDimitry Andric     DFS.pop_back();
4290b57cec5SDimitry Andric     // Only consider regions with predecessor blocks that are considered
4300b57cec5SDimitry Andric     // not-cold (default: part of the top 99.99% of all block counters)
4310b57cec5SDimitry Andric     // AND greater than our minimum block execution count (default: 100).
432e8d8bef9SDimitry Andric     if (PSI.isColdBlock(ThisBB, BFI) ||
433e8d8bef9SDimitry Andric         BBProfileCount(ThisBB) < MinBlockCounterExecution)
4340b57cec5SDimitry Andric       continue;
435e8d8bef9SDimitry Andric     for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
4360b57cec5SDimitry Andric       if (VisitedMap[*SI])
4370b57cec5SDimitry Andric         continue;
4380b57cec5SDimitry Andric       VisitedMap[*SI] = true;
4390b57cec5SDimitry Andric       DFS.push_back(*SI);
4400b57cec5SDimitry Andric       // If branch isn't cold, we skip to the next one.
441e8d8bef9SDimitry Andric       BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
4420b57cec5SDimitry Andric       if (SuccProb > MinBranchProbability)
4430b57cec5SDimitry Andric         continue;
444e8d8bef9SDimitry Andric 
445e8d8bef9SDimitry Andric       LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"
446e8d8bef9SDimitry Andric                         << SI->getName()
447e8d8bef9SDimitry Andric                         << "\nBranch Probability = " << SuccProb << "\n";);
448e8d8bef9SDimitry Andric 
4490b57cec5SDimitry Andric       SmallVector<BasicBlock *, 8> DominateVector;
4500b57cec5SDimitry Andric       DT.getDescendants(*SI, DominateVector);
451e8d8bef9SDimitry Andric       assert(!DominateVector.empty() &&
452e8d8bef9SDimitry Andric              "SI should be reachable and have at least itself as descendant");
453e8d8bef9SDimitry Andric 
4540b57cec5SDimitry Andric       // We can only outline single entry regions (for now).
455e8d8bef9SDimitry Andric       if (!DominateVector.front()->hasNPredecessors(1)) {
456e8d8bef9SDimitry Andric         LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
457e8d8bef9SDimitry Andric                           << " doesn't have a single predecessor in the "
458e8d8bef9SDimitry Andric                              "dominator tree\n";);
4590b57cec5SDimitry Andric         continue;
460e8d8bef9SDimitry Andric       }
461e8d8bef9SDimitry Andric 
4620b57cec5SDimitry Andric       BasicBlock *ExitBlock = nullptr;
4630b57cec5SDimitry Andric       // We can only outline single exit regions (for now).
464e8d8bef9SDimitry Andric       if (!(ExitBlock = IsSingleExit(DominateVector))) {
465e8d8bef9SDimitry Andric         LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()
466e8d8bef9SDimitry Andric                           << " doesn't have a unique successor\n";);
4670b57cec5SDimitry Andric         continue;
468e8d8bef9SDimitry Andric       }
469e8d8bef9SDimitry Andric 
470fe6060f1SDimitry Andric       InstructionCost OutlineRegionCost = 0;
4710b57cec5SDimitry Andric       for (auto *BB : DominateVector)
472e8d8bef9SDimitry Andric         OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
4730b57cec5SDimitry Andric 
474e8d8bef9SDimitry Andric       LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCost
475e8d8bef9SDimitry Andric                         << "\n";);
4760b57cec5SDimitry Andric 
477e8d8bef9SDimitry Andric       if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
4780b57cec5SDimitry Andric         ORE.emit([&]() {
4790b57cec5SDimitry Andric           return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly",
4800b57cec5SDimitry Andric                                             &SI->front())
481e8d8bef9SDimitry Andric                  << ore::NV("Callee", &F)
482e8d8bef9SDimitry Andric                  << " inline cost-savings smaller than "
4830b57cec5SDimitry Andric                  << ore::NV("Cost", MinOutlineRegionCost);
4840b57cec5SDimitry Andric         });
485e8d8bef9SDimitry Andric 
486e8d8bef9SDimitry Andric         LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "
487e8d8bef9SDimitry Andric                           << MinOutlineRegionCost << "\n";);
4880b57cec5SDimitry Andric         continue;
4890b57cec5SDimitry Andric       }
490e8d8bef9SDimitry Andric 
4910b57cec5SDimitry Andric       // For now, ignore blocks that belong to a SISE region that is a
4920b57cec5SDimitry Andric       // candidate for outlining.  In the future, we may want to look
4930b57cec5SDimitry Andric       // at inner regions because the outer region may have live-exit
4940b57cec5SDimitry Andric       // variables.
4950b57cec5SDimitry Andric       for (auto *BB : DominateVector)
4960b57cec5SDimitry Andric         VisitedMap[BB] = true;
497e8d8bef9SDimitry Andric 
4980b57cec5SDimitry Andric       // ReturnBlock here means the block after the outline call
4990b57cec5SDimitry Andric       BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
5000b57cec5SDimitry Andric       FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
5010b57cec5SDimitry Andric           DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
5020b57cec5SDimitry Andric       OutliningInfo->ORI.push_back(RegInfo);
503e8d8bef9SDimitry Andric       LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "
504e8d8bef9SDimitry Andric                         << DominateVector.front()->getName() << "\n";);
5050b57cec5SDimitry Andric       ColdCandidateFound = true;
5060b57cec5SDimitry Andric       NumColdRegionsFound++;
5070b57cec5SDimitry Andric     }
5080b57cec5SDimitry Andric   }
509e8d8bef9SDimitry Andric 
5100b57cec5SDimitry Andric   if (ColdCandidateFound)
5110b57cec5SDimitry Andric     return OutliningInfo;
512e8d8bef9SDimitry Andric 
5130b57cec5SDimitry Andric   return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
5140b57cec5SDimitry Andric }
5150b57cec5SDimitry Andric 
5160b57cec5SDimitry Andric std::unique_ptr<FunctionOutliningInfo>
computeOutliningInfo(Function & F) const517e8d8bef9SDimitry Andric PartialInlinerImpl::computeOutliningInfo(Function &F) const {
518e8d8bef9SDimitry Andric   BasicBlock *EntryBlock = &F.front();
5190b57cec5SDimitry Andric   BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
5200b57cec5SDimitry Andric   if (!BR || BR->isUnconditional())
5210b57cec5SDimitry Andric     return std::unique_ptr<FunctionOutliningInfo>();
5220b57cec5SDimitry Andric 
5230b57cec5SDimitry Andric   // Returns true if Succ is BB's successor
5240b57cec5SDimitry Andric   auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
5250b57cec5SDimitry Andric     return is_contained(successors(BB), Succ);
5260b57cec5SDimitry Andric   };
5270b57cec5SDimitry Andric 
5280b57cec5SDimitry Andric   auto IsReturnBlock = [](BasicBlock *BB) {
5290b57cec5SDimitry Andric     Instruction *TI = BB->getTerminator();
5300b57cec5SDimitry Andric     return isa<ReturnInst>(TI);
5310b57cec5SDimitry Andric   };
5320b57cec5SDimitry Andric 
5330b57cec5SDimitry Andric   auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
5340b57cec5SDimitry Andric     if (IsReturnBlock(Succ1))
5350b57cec5SDimitry Andric       return std::make_tuple(Succ1, Succ2);
5360b57cec5SDimitry Andric     if (IsReturnBlock(Succ2))
5370b57cec5SDimitry Andric       return std::make_tuple(Succ2, Succ1);
5380b57cec5SDimitry Andric 
5390b57cec5SDimitry Andric     return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
5400b57cec5SDimitry Andric   };
5410b57cec5SDimitry Andric 
5420b57cec5SDimitry Andric   // Detect a triangular shape:
5430b57cec5SDimitry Andric   auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
5440b57cec5SDimitry Andric     if (IsSuccessor(Succ1, Succ2))
5450b57cec5SDimitry Andric       return std::make_tuple(Succ1, Succ2);
5460b57cec5SDimitry Andric     if (IsSuccessor(Succ2, Succ1))
5470b57cec5SDimitry Andric       return std::make_tuple(Succ2, Succ1);
5480b57cec5SDimitry Andric 
5490b57cec5SDimitry Andric     return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
5500b57cec5SDimitry Andric   };
5510b57cec5SDimitry Andric 
5520b57cec5SDimitry Andric   std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
5538bcb0991SDimitry Andric       std::make_unique<FunctionOutliningInfo>();
5540b57cec5SDimitry Andric 
5550b57cec5SDimitry Andric   BasicBlock *CurrEntry = EntryBlock;
5560b57cec5SDimitry Andric   bool CandidateFound = false;
5570b57cec5SDimitry Andric   do {
5580b57cec5SDimitry Andric     // The number of blocks to be inlined has already reached
5590b57cec5SDimitry Andric     // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
5600b57cec5SDimitry Andric     // disables partial inlining for the function.
561e8d8bef9SDimitry Andric     if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
5620b57cec5SDimitry Andric       break;
5630b57cec5SDimitry Andric 
5640b57cec5SDimitry Andric     if (succ_size(CurrEntry) != 2)
5650b57cec5SDimitry Andric       break;
5660b57cec5SDimitry Andric 
5670b57cec5SDimitry Andric     BasicBlock *Succ1 = *succ_begin(CurrEntry);
5680b57cec5SDimitry Andric     BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
5690b57cec5SDimitry Andric 
5700b57cec5SDimitry Andric     BasicBlock *ReturnBlock, *NonReturnBlock;
5710b57cec5SDimitry Andric     std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
5720b57cec5SDimitry Andric 
5730b57cec5SDimitry Andric     if (ReturnBlock) {
5740b57cec5SDimitry Andric       OutliningInfo->Entries.push_back(CurrEntry);
5750b57cec5SDimitry Andric       OutliningInfo->ReturnBlock = ReturnBlock;
5760b57cec5SDimitry Andric       OutliningInfo->NonReturnBlock = NonReturnBlock;
5770b57cec5SDimitry Andric       CandidateFound = true;
5780b57cec5SDimitry Andric       break;
5790b57cec5SDimitry Andric     }
5800b57cec5SDimitry Andric 
581e8d8bef9SDimitry Andric     BasicBlock *CommSucc, *OtherSucc;
5820b57cec5SDimitry Andric     std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
5830b57cec5SDimitry Andric 
5840b57cec5SDimitry Andric     if (!CommSucc)
5850b57cec5SDimitry Andric       break;
5860b57cec5SDimitry Andric 
5870b57cec5SDimitry Andric     OutliningInfo->Entries.push_back(CurrEntry);
5880b57cec5SDimitry Andric     CurrEntry = OtherSucc;
5890b57cec5SDimitry Andric   } while (true);
5900b57cec5SDimitry Andric 
5910b57cec5SDimitry Andric   if (!CandidateFound)
5920b57cec5SDimitry Andric     return std::unique_ptr<FunctionOutliningInfo>();
5930b57cec5SDimitry Andric 
5944824e7fdSDimitry Andric   // There should not be any successors (not in the entry set) other than
5950b57cec5SDimitry Andric   // {ReturnBlock, NonReturnBlock}
596e8d8bef9SDimitry Andric   assert(OutliningInfo->Entries[0] == &F.front() &&
5970b57cec5SDimitry Andric          "Function Entry must be the first in Entries vector");
5980b57cec5SDimitry Andric   DenseSet<BasicBlock *> Entries;
5990b57cec5SDimitry Andric   for (BasicBlock *E : OutliningInfo->Entries)
6000b57cec5SDimitry Andric     Entries.insert(E);
6010b57cec5SDimitry Andric 
6020b57cec5SDimitry Andric   // Returns true of BB has Predecessor which is not
6030b57cec5SDimitry Andric   // in Entries set.
6040b57cec5SDimitry Andric   auto HasNonEntryPred = [Entries](BasicBlock *BB) {
605e8d8bef9SDimitry Andric     for (auto *Pred : predecessors(BB)) {
6060b57cec5SDimitry Andric       if (!Entries.count(Pred))
6070b57cec5SDimitry Andric         return true;
6080b57cec5SDimitry Andric     }
6090b57cec5SDimitry Andric     return false;
6100b57cec5SDimitry Andric   };
6110b57cec5SDimitry Andric   auto CheckAndNormalizeCandidate =
6120b57cec5SDimitry Andric       [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
6130b57cec5SDimitry Andric         for (BasicBlock *E : OutliningInfo->Entries) {
614e8d8bef9SDimitry Andric           for (auto *Succ : successors(E)) {
6150b57cec5SDimitry Andric             if (Entries.count(Succ))
6160b57cec5SDimitry Andric               continue;
6170b57cec5SDimitry Andric             if (Succ == OutliningInfo->ReturnBlock)
6180b57cec5SDimitry Andric               OutliningInfo->ReturnBlockPreds.push_back(E);
6190b57cec5SDimitry Andric             else if (Succ != OutliningInfo->NonReturnBlock)
6200b57cec5SDimitry Andric               return false;
6210b57cec5SDimitry Andric           }
6220b57cec5SDimitry Andric           // There should not be any outside incoming edges either:
6230b57cec5SDimitry Andric           if (HasNonEntryPred(E))
6240b57cec5SDimitry Andric             return false;
6250b57cec5SDimitry Andric         }
6260b57cec5SDimitry Andric         return true;
6270b57cec5SDimitry Andric       };
6280b57cec5SDimitry Andric 
6290b57cec5SDimitry Andric   if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
6300b57cec5SDimitry Andric     return std::unique_ptr<FunctionOutliningInfo>();
6310b57cec5SDimitry Andric 
6320b57cec5SDimitry Andric   // Now further growing the candidate's inlining region by
6330b57cec5SDimitry Andric   // peeling off dominating blocks from the outlining region:
634e8d8bef9SDimitry Andric   while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
6350b57cec5SDimitry Andric     BasicBlock *Cand = OutliningInfo->NonReturnBlock;
6360b57cec5SDimitry Andric     if (succ_size(Cand) != 2)
6370b57cec5SDimitry Andric       break;
6380b57cec5SDimitry Andric 
6390b57cec5SDimitry Andric     if (HasNonEntryPred(Cand))
6400b57cec5SDimitry Andric       break;
6410b57cec5SDimitry Andric 
6420b57cec5SDimitry Andric     BasicBlock *Succ1 = *succ_begin(Cand);
6430b57cec5SDimitry Andric     BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
6440b57cec5SDimitry Andric 
6450b57cec5SDimitry Andric     BasicBlock *ReturnBlock, *NonReturnBlock;
6460b57cec5SDimitry Andric     std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
6470b57cec5SDimitry Andric     if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
6480b57cec5SDimitry Andric       break;
6490b57cec5SDimitry Andric 
6500b57cec5SDimitry Andric     if (NonReturnBlock->getSinglePredecessor() != Cand)
6510b57cec5SDimitry Andric       break;
6520b57cec5SDimitry Andric 
6530b57cec5SDimitry Andric     // Now grow and update OutlininigInfo:
6540b57cec5SDimitry Andric     OutliningInfo->Entries.push_back(Cand);
6550b57cec5SDimitry Andric     OutliningInfo->NonReturnBlock = NonReturnBlock;
6560b57cec5SDimitry Andric     OutliningInfo->ReturnBlockPreds.push_back(Cand);
6570b57cec5SDimitry Andric     Entries.insert(Cand);
6580b57cec5SDimitry Andric   }
6590b57cec5SDimitry Andric 
6600b57cec5SDimitry Andric   return OutliningInfo;
6610b57cec5SDimitry Andric }
6620b57cec5SDimitry Andric 
663480093f4SDimitry Andric // Check if there is PGO data or user annotated branch data:
hasProfileData(const Function & F,const FunctionOutliningInfo & OI)664e8d8bef9SDimitry Andric static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
665e8d8bef9SDimitry Andric   if (F.hasProfileData())
6660b57cec5SDimitry Andric     return true;
6670b57cec5SDimitry Andric   // Now check if any of the entry block has MD_prof data:
668e8d8bef9SDimitry Andric   for (auto *E : OI.Entries) {
6690b57cec5SDimitry Andric     BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
6700b57cec5SDimitry Andric     if (!BR || BR->isUnconditional())
6710b57cec5SDimitry Andric       continue;
672bdd1243dSDimitry Andric     if (hasBranchWeightMD(*BR))
6730b57cec5SDimitry Andric       return true;
6740b57cec5SDimitry Andric   }
6750b57cec5SDimitry Andric   return false;
6760b57cec5SDimitry Andric }
6770b57cec5SDimitry Andric 
getOutliningCallBBRelativeFreq(FunctionCloner & Cloner) const678e8d8bef9SDimitry Andric BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
679e8d8bef9SDimitry Andric     FunctionCloner &Cloner) const {
6800b57cec5SDimitry Andric   BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
6810b57cec5SDimitry Andric   auto EntryFreq =
6820b57cec5SDimitry Andric       Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
6830b57cec5SDimitry Andric   auto OutliningCallFreq =
6840b57cec5SDimitry Andric       Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
6850b57cec5SDimitry Andric   // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
6860b57cec5SDimitry Andric   // we outlined any regions, so we may encounter situations where the
6870b57cec5SDimitry Andric   // OutliningCallFreq is *slightly* bigger than the EntryFreq.
688e8d8bef9SDimitry Andric   if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
6890b57cec5SDimitry Andric     OutliningCallFreq = EntryFreq;
690e8d8bef9SDimitry Andric 
6910b57cec5SDimitry Andric   auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
6920b57cec5SDimitry Andric       OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
6930b57cec5SDimitry Andric 
69481ad6265SDimitry Andric   if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI))
6950b57cec5SDimitry Andric     return OutlineRegionRelFreq;
6960b57cec5SDimitry Andric 
6970b57cec5SDimitry Andric   // When profile data is not available, we need to be conservative in
6980b57cec5SDimitry Andric   // estimating the overall savings. Static branch prediction can usually
6990b57cec5SDimitry Andric   // guess the branch direction right (taken/non-taken), but the guessed
7000b57cec5SDimitry Andric   // branch probability is usually not biased enough. In case when the
7010b57cec5SDimitry Andric   // outlined region is predicted to be likely, its probability needs
7020b57cec5SDimitry Andric   // to be made higher (more biased) to not under-estimate the cost of
7030b57cec5SDimitry Andric   // function outlining. On the other hand, if the outlined region
7040b57cec5SDimitry Andric   // is predicted to be less likely, the predicted probablity is usually
7050b57cec5SDimitry Andric   // higher than the actual. For instance, the actual probability of the
7060b57cec5SDimitry Andric   // less likely target is only 5%, but the guessed probablity can be
707bdd1243dSDimitry Andric   // 40%. In the latter case, there is no need for further adjustment.
7080b57cec5SDimitry Andric   // FIXME: add an option for this.
7090b57cec5SDimitry Andric   if (OutlineRegionRelFreq < BranchProbability(45, 100))
7100b57cec5SDimitry Andric     return OutlineRegionRelFreq;
7110b57cec5SDimitry Andric 
7120b57cec5SDimitry Andric   OutlineRegionRelFreq = std::max(
7130b57cec5SDimitry Andric       OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
7140b57cec5SDimitry Andric 
7150b57cec5SDimitry Andric   return OutlineRegionRelFreq;
7160b57cec5SDimitry Andric }
7170b57cec5SDimitry Andric 
shouldPartialInline(CallBase & CB,FunctionCloner & Cloner,BlockFrequency WeightedOutliningRcost,OptimizationRemarkEmitter & ORE) const7180b57cec5SDimitry Andric bool PartialInlinerImpl::shouldPartialInline(
7195ffd83dbSDimitry Andric     CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
720e8d8bef9SDimitry Andric     OptimizationRemarkEmitter &ORE) const {
7210b57cec5SDimitry Andric   using namespace ore;
7220b57cec5SDimitry Andric 
7235ffd83dbSDimitry Andric   Function *Callee = CB.getCalledFunction();
7240b57cec5SDimitry Andric   assert(Callee == Cloner.ClonedFunc);
7250b57cec5SDimitry Andric 
7260b57cec5SDimitry Andric   if (SkipCostAnalysis)
7275ffd83dbSDimitry Andric     return isInlineViable(*Callee).isSuccess();
7280b57cec5SDimitry Andric 
7295ffd83dbSDimitry Andric   Function *Caller = CB.getCaller();
7305ffd83dbSDimitry Andric   auto &CalleeTTI = GetTTI(*Callee);
7310b57cec5SDimitry Andric   bool RemarksEnabled =
7320b57cec5SDimitry Andric       Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
7330b57cec5SDimitry Andric           DEBUG_TYPE);
7345ffd83dbSDimitry Andric   InlineCost IC =
7355ffd83dbSDimitry Andric       getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
7365ffd83dbSDimitry Andric                     GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
7370b57cec5SDimitry Andric 
7380b57cec5SDimitry Andric   if (IC.isAlways()) {
7390b57cec5SDimitry Andric     ORE.emit([&]() {
7405ffd83dbSDimitry Andric       return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", &CB)
7410b57cec5SDimitry Andric              << NV("Callee", Cloner.OrigFunc)
7420b57cec5SDimitry Andric              << " should always be fully inlined, not partially";
7430b57cec5SDimitry Andric     });
7440b57cec5SDimitry Andric     return false;
7450b57cec5SDimitry Andric   }
7460b57cec5SDimitry Andric 
7470b57cec5SDimitry Andric   if (IC.isNever()) {
7480b57cec5SDimitry Andric     ORE.emit([&]() {
7495ffd83dbSDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CB)
7500b57cec5SDimitry Andric              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
7510b57cec5SDimitry Andric              << NV("Caller", Caller)
7520b57cec5SDimitry Andric              << " because it should never be inlined (cost=never)";
7530b57cec5SDimitry Andric     });
7540b57cec5SDimitry Andric     return false;
7550b57cec5SDimitry Andric   }
7560b57cec5SDimitry Andric 
7570b57cec5SDimitry Andric   if (!IC) {
7580b57cec5SDimitry Andric     ORE.emit([&]() {
7595ffd83dbSDimitry Andric       return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", &CB)
7600b57cec5SDimitry Andric              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
7610b57cec5SDimitry Andric              << NV("Caller", Caller) << " because too costly to inline (cost="
7620b57cec5SDimitry Andric              << NV("Cost", IC.getCost()) << ", threshold="
7630b57cec5SDimitry Andric              << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
7640b57cec5SDimitry Andric     });
7650b57cec5SDimitry Andric     return false;
7660b57cec5SDimitry Andric   }
767*0fca6ea1SDimitry Andric   const DataLayout &DL = Caller->getDataLayout();
7680b57cec5SDimitry Andric 
7690b57cec5SDimitry Andric   // The savings of eliminating the call:
7705f757f3fSDimitry Andric   int NonWeightedSavings = getCallsiteCost(CalleeTTI, CB, DL);
7710b57cec5SDimitry Andric   BlockFrequency NormWeightedSavings(NonWeightedSavings);
7720b57cec5SDimitry Andric 
7730b57cec5SDimitry Andric   // Weighted saving is smaller than weighted cost, return false
7740b57cec5SDimitry Andric   if (NormWeightedSavings < WeightedOutliningRcost) {
7750b57cec5SDimitry Andric     ORE.emit([&]() {
7760b57cec5SDimitry Andric       return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh",
7775ffd83dbSDimitry Andric                                         &CB)
7780b57cec5SDimitry Andric              << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
7790b57cec5SDimitry Andric              << NV("Caller", Caller) << " runtime overhead (overhead="
7800b57cec5SDimitry Andric              << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
7810b57cec5SDimitry Andric              << ", savings="
7820b57cec5SDimitry Andric              << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
7830b57cec5SDimitry Andric              << ")"
7840b57cec5SDimitry Andric              << " of making the outlined call is too high";
7850b57cec5SDimitry Andric     });
7860b57cec5SDimitry Andric 
7870b57cec5SDimitry Andric     return false;
7880b57cec5SDimitry Andric   }
7890b57cec5SDimitry Andric 
7900b57cec5SDimitry Andric   ORE.emit([&]() {
7915ffd83dbSDimitry Andric     return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", &CB)
7920b57cec5SDimitry Andric            << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
7930b57cec5SDimitry Andric            << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
7940b57cec5SDimitry Andric            << " (threshold="
7950b57cec5SDimitry Andric            << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
7960b57cec5SDimitry Andric   });
7970b57cec5SDimitry Andric   return true;
7980b57cec5SDimitry Andric }
7990b57cec5SDimitry Andric 
8000b57cec5SDimitry Andric // TODO: Ideally  we should share Inliner's InlineCost Analysis code.
8010b57cec5SDimitry Andric // For now use a simplified version. The returned 'InlineCost' will be used
8020b57cec5SDimitry Andric // to esimate the size cost as well as runtime cost of the BB.
803fe6060f1SDimitry Andric InstructionCost
computeBBInlineCost(BasicBlock * BB,TargetTransformInfo * TTI)804fe6060f1SDimitry Andric PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
805e8d8bef9SDimitry Andric                                         TargetTransformInfo *TTI) {
806fe6060f1SDimitry Andric   InstructionCost InlineCost = 0;
807*0fca6ea1SDimitry Andric   const DataLayout &DL = BB->getDataLayout();
808bdd1243dSDimitry Andric   int InstrCost = InlineConstants::getInstrCost();
8090b57cec5SDimitry Andric   for (Instruction &I : BB->instructionsWithoutDebug()) {
8100b57cec5SDimitry Andric     // Skip free instructions.
8110b57cec5SDimitry Andric     switch (I.getOpcode()) {
8120b57cec5SDimitry Andric     case Instruction::BitCast:
8130b57cec5SDimitry Andric     case Instruction::PtrToInt:
8140b57cec5SDimitry Andric     case Instruction::IntToPtr:
8150b57cec5SDimitry Andric     case Instruction::Alloca:
8160b57cec5SDimitry Andric     case Instruction::PHI:
8170b57cec5SDimitry Andric       continue;
8180b57cec5SDimitry Andric     case Instruction::GetElementPtr:
8190b57cec5SDimitry Andric       if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
8200b57cec5SDimitry Andric         continue;
8210b57cec5SDimitry Andric       break;
8220b57cec5SDimitry Andric     default:
8230b57cec5SDimitry Andric       break;
8240b57cec5SDimitry Andric     }
8250b57cec5SDimitry Andric 
8260b57cec5SDimitry Andric     if (I.isLifetimeStartOrEnd())
8270b57cec5SDimitry Andric       continue;
8280b57cec5SDimitry Andric 
829e8d8bef9SDimitry Andric     if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
830e8d8bef9SDimitry Andric       Intrinsic::ID IID = II->getIntrinsicID();
831e8d8bef9SDimitry Andric       SmallVector<Type *, 4> Tys;
832e8d8bef9SDimitry Andric       FastMathFlags FMF;
833e8d8bef9SDimitry Andric       for (Value *Val : II->args())
834e8d8bef9SDimitry Andric         Tys.push_back(Val->getType());
835e8d8bef9SDimitry Andric 
836e8d8bef9SDimitry Andric       if (auto *FPMO = dyn_cast<FPMathOperator>(II))
837e8d8bef9SDimitry Andric         FMF = FPMO->getFastMathFlags();
838e8d8bef9SDimitry Andric 
839e8d8bef9SDimitry Andric       IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
840e8d8bef9SDimitry Andric       InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency);
841e8d8bef9SDimitry Andric       continue;
842e8d8bef9SDimitry Andric     }
843e8d8bef9SDimitry Andric 
8440b57cec5SDimitry Andric     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
8455f757f3fSDimitry Andric       InlineCost += getCallsiteCost(*TTI, *CI, DL);
8460b57cec5SDimitry Andric       continue;
8470b57cec5SDimitry Andric     }
8480b57cec5SDimitry Andric 
8490b57cec5SDimitry Andric     if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
8505f757f3fSDimitry Andric       InlineCost += getCallsiteCost(*TTI, *II, DL);
8510b57cec5SDimitry Andric       continue;
8520b57cec5SDimitry Andric     }
8530b57cec5SDimitry Andric 
8540b57cec5SDimitry Andric     if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
855bdd1243dSDimitry Andric       InlineCost += (SI->getNumCases() + 1) * InstrCost;
8560b57cec5SDimitry Andric       continue;
8570b57cec5SDimitry Andric     }
858bdd1243dSDimitry Andric     InlineCost += InstrCost;
8590b57cec5SDimitry Andric   }
860fe6060f1SDimitry Andric 
8610b57cec5SDimitry Andric   return InlineCost;
8620b57cec5SDimitry Andric }
8630b57cec5SDimitry Andric 
864fe6060f1SDimitry Andric std::tuple<InstructionCost, InstructionCost>
computeOutliningCosts(FunctionCloner & Cloner) const865e8d8bef9SDimitry Andric PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
866fe6060f1SDimitry Andric   InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
8670b57cec5SDimitry Andric   for (auto FuncBBPair : Cloner.OutlinedFunctions) {
8680b57cec5SDimitry Andric     Function *OutlinedFunc = FuncBBPair.first;
8690b57cec5SDimitry Andric     BasicBlock* OutliningCallBB = FuncBBPair.second;
8700b57cec5SDimitry Andric     // Now compute the cost of the call sequence to the outlined function
8710b57cec5SDimitry Andric     // 'OutlinedFunction' in BB 'OutliningCallBB':
872e8d8bef9SDimitry Andric     auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
873e8d8bef9SDimitry Andric     OutliningFuncCallCost +=
874e8d8bef9SDimitry Andric         computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
8750b57cec5SDimitry Andric 
8760b57cec5SDimitry Andric     // Now compute the cost of the extracted/outlined function itself:
8770b57cec5SDimitry Andric     for (BasicBlock &BB : *OutlinedFunc)
878e8d8bef9SDimitry Andric       OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
8790b57cec5SDimitry Andric   }
8800b57cec5SDimitry Andric   assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&
8810b57cec5SDimitry Andric          "Outlined function cost should be no less than the outlined region");
8820b57cec5SDimitry Andric 
8830b57cec5SDimitry Andric   // The code extractor introduces a new root and exit stub blocks with
8840b57cec5SDimitry Andric   // additional unconditional branches. Those branches will be eliminated
8850b57cec5SDimitry Andric   // later with bb layout. The cost should be adjusted accordingly:
8860b57cec5SDimitry Andric   OutlinedFunctionCost -=
887bdd1243dSDimitry Andric       2 * InlineConstants::getInstrCost() * Cloner.OutlinedFunctions.size();
8880b57cec5SDimitry Andric 
889fe6060f1SDimitry Andric   InstructionCost OutliningRuntimeOverhead =
8900b57cec5SDimitry Andric       OutliningFuncCallCost +
8910b57cec5SDimitry Andric       (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
892fe6060f1SDimitry Andric       ExtraOutliningPenalty.getValue();
8930b57cec5SDimitry Andric 
8940b57cec5SDimitry Andric   return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
8950b57cec5SDimitry Andric }
8960b57cec5SDimitry Andric 
8970b57cec5SDimitry Andric // Create the callsite to profile count map which is
8980b57cec5SDimitry Andric // used to update the original function's entry count,
8990b57cec5SDimitry Andric // after the function is partially inlined into the callsite.
computeCallsiteToProfCountMap(Function * DuplicateFunction,DenseMap<User *,uint64_t> & CallSiteToProfCountMap) const9000b57cec5SDimitry Andric void PartialInlinerImpl::computeCallsiteToProfCountMap(
9010b57cec5SDimitry Andric     Function *DuplicateFunction,
902e8d8bef9SDimitry Andric     DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
9030b57cec5SDimitry Andric   std::vector<User *> Users(DuplicateFunction->user_begin(),
9040b57cec5SDimitry Andric                             DuplicateFunction->user_end());
9050b57cec5SDimitry Andric   Function *CurrentCaller = nullptr;
9060b57cec5SDimitry Andric   std::unique_ptr<BlockFrequencyInfo> TempBFI;
9070b57cec5SDimitry Andric   BlockFrequencyInfo *CurrentCallerBFI = nullptr;
9080b57cec5SDimitry Andric 
9090b57cec5SDimitry Andric   auto ComputeCurrBFI = [&,this](Function *Caller) {
9100b57cec5SDimitry Andric       // For the old pass manager:
9110b57cec5SDimitry Andric       if (!GetBFI) {
9120b57cec5SDimitry Andric         DominatorTree DT(*Caller);
9130b57cec5SDimitry Andric         LoopInfo LI(DT);
9140b57cec5SDimitry Andric         BranchProbabilityInfo BPI(*Caller, LI);
9150b57cec5SDimitry Andric         TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
9160b57cec5SDimitry Andric         CurrentCallerBFI = TempBFI.get();
9170b57cec5SDimitry Andric       } else {
9180b57cec5SDimitry Andric         // New pass manager:
9195ffd83dbSDimitry Andric         CurrentCallerBFI = &(GetBFI(*Caller));
9200b57cec5SDimitry Andric       }
9210b57cec5SDimitry Andric   };
9220b57cec5SDimitry Andric 
9230b57cec5SDimitry Andric   for (User *User : Users) {
92404eeddc0SDimitry Andric     // Don't bother with BlockAddress used by CallBr for asm goto.
92504eeddc0SDimitry Andric     if (isa<BlockAddress>(User))
92604eeddc0SDimitry Andric       continue;
9275ffd83dbSDimitry Andric     CallBase *CB = getSupportedCallBase(User);
9285ffd83dbSDimitry Andric     Function *Caller = CB->getCaller();
9290b57cec5SDimitry Andric     if (CurrentCaller != Caller) {
9300b57cec5SDimitry Andric       CurrentCaller = Caller;
9310b57cec5SDimitry Andric       ComputeCurrBFI(Caller);
9320b57cec5SDimitry Andric     } else {
9330b57cec5SDimitry Andric       assert(CurrentCallerBFI && "CallerBFI is not set");
9340b57cec5SDimitry Andric     }
9355ffd83dbSDimitry Andric     BasicBlock *CallBB = CB->getParent();
9360b57cec5SDimitry Andric     auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
9370b57cec5SDimitry Andric     if (Count)
9380b57cec5SDimitry Andric       CallSiteToProfCountMap[User] = *Count;
9390b57cec5SDimitry Andric     else
9400b57cec5SDimitry Andric       CallSiteToProfCountMap[User] = 0;
9410b57cec5SDimitry Andric   }
9420b57cec5SDimitry Andric }
9430b57cec5SDimitry Andric 
FunctionCloner(Function * F,FunctionOutliningInfo * OI,OptimizationRemarkEmitter & ORE,function_ref<AssumptionCache * (Function &)> LookupAC,function_ref<TargetTransformInfo & (Function &)> GetTTI)9440b57cec5SDimitry Andric PartialInlinerImpl::FunctionCloner::FunctionCloner(
9450b57cec5SDimitry Andric     Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
946e8d8bef9SDimitry Andric     function_ref<AssumptionCache *(Function &)> LookupAC,
947e8d8bef9SDimitry Andric     function_ref<TargetTransformInfo &(Function &)> GetTTI)
948e8d8bef9SDimitry Andric     : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
9498bcb0991SDimitry Andric   ClonedOI = std::make_unique<FunctionOutliningInfo>();
9500b57cec5SDimitry Andric 
9510b57cec5SDimitry Andric   // Clone the function, so that we can hack away on it.
9520b57cec5SDimitry Andric   ValueToValueMapTy VMap;
9530b57cec5SDimitry Andric   ClonedFunc = CloneFunction(F, VMap);
9540b57cec5SDimitry Andric 
9550b57cec5SDimitry Andric   ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
9560b57cec5SDimitry Andric   ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
957e8d8bef9SDimitry Andric   for (BasicBlock *BB : OI->Entries)
9580b57cec5SDimitry Andric     ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
959e8d8bef9SDimitry Andric 
9600b57cec5SDimitry Andric   for (BasicBlock *E : OI->ReturnBlockPreds) {
9610b57cec5SDimitry Andric     BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
9620b57cec5SDimitry Andric     ClonedOI->ReturnBlockPreds.push_back(NewE);
9630b57cec5SDimitry Andric   }
9640b57cec5SDimitry Andric   // Go ahead and update all uses to the duplicate, so that we can just
9650b57cec5SDimitry Andric   // use the inliner functionality when we're done hacking.
9660b57cec5SDimitry Andric   F->replaceAllUsesWith(ClonedFunc);
9670b57cec5SDimitry Andric }
9680b57cec5SDimitry Andric 
FunctionCloner(Function * F,FunctionOutliningMultiRegionInfo * OI,OptimizationRemarkEmitter & ORE,function_ref<AssumptionCache * (Function &)> LookupAC,function_ref<TargetTransformInfo & (Function &)> GetTTI)9690b57cec5SDimitry Andric PartialInlinerImpl::FunctionCloner::FunctionCloner(
9700b57cec5SDimitry Andric     Function *F, FunctionOutliningMultiRegionInfo *OI,
9710b57cec5SDimitry Andric     OptimizationRemarkEmitter &ORE,
972e8d8bef9SDimitry Andric     function_ref<AssumptionCache *(Function &)> LookupAC,
973e8d8bef9SDimitry Andric     function_ref<TargetTransformInfo &(Function &)> GetTTI)
974e8d8bef9SDimitry Andric     : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
9758bcb0991SDimitry Andric   ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
9760b57cec5SDimitry Andric 
9770b57cec5SDimitry Andric   // Clone the function, so that we can hack away on it.
9780b57cec5SDimitry Andric   ValueToValueMapTy VMap;
9790b57cec5SDimitry Andric   ClonedFunc = CloneFunction(F, VMap);
9800b57cec5SDimitry Andric 
9810b57cec5SDimitry Andric   // Go through all Outline Candidate Regions and update all BasicBlock
9820b57cec5SDimitry Andric   // information.
98306c3fb27SDimitry Andric   for (const FunctionOutliningMultiRegionInfo::OutlineRegionInfo &RegionInfo :
9840b57cec5SDimitry Andric        OI->ORI) {
9850b57cec5SDimitry Andric     SmallVector<BasicBlock *, 8> Region;
986e8d8bef9SDimitry Andric     for (BasicBlock *BB : RegionInfo.Region)
9870b57cec5SDimitry Andric       Region.push_back(cast<BasicBlock>(VMap[BB]));
988e8d8bef9SDimitry Andric 
9890b57cec5SDimitry Andric     BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
9900b57cec5SDimitry Andric     BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
9910b57cec5SDimitry Andric     BasicBlock *NewReturnBlock = nullptr;
9920b57cec5SDimitry Andric     if (RegionInfo.ReturnBlock)
9930b57cec5SDimitry Andric       NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
9940b57cec5SDimitry Andric     FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
9950b57cec5SDimitry Andric         Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
9960b57cec5SDimitry Andric     ClonedOMRI->ORI.push_back(MappedRegionInfo);
9970b57cec5SDimitry Andric   }
9980b57cec5SDimitry Andric   // Go ahead and update all uses to the duplicate, so that we can just
9990b57cec5SDimitry Andric   // use the inliner functionality when we're done hacking.
10000b57cec5SDimitry Andric   F->replaceAllUsesWith(ClonedFunc);
10010b57cec5SDimitry Andric }
10020b57cec5SDimitry Andric 
normalizeReturnBlock() const1003e8d8bef9SDimitry Andric void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
1004e8d8bef9SDimitry Andric   auto GetFirstPHI = [](BasicBlock *BB) {
10050b57cec5SDimitry Andric     BasicBlock::iterator I = BB->begin();
10060b57cec5SDimitry Andric     PHINode *FirstPhi = nullptr;
10070b57cec5SDimitry Andric     while (I != BB->end()) {
10080b57cec5SDimitry Andric       PHINode *Phi = dyn_cast<PHINode>(I);
10090b57cec5SDimitry Andric       if (!Phi)
10100b57cec5SDimitry Andric         break;
10110b57cec5SDimitry Andric       if (!FirstPhi) {
10120b57cec5SDimitry Andric         FirstPhi = Phi;
10130b57cec5SDimitry Andric         break;
10140b57cec5SDimitry Andric       }
10150b57cec5SDimitry Andric     }
10160b57cec5SDimitry Andric     return FirstPhi;
10170b57cec5SDimitry Andric   };
10180b57cec5SDimitry Andric 
10190b57cec5SDimitry Andric   // Shouldn't need to normalize PHIs if we're not outlining non-early return
10200b57cec5SDimitry Andric   // blocks.
10210b57cec5SDimitry Andric   if (!ClonedOI)
10220b57cec5SDimitry Andric     return;
10230b57cec5SDimitry Andric 
10240b57cec5SDimitry Andric   // Special hackery is needed with PHI nodes that have inputs from more than
10250b57cec5SDimitry Andric   // one extracted block.  For simplicity, just split the PHIs into a two-level
10260b57cec5SDimitry Andric   // sequence of PHIs, some of which will go in the extracted region, and some
10270b57cec5SDimitry Andric   // of which will go outside.
10280b57cec5SDimitry Andric   BasicBlock *PreReturn = ClonedOI->ReturnBlock;
10290b57cec5SDimitry Andric   // only split block when necessary:
1030e8d8bef9SDimitry Andric   PHINode *FirstPhi = GetFirstPHI(PreReturn);
10310b57cec5SDimitry Andric   unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
10320b57cec5SDimitry Andric 
10330b57cec5SDimitry Andric   if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
10340b57cec5SDimitry Andric     return;
10350b57cec5SDimitry Andric 
10360b57cec5SDimitry Andric   auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1037bdd1243dSDimitry Andric     if (llvm::all_equal(PN->incoming_values()))
1038bdd1243dSDimitry Andric       return PN->getIncomingValue(0);
10390b57cec5SDimitry Andric     return nullptr;
10400b57cec5SDimitry Andric   };
10410b57cec5SDimitry Andric 
10420b57cec5SDimitry Andric   ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
10430b57cec5SDimitry Andric       ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
10440b57cec5SDimitry Andric   BasicBlock::iterator I = PreReturn->begin();
10455f757f3fSDimitry Andric   BasicBlock::iterator Ins = ClonedOI->ReturnBlock->begin();
10460b57cec5SDimitry Andric   SmallVector<Instruction *, 4> DeadPhis;
10470b57cec5SDimitry Andric   while (I != PreReturn->end()) {
10480b57cec5SDimitry Andric     PHINode *OldPhi = dyn_cast<PHINode>(I);
10490b57cec5SDimitry Andric     if (!OldPhi)
10500b57cec5SDimitry Andric       break;
10510b57cec5SDimitry Andric 
10520b57cec5SDimitry Andric     PHINode *RetPhi =
10535f757f3fSDimitry Andric         PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "");
10545f757f3fSDimitry Andric     RetPhi->insertBefore(Ins);
10550b57cec5SDimitry Andric     OldPhi->replaceAllUsesWith(RetPhi);
10565f757f3fSDimitry Andric     Ins = ClonedOI->ReturnBlock->getFirstNonPHIIt();
10570b57cec5SDimitry Andric 
10580b57cec5SDimitry Andric     RetPhi->addIncoming(&*I, PreReturn);
10590b57cec5SDimitry Andric     for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
10600b57cec5SDimitry Andric       RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
10610b57cec5SDimitry Andric       OldPhi->removeIncomingValue(E);
10620b57cec5SDimitry Andric     }
10630b57cec5SDimitry Andric 
10640b57cec5SDimitry Andric     // After incoming values splitting, the old phi may become trivial.
10650b57cec5SDimitry Andric     // Keeping the trivial phi can introduce definition inside the outline
10660b57cec5SDimitry Andric     // region which is live-out, causing necessary overhead (load, store
10670b57cec5SDimitry Andric     // arg passing etc).
10680b57cec5SDimitry Andric     if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
10690b57cec5SDimitry Andric       OldPhi->replaceAllUsesWith(OldPhiVal);
10700b57cec5SDimitry Andric       DeadPhis.push_back(OldPhi);
10710b57cec5SDimitry Andric     }
10720b57cec5SDimitry Andric     ++I;
10730b57cec5SDimitry Andric   }
10740b57cec5SDimitry Andric   for (auto *DP : DeadPhis)
10750b57cec5SDimitry Andric     DP->eraseFromParent();
10760b57cec5SDimitry Andric 
1077e8d8bef9SDimitry Andric   for (auto *E : ClonedOI->ReturnBlockPreds)
10780b57cec5SDimitry Andric     E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
10790b57cec5SDimitry Andric }
10800b57cec5SDimitry Andric 
doMultiRegionFunctionOutlining()10810b57cec5SDimitry Andric bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
10820b57cec5SDimitry Andric 
1083fe6060f1SDimitry Andric   auto ComputeRegionCost =
1084fe6060f1SDimitry Andric       [&](SmallVectorImpl<BasicBlock *> &Region) -> InstructionCost {
1085fe6060f1SDimitry Andric     InstructionCost Cost = 0;
10860b57cec5SDimitry Andric     for (BasicBlock* BB : Region)
1087e8d8bef9SDimitry Andric       Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
10880b57cec5SDimitry Andric     return Cost;
10890b57cec5SDimitry Andric   };
10900b57cec5SDimitry Andric 
10910b57cec5SDimitry Andric   assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline");
10920b57cec5SDimitry Andric 
10930b57cec5SDimitry Andric   if (ClonedOMRI->ORI.empty())
10940b57cec5SDimitry Andric     return false;
10950b57cec5SDimitry Andric 
10960b57cec5SDimitry Andric   // The CodeExtractor needs a dominator tree.
10970b57cec5SDimitry Andric   DominatorTree DT;
10980b57cec5SDimitry Andric   DT.recalculate(*ClonedFunc);
10990b57cec5SDimitry Andric 
11000b57cec5SDimitry Andric   // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
11010b57cec5SDimitry Andric   LoopInfo LI(DT);
11020b57cec5SDimitry Andric   BranchProbabilityInfo BPI(*ClonedFunc, LI);
11030b57cec5SDimitry Andric   ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
11040b57cec5SDimitry Andric 
11058bcb0991SDimitry Andric   // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
11068bcb0991SDimitry Andric   CodeExtractorAnalysisCache CEAC(*ClonedFunc);
11078bcb0991SDimitry Andric 
11080b57cec5SDimitry Andric   SetVector<Value *> Inputs, Outputs, Sinks;
11090b57cec5SDimitry Andric   for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
11100b57cec5SDimitry Andric        ClonedOMRI->ORI) {
1111fe6060f1SDimitry Andric     InstructionCost CurrentOutlinedRegionCost =
1112fe6060f1SDimitry Andric         ComputeRegionCost(RegionInfo.Region);
11130b57cec5SDimitry Andric 
11140b57cec5SDimitry Andric     CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
11150b57cec5SDimitry Andric                      ClonedFuncBFI.get(), &BPI,
11160b57cec5SDimitry Andric                      LookupAC(*RegionInfo.EntryBlock->getParent()),
11170b57cec5SDimitry Andric                      /* AllowVarargs */ false);
11180b57cec5SDimitry Andric 
11190b57cec5SDimitry Andric     CE.findInputsOutputs(Inputs, Outputs, Sinks);
11200b57cec5SDimitry Andric 
1121e8d8bef9SDimitry Andric     LLVM_DEBUG({
11220b57cec5SDimitry Andric       dbgs() << "inputs: " << Inputs.size() << "\n";
11230b57cec5SDimitry Andric       dbgs() << "outputs: " << Outputs.size() << "\n";
11240b57cec5SDimitry Andric       for (Value *value : Inputs)
11250b57cec5SDimitry Andric         dbgs() << "value used in func: " << *value << "\n";
11260b57cec5SDimitry Andric       for (Value *output : Outputs)
11270b57cec5SDimitry Andric         dbgs() << "instr used in func: " << *output << "\n";
1128e8d8bef9SDimitry Andric     });
1129e8d8bef9SDimitry Andric 
11300b57cec5SDimitry Andric     // Do not extract regions that have live exit variables.
11310b57cec5SDimitry Andric     if (Outputs.size() > 0 && !ForceLiveExit)
11320b57cec5SDimitry Andric       continue;
11330b57cec5SDimitry Andric 
1134e8d8bef9SDimitry Andric     if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
1135e8d8bef9SDimitry Andric       CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
11365ffd83dbSDimitry Andric       BasicBlock *OutliningCallBB = OCS->getParent();
11370b57cec5SDimitry Andric       assert(OutliningCallBB->getParent() == ClonedFunc);
11380b57cec5SDimitry Andric       OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
11390b57cec5SDimitry Andric       NumColdRegionsOutlined++;
11400b57cec5SDimitry Andric       OutlinedRegionCost += CurrentOutlinedRegionCost;
11410b57cec5SDimitry Andric 
11420b57cec5SDimitry Andric       if (MarkOutlinedColdCC) {
11430b57cec5SDimitry Andric         OutlinedFunc->setCallingConv(CallingConv::Cold);
11445ffd83dbSDimitry Andric         OCS->setCallingConv(CallingConv::Cold);
11450b57cec5SDimitry Andric       }
11460b57cec5SDimitry Andric     } else
11470b57cec5SDimitry Andric       ORE.emit([&]() {
11480b57cec5SDimitry Andric         return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
11490b57cec5SDimitry Andric                                         &RegionInfo.Region.front()->front())
11500b57cec5SDimitry Andric                << "Failed to extract region at block "
11510b57cec5SDimitry Andric                << ore::NV("Block", RegionInfo.Region.front());
11520b57cec5SDimitry Andric       });
11530b57cec5SDimitry Andric   }
11540b57cec5SDimitry Andric 
11550b57cec5SDimitry Andric   return !OutlinedFunctions.empty();
11560b57cec5SDimitry Andric }
11570b57cec5SDimitry Andric 
11580b57cec5SDimitry Andric Function *
doSingleRegionFunctionOutlining()11590b57cec5SDimitry Andric PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
11600b57cec5SDimitry Andric   // Returns true if the block is to be partial inlined into the caller
11610b57cec5SDimitry Andric   // (i.e. not to be extracted to the out of line function)
11620b57cec5SDimitry Andric   auto ToBeInlined = [&, this](BasicBlock *BB) {
11630b57cec5SDimitry Andric     return BB == ClonedOI->ReturnBlock ||
1164e8d8bef9SDimitry Andric            llvm::is_contained(ClonedOI->Entries, BB);
11650b57cec5SDimitry Andric   };
11660b57cec5SDimitry Andric 
11670b57cec5SDimitry Andric   assert(ClonedOI && "Expecting OutlineInfo for single region outline");
11680b57cec5SDimitry Andric   // The CodeExtractor needs a dominator tree.
11690b57cec5SDimitry Andric   DominatorTree DT;
11700b57cec5SDimitry Andric   DT.recalculate(*ClonedFunc);
11710b57cec5SDimitry Andric 
11720b57cec5SDimitry Andric   // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
11730b57cec5SDimitry Andric   LoopInfo LI(DT);
11740b57cec5SDimitry Andric   BranchProbabilityInfo BPI(*ClonedFunc, LI);
11750b57cec5SDimitry Andric   ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
11760b57cec5SDimitry Andric 
11770b57cec5SDimitry Andric   // Gather up the blocks that we're going to extract.
11780b57cec5SDimitry Andric   std::vector<BasicBlock *> ToExtract;
1179e8d8bef9SDimitry Andric   auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
11800b57cec5SDimitry Andric   ToExtract.push_back(ClonedOI->NonReturnBlock);
1181e8d8bef9SDimitry Andric   OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1182e8d8bef9SDimitry Andric       ClonedOI->NonReturnBlock, ClonedFuncTTI);
118306c3fb27SDimitry Andric   for (BasicBlock *BB : depth_first(&ClonedFunc->getEntryBlock()))
118406c3fb27SDimitry Andric     if (!ToBeInlined(BB) && BB != ClonedOI->NonReturnBlock) {
118506c3fb27SDimitry Andric       ToExtract.push_back(BB);
11860b57cec5SDimitry Andric       // FIXME: the code extractor may hoist/sink more code
11870b57cec5SDimitry Andric       // into the outlined function which may make the outlining
11880b57cec5SDimitry Andric       // overhead (the difference of the outlined function cost
11890b57cec5SDimitry Andric       // and OutliningRegionCost) look larger.
119006c3fb27SDimitry Andric       OutlinedRegionCost += computeBBInlineCost(BB, ClonedFuncTTI);
11910b57cec5SDimitry Andric     }
11920b57cec5SDimitry Andric 
11930b57cec5SDimitry Andric   // Extract the body of the if.
11948bcb0991SDimitry Andric   CodeExtractorAnalysisCache CEAC(*ClonedFunc);
11950b57cec5SDimitry Andric   Function *OutlinedFunc =
11960b57cec5SDimitry Andric       CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
11970b57cec5SDimitry Andric                     ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
11980b57cec5SDimitry Andric                     /* AllowVarargs */ true)
11998bcb0991SDimitry Andric           .extractCodeRegion(CEAC);
12000b57cec5SDimitry Andric 
12010b57cec5SDimitry Andric   if (OutlinedFunc) {
12020b57cec5SDimitry Andric     BasicBlock *OutliningCallBB =
1203e8d8bef9SDimitry Andric         PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
12040b57cec5SDimitry Andric     assert(OutliningCallBB->getParent() == ClonedFunc);
12050b57cec5SDimitry Andric     OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
12060b57cec5SDimitry Andric   } else
12070b57cec5SDimitry Andric     ORE.emit([&]() {
12080b57cec5SDimitry Andric       return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
12090b57cec5SDimitry Andric                                       &ToExtract.front()->front())
12100b57cec5SDimitry Andric              << "Failed to extract region at block "
12110b57cec5SDimitry Andric              << ore::NV("Block", ToExtract.front());
12120b57cec5SDimitry Andric     });
12130b57cec5SDimitry Andric 
12140b57cec5SDimitry Andric   return OutlinedFunc;
12150b57cec5SDimitry Andric }
12160b57cec5SDimitry Andric 
~FunctionCloner()12170b57cec5SDimitry Andric PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
12180b57cec5SDimitry Andric   // Ditch the duplicate, since we're done with it, and rewrite all remaining
12190b57cec5SDimitry Andric   // users (function pointers, etc.) back to the original function.
12200b57cec5SDimitry Andric   ClonedFunc->replaceAllUsesWith(OrigFunc);
12210b57cec5SDimitry Andric   ClonedFunc->eraseFromParent();
12220b57cec5SDimitry Andric   if (!IsFunctionInlined) {
12230b57cec5SDimitry Andric     // Remove each function that was speculatively created if there is no
12240b57cec5SDimitry Andric     // reference.
12250b57cec5SDimitry Andric     for (auto FuncBBPair : OutlinedFunctions) {
12260b57cec5SDimitry Andric       Function *Func = FuncBBPair.first;
12270b57cec5SDimitry Andric       Func->eraseFromParent();
12280b57cec5SDimitry Andric     }
12290b57cec5SDimitry Andric   }
12300b57cec5SDimitry Andric }
12310b57cec5SDimitry Andric 
unswitchFunction(Function & F)1232e8d8bef9SDimitry Andric std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
1233e8d8bef9SDimitry Andric   if (F.hasAddressTaken())
12340b57cec5SDimitry Andric     return {false, nullptr};
12350b57cec5SDimitry Andric 
12360b57cec5SDimitry Andric   // Let inliner handle it
1237e8d8bef9SDimitry Andric   if (F.hasFnAttribute(Attribute::AlwaysInline))
12380b57cec5SDimitry Andric     return {false, nullptr};
12390b57cec5SDimitry Andric 
1240e8d8bef9SDimitry Andric   if (F.hasFnAttribute(Attribute::NoInline))
12410b57cec5SDimitry Andric     return {false, nullptr};
12420b57cec5SDimitry Andric 
1243e8d8bef9SDimitry Andric   if (PSI.isFunctionEntryCold(&F))
12440b57cec5SDimitry Andric     return {false, nullptr};
12450b57cec5SDimitry Andric 
1246e8d8bef9SDimitry Andric   if (F.users().empty())
12470b57cec5SDimitry Andric     return {false, nullptr};
12480b57cec5SDimitry Andric 
1249e8d8bef9SDimitry Andric   OptimizationRemarkEmitter ORE(&F);
12500b57cec5SDimitry Andric 
12510b57cec5SDimitry Andric   // Only try to outline cold regions if we have a profile summary, which
12520b57cec5SDimitry Andric   // implies we have profiling information.
1253e8d8bef9SDimitry Andric   if (PSI.hasProfileSummary() && F.hasProfileData() &&
12540b57cec5SDimitry Andric       !DisableMultiRegionPartialInline) {
12550b57cec5SDimitry Andric     std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
12560b57cec5SDimitry Andric         computeOutliningColdRegionsInfo(F, ORE);
12570b57cec5SDimitry Andric     if (OMRI) {
1258e8d8bef9SDimitry Andric       FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
12590b57cec5SDimitry Andric 
1260e8d8bef9SDimitry Andric       LLVM_DEBUG({
12615ffd83dbSDimitry Andric         dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";
12625ffd83dbSDimitry Andric         dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()
12630b57cec5SDimitry Andric                << "\n";
1264e8d8bef9SDimitry Andric       });
1265e8d8bef9SDimitry Andric 
12660b57cec5SDimitry Andric       bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
12670b57cec5SDimitry Andric 
12680b57cec5SDimitry Andric       if (DidOutline) {
1269e8d8bef9SDimitry Andric         LLVM_DEBUG({
12700b57cec5SDimitry Andric           dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";
12710b57cec5SDimitry Andric           Cloner.ClonedFunc->print(dbgs());
12720b57cec5SDimitry Andric           dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";
1273e8d8bef9SDimitry Andric         });
12740b57cec5SDimitry Andric 
12750b57cec5SDimitry Andric         if (tryPartialInline(Cloner))
12760b57cec5SDimitry Andric           return {true, nullptr};
12770b57cec5SDimitry Andric       }
12780b57cec5SDimitry Andric     }
12790b57cec5SDimitry Andric   }
12800b57cec5SDimitry Andric 
12810b57cec5SDimitry Andric   // Fall-thru to regular partial inlining if we:
12820b57cec5SDimitry Andric   //    i) can't find any cold regions to outline, or
12830b57cec5SDimitry Andric   //   ii) can't inline the outlined function anywhere.
12840b57cec5SDimitry Andric   std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
12850b57cec5SDimitry Andric   if (!OI)
12860b57cec5SDimitry Andric     return {false, nullptr};
12870b57cec5SDimitry Andric 
1288e8d8bef9SDimitry Andric   FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1289e8d8bef9SDimitry Andric   Cloner.normalizeReturnBlock();
12900b57cec5SDimitry Andric 
12910b57cec5SDimitry Andric   Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
12920b57cec5SDimitry Andric 
12930b57cec5SDimitry Andric   if (!OutlinedFunction)
12940b57cec5SDimitry Andric     return {false, nullptr};
12950b57cec5SDimitry Andric 
1296e8d8bef9SDimitry Andric   if (tryPartialInline(Cloner))
12970b57cec5SDimitry Andric     return {true, OutlinedFunction};
12980b57cec5SDimitry Andric 
12990b57cec5SDimitry Andric   return {false, nullptr};
13000b57cec5SDimitry Andric }
13010b57cec5SDimitry Andric 
tryPartialInline(FunctionCloner & Cloner)13020b57cec5SDimitry Andric bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
13030b57cec5SDimitry Andric   if (Cloner.OutlinedFunctions.empty())
13040b57cec5SDimitry Andric     return false;
13050b57cec5SDimitry Andric 
1306fe6060f1SDimitry Andric   auto OutliningCosts = computeOutliningCosts(Cloner);
1307fe6060f1SDimitry Andric 
1308bdd1243dSDimitry Andric   InstructionCost SizeCost = std::get<0>(OutliningCosts);
1309bdd1243dSDimitry Andric   InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts);
1310bdd1243dSDimitry Andric 
1311bdd1243dSDimitry Andric   assert(SizeCost.isValid() && NonWeightedRcost.isValid() &&
1312bdd1243dSDimitry Andric          "Expected valid costs");
13130b57cec5SDimitry Andric 
13140b57cec5SDimitry Andric   // Only calculate RelativeToEntryFreq when we are doing single region
13150b57cec5SDimitry Andric   // outlining.
13160b57cec5SDimitry Andric   BranchProbability RelativeToEntryFreq;
1317e8d8bef9SDimitry Andric   if (Cloner.ClonedOI)
13180b57cec5SDimitry Andric     RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1319e8d8bef9SDimitry Andric   else
13200b57cec5SDimitry Andric     // RelativeToEntryFreq doesn't make sense when we have more than one
13210b57cec5SDimitry Andric     // outlined call because each call will have a different relative frequency
13220b57cec5SDimitry Andric     // to the entry block.  We can consider using the average, but the
13230b57cec5SDimitry Andric     // usefulness of that information is questionable. For now, assume we never
13240b57cec5SDimitry Andric     // execute the calls to outlined functions.
13250b57cec5SDimitry Andric     RelativeToEntryFreq = BranchProbability(0, 1);
13260b57cec5SDimitry Andric 
1327bdd1243dSDimitry Andric   BlockFrequency WeightedRcost =
1328bdd1243dSDimitry Andric       BlockFrequency(*NonWeightedRcost.getValue()) * RelativeToEntryFreq;
13290b57cec5SDimitry Andric 
13300b57cec5SDimitry Andric   // The call sequence(s) to the outlined function(s) are larger than the sum of
13310b57cec5SDimitry Andric   // the original outlined region size(s), it does not increase the chances of
13320b57cec5SDimitry Andric   // inlining the function with outlining (The inliner uses the size increase to
13330b57cec5SDimitry Andric   // model the cost of inlining a callee).
13340b57cec5SDimitry Andric   if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
13350b57cec5SDimitry Andric     OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
13360b57cec5SDimitry Andric     DebugLoc DLoc;
13370b57cec5SDimitry Andric     BasicBlock *Block;
1338e8d8bef9SDimitry Andric     std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
13390b57cec5SDimitry Andric     OrigFuncORE.emit([&]() {
13400b57cec5SDimitry Andric       return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
13410b57cec5SDimitry Andric                                         DLoc, Block)
13420b57cec5SDimitry Andric              << ore::NV("Function", Cloner.OrigFunc)
13430b57cec5SDimitry Andric              << " not partially inlined into callers (Original Size = "
13440b57cec5SDimitry Andric              << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
13450b57cec5SDimitry Andric              << ", Size of call sequence to outlined function = "
13460b57cec5SDimitry Andric              << ore::NV("NewSize", SizeCost) << ")";
13470b57cec5SDimitry Andric     });
13480b57cec5SDimitry Andric     return false;
13490b57cec5SDimitry Andric   }
13500b57cec5SDimitry Andric 
13518bcb0991SDimitry Andric   assert(Cloner.OrigFunc->users().empty() &&
13520b57cec5SDimitry Andric          "F's users should all be replaced!");
13530b57cec5SDimitry Andric 
13540b57cec5SDimitry Andric   std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
13550b57cec5SDimitry Andric                             Cloner.ClonedFunc->user_end());
13560b57cec5SDimitry Andric 
13570b57cec5SDimitry Andric   DenseMap<User *, uint64_t> CallSiteToProfCountMap;
13580b57cec5SDimitry Andric   auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
13590b57cec5SDimitry Andric   if (CalleeEntryCount)
13600b57cec5SDimitry Andric     computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
13610b57cec5SDimitry Andric 
13620b57cec5SDimitry Andric   uint64_t CalleeEntryCountV =
1363349cc55cSDimitry Andric       (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
13640b57cec5SDimitry Andric 
13650b57cec5SDimitry Andric   bool AnyInline = false;
13660b57cec5SDimitry Andric   for (User *User : Users) {
136704eeddc0SDimitry Andric     // Don't bother with BlockAddress used by CallBr for asm goto.
136804eeddc0SDimitry Andric     if (isa<BlockAddress>(User))
136904eeddc0SDimitry Andric       continue;
137004eeddc0SDimitry Andric 
13715ffd83dbSDimitry Andric     CallBase *CB = getSupportedCallBase(User);
13720b57cec5SDimitry Andric 
1373e8d8bef9SDimitry Andric     if (isLimitReached())
13740b57cec5SDimitry Andric       continue;
13750b57cec5SDimitry Andric 
13765ffd83dbSDimitry Andric     OptimizationRemarkEmitter CallerORE(CB->getCaller());
13775ffd83dbSDimitry Andric     if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
13780b57cec5SDimitry Andric       continue;
13790b57cec5SDimitry Andric 
13800b57cec5SDimitry Andric     // Construct remark before doing the inlining, as after successful inlining
13810b57cec5SDimitry Andric     // the callsite is removed.
13825ffd83dbSDimitry Andric     OptimizationRemark OR(DEBUG_TYPE, "PartiallyInlined", CB);
13830b57cec5SDimitry Andric     OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
13845ffd83dbSDimitry Andric        << ore::NV("Caller", CB->getCaller());
13850b57cec5SDimitry Andric 
138606c3fb27SDimitry Andric     InlineFunctionInfo IFI(GetAssumptionCache, &PSI);
13870b57cec5SDimitry Andric     // We can only forward varargs when we outlined a single region, else we
13880b57cec5SDimitry Andric     // bail on vararg functions.
1389bdd1243dSDimitry Andric     if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true,
13900b57cec5SDimitry Andric                         (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
13915ffd83dbSDimitry Andric                                          : nullptr))
13925ffd83dbSDimitry Andric              .isSuccess())
13930b57cec5SDimitry Andric       continue;
13940b57cec5SDimitry Andric 
13950b57cec5SDimitry Andric     CallerORE.emit(OR);
13960b57cec5SDimitry Andric 
13970b57cec5SDimitry Andric     // Now update the entry count:
13980b57cec5SDimitry Andric     if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
13990b57cec5SDimitry Andric       uint64_t CallSiteCount = CallSiteToProfCountMap[User];
14000b57cec5SDimitry Andric       CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
14010b57cec5SDimitry Andric     }
14020b57cec5SDimitry Andric 
14030b57cec5SDimitry Andric     AnyInline = true;
14040b57cec5SDimitry Andric     NumPartialInlining++;
14050b57cec5SDimitry Andric     // Update the stats
14060b57cec5SDimitry Andric     if (Cloner.ClonedOI)
14070b57cec5SDimitry Andric       NumPartialInlined++;
14080b57cec5SDimitry Andric     else
14090b57cec5SDimitry Andric       NumColdOutlinePartialInlined++;
14100b57cec5SDimitry Andric   }
14110b57cec5SDimitry Andric 
14120b57cec5SDimitry Andric   if (AnyInline) {
14130b57cec5SDimitry Andric     Cloner.IsFunctionInlined = true;
14140b57cec5SDimitry Andric     if (CalleeEntryCount)
1415349cc55cSDimitry Andric       Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
1416349cc55cSDimitry Andric           CalleeEntryCountV, CalleeEntryCount->getType()));
14170b57cec5SDimitry Andric     OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
14180b57cec5SDimitry Andric     OrigFuncORE.emit([&]() {
14190b57cec5SDimitry Andric       return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
14200b57cec5SDimitry Andric              << "Partially inlined into at least one caller";
14210b57cec5SDimitry Andric     });
14220b57cec5SDimitry Andric   }
14230b57cec5SDimitry Andric 
14240b57cec5SDimitry Andric   return AnyInline;
14250b57cec5SDimitry Andric }
14260b57cec5SDimitry Andric 
run(Module & M)14270b57cec5SDimitry Andric bool PartialInlinerImpl::run(Module &M) {
14280b57cec5SDimitry Andric   if (DisablePartialInlining)
14290b57cec5SDimitry Andric     return false;
14300b57cec5SDimitry Andric 
14310b57cec5SDimitry Andric   std::vector<Function *> Worklist;
14320b57cec5SDimitry Andric   Worklist.reserve(M.size());
14330b57cec5SDimitry Andric   for (Function &F : M)
14340b57cec5SDimitry Andric     if (!F.use_empty() && !F.isDeclaration())
14350b57cec5SDimitry Andric       Worklist.push_back(&F);
14360b57cec5SDimitry Andric 
14370b57cec5SDimitry Andric   bool Changed = false;
14380b57cec5SDimitry Andric   while (!Worklist.empty()) {
14390b57cec5SDimitry Andric     Function *CurrFunc = Worklist.back();
14400b57cec5SDimitry Andric     Worklist.pop_back();
14410b57cec5SDimitry Andric 
14420b57cec5SDimitry Andric     if (CurrFunc->use_empty())
14430b57cec5SDimitry Andric       continue;
14440b57cec5SDimitry Andric 
1445e8d8bef9SDimitry Andric     std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
14460b57cec5SDimitry Andric     if (Result.second)
14470b57cec5SDimitry Andric       Worklist.push_back(Result.second);
14480b57cec5SDimitry Andric     Changed |= Result.first;
14490b57cec5SDimitry Andric   }
14500b57cec5SDimitry Andric 
14510b57cec5SDimitry Andric   return Changed;
14520b57cec5SDimitry Andric }
14530b57cec5SDimitry Andric 
run(Module & M,ModuleAnalysisManager & AM)14540b57cec5SDimitry Andric PreservedAnalyses PartialInlinerPass::run(Module &M,
14550b57cec5SDimitry Andric                                           ModuleAnalysisManager &AM) {
14560b57cec5SDimitry Andric   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
14570b57cec5SDimitry Andric 
14585ffd83dbSDimitry Andric   auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
14590b57cec5SDimitry Andric     return FAM.getResult<AssumptionAnalysis>(F);
14600b57cec5SDimitry Andric   };
14610b57cec5SDimitry Andric 
14620b57cec5SDimitry Andric   auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
14630b57cec5SDimitry Andric     return FAM.getCachedResult<AssumptionAnalysis>(F);
14640b57cec5SDimitry Andric   };
14650b57cec5SDimitry Andric 
14665ffd83dbSDimitry Andric   auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
14670b57cec5SDimitry Andric     return FAM.getResult<BlockFrequencyAnalysis>(F);
14680b57cec5SDimitry Andric   };
14690b57cec5SDimitry Andric 
14705ffd83dbSDimitry Andric   auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
14710b57cec5SDimitry Andric     return FAM.getResult<TargetIRAnalysis>(F);
14720b57cec5SDimitry Andric   };
14730b57cec5SDimitry Andric 
14745ffd83dbSDimitry Andric   auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
14755ffd83dbSDimitry Andric     return FAM.getResult<TargetLibraryAnalysis>(F);
14765ffd83dbSDimitry Andric   };
14770b57cec5SDimitry Andric 
14785ffd83dbSDimitry Andric   ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
14795ffd83dbSDimitry Andric 
14805ffd83dbSDimitry Andric   if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
14815ffd83dbSDimitry Andric                          GetTLI, PSI, GetBFI)
14820b57cec5SDimitry Andric           .run(M))
14830b57cec5SDimitry Andric     return PreservedAnalyses::none();
14840b57cec5SDimitry Andric   return PreservedAnalyses::all();
14850b57cec5SDimitry Andric }
1486