xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp (revision f499134dd403eeeba8283e2640e2654c8da62430)
1  //===- CodeExtractor.cpp - Pull code region into a new function -----------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file implements the interface to tear out a code region, such as an
10  // individual loop or a parallel section, into a new function, replacing it with
11  // a call to the new function.
12  //
13  //===----------------------------------------------------------------------===//
14  
15  #include "llvm/Transforms/Utils/CodeExtractor.h"
16  #include "llvm/ADT/ArrayRef.h"
17  #include "llvm/ADT/DenseMap.h"
18  #include "llvm/ADT/Optional.h"
19  #include "llvm/ADT/STLExtras.h"
20  #include "llvm/ADT/SetVector.h"
21  #include "llvm/ADT/SmallPtrSet.h"
22  #include "llvm/ADT/SmallVector.h"
23  #include "llvm/Analysis/AssumptionCache.h"
24  #include "llvm/Analysis/BlockFrequencyInfo.h"
25  #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
26  #include "llvm/Analysis/BranchProbabilityInfo.h"
27  #include "llvm/Analysis/LoopInfo.h"
28  #include "llvm/IR/Argument.h"
29  #include "llvm/IR/Attributes.h"
30  #include "llvm/IR/BasicBlock.h"
31  #include "llvm/IR/CFG.h"
32  #include "llvm/IR/Constant.h"
33  #include "llvm/IR/Constants.h"
34  #include "llvm/IR/DIBuilder.h"
35  #include "llvm/IR/DataLayout.h"
36  #include "llvm/IR/DebugInfoMetadata.h"
37  #include "llvm/IR/DerivedTypes.h"
38  #include "llvm/IR/Dominators.h"
39  #include "llvm/IR/Function.h"
40  #include "llvm/IR/GlobalValue.h"
41  #include "llvm/IR/InstIterator.h"
42  #include "llvm/IR/InstrTypes.h"
43  #include "llvm/IR/Instruction.h"
44  #include "llvm/IR/Instructions.h"
45  #include "llvm/IR/IntrinsicInst.h"
46  #include "llvm/IR/Intrinsics.h"
47  #include "llvm/IR/LLVMContext.h"
48  #include "llvm/IR/MDBuilder.h"
49  #include "llvm/IR/Module.h"
50  #include "llvm/IR/PatternMatch.h"
51  #include "llvm/IR/Type.h"
52  #include "llvm/IR/User.h"
53  #include "llvm/IR/Value.h"
54  #include "llvm/IR/Verifier.h"
55  #include "llvm/Pass.h"
56  #include "llvm/Support/BlockFrequency.h"
57  #include "llvm/Support/BranchProbability.h"
58  #include "llvm/Support/Casting.h"
59  #include "llvm/Support/CommandLine.h"
60  #include "llvm/Support/Debug.h"
61  #include "llvm/Support/ErrorHandling.h"
62  #include "llvm/Support/raw_ostream.h"
63  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
64  #include "llvm/Transforms/Utils/Local.h"
65  #include <cassert>
66  #include <cstdint>
67  #include <iterator>
68  #include <map>
69  #include <set>
70  #include <utility>
71  #include <vector>
72  
73  using namespace llvm;
74  using namespace llvm::PatternMatch;
75  using ProfileCount = Function::ProfileCount;
76  
77  #define DEBUG_TYPE "code-extractor"
78  
79  // Provide a command-line option to aggregate function arguments into a struct
80  // for functions produced by the code extractor. This is useful when converting
81  // extracted functions to pthread-based code, as only one argument (void*) can
82  // be passed in to pthread_create().
83  static cl::opt<bool>
84  AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
85                   cl::desc("Aggregate arguments to code-extracted functions"));
86  
87  /// Test whether a block is valid for extraction.
88  static bool isBlockValidForExtraction(const BasicBlock &BB,
89                                        const SetVector<BasicBlock *> &Result,
90                                        bool AllowVarArgs, bool AllowAlloca) {
91    // taking the address of a basic block moved to another function is illegal
92    if (BB.hasAddressTaken())
93      return false;
94  
95    // don't hoist code that uses another basicblock address, as it's likely to
96    // lead to unexpected behavior, like cross-function jumps
97    SmallPtrSet<User const *, 16> Visited;
98    SmallVector<User const *, 16> ToVisit;
99  
100    for (Instruction const &Inst : BB)
101      ToVisit.push_back(&Inst);
102  
103    while (!ToVisit.empty()) {
104      User const *Curr = ToVisit.pop_back_val();
105      if (!Visited.insert(Curr).second)
106        continue;
107      if (isa<BlockAddress const>(Curr))
108        return false; // even a reference to self is likely to be not compatible
109  
110      if (isa<Instruction>(Curr) && cast<Instruction>(Curr)->getParent() != &BB)
111        continue;
112  
113      for (auto const &U : Curr->operands()) {
114        if (auto *UU = dyn_cast<User>(U))
115          ToVisit.push_back(UU);
116      }
117    }
118  
119    // If explicitly requested, allow vastart and alloca. For invoke instructions
120    // verify that extraction is valid.
121    for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
122      if (isa<AllocaInst>(I)) {
123         if (!AllowAlloca)
124           return false;
125         continue;
126      }
127  
128      if (const auto *II = dyn_cast<InvokeInst>(I)) {
129        // Unwind destination (either a landingpad, catchswitch, or cleanuppad)
130        // must be a part of the subgraph which is being extracted.
131        if (auto *UBB = II->getUnwindDest())
132          if (!Result.count(UBB))
133            return false;
134        continue;
135      }
136  
137      // All catch handlers of a catchswitch instruction as well as the unwind
138      // destination must be in the subgraph.
139      if (const auto *CSI = dyn_cast<CatchSwitchInst>(I)) {
140        if (auto *UBB = CSI->getUnwindDest())
141          if (!Result.count(UBB))
142            return false;
143        for (auto *HBB : CSI->handlers())
144          if (!Result.count(const_cast<BasicBlock*>(HBB)))
145            return false;
146        continue;
147      }
148  
149      // Make sure that entire catch handler is within subgraph. It is sufficient
150      // to check that catch return's block is in the list.
151      if (const auto *CPI = dyn_cast<CatchPadInst>(I)) {
152        for (const auto *U : CPI->users())
153          if (const auto *CRI = dyn_cast<CatchReturnInst>(U))
154            if (!Result.count(const_cast<BasicBlock*>(CRI->getParent())))
155              return false;
156        continue;
157      }
158  
159      // And do similar checks for cleanup handler - the entire handler must be
160      // in subgraph which is going to be extracted. For cleanup return should
161      // additionally check that the unwind destination is also in the subgraph.
162      if (const auto *CPI = dyn_cast<CleanupPadInst>(I)) {
163        for (const auto *U : CPI->users())
164          if (const auto *CRI = dyn_cast<CleanupReturnInst>(U))
165            if (!Result.count(const_cast<BasicBlock*>(CRI->getParent())))
166              return false;
167        continue;
168      }
169      if (const auto *CRI = dyn_cast<CleanupReturnInst>(I)) {
170        if (auto *UBB = CRI->getUnwindDest())
171          if (!Result.count(UBB))
172            return false;
173        continue;
174      }
175  
176      if (const CallInst *CI = dyn_cast<CallInst>(I)) {
177        if (const Function *F = CI->getCalledFunction()) {
178          auto IID = F->getIntrinsicID();
179          if (IID == Intrinsic::vastart) {
180            if (AllowVarArgs)
181              continue;
182            else
183              return false;
184          }
185  
186          // Currently, we miscompile outlined copies of eh_typid_for. There are
187          // proposals for fixing this in llvm.org/PR39545.
188          if (IID == Intrinsic::eh_typeid_for)
189            return false;
190        }
191      }
192    }
193  
194    return true;
195  }
196  
197  /// Build a set of blocks to extract if the input blocks are viable.
198  static SetVector<BasicBlock *>
199  buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
200                          bool AllowVarArgs, bool AllowAlloca) {
201    assert(!BBs.empty() && "The set of blocks to extract must be non-empty");
202    SetVector<BasicBlock *> Result;
203  
204    // Loop over the blocks, adding them to our set-vector, and aborting with an
205    // empty set if we encounter invalid blocks.
206    for (BasicBlock *BB : BBs) {
207      // If this block is dead, don't process it.
208      if (DT && !DT->isReachableFromEntry(BB))
209        continue;
210  
211      if (!Result.insert(BB))
212        llvm_unreachable("Repeated basic blocks in extraction input");
213    }
214  
215    LLVM_DEBUG(dbgs() << "Region front block: " << Result.front()->getName()
216                      << '\n');
217  
218    for (auto *BB : Result) {
219      if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca))
220        return {};
221  
222      // Make sure that the first block is not a landing pad.
223      if (BB == Result.front()) {
224        if (BB->isEHPad()) {
225          LLVM_DEBUG(dbgs() << "The first block cannot be an unwind block\n");
226          return {};
227        }
228        continue;
229      }
230  
231      // All blocks other than the first must not have predecessors outside of
232      // the subgraph which is being extracted.
233      for (auto *PBB : predecessors(BB))
234        if (!Result.count(PBB)) {
235          LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from "
236                               "outside the region except for the first block!\n"
237                            << "Problematic source BB: " << BB->getName() << "\n"
238                            << "Problematic destination BB: " << PBB->getName()
239                            << "\n");
240          return {};
241        }
242    }
243  
244    return Result;
245  }
246  
247  CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
248                               bool AggregateArgs, BlockFrequencyInfo *BFI,
249                               BranchProbabilityInfo *BPI, AssumptionCache *AC,
250                               bool AllowVarArgs, bool AllowAlloca,
251                               std::string Suffix)
252      : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
253        BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs),
254        Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
255        Suffix(Suffix) {}
256  
257  CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
258                               BlockFrequencyInfo *BFI,
259                               BranchProbabilityInfo *BPI, AssumptionCache *AC,
260                               std::string Suffix)
261      : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
262        BPI(BPI), AC(AC), AllowVarArgs(false),
263        Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
264                                       /* AllowVarArgs */ false,
265                                       /* AllowAlloca */ false)),
266        Suffix(Suffix) {}
267  
268  /// definedInRegion - Return true if the specified value is defined in the
269  /// extracted region.
270  static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) {
271    if (Instruction *I = dyn_cast<Instruction>(V))
272      if (Blocks.count(I->getParent()))
273        return true;
274    return false;
275  }
276  
277  /// definedInCaller - Return true if the specified value is defined in the
278  /// function being code extracted, but not in the region being extracted.
279  /// These values must be passed in as live-ins to the function.
280  static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) {
281    if (isa<Argument>(V)) return true;
282    if (Instruction *I = dyn_cast<Instruction>(V))
283      if (!Blocks.count(I->getParent()))
284        return true;
285    return false;
286  }
287  
288  static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) {
289    BasicBlock *CommonExitBlock = nullptr;
290    auto hasNonCommonExitSucc = [&](BasicBlock *Block) {
291      for (auto *Succ : successors(Block)) {
292        // Internal edges, ok.
293        if (Blocks.count(Succ))
294          continue;
295        if (!CommonExitBlock) {
296          CommonExitBlock = Succ;
297          continue;
298        }
299        if (CommonExitBlock != Succ)
300          return true;
301      }
302      return false;
303    };
304  
305    if (any_of(Blocks, hasNonCommonExitSucc))
306      return nullptr;
307  
308    return CommonExitBlock;
309  }
310  
311  CodeExtractorAnalysisCache::CodeExtractorAnalysisCache(Function &F) {
312    for (BasicBlock &BB : F) {
313      for (Instruction &II : BB.instructionsWithoutDebug())
314        if (auto *AI = dyn_cast<AllocaInst>(&II))
315          Allocas.push_back(AI);
316  
317      findSideEffectInfoForBlock(BB);
318    }
319  }
320  
321  void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock &BB) {
322    for (Instruction &II : BB.instructionsWithoutDebug()) {
323      unsigned Opcode = II.getOpcode();
324      Value *MemAddr = nullptr;
325      switch (Opcode) {
326      case Instruction::Store:
327      case Instruction::Load: {
328        if (Opcode == Instruction::Store) {
329          StoreInst *SI = cast<StoreInst>(&II);
330          MemAddr = SI->getPointerOperand();
331        } else {
332          LoadInst *LI = cast<LoadInst>(&II);
333          MemAddr = LI->getPointerOperand();
334        }
335        // Global variable can not be aliased with locals.
336        if (isa<Constant>(MemAddr))
337          break;
338        Value *Base = MemAddr->stripInBoundsConstantOffsets();
339        if (!isa<AllocaInst>(Base)) {
340          SideEffectingBlocks.insert(&BB);
341          return;
342        }
343        BaseMemAddrs[&BB].insert(Base);
344        break;
345      }
346      default: {
347        IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
348        if (IntrInst) {
349          if (IntrInst->isLifetimeStartOrEnd())
350            break;
351          SideEffectingBlocks.insert(&BB);
352          return;
353        }
354        // Treat all the other cases conservatively if it has side effects.
355        if (II.mayHaveSideEffects()) {
356          SideEffectingBlocks.insert(&BB);
357          return;
358        }
359      }
360      }
361    }
362  }
363  
364  bool CodeExtractorAnalysisCache::doesBlockContainClobberOfAddr(
365      BasicBlock &BB, AllocaInst *Addr) const {
366    if (SideEffectingBlocks.count(&BB))
367      return true;
368    auto It = BaseMemAddrs.find(&BB);
369    if (It != BaseMemAddrs.end())
370      return It->second.count(Addr);
371    return false;
372  }
373  
374  bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
375      const CodeExtractorAnalysisCache &CEAC, Instruction *Addr) const {
376    AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets());
377    Function *Func = (*Blocks.begin())->getParent();
378    for (BasicBlock &BB : *Func) {
379      if (Blocks.count(&BB))
380        continue;
381      if (CEAC.doesBlockContainClobberOfAddr(BB, AI))
382        return false;
383    }
384    return true;
385  }
386  
387  BasicBlock *
388  CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
389    BasicBlock *SinglePredFromOutlineRegion = nullptr;
390    assert(!Blocks.count(CommonExitBlock) &&
391           "Expect a block outside the region!");
392    for (auto *Pred : predecessors(CommonExitBlock)) {
393      if (!Blocks.count(Pred))
394        continue;
395      if (!SinglePredFromOutlineRegion) {
396        SinglePredFromOutlineRegion = Pred;
397      } else if (SinglePredFromOutlineRegion != Pred) {
398        SinglePredFromOutlineRegion = nullptr;
399        break;
400      }
401    }
402  
403    if (SinglePredFromOutlineRegion)
404      return SinglePredFromOutlineRegion;
405  
406  #ifndef NDEBUG
407    auto getFirstPHI = [](BasicBlock *BB) {
408      BasicBlock::iterator I = BB->begin();
409      PHINode *FirstPhi = nullptr;
410      while (I != BB->end()) {
411        PHINode *Phi = dyn_cast<PHINode>(I);
412        if (!Phi)
413          break;
414        if (!FirstPhi) {
415          FirstPhi = Phi;
416          break;
417        }
418      }
419      return FirstPhi;
420    };
421    // If there are any phi nodes, the single pred either exists or has already
422    // be created before code extraction.
423    assert(!getFirstPHI(CommonExitBlock) && "Phi not expected");
424  #endif
425  
426    BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock(
427        CommonExitBlock->getFirstNonPHI()->getIterator());
428  
429    for (BasicBlock *Pred :
430         llvm::make_early_inc_range(predecessors(CommonExitBlock))) {
431      if (Blocks.count(Pred))
432        continue;
433      Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock);
434    }
435    // Now add the old exit block to the outline region.
436    Blocks.insert(CommonExitBlock);
437    return CommonExitBlock;
438  }
439  
440  // Find the pair of life time markers for address 'Addr' that are either
441  // defined inside the outline region or can legally be shrinkwrapped into the
442  // outline region. If there are not other untracked uses of the address, return
443  // the pair of markers if found; otherwise return a pair of nullptr.
444  CodeExtractor::LifetimeMarkerInfo
445  CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
446                                    Instruction *Addr,
447                                    BasicBlock *ExitBlock) const {
448    LifetimeMarkerInfo Info;
449  
450    for (User *U : Addr->users()) {
451      IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
452      if (IntrInst) {
453        // We don't model addresses with multiple start/end markers, but the
454        // markers do not need to be in the region.
455        if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
456          if (Info.LifeStart)
457            return {};
458          Info.LifeStart = IntrInst;
459          continue;
460        }
461        if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
462          if (Info.LifeEnd)
463            return {};
464          Info.LifeEnd = IntrInst;
465          continue;
466        }
467        // At this point, permit debug uses outside of the region.
468        // This is fixed in a later call to fixupDebugInfoPostExtraction().
469        if (isa<DbgInfoIntrinsic>(IntrInst))
470          continue;
471      }
472      // Find untracked uses of the address, bail.
473      if (!definedInRegion(Blocks, U))
474        return {};
475    }
476  
477    if (!Info.LifeStart || !Info.LifeEnd)
478      return {};
479  
480    Info.SinkLifeStart = !definedInRegion(Blocks, Info.LifeStart);
481    Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd);
482    // Do legality check.
483    if ((Info.SinkLifeStart || Info.HoistLifeEnd) &&
484        !isLegalToShrinkwrapLifetimeMarkers(CEAC, Addr))
485      return {};
486  
487    // Check to see if we have a place to do hoisting, if not, bail.
488    if (Info.HoistLifeEnd && !ExitBlock)
489      return {};
490  
491    return Info;
492  }
493  
494  void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC,
495                                  ValueSet &SinkCands, ValueSet &HoistCands,
496                                  BasicBlock *&ExitBlock) const {
497    Function *Func = (*Blocks.begin())->getParent();
498    ExitBlock = getCommonExitBlock(Blocks);
499  
500    auto moveOrIgnoreLifetimeMarkers =
501        [&](const LifetimeMarkerInfo &LMI) -> bool {
502      if (!LMI.LifeStart)
503        return false;
504      if (LMI.SinkLifeStart) {
505        LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI.LifeStart
506                          << "\n");
507        SinkCands.insert(LMI.LifeStart);
508      }
509      if (LMI.HoistLifeEnd) {
510        LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI.LifeEnd << "\n");
511        HoistCands.insert(LMI.LifeEnd);
512      }
513      return true;
514    };
515  
516    // Look up allocas in the original function in CodeExtractorAnalysisCache, as
517    // this is much faster than walking all the instructions.
518    for (AllocaInst *AI : CEAC.getAllocas()) {
519      BasicBlock *BB = AI->getParent();
520      if (Blocks.count(BB))
521        continue;
522  
523      // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca,
524      // check whether it is actually still in the original function.
525      Function *AIFunc = BB->getParent();
526      if (AIFunc != Func)
527        continue;
528  
529      LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(CEAC, AI, ExitBlock);
530      bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo);
531      if (Moved) {
532        LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n");
533        SinkCands.insert(AI);
534        continue;
535      }
536  
537      // Find bitcasts in the outlined region that have lifetime marker users
538      // outside that region. Replace the lifetime marker use with an
539      // outside region bitcast to avoid unnecessary alloca/reload instructions
540      // and extra lifetime markers.
541      SmallVector<Instruction *, 2> LifetimeBitcastUsers;
542      for (User *U : AI->users()) {
543        if (!definedInRegion(Blocks, U))
544          continue;
545  
546        if (U->stripInBoundsConstantOffsets() != AI)
547          continue;
548  
549        Instruction *Bitcast = cast<Instruction>(U);
550        for (User *BU : Bitcast->users()) {
551          IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(BU);
552          if (!IntrInst)
553            continue;
554  
555          if (!IntrInst->isLifetimeStartOrEnd())
556            continue;
557  
558          if (definedInRegion(Blocks, IntrInst))
559            continue;
560  
561          LLVM_DEBUG(dbgs() << "Replace use of extracted region bitcast"
562                            << *Bitcast << " in out-of-region lifetime marker "
563                            << *IntrInst << "\n");
564          LifetimeBitcastUsers.push_back(IntrInst);
565        }
566      }
567  
568      for (Instruction *I : LifetimeBitcastUsers) {
569        Module *M = AIFunc->getParent();
570        LLVMContext &Ctx = M->getContext();
571        auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
572        CastInst *CastI =
573            CastInst::CreatePointerCast(AI, Int8PtrTy, "lt.cast", I);
574        I->replaceUsesOfWith(I->getOperand(1), CastI);
575      }
576  
577      // Follow any bitcasts.
578      SmallVector<Instruction *, 2> Bitcasts;
579      SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
580      for (User *U : AI->users()) {
581        if (U->stripInBoundsConstantOffsets() == AI) {
582          Instruction *Bitcast = cast<Instruction>(U);
583          LifetimeMarkerInfo LMI = getLifetimeMarkers(CEAC, Bitcast, ExitBlock);
584          if (LMI.LifeStart) {
585            Bitcasts.push_back(Bitcast);
586            BitcastLifetimeInfo.push_back(LMI);
587            continue;
588          }
589        }
590  
591        // Found unknown use of AI.
592        if (!definedInRegion(Blocks, U)) {
593          Bitcasts.clear();
594          break;
595        }
596      }
597  
598      // Either no bitcasts reference the alloca or there are unknown uses.
599      if (Bitcasts.empty())
600        continue;
601  
602      LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n");
603      SinkCands.insert(AI);
604      for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) {
605        Instruction *BitcastAddr = Bitcasts[I];
606        const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I];
607        assert(LMI.LifeStart &&
608               "Unsafe to sink bitcast without lifetime markers");
609        moveOrIgnoreLifetimeMarkers(LMI);
610        if (!definedInRegion(Blocks, BitcastAddr)) {
611          LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
612                            << "\n");
613          SinkCands.insert(BitcastAddr);
614        }
615      }
616    }
617  }
618  
619  bool CodeExtractor::isEligible() const {
620    if (Blocks.empty())
621      return false;
622    BasicBlock *Header = *Blocks.begin();
623    Function *F = Header->getParent();
624  
625    // For functions with varargs, check that varargs handling is only done in the
626    // outlined function, i.e vastart and vaend are only used in outlined blocks.
627    if (AllowVarArgs && F->getFunctionType()->isVarArg()) {
628      auto containsVarArgIntrinsic = [](const Instruction &I) {
629        if (const CallInst *CI = dyn_cast<CallInst>(&I))
630          if (const Function *Callee = CI->getCalledFunction())
631            return Callee->getIntrinsicID() == Intrinsic::vastart ||
632                   Callee->getIntrinsicID() == Intrinsic::vaend;
633        return false;
634      };
635  
636      for (auto &BB : *F) {
637        if (Blocks.count(&BB))
638          continue;
639        if (llvm::any_of(BB, containsVarArgIntrinsic))
640          return false;
641      }
642    }
643    return true;
644  }
645  
646  void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
647                                        const ValueSet &SinkCands) const {
648    for (BasicBlock *BB : Blocks) {
649      // If a used value is defined outside the region, it's an input.  If an
650      // instruction is used outside the region, it's an output.
651      for (Instruction &II : *BB) {
652        for (auto &OI : II.operands()) {
653          Value *V = OI;
654          if (!SinkCands.count(V) && definedInCaller(Blocks, V))
655            Inputs.insert(V);
656        }
657  
658        for (User *U : II.users())
659          if (!definedInRegion(Blocks, U)) {
660            Outputs.insert(&II);
661            break;
662          }
663      }
664    }
665  }
666  
667  /// severSplitPHINodesOfEntry - If a PHI node has multiple inputs from outside
668  /// of the region, we need to split the entry block of the region so that the
669  /// PHI node is easier to deal with.
670  void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) {
671    unsigned NumPredsFromRegion = 0;
672    unsigned NumPredsOutsideRegion = 0;
673  
674    if (Header != &Header->getParent()->getEntryBlock()) {
675      PHINode *PN = dyn_cast<PHINode>(Header->begin());
676      if (!PN) return;  // No PHI nodes.
677  
678      // If the header node contains any PHI nodes, check to see if there is more
679      // than one entry from outside the region.  If so, we need to sever the
680      // header block into two.
681      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
682        if (Blocks.count(PN->getIncomingBlock(i)))
683          ++NumPredsFromRegion;
684        else
685          ++NumPredsOutsideRegion;
686  
687      // If there is one (or fewer) predecessor from outside the region, we don't
688      // need to do anything special.
689      if (NumPredsOutsideRegion <= 1) return;
690    }
691  
692    // Otherwise, we need to split the header block into two pieces: one
693    // containing PHI nodes merging values from outside of the region, and a
694    // second that contains all of the code for the block and merges back any
695    // incoming values from inside of the region.
696    BasicBlock *NewBB = SplitBlock(Header, Header->getFirstNonPHI(), DT);
697  
698    // We only want to code extract the second block now, and it becomes the new
699    // header of the region.
700    BasicBlock *OldPred = Header;
701    Blocks.remove(OldPred);
702    Blocks.insert(NewBB);
703    Header = NewBB;
704  
705    // Okay, now we need to adjust the PHI nodes and any branches from within the
706    // region to go to the new header block instead of the old header block.
707    if (NumPredsFromRegion) {
708      PHINode *PN = cast<PHINode>(OldPred->begin());
709      // Loop over all of the predecessors of OldPred that are in the region,
710      // changing them to branch to NewBB instead.
711      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
712        if (Blocks.count(PN->getIncomingBlock(i))) {
713          Instruction *TI = PN->getIncomingBlock(i)->getTerminator();
714          TI->replaceUsesOfWith(OldPred, NewBB);
715        }
716  
717      // Okay, everything within the region is now branching to the right block, we
718      // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
719      BasicBlock::iterator AfterPHIs;
720      for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
721        PHINode *PN = cast<PHINode>(AfterPHIs);
722        // Create a new PHI node in the new region, which has an incoming value
723        // from OldPred of PN.
724        PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
725                                         PN->getName() + ".ce", &NewBB->front());
726        PN->replaceAllUsesWith(NewPN);
727        NewPN->addIncoming(PN, OldPred);
728  
729        // Loop over all of the incoming value in PN, moving them to NewPN if they
730        // are from the extracted region.
731        for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
732          if (Blocks.count(PN->getIncomingBlock(i))) {
733            NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i));
734            PN->removeIncomingValue(i);
735            --i;
736          }
737        }
738      }
739    }
740  }
741  
742  /// severSplitPHINodesOfExits - if PHI nodes in exit blocks have inputs from
743  /// outlined region, we split these PHIs on two: one with inputs from region
744  /// and other with remaining incoming blocks; then first PHIs are placed in
745  /// outlined region.
746  void CodeExtractor::severSplitPHINodesOfExits(
747      const SmallPtrSetImpl<BasicBlock *> &Exits) {
748    for (BasicBlock *ExitBB : Exits) {
749      BasicBlock *NewBB = nullptr;
750  
751      for (PHINode &PN : ExitBB->phis()) {
752        // Find all incoming values from the outlining region.
753        SmallVector<unsigned, 2> IncomingVals;
754        for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
755          if (Blocks.count(PN.getIncomingBlock(i)))
756            IncomingVals.push_back(i);
757  
758        // Do not process PHI if there is one (or fewer) predecessor from region.
759        // If PHI has exactly one predecessor from region, only this one incoming
760        // will be replaced on codeRepl block, so it should be safe to skip PHI.
761        if (IncomingVals.size() <= 1)
762          continue;
763  
764        // Create block for new PHIs and add it to the list of outlined if it
765        // wasn't done before.
766        if (!NewBB) {
767          NewBB = BasicBlock::Create(ExitBB->getContext(),
768                                     ExitBB->getName() + ".split",
769                                     ExitBB->getParent(), ExitBB);
770          SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBB));
771          for (BasicBlock *PredBB : Preds)
772            if (Blocks.count(PredBB))
773              PredBB->getTerminator()->replaceUsesOfWith(ExitBB, NewBB);
774          BranchInst::Create(ExitBB, NewBB);
775          Blocks.insert(NewBB);
776        }
777  
778        // Split this PHI.
779        PHINode *NewPN =
780            PHINode::Create(PN.getType(), IncomingVals.size(),
781                            PN.getName() + ".ce", NewBB->getFirstNonPHI());
782        for (unsigned i : IncomingVals)
783          NewPN->addIncoming(PN.getIncomingValue(i), PN.getIncomingBlock(i));
784        for (unsigned i : reverse(IncomingVals))
785          PN.removeIncomingValue(i, false);
786        PN.addIncoming(NewPN, NewBB);
787      }
788    }
789  }
790  
791  void CodeExtractor::splitReturnBlocks() {
792    for (BasicBlock *Block : Blocks)
793      if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) {
794        BasicBlock *New =
795            Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret");
796        if (DT) {
797          // Old dominates New. New node dominates all other nodes dominated
798          // by Old.
799          DomTreeNode *OldNode = DT->getNode(Block);
800          SmallVector<DomTreeNode *, 8> Children(OldNode->begin(),
801                                                 OldNode->end());
802  
803          DomTreeNode *NewNode = DT->addNewBlock(New, Block);
804  
805          for (DomTreeNode *I : Children)
806            DT->changeImmediateDominator(I, NewNode);
807        }
808      }
809  }
810  
811  /// constructFunction - make a function based on inputs and outputs, as follows:
812  /// f(in0, ..., inN, out0, ..., outN)
813  Function *CodeExtractor::constructFunction(const ValueSet &inputs,
814                                             const ValueSet &outputs,
815                                             BasicBlock *header,
816                                             BasicBlock *newRootNode,
817                                             BasicBlock *newHeader,
818                                             Function *oldFunction,
819                                             Module *M) {
820    LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
821    LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
822  
823    // This function returns unsigned, outputs will go back by reference.
824    switch (NumExitBlocks) {
825    case 0:
826    case 1: RetTy = Type::getVoidTy(header->getContext()); break;
827    case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
828    default: RetTy = Type::getInt16Ty(header->getContext()); break;
829    }
830  
831    std::vector<Type *> paramTy;
832  
833    // Add the types of the input values to the function's argument list
834    for (Value *value : inputs) {
835      LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n");
836      paramTy.push_back(value->getType());
837    }
838  
839    // Add the types of the output values to the function's argument list.
840    for (Value *output : outputs) {
841      LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n");
842      if (AggregateArgs)
843        paramTy.push_back(output->getType());
844      else
845        paramTy.push_back(PointerType::getUnqual(output->getType()));
846    }
847  
848    LLVM_DEBUG({
849      dbgs() << "Function type: " << *RetTy << " f(";
850      for (Type *i : paramTy)
851        dbgs() << *i << ", ";
852      dbgs() << ")\n";
853    });
854  
855    StructType *StructTy = nullptr;
856    if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
857      StructTy = StructType::get(M->getContext(), paramTy);
858      paramTy.clear();
859      paramTy.push_back(PointerType::getUnqual(StructTy));
860    }
861    FunctionType *funcType =
862                    FunctionType::get(RetTy, paramTy,
863                                      AllowVarArgs && oldFunction->isVarArg());
864  
865    std::string SuffixToUse =
866        Suffix.empty()
867            ? (header->getName().empty() ? "extracted" : header->getName().str())
868            : Suffix;
869    // Create the new function
870    Function *newFunction = Function::Create(
871        funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(),
872        oldFunction->getName() + "." + SuffixToUse, M);
873    // If the old function is no-throw, so is the new one.
874    if (oldFunction->doesNotThrow())
875      newFunction->setDoesNotThrow();
876  
877    // Inherit the uwtable attribute if we need to.
878    if (oldFunction->hasUWTable())
879      newFunction->setHasUWTable();
880  
881    // Inherit all of the target dependent attributes and white-listed
882    // target independent attributes.
883    //  (e.g. If the extracted region contains a call to an x86.sse
884    //  instruction we need to make sure that the extracted region has the
885    //  "target-features" attribute allowing it to be lowered.
886    // FIXME: This should be changed to check to see if a specific
887    //           attribute can not be inherited.
888    for (const auto &Attr : oldFunction->getAttributes().getFnAttributes()) {
889      if (Attr.isStringAttribute()) {
890        if (Attr.getKindAsString() == "thunk")
891          continue;
892      } else
893        switch (Attr.getKindAsEnum()) {
894        // Those attributes cannot be propagated safely. Explicitly list them
895        // here so we get a warning if new attributes are added. This list also
896        // includes non-function attributes.
897        case Attribute::Alignment:
898        case Attribute::AllocSize:
899        case Attribute::ArgMemOnly:
900        case Attribute::Builtin:
901        case Attribute::ByVal:
902        case Attribute::Convergent:
903        case Attribute::Dereferenceable:
904        case Attribute::DereferenceableOrNull:
905        case Attribute::ElementType:
906        case Attribute::InAlloca:
907        case Attribute::InReg:
908        case Attribute::InaccessibleMemOnly:
909        case Attribute::InaccessibleMemOrArgMemOnly:
910        case Attribute::JumpTable:
911        case Attribute::Naked:
912        case Attribute::Nest:
913        case Attribute::NoAlias:
914        case Attribute::NoBuiltin:
915        case Attribute::NoCapture:
916        case Attribute::NoMerge:
917        case Attribute::NoReturn:
918        case Attribute::NoSync:
919        case Attribute::NoUndef:
920        case Attribute::None:
921        case Attribute::NonNull:
922        case Attribute::Preallocated:
923        case Attribute::ReadNone:
924        case Attribute::ReadOnly:
925        case Attribute::Returned:
926        case Attribute::ReturnsTwice:
927        case Attribute::SExt:
928        case Attribute::Speculatable:
929        case Attribute::StackAlignment:
930        case Attribute::StructRet:
931        case Attribute::SwiftError:
932        case Attribute::SwiftSelf:
933        case Attribute::SwiftAsync:
934        case Attribute::WillReturn:
935        case Attribute::WriteOnly:
936        case Attribute::ZExt:
937        case Attribute::ImmArg:
938        case Attribute::ByRef:
939        case Attribute::EndAttrKinds:
940        case Attribute::EmptyKey:
941        case Attribute::TombstoneKey:
942          continue;
943        // Those attributes should be safe to propagate to the extracted function.
944        case Attribute::AlwaysInline:
945        case Attribute::Cold:
946        case Attribute::Hot:
947        case Attribute::NoRecurse:
948        case Attribute::InlineHint:
949        case Attribute::MinSize:
950        case Attribute::NoCallback:
951        case Attribute::NoDuplicate:
952        case Attribute::NoFree:
953        case Attribute::NoImplicitFloat:
954        case Attribute::NoInline:
955        case Attribute::NonLazyBind:
956        case Attribute::NoRedZone:
957        case Attribute::NoUnwind:
958        case Attribute::NoSanitizeCoverage:
959        case Attribute::NullPointerIsValid:
960        case Attribute::OptForFuzzing:
961        case Attribute::OptimizeNone:
962        case Attribute::OptimizeForSize:
963        case Attribute::SafeStack:
964        case Attribute::ShadowCallStack:
965        case Attribute::SanitizeAddress:
966        case Attribute::SanitizeMemory:
967        case Attribute::SanitizeThread:
968        case Attribute::SanitizeHWAddress:
969        case Attribute::SanitizeMemTag:
970        case Attribute::SpeculativeLoadHardening:
971        case Attribute::StackProtect:
972        case Attribute::StackProtectReq:
973        case Attribute::StackProtectStrong:
974        case Attribute::StrictFP:
975        case Attribute::UWTable:
976        case Attribute::VScaleRange:
977        case Attribute::NoCfCheck:
978        case Attribute::MustProgress:
979        case Attribute::NoProfile:
980          break;
981        }
982  
983      newFunction->addFnAttr(Attr);
984    }
985    newFunction->getBasicBlockList().push_back(newRootNode);
986  
987    // Create an iterator to name all of the arguments we inserted.
988    Function::arg_iterator AI = newFunction->arg_begin();
989  
990    // Rewrite all users of the inputs in the extracted region to use the
991    // arguments (or appropriate addressing into struct) instead.
992    for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
993      Value *RewriteVal;
994      if (AggregateArgs) {
995        Value *Idx[2];
996        Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
997        Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
998        Instruction *TI = newFunction->begin()->getTerminator();
999        GetElementPtrInst *GEP = GetElementPtrInst::Create(
1000            StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI);
1001        RewriteVal = new LoadInst(StructTy->getElementType(i), GEP,
1002                                  "loadgep_" + inputs[i]->getName(), TI);
1003      } else
1004        RewriteVal = &*AI++;
1005  
1006      std::vector<User *> Users(inputs[i]->user_begin(), inputs[i]->user_end());
1007      for (User *use : Users)
1008        if (Instruction *inst = dyn_cast<Instruction>(use))
1009          if (Blocks.count(inst->getParent()))
1010            inst->replaceUsesOfWith(inputs[i], RewriteVal);
1011    }
1012  
1013    // Set names for input and output arguments.
1014    if (!AggregateArgs) {
1015      AI = newFunction->arg_begin();
1016      for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI)
1017        AI->setName(inputs[i]->getName());
1018      for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI)
1019        AI->setName(outputs[i]->getName()+".out");
1020    }
1021  
1022    // Rewrite branches to basic blocks outside of the loop to new dummy blocks
1023    // within the new function. This must be done before we lose track of which
1024    // blocks were originally in the code region.
1025    std::vector<User *> Users(header->user_begin(), header->user_end());
1026    for (auto &U : Users)
1027      // The BasicBlock which contains the branch is not in the region
1028      // modify the branch target to a new block
1029      if (Instruction *I = dyn_cast<Instruction>(U))
1030        if (I->isTerminator() && I->getFunction() == oldFunction &&
1031            !Blocks.count(I->getParent()))
1032          I->replaceUsesOfWith(header, newHeader);
1033  
1034    return newFunction;
1035  }
1036  
1037  /// Erase lifetime.start markers which reference inputs to the extraction
1038  /// region, and insert the referenced memory into \p LifetimesStart.
1039  ///
1040  /// The extraction region is defined by a set of blocks (\p Blocks), and a set
1041  /// of allocas which will be moved from the caller function into the extracted
1042  /// function (\p SunkAllocas).
1043  static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
1044                                           const SetVector<Value *> &SunkAllocas,
1045                                           SetVector<Value *> &LifetimesStart) {
1046    for (BasicBlock *BB : Blocks) {
1047      for (auto It = BB->begin(), End = BB->end(); It != End;) {
1048        auto *II = dyn_cast<IntrinsicInst>(&*It);
1049        ++It;
1050        if (!II || !II->isLifetimeStartOrEnd())
1051          continue;
1052  
1053        // Get the memory operand of the lifetime marker. If the underlying
1054        // object is a sunk alloca, or is otherwise defined in the extraction
1055        // region, the lifetime marker must not be erased.
1056        Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
1057        if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
1058          continue;
1059  
1060        if (II->getIntrinsicID() == Intrinsic::lifetime_start)
1061          LifetimesStart.insert(Mem);
1062        II->eraseFromParent();
1063      }
1064    }
1065  }
1066  
1067  /// Insert lifetime start/end markers surrounding the call to the new function
1068  /// for objects defined in the caller.
1069  static void insertLifetimeMarkersSurroundingCall(
1070      Module *M, ArrayRef<Value *> LifetimesStart, ArrayRef<Value *> LifetimesEnd,
1071      CallInst *TheCall) {
1072    LLVMContext &Ctx = M->getContext();
1073    auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
1074    auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
1075    Instruction *Term = TheCall->getParent()->getTerminator();
1076  
1077    // The memory argument to a lifetime marker must be a i8*. Cache any bitcasts
1078    // needed to satisfy this requirement so they may be reused.
1079    DenseMap<Value *, Value *> Bitcasts;
1080  
1081    // Emit lifetime markers for the pointers given in \p Objects. Insert the
1082    // markers before the call if \p InsertBefore, and after the call otherwise.
1083    auto insertMarkers = [&](Function *MarkerFunc, ArrayRef<Value *> Objects,
1084                             bool InsertBefore) {
1085      for (Value *Mem : Objects) {
1086        assert((!isa<Instruction>(Mem) || cast<Instruction>(Mem)->getFunction() ==
1087                                              TheCall->getFunction()) &&
1088               "Input memory not defined in original function");
1089        Value *&MemAsI8Ptr = Bitcasts[Mem];
1090        if (!MemAsI8Ptr) {
1091          if (Mem->getType() == Int8PtrTy)
1092            MemAsI8Ptr = Mem;
1093          else
1094            MemAsI8Ptr =
1095                CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
1096        }
1097  
1098        auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, MemAsI8Ptr});
1099        if (InsertBefore)
1100          Marker->insertBefore(TheCall);
1101        else
1102          Marker->insertBefore(Term);
1103      }
1104    };
1105  
1106    if (!LifetimesStart.empty()) {
1107      auto StartFn = llvm::Intrinsic::getDeclaration(
1108          M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
1109      insertMarkers(StartFn, LifetimesStart, /*InsertBefore=*/true);
1110    }
1111  
1112    if (!LifetimesEnd.empty()) {
1113      auto EndFn = llvm::Intrinsic::getDeclaration(
1114          M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
1115      insertMarkers(EndFn, LifetimesEnd, /*InsertBefore=*/false);
1116    }
1117  }
1118  
1119  /// emitCallAndSwitchStatement - This method sets up the caller side by adding
1120  /// the call instruction, splitting any PHI nodes in the header block as
1121  /// necessary.
1122  CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
1123                                                      BasicBlock *codeReplacer,
1124                                                      ValueSet &inputs,
1125                                                      ValueSet &outputs) {
1126    // Emit a call to the new function, passing in: *pointer to struct (if
1127    // aggregating parameters), or plan inputs and allocated memory for outputs
1128    std::vector<Value *> params, StructValues, ReloadOutputs, Reloads;
1129  
1130    Module *M = newFunction->getParent();
1131    LLVMContext &Context = M->getContext();
1132    const DataLayout &DL = M->getDataLayout();
1133    CallInst *call = nullptr;
1134  
1135    // Add inputs as params, or to be filled into the struct
1136    unsigned ArgNo = 0;
1137    SmallVector<unsigned, 1> SwiftErrorArgs;
1138    for (Value *input : inputs) {
1139      if (AggregateArgs)
1140        StructValues.push_back(input);
1141      else {
1142        params.push_back(input);
1143        if (input->isSwiftError())
1144          SwiftErrorArgs.push_back(ArgNo);
1145      }
1146      ++ArgNo;
1147    }
1148  
1149    // Create allocas for the outputs
1150    for (Value *output : outputs) {
1151      if (AggregateArgs) {
1152        StructValues.push_back(output);
1153      } else {
1154        AllocaInst *alloca =
1155          new AllocaInst(output->getType(), DL.getAllocaAddrSpace(),
1156                         nullptr, output->getName() + ".loc",
1157                         &codeReplacer->getParent()->front().front());
1158        ReloadOutputs.push_back(alloca);
1159        params.push_back(alloca);
1160      }
1161    }
1162  
1163    StructType *StructArgTy = nullptr;
1164    AllocaInst *Struct = nullptr;
1165    if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
1166      std::vector<Type *> ArgTypes;
1167      for (Value *V : StructValues)
1168        ArgTypes.push_back(V->getType());
1169  
1170      // Allocate a struct at the beginning of this function
1171      StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
1172      Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr,
1173                              "structArg",
1174                              &codeReplacer->getParent()->front().front());
1175      params.push_back(Struct);
1176  
1177      for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
1178        Value *Idx[2];
1179        Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
1180        Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
1181        GetElementPtrInst *GEP = GetElementPtrInst::Create(
1182            StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
1183        codeReplacer->getInstList().push_back(GEP);
1184        new StoreInst(StructValues[i], GEP, codeReplacer);
1185      }
1186    }
1187  
1188    // Emit the call to the function
1189    call = CallInst::Create(newFunction, params,
1190                            NumExitBlocks > 1 ? "targetBlock" : "");
1191    // Add debug location to the new call, if the original function has debug
1192    // info. In that case, the terminator of the entry block of the extracted
1193    // function contains the first debug location of the extracted function,
1194    // set in extractCodeRegion.
1195    if (codeReplacer->getParent()->getSubprogram()) {
1196      if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc())
1197        call->setDebugLoc(DL);
1198    }
1199    codeReplacer->getInstList().push_back(call);
1200  
1201    // Set swifterror parameter attributes.
1202    for (unsigned SwiftErrArgNo : SwiftErrorArgs) {
1203      call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
1204      newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
1205    }
1206  
1207    Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
1208    unsigned FirstOut = inputs.size();
1209    if (!AggregateArgs)
1210      std::advance(OutputArgBegin, inputs.size());
1211  
1212    // Reload the outputs passed in by reference.
1213    for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
1214      Value *Output = nullptr;
1215      if (AggregateArgs) {
1216        Value *Idx[2];
1217        Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
1218        Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
1219        GetElementPtrInst *GEP = GetElementPtrInst::Create(
1220            StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName());
1221        codeReplacer->getInstList().push_back(GEP);
1222        Output = GEP;
1223      } else {
1224        Output = ReloadOutputs[i];
1225      }
1226      LoadInst *load = new LoadInst(outputs[i]->getType(), Output,
1227                                    outputs[i]->getName() + ".reload",
1228                                    codeReplacer);
1229      Reloads.push_back(load);
1230      std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end());
1231      for (unsigned u = 0, e = Users.size(); u != e; ++u) {
1232        Instruction *inst = cast<Instruction>(Users[u]);
1233        if (!Blocks.count(inst->getParent()))
1234          inst->replaceUsesOfWith(outputs[i], load);
1235      }
1236    }
1237  
1238    // Now we can emit a switch statement using the call as a value.
1239    SwitchInst *TheSwitch =
1240        SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
1241                           codeReplacer, 0, codeReplacer);
1242  
1243    // Since there may be multiple exits from the original region, make the new
1244    // function return an unsigned, switch on that number.  This loop iterates
1245    // over all of the blocks in the extracted region, updating any terminator
1246    // instructions in the to-be-extracted region that branch to blocks that are
1247    // not in the region to be extracted.
1248    std::map<BasicBlock *, BasicBlock *> ExitBlockMap;
1249  
1250    unsigned switchVal = 0;
1251    for (BasicBlock *Block : Blocks) {
1252      Instruction *TI = Block->getTerminator();
1253      for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
1254        if (!Blocks.count(TI->getSuccessor(i))) {
1255          BasicBlock *OldTarget = TI->getSuccessor(i);
1256          // add a new basic block which returns the appropriate value
1257          BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
1258          if (!NewTarget) {
1259            // If we don't already have an exit stub for this non-extracted
1260            // destination, create one now!
1261            NewTarget = BasicBlock::Create(Context,
1262                                           OldTarget->getName() + ".exitStub",
1263                                           newFunction);
1264            unsigned SuccNum = switchVal++;
1265  
1266            Value *brVal = nullptr;
1267            switch (NumExitBlocks) {
1268            case 0:
1269            case 1: break;  // No value needed.
1270            case 2:         // Conditional branch, return a bool
1271              brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
1272              break;
1273            default:
1274              brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
1275              break;
1276            }
1277  
1278            ReturnInst::Create(Context, brVal, NewTarget);
1279  
1280            // Update the switch instruction.
1281            TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
1282                                                SuccNum),
1283                               OldTarget);
1284          }
1285  
1286          // rewrite the original branch instruction with this new target
1287          TI->setSuccessor(i, NewTarget);
1288        }
1289    }
1290  
1291    // Store the arguments right after the definition of output value.
1292    // This should be proceeded after creating exit stubs to be ensure that invoke
1293    // result restore will be placed in the outlined function.
1294    Function::arg_iterator OAI = OutputArgBegin;
1295    for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
1296      auto *OutI = dyn_cast<Instruction>(outputs[i]);
1297      if (!OutI)
1298        continue;
1299  
1300      // Find proper insertion point.
1301      BasicBlock::iterator InsertPt;
1302      // In case OutI is an invoke, we insert the store at the beginning in the
1303      // 'normal destination' BB. Otherwise we insert the store right after OutI.
1304      if (auto *InvokeI = dyn_cast<InvokeInst>(OutI))
1305        InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt();
1306      else if (auto *Phi = dyn_cast<PHINode>(OutI))
1307        InsertPt = Phi->getParent()->getFirstInsertionPt();
1308      else
1309        InsertPt = std::next(OutI->getIterator());
1310  
1311      Instruction *InsertBefore = &*InsertPt;
1312      assert((InsertBefore->getFunction() == newFunction ||
1313              Blocks.count(InsertBefore->getParent())) &&
1314             "InsertPt should be in new function");
1315      assert(OAI != newFunction->arg_end() &&
1316             "Number of output arguments should match "
1317             "the amount of defined values");
1318      if (AggregateArgs) {
1319        Value *Idx[2];
1320        Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
1321        Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
1322        GetElementPtrInst *GEP = GetElementPtrInst::Create(
1323            StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(),
1324            InsertBefore);
1325        new StoreInst(outputs[i], GEP, InsertBefore);
1326        // Since there should be only one struct argument aggregating
1327        // all the output values, we shouldn't increment OAI, which always
1328        // points to the struct argument, in this case.
1329      } else {
1330        new StoreInst(outputs[i], &*OAI, InsertBefore);
1331        ++OAI;
1332      }
1333    }
1334  
1335    // Now that we've done the deed, simplify the switch instruction.
1336    Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
1337    switch (NumExitBlocks) {
1338    case 0:
1339      // There are no successors (the block containing the switch itself), which
1340      // means that previously this was the last part of the function, and hence
1341      // this should be rewritten as a `ret'
1342  
1343      // Check if the function should return a value
1344      if (OldFnRetTy->isVoidTy()) {
1345        ReturnInst::Create(Context, nullptr, TheSwitch);  // Return void
1346      } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
1347        // return what we have
1348        ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
1349      } else {
1350        // Otherwise we must have code extracted an unwind or something, just
1351        // return whatever we want.
1352        ReturnInst::Create(Context,
1353                           Constant::getNullValue(OldFnRetTy), TheSwitch);
1354      }
1355  
1356      TheSwitch->eraseFromParent();
1357      break;
1358    case 1:
1359      // Only a single destination, change the switch into an unconditional
1360      // branch.
1361      BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch);
1362      TheSwitch->eraseFromParent();
1363      break;
1364    case 2:
1365      BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2),
1366                         call, TheSwitch);
1367      TheSwitch->eraseFromParent();
1368      break;
1369    default:
1370      // Otherwise, make the default destination of the switch instruction be one
1371      // of the other successors.
1372      TheSwitch->setCondition(call);
1373      TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
1374      // Remove redundant case
1375      TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
1376      break;
1377    }
1378  
1379    // Insert lifetime markers around the reloads of any output values. The
1380    // allocas output values are stored in are only in-use in the codeRepl block.
1381    insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call);
1382  
1383    return call;
1384  }
1385  
1386  void CodeExtractor::moveCodeToFunction(Function *newFunction) {
1387    Function *oldFunc = (*Blocks.begin())->getParent();
1388    Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
1389    Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
1390  
1391    for (BasicBlock *Block : Blocks) {
1392      // Delete the basic block from the old function, and the list of blocks
1393      oldBlocks.remove(Block);
1394  
1395      // Insert this basic block into the new function
1396      newBlocks.push_back(Block);
1397    }
1398  }
1399  
1400  void CodeExtractor::calculateNewCallTerminatorWeights(
1401      BasicBlock *CodeReplacer,
1402      DenseMap<BasicBlock *, BlockFrequency> &ExitWeights,
1403      BranchProbabilityInfo *BPI) {
1404    using Distribution = BlockFrequencyInfoImplBase::Distribution;
1405    using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
1406  
1407    // Update the branch weights for the exit block.
1408    Instruction *TI = CodeReplacer->getTerminator();
1409    SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0);
1410  
1411    // Block Frequency distribution with dummy node.
1412    Distribution BranchDist;
1413  
1414    SmallVector<BranchProbability, 4> EdgeProbabilities(
1415        TI->getNumSuccessors(), BranchProbability::getUnknown());
1416  
1417    // Add each of the frequencies of the successors.
1418    for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) {
1419      BlockNode ExitNode(i);
1420      uint64_t ExitFreq = ExitWeights[TI->getSuccessor(i)].getFrequency();
1421      if (ExitFreq != 0)
1422        BranchDist.addExit(ExitNode, ExitFreq);
1423      else
1424        EdgeProbabilities[i] = BranchProbability::getZero();
1425    }
1426  
1427    // Check for no total weight.
1428    if (BranchDist.Total == 0) {
1429      BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities);
1430      return;
1431    }
1432  
1433    // Normalize the distribution so that they can fit in unsigned.
1434    BranchDist.normalize();
1435  
1436    // Create normalized branch weights and set the metadata.
1437    for (unsigned I = 0, E = BranchDist.Weights.size(); I < E; ++I) {
1438      const auto &Weight = BranchDist.Weights[I];
1439  
1440      // Get the weight and update the current BFI.
1441      BranchWeights[Weight.TargetNode.Index] = Weight.Amount;
1442      BranchProbability BP(Weight.Amount, BranchDist.Total);
1443      EdgeProbabilities[Weight.TargetNode.Index] = BP;
1444    }
1445    BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities);
1446    TI->setMetadata(
1447        LLVMContext::MD_prof,
1448        MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
1449  }
1450  
1451  /// Erase debug info intrinsics which refer to values in \p F but aren't in
1452  /// \p F.
1453  static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) {
1454    for (Instruction &I : instructions(F)) {
1455      SmallVector<DbgVariableIntrinsic *, 4> DbgUsers;
1456      findDbgUsers(DbgUsers, &I);
1457      for (DbgVariableIntrinsic *DVI : DbgUsers)
1458        if (DVI->getFunction() != &F)
1459          DVI->eraseFromParent();
1460    }
1461  }
1462  
1463  /// Fix up the debug info in the old and new functions by pointing line
1464  /// locations and debug intrinsics to the new subprogram scope, and by deleting
1465  /// intrinsics which point to values outside of the new function.
1466  static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
1467                                           CallInst &TheCall) {
1468    DISubprogram *OldSP = OldFunc.getSubprogram();
1469    LLVMContext &Ctx = OldFunc.getContext();
1470  
1471    if (!OldSP) {
1472      // Erase any debug info the new function contains.
1473      stripDebugInfo(NewFunc);
1474      // Make sure the old function doesn't contain any non-local metadata refs.
1475      eraseDebugIntrinsicsWithNonLocalRefs(NewFunc);
1476      return;
1477    }
1478  
1479    // Create a subprogram for the new function. Leave out a description of the
1480    // function arguments, as the parameters don't correspond to anything at the
1481    // source level.
1482    assert(OldSP->getUnit() && "Missing compile unit for subprogram");
1483    DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolved=*/false,
1484                  OldSP->getUnit());
1485    auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
1486    DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition |
1487                                      DISubprogram::SPFlagOptimized |
1488                                      DISubprogram::SPFlagLocalToUnit;
1489    auto NewSP = DIB.createFunction(
1490        OldSP->getUnit(), NewFunc.getName(), NewFunc.getName(), OldSP->getFile(),
1491        /*LineNo=*/0, SPType, /*ScopeLine=*/0, DINode::FlagZero, SPFlags);
1492    NewFunc.setSubprogram(NewSP);
1493  
1494    // Debug intrinsics in the new function need to be updated in one of two
1495    // ways:
1496    //  1) They need to be deleted, because they describe a value in the old
1497    //     function.
1498    //  2) They need to point to fresh metadata, e.g. because they currently
1499    //     point to a variable in the wrong scope.
1500    SmallDenseMap<DINode *, DINode *> RemappedMetadata;
1501    SmallVector<Instruction *, 4> DebugIntrinsicsToDelete;
1502    for (Instruction &I : instructions(NewFunc)) {
1503      auto *DII = dyn_cast<DbgInfoIntrinsic>(&I);
1504      if (!DII)
1505        continue;
1506  
1507      // Point the intrinsic to a fresh label within the new function.
1508      if (auto *DLI = dyn_cast<DbgLabelInst>(&I)) {
1509        DILabel *OldLabel = DLI->getLabel();
1510        DINode *&NewLabel = RemappedMetadata[OldLabel];
1511        if (!NewLabel)
1512          NewLabel = DILabel::get(Ctx, NewSP, OldLabel->getName(),
1513                                  OldLabel->getFile(), OldLabel->getLine());
1514        DLI->setArgOperand(0, MetadataAsValue::get(Ctx, NewLabel));
1515        continue;
1516      }
1517  
1518      auto IsInvalidLocation = [&NewFunc](Value *Location) {
1519        // Location is invalid if it isn't a constant or an instruction, or is an
1520        // instruction but isn't in the new function.
1521        if (!Location ||
1522            (!isa<Constant>(Location) && !isa<Instruction>(Location)))
1523          return true;
1524        Instruction *LocationInst = dyn_cast<Instruction>(Location);
1525        return LocationInst && LocationInst->getFunction() != &NewFunc;
1526      };
1527  
1528      auto *DVI = cast<DbgVariableIntrinsic>(DII);
1529      // If any of the used locations are invalid, delete the intrinsic.
1530      if (any_of(DVI->location_ops(), IsInvalidLocation)) {
1531        DebugIntrinsicsToDelete.push_back(DVI);
1532        continue;
1533      }
1534  
1535      // Point the intrinsic to a fresh variable within the new function.
1536      DILocalVariable *OldVar = DVI->getVariable();
1537      DINode *&NewVar = RemappedMetadata[OldVar];
1538      if (!NewVar)
1539        NewVar = DIB.createAutoVariable(
1540            NewSP, OldVar->getName(), OldVar->getFile(), OldVar->getLine(),
1541            OldVar->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero,
1542            OldVar->getAlignInBits());
1543      DVI->setVariable(cast<DILocalVariable>(NewVar));
1544    }
1545    for (auto *DII : DebugIntrinsicsToDelete)
1546      DII->eraseFromParent();
1547    DIB.finalizeSubprogram(NewSP);
1548  
1549    // Fix up the scope information attached to the line locations in the new
1550    // function.
1551    for (Instruction &I : instructions(NewFunc)) {
1552      if (const DebugLoc &DL = I.getDebugLoc())
1553        I.setDebugLoc(DILocation::get(Ctx, DL.getLine(), DL.getCol(), NewSP));
1554  
1555      // Loop info metadata may contain line locations. Fix them up.
1556      auto updateLoopInfoLoc = [&Ctx, NewSP](Metadata *MD) -> Metadata * {
1557        if (auto *Loc = dyn_cast_or_null<DILocation>(MD))
1558          return DILocation::get(Ctx, Loc->getLine(), Loc->getColumn(), NewSP,
1559                                 nullptr);
1560        return MD;
1561      };
1562      updateLoopMetadataDebugLocations(I, updateLoopInfoLoc);
1563    }
1564    if (!TheCall.getDebugLoc())
1565      TheCall.setDebugLoc(DILocation::get(Ctx, 0, 0, OldSP));
1566  
1567    eraseDebugIntrinsicsWithNonLocalRefs(NewFunc);
1568  }
1569  
1570  Function *
1571  CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
1572    if (!isEligible())
1573      return nullptr;
1574  
1575    // Assumption: this is a single-entry code region, and the header is the first
1576    // block in the region.
1577    BasicBlock *header = *Blocks.begin();
1578    Function *oldFunction = header->getParent();
1579  
1580    // Calculate the entry frequency of the new function before we change the root
1581    //   block.
1582    BlockFrequency EntryFreq;
1583    if (BFI) {
1584      assert(BPI && "Both BPI and BFI are required to preserve profile info");
1585      for (BasicBlock *Pred : predecessors(header)) {
1586        if (Blocks.count(Pred))
1587          continue;
1588        EntryFreq +=
1589            BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header);
1590      }
1591    }
1592  
1593    // Remove @llvm.assume calls that will be moved to the new function from the
1594    // old function's assumption cache.
1595    for (BasicBlock *Block : Blocks) {
1596      for (auto It = Block->begin(), End = Block->end(); It != End;) {
1597        Instruction *I = &*It;
1598        ++It;
1599  
1600        if (auto *AI = dyn_cast<AssumeInst>(I)) {
1601          if (AC)
1602            AC->unregisterAssumption(AI);
1603          AI->eraseFromParent();
1604        }
1605      }
1606    }
1607  
1608    // If we have any return instructions in the region, split those blocks so
1609    // that the return is not in the region.
1610    splitReturnBlocks();
1611  
1612    // Calculate the exit blocks for the extracted region and the total exit
1613    // weights for each of those blocks.
1614    DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
1615    SmallPtrSet<BasicBlock *, 1> ExitBlocks;
1616    for (BasicBlock *Block : Blocks) {
1617      for (BasicBlock *Succ : successors(Block)) {
1618        if (!Blocks.count(Succ)) {
1619          // Update the branch weight for this successor.
1620          if (BFI) {
1621            BlockFrequency &BF = ExitWeights[Succ];
1622            BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ);
1623          }
1624          ExitBlocks.insert(Succ);
1625        }
1626      }
1627    }
1628    NumExitBlocks = ExitBlocks.size();
1629  
1630    // If we have to split PHI nodes of the entry or exit blocks, do so now.
1631    severSplitPHINodesOfEntry(header);
1632    severSplitPHINodesOfExits(ExitBlocks);
1633  
1634    // This takes place of the original loop
1635    BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
1636                                                  "codeRepl", oldFunction,
1637                                                  header);
1638  
1639    // The new function needs a root node because other nodes can branch to the
1640    // head of the region, but the entry node of a function cannot have preds.
1641    BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
1642                                                 "newFuncRoot");
1643    auto *BranchI = BranchInst::Create(header);
1644    // If the original function has debug info, we have to add a debug location
1645    // to the new branch instruction from the artificial entry block.
1646    // We use the debug location of the first instruction in the extracted
1647    // blocks, as there is no other equivalent line in the source code.
1648    if (oldFunction->getSubprogram()) {
1649      any_of(Blocks, [&BranchI](const BasicBlock *BB) {
1650        return any_of(*BB, [&BranchI](const Instruction &I) {
1651          if (!I.getDebugLoc())
1652            return false;
1653          BranchI->setDebugLoc(I.getDebugLoc());
1654          return true;
1655        });
1656      });
1657    }
1658    newFuncRoot->getInstList().push_back(BranchI);
1659  
1660    ValueSet inputs, outputs, SinkingCands, HoistingCands;
1661    BasicBlock *CommonExit = nullptr;
1662    findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1663    assert(HoistingCands.empty() || CommonExit);
1664  
1665    // Find inputs to, outputs from the code region.
1666    findInputsOutputs(inputs, outputs, SinkingCands);
1667  
1668    // Now sink all instructions which only have non-phi uses inside the region.
1669    // Group the allocas at the start of the block, so that any bitcast uses of
1670    // the allocas are well-defined.
1671    AllocaInst *FirstSunkAlloca = nullptr;
1672    for (auto *II : SinkingCands) {
1673      if (auto *AI = dyn_cast<AllocaInst>(II)) {
1674        AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt());
1675        if (!FirstSunkAlloca)
1676          FirstSunkAlloca = AI;
1677      }
1678    }
1679    assert((SinkingCands.empty() || FirstSunkAlloca) &&
1680           "Did not expect a sink candidate without any allocas");
1681    for (auto *II : SinkingCands) {
1682      if (!isa<AllocaInst>(II)) {
1683        cast<Instruction>(II)->moveAfter(FirstSunkAlloca);
1684      }
1685    }
1686  
1687    if (!HoistingCands.empty()) {
1688      auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit);
1689      Instruction *TI = HoistToBlock->getTerminator();
1690      for (auto *II : HoistingCands)
1691        cast<Instruction>(II)->moveBefore(TI);
1692    }
1693  
1694    // Collect objects which are inputs to the extraction region and also
1695    // referenced by lifetime start markers within it. The effects of these
1696    // markers must be replicated in the calling function to prevent the stack
1697    // coloring pass from merging slots which store input objects.
1698    ValueSet LifetimesStart;
1699    eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart);
1700  
1701    // Construct new function based on inputs/outputs & add allocas for all defs.
1702    Function *newFunction =
1703        constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer,
1704                          oldFunction, oldFunction->getParent());
1705  
1706    // Update the entry count of the function.
1707    if (BFI) {
1708      auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency());
1709      if (Count.hasValue())
1710        newFunction->setEntryCount(
1711            ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME
1712      BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
1713    }
1714  
1715    CallInst *TheCall =
1716        emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
1717  
1718    moveCodeToFunction(newFunction);
1719  
1720    // Replicate the effects of any lifetime start/end markers which referenced
1721    // input objects in the extraction region by placing markers around the call.
1722    insertLifetimeMarkersSurroundingCall(
1723        oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall);
1724  
1725    // Propagate personality info to the new function if there is one.
1726    if (oldFunction->hasPersonalityFn())
1727      newFunction->setPersonalityFn(oldFunction->getPersonalityFn());
1728  
1729    // Update the branch weights for the exit block.
1730    if (BFI && NumExitBlocks > 1)
1731      calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI);
1732  
1733    // Loop over all of the PHI nodes in the header and exit blocks, and change
1734    // any references to the old incoming edge to be the new incoming edge.
1735    for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
1736      PHINode *PN = cast<PHINode>(I);
1737      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1738        if (!Blocks.count(PN->getIncomingBlock(i)))
1739          PN->setIncomingBlock(i, newFuncRoot);
1740    }
1741  
1742    for (BasicBlock *ExitBB : ExitBlocks)
1743      for (PHINode &PN : ExitBB->phis()) {
1744        Value *IncomingCodeReplacerVal = nullptr;
1745        for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
1746          // Ignore incoming values from outside of the extracted region.
1747          if (!Blocks.count(PN.getIncomingBlock(i)))
1748            continue;
1749  
1750          // Ensure that there is only one incoming value from codeReplacer.
1751          if (!IncomingCodeReplacerVal) {
1752            PN.setIncomingBlock(i, codeReplacer);
1753            IncomingCodeReplacerVal = PN.getIncomingValue(i);
1754          } else
1755            assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) &&
1756                   "PHI has two incompatbile incoming values from codeRepl");
1757        }
1758      }
1759  
1760    fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall);
1761  
1762    // Mark the new function `noreturn` if applicable. Terminators which resume
1763    // exception propagation are treated as returning instructions. This is to
1764    // avoid inserting traps after calls to outlined functions which unwind.
1765    bool doesNotReturn = none_of(*newFunction, [](const BasicBlock &BB) {
1766      const Instruction *Term = BB.getTerminator();
1767      return isa<ReturnInst>(Term) || isa<ResumeInst>(Term);
1768    });
1769    if (doesNotReturn)
1770      newFunction->setDoesNotReturn();
1771  
1772    LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) {
1773      newFunction->dump();
1774      report_fatal_error("verification of newFunction failed!");
1775    });
1776    LLVM_DEBUG(if (verifyFunction(*oldFunction))
1777               report_fatal_error("verification of oldFunction failed!"));
1778    LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC))
1779                   report_fatal_error("Stale Asumption cache for old Function!"));
1780    return newFunction;
1781  }
1782  
1783  bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,
1784                                            const Function &NewFunc,
1785                                            AssumptionCache *AC) {
1786    for (auto AssumeVH : AC->assumptions()) {
1787      auto *I = dyn_cast_or_null<CallInst>(AssumeVH);
1788      if (!I)
1789        continue;
1790  
1791      // There shouldn't be any llvm.assume intrinsics in the new function.
1792      if (I->getFunction() != &OldFunc)
1793        return true;
1794  
1795      // There shouldn't be any stale affected values in the assumption cache
1796      // that were previously in the old function, but that have now been moved
1797      // to the new function.
1798      for (auto AffectedValVH : AC->assumptionsFor(I->getOperand(0))) {
1799        auto *AffectedCI = dyn_cast_or_null<CallInst>(AffectedValVH);
1800        if (!AffectedCI)
1801          continue;
1802        if (AffectedCI->getFunction() != &OldFunc)
1803          return true;
1804        auto *AssumedInst = cast<Instruction>(AffectedCI->getOperand(0));
1805        if (AssumedInst->getFunction() != &OldFunc)
1806          return true;
1807      }
1808    }
1809    return false;
1810  }
1811