xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Utils/CodeExtractor.cpp (revision 8c22b9f3ba586e008e8e55a6215a1d46eb6830b9)
1  //===- CodeExtractor.cpp - Pull code region into a new function -----------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file implements the interface to tear out a code region, such as an
10  // individual loop or a parallel section, into a new function, replacing it with
11  // a call to the new function.
12  //
13  //===----------------------------------------------------------------------===//
14  
15  #include "llvm/Transforms/Utils/CodeExtractor.h"
16  #include "llvm/ADT/ArrayRef.h"
17  #include "llvm/ADT/DenseMap.h"
18  #include "llvm/ADT/Optional.h"
19  #include "llvm/ADT/STLExtras.h"
20  #include "llvm/ADT/SetVector.h"
21  #include "llvm/ADT/SmallPtrSet.h"
22  #include "llvm/ADT/SmallVector.h"
23  #include "llvm/Analysis/AssumptionCache.h"
24  #include "llvm/Analysis/BlockFrequencyInfo.h"
25  #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
26  #include "llvm/Analysis/BranchProbabilityInfo.h"
27  #include "llvm/Analysis/LoopInfo.h"
28  #include "llvm/IR/Argument.h"
29  #include "llvm/IR/Attributes.h"
30  #include "llvm/IR/BasicBlock.h"
31  #include "llvm/IR/CFG.h"
32  #include "llvm/IR/Constant.h"
33  #include "llvm/IR/Constants.h"
34  #include "llvm/IR/DIBuilder.h"
35  #include "llvm/IR/DataLayout.h"
36  #include "llvm/IR/DebugInfoMetadata.h"
37  #include "llvm/IR/DerivedTypes.h"
38  #include "llvm/IR/Dominators.h"
39  #include "llvm/IR/Function.h"
40  #include "llvm/IR/GlobalValue.h"
41  #include "llvm/IR/InstIterator.h"
42  #include "llvm/IR/InstrTypes.h"
43  #include "llvm/IR/Instruction.h"
44  #include "llvm/IR/Instructions.h"
45  #include "llvm/IR/IntrinsicInst.h"
46  #include "llvm/IR/Intrinsics.h"
47  #include "llvm/IR/LLVMContext.h"
48  #include "llvm/IR/MDBuilder.h"
49  #include "llvm/IR/Module.h"
50  #include "llvm/IR/PatternMatch.h"
51  #include "llvm/IR/Type.h"
52  #include "llvm/IR/User.h"
53  #include "llvm/IR/Value.h"
54  #include "llvm/IR/Verifier.h"
55  #include "llvm/Pass.h"
56  #include "llvm/Support/BlockFrequency.h"
57  #include "llvm/Support/BranchProbability.h"
58  #include "llvm/Support/Casting.h"
59  #include "llvm/Support/CommandLine.h"
60  #include "llvm/Support/Debug.h"
61  #include "llvm/Support/ErrorHandling.h"
62  #include "llvm/Support/raw_ostream.h"
63  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
64  #include "llvm/Transforms/Utils/Local.h"
65  #include <cassert>
66  #include <cstdint>
67  #include <iterator>
68  #include <map>
69  #include <set>
70  #include <utility>
71  #include <vector>
72  
73  using namespace llvm;
74  using namespace llvm::PatternMatch;
75  using ProfileCount = Function::ProfileCount;
76  
77  #define DEBUG_TYPE "code-extractor"
78  
79  // Provide a command-line option to aggregate function arguments into a struct
80  // for functions produced by the code extractor. This is useful when converting
81  // extracted functions to pthread-based code, as only one argument (void*) can
82  // be passed in to pthread_create().
83  static cl::opt<bool>
84  AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
85                   cl::desc("Aggregate arguments to code-extracted functions"));
86  
87  /// Test whether a block is valid for extraction.
88  static bool isBlockValidForExtraction(const BasicBlock &BB,
89                                        const SetVector<BasicBlock *> &Result,
90                                        bool AllowVarArgs, bool AllowAlloca) {
91    // taking the address of a basic block moved to another function is illegal
92    if (BB.hasAddressTaken())
93      return false;
94  
95    // don't hoist code that uses another basicblock address, as it's likely to
96    // lead to unexpected behavior, like cross-function jumps
97    SmallPtrSet<User const *, 16> Visited;
98    SmallVector<User const *, 16> ToVisit;
99  
100    for (Instruction const &Inst : BB)
101      ToVisit.push_back(&Inst);
102  
103    while (!ToVisit.empty()) {
104      User const *Curr = ToVisit.pop_back_val();
105      if (!Visited.insert(Curr).second)
106        continue;
107      if (isa<BlockAddress const>(Curr))
108        return false; // even a reference to self is likely to be not compatible
109  
110      if (isa<Instruction>(Curr) && cast<Instruction>(Curr)->getParent() != &BB)
111        continue;
112  
113      for (auto const &U : Curr->operands()) {
114        if (auto *UU = dyn_cast<User>(U))
115          ToVisit.push_back(UU);
116      }
117    }
118  
119    // If explicitly requested, allow vastart and alloca. For invoke instructions
120    // verify that extraction is valid.
121    for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
122      if (isa<AllocaInst>(I)) {
123         if (!AllowAlloca)
124           return false;
125         continue;
126      }
127  
128      if (const auto *II = dyn_cast<InvokeInst>(I)) {
129        // Unwind destination (either a landingpad, catchswitch, or cleanuppad)
130        // must be a part of the subgraph which is being extracted.
131        if (auto *UBB = II->getUnwindDest())
132          if (!Result.count(UBB))
133            return false;
134        continue;
135      }
136  
137      // All catch handlers of a catchswitch instruction as well as the unwind
138      // destination must be in the subgraph.
139      if (const auto *CSI = dyn_cast<CatchSwitchInst>(I)) {
140        if (auto *UBB = CSI->getUnwindDest())
141          if (!Result.count(UBB))
142            return false;
143        for (auto *HBB : CSI->handlers())
144          if (!Result.count(const_cast<BasicBlock*>(HBB)))
145            return false;
146        continue;
147      }
148  
149      // Make sure that entire catch handler is within subgraph. It is sufficient
150      // to check that catch return's block is in the list.
151      if (const auto *CPI = dyn_cast<CatchPadInst>(I)) {
152        for (const auto *U : CPI->users())
153          if (const auto *CRI = dyn_cast<CatchReturnInst>(U))
154            if (!Result.count(const_cast<BasicBlock*>(CRI->getParent())))
155              return false;
156        continue;
157      }
158  
159      // And do similar checks for cleanup handler - the entire handler must be
160      // in subgraph which is going to be extracted. For cleanup return should
161      // additionally check that the unwind destination is also in the subgraph.
162      if (const auto *CPI = dyn_cast<CleanupPadInst>(I)) {
163        for (const auto *U : CPI->users())
164          if (const auto *CRI = dyn_cast<CleanupReturnInst>(U))
165            if (!Result.count(const_cast<BasicBlock*>(CRI->getParent())))
166              return false;
167        continue;
168      }
169      if (const auto *CRI = dyn_cast<CleanupReturnInst>(I)) {
170        if (auto *UBB = CRI->getUnwindDest())
171          if (!Result.count(UBB))
172            return false;
173        continue;
174      }
175  
176      if (const CallInst *CI = dyn_cast<CallInst>(I)) {
177        if (const Function *F = CI->getCalledFunction()) {
178          auto IID = F->getIntrinsicID();
179          if (IID == Intrinsic::vastart) {
180            if (AllowVarArgs)
181              continue;
182            else
183              return false;
184          }
185  
186          // Currently, we miscompile outlined copies of eh_typid_for. There are
187          // proposals for fixing this in llvm.org/PR39545.
188          if (IID == Intrinsic::eh_typeid_for)
189            return false;
190        }
191      }
192    }
193  
194    return true;
195  }
196  
197  /// Build a set of blocks to extract if the input blocks are viable.
198  static SetVector<BasicBlock *>
199  buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
200                          bool AllowVarArgs, bool AllowAlloca) {
201    assert(!BBs.empty() && "The set of blocks to extract must be non-empty");
202    SetVector<BasicBlock *> Result;
203  
204    // Loop over the blocks, adding them to our set-vector, and aborting with an
205    // empty set if we encounter invalid blocks.
206    for (BasicBlock *BB : BBs) {
207      // If this block is dead, don't process it.
208      if (DT && !DT->isReachableFromEntry(BB))
209        continue;
210  
211      if (!Result.insert(BB))
212        llvm_unreachable("Repeated basic blocks in extraction input");
213    }
214  
215    LLVM_DEBUG(dbgs() << "Region front block: " << Result.front()->getName()
216                      << '\n');
217  
218    for (auto *BB : Result) {
219      if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca))
220        return {};
221  
222      // Make sure that the first block is not a landing pad.
223      if (BB == Result.front()) {
224        if (BB->isEHPad()) {
225          LLVM_DEBUG(dbgs() << "The first block cannot be an unwind block\n");
226          return {};
227        }
228        continue;
229      }
230  
231      // All blocks other than the first must not have predecessors outside of
232      // the subgraph which is being extracted.
233      for (auto *PBB : predecessors(BB))
234        if (!Result.count(PBB)) {
235          LLVM_DEBUG(dbgs() << "No blocks in this region may have entries from "
236                               "outside the region except for the first block!\n"
237                            << "Problematic source BB: " << BB->getName() << "\n"
238                            << "Problematic destination BB: " << PBB->getName()
239                            << "\n");
240          return {};
241        }
242    }
243  
244    return Result;
245  }
246  
247  CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
248                               bool AggregateArgs, BlockFrequencyInfo *BFI,
249                               BranchProbabilityInfo *BPI, AssumptionCache *AC,
250                               bool AllowVarArgs, bool AllowAlloca,
251                               std::string Suffix)
252      : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
253        BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs),
254        Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
255        Suffix(Suffix) {}
256  
257  CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
258                               BlockFrequencyInfo *BFI,
259                               BranchProbabilityInfo *BPI, AssumptionCache *AC,
260                               std::string Suffix)
261      : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
262        BPI(BPI), AC(AC), AllowVarArgs(false),
263        Blocks(buildExtractionBlockSet(L.getBlocks(), &DT,
264                                       /* AllowVarArgs */ false,
265                                       /* AllowAlloca */ false)),
266        Suffix(Suffix) {}
267  
268  /// definedInRegion - Return true if the specified value is defined in the
269  /// extracted region.
270  static bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) {
271    if (Instruction *I = dyn_cast<Instruction>(V))
272      if (Blocks.count(I->getParent()))
273        return true;
274    return false;
275  }
276  
277  /// definedInCaller - Return true if the specified value is defined in the
278  /// function being code extracted, but not in the region being extracted.
279  /// These values must be passed in as live-ins to the function.
280  static bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) {
281    if (isa<Argument>(V)) return true;
282    if (Instruction *I = dyn_cast<Instruction>(V))
283      if (!Blocks.count(I->getParent()))
284        return true;
285    return false;
286  }
287  
288  static BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) {
289    BasicBlock *CommonExitBlock = nullptr;
290    auto hasNonCommonExitSucc = [&](BasicBlock *Block) {
291      for (auto *Succ : successors(Block)) {
292        // Internal edges, ok.
293        if (Blocks.count(Succ))
294          continue;
295        if (!CommonExitBlock) {
296          CommonExitBlock = Succ;
297          continue;
298        }
299        if (CommonExitBlock != Succ)
300          return true;
301      }
302      return false;
303    };
304  
305    if (any_of(Blocks, hasNonCommonExitSucc))
306      return nullptr;
307  
308    return CommonExitBlock;
309  }
310  
311  CodeExtractorAnalysisCache::CodeExtractorAnalysisCache(Function &F) {
312    for (BasicBlock &BB : F) {
313      for (Instruction &II : BB.instructionsWithoutDebug())
314        if (auto *AI = dyn_cast<AllocaInst>(&II))
315          Allocas.push_back(AI);
316  
317      findSideEffectInfoForBlock(BB);
318    }
319  }
320  
321  void CodeExtractorAnalysisCache::findSideEffectInfoForBlock(BasicBlock &BB) {
322    for (Instruction &II : BB.instructionsWithoutDebug()) {
323      unsigned Opcode = II.getOpcode();
324      Value *MemAddr = nullptr;
325      switch (Opcode) {
326      case Instruction::Store:
327      case Instruction::Load: {
328        if (Opcode == Instruction::Store) {
329          StoreInst *SI = cast<StoreInst>(&II);
330          MemAddr = SI->getPointerOperand();
331        } else {
332          LoadInst *LI = cast<LoadInst>(&II);
333          MemAddr = LI->getPointerOperand();
334        }
335        // Global variable can not be aliased with locals.
336        if (dyn_cast<Constant>(MemAddr))
337          break;
338        Value *Base = MemAddr->stripInBoundsConstantOffsets();
339        if (!isa<AllocaInst>(Base)) {
340          SideEffectingBlocks.insert(&BB);
341          return;
342        }
343        BaseMemAddrs[&BB].insert(Base);
344        break;
345      }
346      default: {
347        IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
348        if (IntrInst) {
349          if (IntrInst->isLifetimeStartOrEnd())
350            break;
351          SideEffectingBlocks.insert(&BB);
352          return;
353        }
354        // Treat all the other cases conservatively if it has side effects.
355        if (II.mayHaveSideEffects()) {
356          SideEffectingBlocks.insert(&BB);
357          return;
358        }
359      }
360      }
361    }
362  }
363  
364  bool CodeExtractorAnalysisCache::doesBlockContainClobberOfAddr(
365      BasicBlock &BB, AllocaInst *Addr) const {
366    if (SideEffectingBlocks.count(&BB))
367      return true;
368    auto It = BaseMemAddrs.find(&BB);
369    if (It != BaseMemAddrs.end())
370      return It->second.count(Addr);
371    return false;
372  }
373  
374  bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
375      const CodeExtractorAnalysisCache &CEAC, Instruction *Addr) const {
376    AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets());
377    Function *Func = (*Blocks.begin())->getParent();
378    for (BasicBlock &BB : *Func) {
379      if (Blocks.count(&BB))
380        continue;
381      if (CEAC.doesBlockContainClobberOfAddr(BB, AI))
382        return false;
383    }
384    return true;
385  }
386  
387  BasicBlock *
388  CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
389    BasicBlock *SinglePredFromOutlineRegion = nullptr;
390    assert(!Blocks.count(CommonExitBlock) &&
391           "Expect a block outside the region!");
392    for (auto *Pred : predecessors(CommonExitBlock)) {
393      if (!Blocks.count(Pred))
394        continue;
395      if (!SinglePredFromOutlineRegion) {
396        SinglePredFromOutlineRegion = Pred;
397      } else if (SinglePredFromOutlineRegion != Pred) {
398        SinglePredFromOutlineRegion = nullptr;
399        break;
400      }
401    }
402  
403    if (SinglePredFromOutlineRegion)
404      return SinglePredFromOutlineRegion;
405  
406  #ifndef NDEBUG
407    auto getFirstPHI = [](BasicBlock *BB) {
408      BasicBlock::iterator I = BB->begin();
409      PHINode *FirstPhi = nullptr;
410      while (I != BB->end()) {
411        PHINode *Phi = dyn_cast<PHINode>(I);
412        if (!Phi)
413          break;
414        if (!FirstPhi) {
415          FirstPhi = Phi;
416          break;
417        }
418      }
419      return FirstPhi;
420    };
421    // If there are any phi nodes, the single pred either exists or has already
422    // be created before code extraction.
423    assert(!getFirstPHI(CommonExitBlock) && "Phi not expected");
424  #endif
425  
426    BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock(
427        CommonExitBlock->getFirstNonPHI()->getIterator());
428  
429    for (auto PI = pred_begin(CommonExitBlock), PE = pred_end(CommonExitBlock);
430         PI != PE;) {
431      BasicBlock *Pred = *PI++;
432      if (Blocks.count(Pred))
433        continue;
434      Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock);
435    }
436    // Now add the old exit block to the outline region.
437    Blocks.insert(CommonExitBlock);
438    return CommonExitBlock;
439  }
440  
441  // Find the pair of life time markers for address 'Addr' that are either
442  // defined inside the outline region or can legally be shrinkwrapped into the
443  // outline region. If there are not other untracked uses of the address, return
444  // the pair of markers if found; otherwise return a pair of nullptr.
445  CodeExtractor::LifetimeMarkerInfo
446  CodeExtractor::getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
447                                    Instruction *Addr,
448                                    BasicBlock *ExitBlock) const {
449    LifetimeMarkerInfo Info;
450  
451    for (User *U : Addr->users()) {
452      IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
453      if (IntrInst) {
454        // We don't model addresses with multiple start/end markers, but the
455        // markers do not need to be in the region.
456        if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
457          if (Info.LifeStart)
458            return {};
459          Info.LifeStart = IntrInst;
460          continue;
461        }
462        if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
463          if (Info.LifeEnd)
464            return {};
465          Info.LifeEnd = IntrInst;
466          continue;
467        }
468        // At this point, permit debug uses outside of the region.
469        // This is fixed in a later call to fixupDebugInfoPostExtraction().
470        if (isa<DbgInfoIntrinsic>(IntrInst))
471          continue;
472      }
473      // Find untracked uses of the address, bail.
474      if (!definedInRegion(Blocks, U))
475        return {};
476    }
477  
478    if (!Info.LifeStart || !Info.LifeEnd)
479      return {};
480  
481    Info.SinkLifeStart = !definedInRegion(Blocks, Info.LifeStart);
482    Info.HoistLifeEnd = !definedInRegion(Blocks, Info.LifeEnd);
483    // Do legality check.
484    if ((Info.SinkLifeStart || Info.HoistLifeEnd) &&
485        !isLegalToShrinkwrapLifetimeMarkers(CEAC, Addr))
486      return {};
487  
488    // Check to see if we have a place to do hoisting, if not, bail.
489    if (Info.HoistLifeEnd && !ExitBlock)
490      return {};
491  
492    return Info;
493  }
494  
495  void CodeExtractor::findAllocas(const CodeExtractorAnalysisCache &CEAC,
496                                  ValueSet &SinkCands, ValueSet &HoistCands,
497                                  BasicBlock *&ExitBlock) const {
498    Function *Func = (*Blocks.begin())->getParent();
499    ExitBlock = getCommonExitBlock(Blocks);
500  
501    auto moveOrIgnoreLifetimeMarkers =
502        [&](const LifetimeMarkerInfo &LMI) -> bool {
503      if (!LMI.LifeStart)
504        return false;
505      if (LMI.SinkLifeStart) {
506        LLVM_DEBUG(dbgs() << "Sinking lifetime.start: " << *LMI.LifeStart
507                          << "\n");
508        SinkCands.insert(LMI.LifeStart);
509      }
510      if (LMI.HoistLifeEnd) {
511        LLVM_DEBUG(dbgs() << "Hoisting lifetime.end: " << *LMI.LifeEnd << "\n");
512        HoistCands.insert(LMI.LifeEnd);
513      }
514      return true;
515    };
516  
517    // Look up allocas in the original function in CodeExtractorAnalysisCache, as
518    // this is much faster than walking all the instructions.
519    for (AllocaInst *AI : CEAC.getAllocas()) {
520      BasicBlock *BB = AI->getParent();
521      if (Blocks.count(BB))
522        continue;
523  
524      // As a prior call to extractCodeRegion() may have shrinkwrapped the alloca,
525      // check whether it is actually still in the original function.
526      Function *AIFunc = BB->getParent();
527      if (AIFunc != Func)
528        continue;
529  
530      LifetimeMarkerInfo MarkerInfo = getLifetimeMarkers(CEAC, AI, ExitBlock);
531      bool Moved = moveOrIgnoreLifetimeMarkers(MarkerInfo);
532      if (Moved) {
533        LLVM_DEBUG(dbgs() << "Sinking alloca: " << *AI << "\n");
534        SinkCands.insert(AI);
535        continue;
536      }
537  
538      // Find bitcasts in the outlined region that have lifetime marker users
539      // outside that region. Replace the lifetime marker use with an
540      // outside region bitcast to avoid unnecessary alloca/reload instructions
541      // and extra lifetime markers.
542      SmallVector<Instruction *, 2> LifetimeBitcastUsers;
543      for (User *U : AI->users()) {
544        if (!definedInRegion(Blocks, U))
545          continue;
546  
547        if (U->stripInBoundsConstantOffsets() != AI)
548          continue;
549  
550        Instruction *Bitcast = cast<Instruction>(U);
551        for (User *BU : Bitcast->users()) {
552          IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(BU);
553          if (!IntrInst)
554            continue;
555  
556          if (!IntrInst->isLifetimeStartOrEnd())
557            continue;
558  
559          if (definedInRegion(Blocks, IntrInst))
560            continue;
561  
562          LLVM_DEBUG(dbgs() << "Replace use of extracted region bitcast"
563                            << *Bitcast << " in out-of-region lifetime marker "
564                            << *IntrInst << "\n");
565          LifetimeBitcastUsers.push_back(IntrInst);
566        }
567      }
568  
569      for (Instruction *I : LifetimeBitcastUsers) {
570        Module *M = AIFunc->getParent();
571        LLVMContext &Ctx = M->getContext();
572        auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
573        CastInst *CastI =
574            CastInst::CreatePointerCast(AI, Int8PtrTy, "lt.cast", I);
575        I->replaceUsesOfWith(I->getOperand(1), CastI);
576      }
577  
578      // Follow any bitcasts.
579      SmallVector<Instruction *, 2> Bitcasts;
580      SmallVector<LifetimeMarkerInfo, 2> BitcastLifetimeInfo;
581      for (User *U : AI->users()) {
582        if (U->stripInBoundsConstantOffsets() == AI) {
583          Instruction *Bitcast = cast<Instruction>(U);
584          LifetimeMarkerInfo LMI = getLifetimeMarkers(CEAC, Bitcast, ExitBlock);
585          if (LMI.LifeStart) {
586            Bitcasts.push_back(Bitcast);
587            BitcastLifetimeInfo.push_back(LMI);
588            continue;
589          }
590        }
591  
592        // Found unknown use of AI.
593        if (!definedInRegion(Blocks, U)) {
594          Bitcasts.clear();
595          break;
596        }
597      }
598  
599      // Either no bitcasts reference the alloca or there are unknown uses.
600      if (Bitcasts.empty())
601        continue;
602  
603      LLVM_DEBUG(dbgs() << "Sinking alloca (via bitcast): " << *AI << "\n");
604      SinkCands.insert(AI);
605      for (unsigned I = 0, E = Bitcasts.size(); I != E; ++I) {
606        Instruction *BitcastAddr = Bitcasts[I];
607        const LifetimeMarkerInfo &LMI = BitcastLifetimeInfo[I];
608        assert(LMI.LifeStart &&
609               "Unsafe to sink bitcast without lifetime markers");
610        moveOrIgnoreLifetimeMarkers(LMI);
611        if (!definedInRegion(Blocks, BitcastAddr)) {
612          LLVM_DEBUG(dbgs() << "Sinking bitcast-of-alloca: " << *BitcastAddr
613                            << "\n");
614          SinkCands.insert(BitcastAddr);
615        }
616      }
617    }
618  }
619  
620  bool CodeExtractor::isEligible() const {
621    if (Blocks.empty())
622      return false;
623    BasicBlock *Header = *Blocks.begin();
624    Function *F = Header->getParent();
625  
626    // For functions with varargs, check that varargs handling is only done in the
627    // outlined function, i.e vastart and vaend are only used in outlined blocks.
628    if (AllowVarArgs && F->getFunctionType()->isVarArg()) {
629      auto containsVarArgIntrinsic = [](const Instruction &I) {
630        if (const CallInst *CI = dyn_cast<CallInst>(&I))
631          if (const Function *Callee = CI->getCalledFunction())
632            return Callee->getIntrinsicID() == Intrinsic::vastart ||
633                   Callee->getIntrinsicID() == Intrinsic::vaend;
634        return false;
635      };
636  
637      for (auto &BB : *F) {
638        if (Blocks.count(&BB))
639          continue;
640        if (llvm::any_of(BB, containsVarArgIntrinsic))
641          return false;
642      }
643    }
644    return true;
645  }
646  
647  void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
648                                        const ValueSet &SinkCands) const {
649    for (BasicBlock *BB : Blocks) {
650      // If a used value is defined outside the region, it's an input.  If an
651      // instruction is used outside the region, it's an output.
652      for (Instruction &II : *BB) {
653        for (auto &OI : II.operands()) {
654          Value *V = OI;
655          if (!SinkCands.count(V) && definedInCaller(Blocks, V))
656            Inputs.insert(V);
657        }
658  
659        for (User *U : II.users())
660          if (!definedInRegion(Blocks, U)) {
661            Outputs.insert(&II);
662            break;
663          }
664      }
665    }
666  }
667  
668  /// severSplitPHINodesOfEntry - If a PHI node has multiple inputs from outside
669  /// of the region, we need to split the entry block of the region so that the
670  /// PHI node is easier to deal with.
671  void CodeExtractor::severSplitPHINodesOfEntry(BasicBlock *&Header) {
672    unsigned NumPredsFromRegion = 0;
673    unsigned NumPredsOutsideRegion = 0;
674  
675    if (Header != &Header->getParent()->getEntryBlock()) {
676      PHINode *PN = dyn_cast<PHINode>(Header->begin());
677      if (!PN) return;  // No PHI nodes.
678  
679      // If the header node contains any PHI nodes, check to see if there is more
680      // than one entry from outside the region.  If so, we need to sever the
681      // header block into two.
682      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
683        if (Blocks.count(PN->getIncomingBlock(i)))
684          ++NumPredsFromRegion;
685        else
686          ++NumPredsOutsideRegion;
687  
688      // If there is one (or fewer) predecessor from outside the region, we don't
689      // need to do anything special.
690      if (NumPredsOutsideRegion <= 1) return;
691    }
692  
693    // Otherwise, we need to split the header block into two pieces: one
694    // containing PHI nodes merging values from outside of the region, and a
695    // second that contains all of the code for the block and merges back any
696    // incoming values from inside of the region.
697    BasicBlock *NewBB = SplitBlock(Header, Header->getFirstNonPHI(), DT);
698  
699    // We only want to code extract the second block now, and it becomes the new
700    // header of the region.
701    BasicBlock *OldPred = Header;
702    Blocks.remove(OldPred);
703    Blocks.insert(NewBB);
704    Header = NewBB;
705  
706    // Okay, now we need to adjust the PHI nodes and any branches from within the
707    // region to go to the new header block instead of the old header block.
708    if (NumPredsFromRegion) {
709      PHINode *PN = cast<PHINode>(OldPred->begin());
710      // Loop over all of the predecessors of OldPred that are in the region,
711      // changing them to branch to NewBB instead.
712      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
713        if (Blocks.count(PN->getIncomingBlock(i))) {
714          Instruction *TI = PN->getIncomingBlock(i)->getTerminator();
715          TI->replaceUsesOfWith(OldPred, NewBB);
716        }
717  
718      // Okay, everything within the region is now branching to the right block, we
719      // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
720      BasicBlock::iterator AfterPHIs;
721      for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
722        PHINode *PN = cast<PHINode>(AfterPHIs);
723        // Create a new PHI node in the new region, which has an incoming value
724        // from OldPred of PN.
725        PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
726                                         PN->getName() + ".ce", &NewBB->front());
727        PN->replaceAllUsesWith(NewPN);
728        NewPN->addIncoming(PN, OldPred);
729  
730        // Loop over all of the incoming value in PN, moving them to NewPN if they
731        // are from the extracted region.
732        for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
733          if (Blocks.count(PN->getIncomingBlock(i))) {
734            NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i));
735            PN->removeIncomingValue(i);
736            --i;
737          }
738        }
739      }
740    }
741  }
742  
743  /// severSplitPHINodesOfExits - if PHI nodes in exit blocks have inputs from
744  /// outlined region, we split these PHIs on two: one with inputs from region
745  /// and other with remaining incoming blocks; then first PHIs are placed in
746  /// outlined region.
747  void CodeExtractor::severSplitPHINodesOfExits(
748      const SmallPtrSetImpl<BasicBlock *> &Exits) {
749    for (BasicBlock *ExitBB : Exits) {
750      BasicBlock *NewBB = nullptr;
751  
752      for (PHINode &PN : ExitBB->phis()) {
753        // Find all incoming values from the outlining region.
754        SmallVector<unsigned, 2> IncomingVals;
755        for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
756          if (Blocks.count(PN.getIncomingBlock(i)))
757            IncomingVals.push_back(i);
758  
759        // Do not process PHI if there is one (or fewer) predecessor from region.
760        // If PHI has exactly one predecessor from region, only this one incoming
761        // will be replaced on codeRepl block, so it should be safe to skip PHI.
762        if (IncomingVals.size() <= 1)
763          continue;
764  
765        // Create block for new PHIs and add it to the list of outlined if it
766        // wasn't done before.
767        if (!NewBB) {
768          NewBB = BasicBlock::Create(ExitBB->getContext(),
769                                     ExitBB->getName() + ".split",
770                                     ExitBB->getParent(), ExitBB);
771          SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBB));
772          for (BasicBlock *PredBB : Preds)
773            if (Blocks.count(PredBB))
774              PredBB->getTerminator()->replaceUsesOfWith(ExitBB, NewBB);
775          BranchInst::Create(ExitBB, NewBB);
776          Blocks.insert(NewBB);
777        }
778  
779        // Split this PHI.
780        PHINode *NewPN =
781            PHINode::Create(PN.getType(), IncomingVals.size(),
782                            PN.getName() + ".ce", NewBB->getFirstNonPHI());
783        for (unsigned i : IncomingVals)
784          NewPN->addIncoming(PN.getIncomingValue(i), PN.getIncomingBlock(i));
785        for (unsigned i : reverse(IncomingVals))
786          PN.removeIncomingValue(i, false);
787        PN.addIncoming(NewPN, NewBB);
788      }
789    }
790  }
791  
792  void CodeExtractor::splitReturnBlocks() {
793    for (BasicBlock *Block : Blocks)
794      if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) {
795        BasicBlock *New =
796            Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret");
797        if (DT) {
798          // Old dominates New. New node dominates all other nodes dominated
799          // by Old.
800          DomTreeNode *OldNode = DT->getNode(Block);
801          SmallVector<DomTreeNode *, 8> Children(OldNode->begin(),
802                                                 OldNode->end());
803  
804          DomTreeNode *NewNode = DT->addNewBlock(New, Block);
805  
806          for (DomTreeNode *I : Children)
807            DT->changeImmediateDominator(I, NewNode);
808        }
809      }
810  }
811  
812  /// constructFunction - make a function based on inputs and outputs, as follows:
813  /// f(in0, ..., inN, out0, ..., outN)
814  Function *CodeExtractor::constructFunction(const ValueSet &inputs,
815                                             const ValueSet &outputs,
816                                             BasicBlock *header,
817                                             BasicBlock *newRootNode,
818                                             BasicBlock *newHeader,
819                                             Function *oldFunction,
820                                             Module *M) {
821    LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
822    LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
823  
824    // This function returns unsigned, outputs will go back by reference.
825    switch (NumExitBlocks) {
826    case 0:
827    case 1: RetTy = Type::getVoidTy(header->getContext()); break;
828    case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
829    default: RetTy = Type::getInt16Ty(header->getContext()); break;
830    }
831  
832    std::vector<Type *> paramTy;
833  
834    // Add the types of the input values to the function's argument list
835    for (Value *value : inputs) {
836      LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n");
837      paramTy.push_back(value->getType());
838    }
839  
840    // Add the types of the output values to the function's argument list.
841    for (Value *output : outputs) {
842      LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n");
843      if (AggregateArgs)
844        paramTy.push_back(output->getType());
845      else
846        paramTy.push_back(PointerType::getUnqual(output->getType()));
847    }
848  
849    LLVM_DEBUG({
850      dbgs() << "Function type: " << *RetTy << " f(";
851      for (Type *i : paramTy)
852        dbgs() << *i << ", ";
853      dbgs() << ")\n";
854    });
855  
856    StructType *StructTy = nullptr;
857    if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
858      StructTy = StructType::get(M->getContext(), paramTy);
859      paramTy.clear();
860      paramTy.push_back(PointerType::getUnqual(StructTy));
861    }
862    FunctionType *funcType =
863                    FunctionType::get(RetTy, paramTy,
864                                      AllowVarArgs && oldFunction->isVarArg());
865  
866    std::string SuffixToUse =
867        Suffix.empty()
868            ? (header->getName().empty() ? "extracted" : header->getName().str())
869            : Suffix;
870    // Create the new function
871    Function *newFunction = Function::Create(
872        funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(),
873        oldFunction->getName() + "." + SuffixToUse, M);
874    // If the old function is no-throw, so is the new one.
875    if (oldFunction->doesNotThrow())
876      newFunction->setDoesNotThrow();
877  
878    // Inherit the uwtable attribute if we need to.
879    if (oldFunction->hasUWTable())
880      newFunction->setHasUWTable();
881  
882    // Inherit all of the target dependent attributes and white-listed
883    // target independent attributes.
884    //  (e.g. If the extracted region contains a call to an x86.sse
885    //  instruction we need to make sure that the extracted region has the
886    //  "target-features" attribute allowing it to be lowered.
887    // FIXME: This should be changed to check to see if a specific
888    //           attribute can not be inherited.
889    for (const auto &Attr : oldFunction->getAttributes().getFnAttributes()) {
890      if (Attr.isStringAttribute()) {
891        if (Attr.getKindAsString() == "thunk")
892          continue;
893      } else
894        switch (Attr.getKindAsEnum()) {
895        // Those attributes cannot be propagated safely. Explicitly list them
896        // here so we get a warning if new attributes are added. This list also
897        // includes non-function attributes.
898        case Attribute::Alignment:
899        case Attribute::AllocSize:
900        case Attribute::ArgMemOnly:
901        case Attribute::Builtin:
902        case Attribute::ByVal:
903        case Attribute::Convergent:
904        case Attribute::Dereferenceable:
905        case Attribute::DereferenceableOrNull:
906        case Attribute::InAlloca:
907        case Attribute::InReg:
908        case Attribute::InaccessibleMemOnly:
909        case Attribute::InaccessibleMemOrArgMemOnly:
910        case Attribute::JumpTable:
911        case Attribute::Naked:
912        case Attribute::Nest:
913        case Attribute::NoAlias:
914        case Attribute::NoBuiltin:
915        case Attribute::NoCapture:
916        case Attribute::NoMerge:
917        case Attribute::NoReturn:
918        case Attribute::NoSync:
919        case Attribute::NoUndef:
920        case Attribute::None:
921        case Attribute::NonNull:
922        case Attribute::Preallocated:
923        case Attribute::ReadNone:
924        case Attribute::ReadOnly:
925        case Attribute::Returned:
926        case Attribute::ReturnsTwice:
927        case Attribute::SExt:
928        case Attribute::Speculatable:
929        case Attribute::StackAlignment:
930        case Attribute::StructRet:
931        case Attribute::SwiftError:
932        case Attribute::SwiftSelf:
933        case Attribute::WillReturn:
934        case Attribute::WriteOnly:
935        case Attribute::ZExt:
936        case Attribute::ImmArg:
937        case Attribute::ByRef:
938        case Attribute::EndAttrKinds:
939        case Attribute::EmptyKey:
940        case Attribute::TombstoneKey:
941          continue;
942        // Those attributes should be safe to propagate to the extracted function.
943        case Attribute::AlwaysInline:
944        case Attribute::Cold:
945        case Attribute::Hot:
946        case Attribute::NoRecurse:
947        case Attribute::InlineHint:
948        case Attribute::MinSize:
949        case Attribute::NoCallback:
950        case Attribute::NoDuplicate:
951        case Attribute::NoFree:
952        case Attribute::NoImplicitFloat:
953        case Attribute::NoInline:
954        case Attribute::NonLazyBind:
955        case Attribute::NoRedZone:
956        case Attribute::NoUnwind:
957        case Attribute::NullPointerIsValid:
958        case Attribute::OptForFuzzing:
959        case Attribute::OptimizeNone:
960        case Attribute::OptimizeForSize:
961        case Attribute::SafeStack:
962        case Attribute::ShadowCallStack:
963        case Attribute::SanitizeAddress:
964        case Attribute::SanitizeMemory:
965        case Attribute::SanitizeThread:
966        case Attribute::SanitizeHWAddress:
967        case Attribute::SanitizeMemTag:
968        case Attribute::SpeculativeLoadHardening:
969        case Attribute::StackProtect:
970        case Attribute::StackProtectReq:
971        case Attribute::StackProtectStrong:
972        case Attribute::StrictFP:
973        case Attribute::UWTable:
974        case Attribute::NoCfCheck:
975        case Attribute::MustProgress:
976        case Attribute::NoProfile:
977          break;
978        }
979  
980      newFunction->addFnAttr(Attr);
981    }
982    newFunction->getBasicBlockList().push_back(newRootNode);
983  
984    // Create an iterator to name all of the arguments we inserted.
985    Function::arg_iterator AI = newFunction->arg_begin();
986  
987    // Rewrite all users of the inputs in the extracted region to use the
988    // arguments (or appropriate addressing into struct) instead.
989    for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
990      Value *RewriteVal;
991      if (AggregateArgs) {
992        Value *Idx[2];
993        Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
994        Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
995        Instruction *TI = newFunction->begin()->getTerminator();
996        GetElementPtrInst *GEP = GetElementPtrInst::Create(
997            StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI);
998        RewriteVal = new LoadInst(StructTy->getElementType(i), GEP,
999                                  "loadgep_" + inputs[i]->getName(), TI);
1000      } else
1001        RewriteVal = &*AI++;
1002  
1003      std::vector<User *> Users(inputs[i]->user_begin(), inputs[i]->user_end());
1004      for (User *use : Users)
1005        if (Instruction *inst = dyn_cast<Instruction>(use))
1006          if (Blocks.count(inst->getParent()))
1007            inst->replaceUsesOfWith(inputs[i], RewriteVal);
1008    }
1009  
1010    // Set names for input and output arguments.
1011    if (!AggregateArgs) {
1012      AI = newFunction->arg_begin();
1013      for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI)
1014        AI->setName(inputs[i]->getName());
1015      for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI)
1016        AI->setName(outputs[i]->getName()+".out");
1017    }
1018  
1019    // Rewrite branches to basic blocks outside of the loop to new dummy blocks
1020    // within the new function. This must be done before we lose track of which
1021    // blocks were originally in the code region.
1022    std::vector<User *> Users(header->user_begin(), header->user_end());
1023    for (auto &U : Users)
1024      // The BasicBlock which contains the branch is not in the region
1025      // modify the branch target to a new block
1026      if (Instruction *I = dyn_cast<Instruction>(U))
1027        if (I->isTerminator() && I->getFunction() == oldFunction &&
1028            !Blocks.count(I->getParent()))
1029          I->replaceUsesOfWith(header, newHeader);
1030  
1031    return newFunction;
1032  }
1033  
1034  /// Erase lifetime.start markers which reference inputs to the extraction
1035  /// region, and insert the referenced memory into \p LifetimesStart.
1036  ///
1037  /// The extraction region is defined by a set of blocks (\p Blocks), and a set
1038  /// of allocas which will be moved from the caller function into the extracted
1039  /// function (\p SunkAllocas).
1040  static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
1041                                           const SetVector<Value *> &SunkAllocas,
1042                                           SetVector<Value *> &LifetimesStart) {
1043    for (BasicBlock *BB : Blocks) {
1044      for (auto It = BB->begin(), End = BB->end(); It != End;) {
1045        auto *II = dyn_cast<IntrinsicInst>(&*It);
1046        ++It;
1047        if (!II || !II->isLifetimeStartOrEnd())
1048          continue;
1049  
1050        // Get the memory operand of the lifetime marker. If the underlying
1051        // object is a sunk alloca, or is otherwise defined in the extraction
1052        // region, the lifetime marker must not be erased.
1053        Value *Mem = II->getOperand(1)->stripInBoundsOffsets();
1054        if (SunkAllocas.count(Mem) || definedInRegion(Blocks, Mem))
1055          continue;
1056  
1057        if (II->getIntrinsicID() == Intrinsic::lifetime_start)
1058          LifetimesStart.insert(Mem);
1059        II->eraseFromParent();
1060      }
1061    }
1062  }
1063  
1064  /// Insert lifetime start/end markers surrounding the call to the new function
1065  /// for objects defined in the caller.
1066  static void insertLifetimeMarkersSurroundingCall(
1067      Module *M, ArrayRef<Value *> LifetimesStart, ArrayRef<Value *> LifetimesEnd,
1068      CallInst *TheCall) {
1069    LLVMContext &Ctx = M->getContext();
1070    auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
1071    auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
1072    Instruction *Term = TheCall->getParent()->getTerminator();
1073  
1074    // The memory argument to a lifetime marker must be a i8*. Cache any bitcasts
1075    // needed to satisfy this requirement so they may be reused.
1076    DenseMap<Value *, Value *> Bitcasts;
1077  
1078    // Emit lifetime markers for the pointers given in \p Objects. Insert the
1079    // markers before the call if \p InsertBefore, and after the call otherwise.
1080    auto insertMarkers = [&](Function *MarkerFunc, ArrayRef<Value *> Objects,
1081                             bool InsertBefore) {
1082      for (Value *Mem : Objects) {
1083        assert((!isa<Instruction>(Mem) || cast<Instruction>(Mem)->getFunction() ==
1084                                              TheCall->getFunction()) &&
1085               "Input memory not defined in original function");
1086        Value *&MemAsI8Ptr = Bitcasts[Mem];
1087        if (!MemAsI8Ptr) {
1088          if (Mem->getType() == Int8PtrTy)
1089            MemAsI8Ptr = Mem;
1090          else
1091            MemAsI8Ptr =
1092                CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
1093        }
1094  
1095        auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, MemAsI8Ptr});
1096        if (InsertBefore)
1097          Marker->insertBefore(TheCall);
1098        else
1099          Marker->insertBefore(Term);
1100      }
1101    };
1102  
1103    if (!LifetimesStart.empty()) {
1104      auto StartFn = llvm::Intrinsic::getDeclaration(
1105          M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
1106      insertMarkers(StartFn, LifetimesStart, /*InsertBefore=*/true);
1107    }
1108  
1109    if (!LifetimesEnd.empty()) {
1110      auto EndFn = llvm::Intrinsic::getDeclaration(
1111          M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
1112      insertMarkers(EndFn, LifetimesEnd, /*InsertBefore=*/false);
1113    }
1114  }
1115  
1116  /// emitCallAndSwitchStatement - This method sets up the caller side by adding
1117  /// the call instruction, splitting any PHI nodes in the header block as
1118  /// necessary.
1119  CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
1120                                                      BasicBlock *codeReplacer,
1121                                                      ValueSet &inputs,
1122                                                      ValueSet &outputs) {
1123    // Emit a call to the new function, passing in: *pointer to struct (if
1124    // aggregating parameters), or plan inputs and allocated memory for outputs
1125    std::vector<Value *> params, StructValues, ReloadOutputs, Reloads;
1126  
1127    Module *M = newFunction->getParent();
1128    LLVMContext &Context = M->getContext();
1129    const DataLayout &DL = M->getDataLayout();
1130    CallInst *call = nullptr;
1131  
1132    // Add inputs as params, or to be filled into the struct
1133    unsigned ArgNo = 0;
1134    SmallVector<unsigned, 1> SwiftErrorArgs;
1135    for (Value *input : inputs) {
1136      if (AggregateArgs)
1137        StructValues.push_back(input);
1138      else {
1139        params.push_back(input);
1140        if (input->isSwiftError())
1141          SwiftErrorArgs.push_back(ArgNo);
1142      }
1143      ++ArgNo;
1144    }
1145  
1146    // Create allocas for the outputs
1147    for (Value *output : outputs) {
1148      if (AggregateArgs) {
1149        StructValues.push_back(output);
1150      } else {
1151        AllocaInst *alloca =
1152          new AllocaInst(output->getType(), DL.getAllocaAddrSpace(),
1153                         nullptr, output->getName() + ".loc",
1154                         &codeReplacer->getParent()->front().front());
1155        ReloadOutputs.push_back(alloca);
1156        params.push_back(alloca);
1157      }
1158    }
1159  
1160    StructType *StructArgTy = nullptr;
1161    AllocaInst *Struct = nullptr;
1162    if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
1163      std::vector<Type *> ArgTypes;
1164      for (ValueSet::iterator v = StructValues.begin(),
1165             ve = StructValues.end(); v != ve; ++v)
1166        ArgTypes.push_back((*v)->getType());
1167  
1168      // Allocate a struct at the beginning of this function
1169      StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
1170      Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr,
1171                              "structArg",
1172                              &codeReplacer->getParent()->front().front());
1173      params.push_back(Struct);
1174  
1175      for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
1176        Value *Idx[2];
1177        Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
1178        Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
1179        GetElementPtrInst *GEP = GetElementPtrInst::Create(
1180            StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
1181        codeReplacer->getInstList().push_back(GEP);
1182        new StoreInst(StructValues[i], GEP, codeReplacer);
1183      }
1184    }
1185  
1186    // Emit the call to the function
1187    call = CallInst::Create(newFunction, params,
1188                            NumExitBlocks > 1 ? "targetBlock" : "");
1189    // Add debug location to the new call, if the original function has debug
1190    // info. In that case, the terminator of the entry block of the extracted
1191    // function contains the first debug location of the extracted function,
1192    // set in extractCodeRegion.
1193    if (codeReplacer->getParent()->getSubprogram()) {
1194      if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc())
1195        call->setDebugLoc(DL);
1196    }
1197    codeReplacer->getInstList().push_back(call);
1198  
1199    // Set swifterror parameter attributes.
1200    for (unsigned SwiftErrArgNo : SwiftErrorArgs) {
1201      call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
1202      newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
1203    }
1204  
1205    Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
1206    unsigned FirstOut = inputs.size();
1207    if (!AggregateArgs)
1208      std::advance(OutputArgBegin, inputs.size());
1209  
1210    // Reload the outputs passed in by reference.
1211    for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
1212      Value *Output = nullptr;
1213      if (AggregateArgs) {
1214        Value *Idx[2];
1215        Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
1216        Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
1217        GetElementPtrInst *GEP = GetElementPtrInst::Create(
1218            StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName());
1219        codeReplacer->getInstList().push_back(GEP);
1220        Output = GEP;
1221      } else {
1222        Output = ReloadOutputs[i];
1223      }
1224      LoadInst *load = new LoadInst(outputs[i]->getType(), Output,
1225                                    outputs[i]->getName() + ".reload",
1226                                    codeReplacer);
1227      Reloads.push_back(load);
1228      std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end());
1229      for (unsigned u = 0, e = Users.size(); u != e; ++u) {
1230        Instruction *inst = cast<Instruction>(Users[u]);
1231        if (!Blocks.count(inst->getParent()))
1232          inst->replaceUsesOfWith(outputs[i], load);
1233      }
1234    }
1235  
1236    // Now we can emit a switch statement using the call as a value.
1237    SwitchInst *TheSwitch =
1238        SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
1239                           codeReplacer, 0, codeReplacer);
1240  
1241    // Since there may be multiple exits from the original region, make the new
1242    // function return an unsigned, switch on that number.  This loop iterates
1243    // over all of the blocks in the extracted region, updating any terminator
1244    // instructions in the to-be-extracted region that branch to blocks that are
1245    // not in the region to be extracted.
1246    std::map<BasicBlock *, BasicBlock *> ExitBlockMap;
1247  
1248    unsigned switchVal = 0;
1249    for (BasicBlock *Block : Blocks) {
1250      Instruction *TI = Block->getTerminator();
1251      for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
1252        if (!Blocks.count(TI->getSuccessor(i))) {
1253          BasicBlock *OldTarget = TI->getSuccessor(i);
1254          // add a new basic block which returns the appropriate value
1255          BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
1256          if (!NewTarget) {
1257            // If we don't already have an exit stub for this non-extracted
1258            // destination, create one now!
1259            NewTarget = BasicBlock::Create(Context,
1260                                           OldTarget->getName() + ".exitStub",
1261                                           newFunction);
1262            unsigned SuccNum = switchVal++;
1263  
1264            Value *brVal = nullptr;
1265            switch (NumExitBlocks) {
1266            case 0:
1267            case 1: break;  // No value needed.
1268            case 2:         // Conditional branch, return a bool
1269              brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
1270              break;
1271            default:
1272              brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
1273              break;
1274            }
1275  
1276            ReturnInst::Create(Context, brVal, NewTarget);
1277  
1278            // Update the switch instruction.
1279            TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
1280                                                SuccNum),
1281                               OldTarget);
1282          }
1283  
1284          // rewrite the original branch instruction with this new target
1285          TI->setSuccessor(i, NewTarget);
1286        }
1287    }
1288  
1289    // Store the arguments right after the definition of output value.
1290    // This should be proceeded after creating exit stubs to be ensure that invoke
1291    // result restore will be placed in the outlined function.
1292    Function::arg_iterator OAI = OutputArgBegin;
1293    for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
1294      auto *OutI = dyn_cast<Instruction>(outputs[i]);
1295      if (!OutI)
1296        continue;
1297  
1298      // Find proper insertion point.
1299      BasicBlock::iterator InsertPt;
1300      // In case OutI is an invoke, we insert the store at the beginning in the
1301      // 'normal destination' BB. Otherwise we insert the store right after OutI.
1302      if (auto *InvokeI = dyn_cast<InvokeInst>(OutI))
1303        InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt();
1304      else if (auto *Phi = dyn_cast<PHINode>(OutI))
1305        InsertPt = Phi->getParent()->getFirstInsertionPt();
1306      else
1307        InsertPt = std::next(OutI->getIterator());
1308  
1309      Instruction *InsertBefore = &*InsertPt;
1310      assert((InsertBefore->getFunction() == newFunction ||
1311              Blocks.count(InsertBefore->getParent())) &&
1312             "InsertPt should be in new function");
1313      assert(OAI != newFunction->arg_end() &&
1314             "Number of output arguments should match "
1315             "the amount of defined values");
1316      if (AggregateArgs) {
1317        Value *Idx[2];
1318        Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
1319        Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
1320        GetElementPtrInst *GEP = GetElementPtrInst::Create(
1321            StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(),
1322            InsertBefore);
1323        new StoreInst(outputs[i], GEP, InsertBefore);
1324        // Since there should be only one struct argument aggregating
1325        // all the output values, we shouldn't increment OAI, which always
1326        // points to the struct argument, in this case.
1327      } else {
1328        new StoreInst(outputs[i], &*OAI, InsertBefore);
1329        ++OAI;
1330      }
1331    }
1332  
1333    // Now that we've done the deed, simplify the switch instruction.
1334    Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
1335    switch (NumExitBlocks) {
1336    case 0:
1337      // There are no successors (the block containing the switch itself), which
1338      // means that previously this was the last part of the function, and hence
1339      // this should be rewritten as a `ret'
1340  
1341      // Check if the function should return a value
1342      if (OldFnRetTy->isVoidTy()) {
1343        ReturnInst::Create(Context, nullptr, TheSwitch);  // Return void
1344      } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
1345        // return what we have
1346        ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
1347      } else {
1348        // Otherwise we must have code extracted an unwind or something, just
1349        // return whatever we want.
1350        ReturnInst::Create(Context,
1351                           Constant::getNullValue(OldFnRetTy), TheSwitch);
1352      }
1353  
1354      TheSwitch->eraseFromParent();
1355      break;
1356    case 1:
1357      // Only a single destination, change the switch into an unconditional
1358      // branch.
1359      BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch);
1360      TheSwitch->eraseFromParent();
1361      break;
1362    case 2:
1363      BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2),
1364                         call, TheSwitch);
1365      TheSwitch->eraseFromParent();
1366      break;
1367    default:
1368      // Otherwise, make the default destination of the switch instruction be one
1369      // of the other successors.
1370      TheSwitch->setCondition(call);
1371      TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
1372      // Remove redundant case
1373      TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
1374      break;
1375    }
1376  
1377    // Insert lifetime markers around the reloads of any output values. The
1378    // allocas output values are stored in are only in-use in the codeRepl block.
1379    insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call);
1380  
1381    return call;
1382  }
1383  
1384  void CodeExtractor::moveCodeToFunction(Function *newFunction) {
1385    Function *oldFunc = (*Blocks.begin())->getParent();
1386    Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
1387    Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
1388  
1389    for (BasicBlock *Block : Blocks) {
1390      // Delete the basic block from the old function, and the list of blocks
1391      oldBlocks.remove(Block);
1392  
1393      // Insert this basic block into the new function
1394      newBlocks.push_back(Block);
1395    }
1396  }
1397  
1398  void CodeExtractor::calculateNewCallTerminatorWeights(
1399      BasicBlock *CodeReplacer,
1400      DenseMap<BasicBlock *, BlockFrequency> &ExitWeights,
1401      BranchProbabilityInfo *BPI) {
1402    using Distribution = BlockFrequencyInfoImplBase::Distribution;
1403    using BlockNode = BlockFrequencyInfoImplBase::BlockNode;
1404  
1405    // Update the branch weights for the exit block.
1406    Instruction *TI = CodeReplacer->getTerminator();
1407    SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0);
1408  
1409    // Block Frequency distribution with dummy node.
1410    Distribution BranchDist;
1411  
1412    SmallVector<BranchProbability, 4> EdgeProbabilities(
1413        TI->getNumSuccessors(), BranchProbability::getUnknown());
1414  
1415    // Add each of the frequencies of the successors.
1416    for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) {
1417      BlockNode ExitNode(i);
1418      uint64_t ExitFreq = ExitWeights[TI->getSuccessor(i)].getFrequency();
1419      if (ExitFreq != 0)
1420        BranchDist.addExit(ExitNode, ExitFreq);
1421      else
1422        EdgeProbabilities[i] = BranchProbability::getZero();
1423    }
1424  
1425    // Check for no total weight.
1426    if (BranchDist.Total == 0) {
1427      BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities);
1428      return;
1429    }
1430  
1431    // Normalize the distribution so that they can fit in unsigned.
1432    BranchDist.normalize();
1433  
1434    // Create normalized branch weights and set the metadata.
1435    for (unsigned I = 0, E = BranchDist.Weights.size(); I < E; ++I) {
1436      const auto &Weight = BranchDist.Weights[I];
1437  
1438      // Get the weight and update the current BFI.
1439      BranchWeights[Weight.TargetNode.Index] = Weight.Amount;
1440      BranchProbability BP(Weight.Amount, BranchDist.Total);
1441      EdgeProbabilities[Weight.TargetNode.Index] = BP;
1442    }
1443    BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities);
1444    TI->setMetadata(
1445        LLVMContext::MD_prof,
1446        MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
1447  }
1448  
1449  /// Erase debug info intrinsics which refer to values in \p F but aren't in
1450  /// \p F.
1451  static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) {
1452    for (Instruction &I : instructions(F)) {
1453      SmallVector<DbgVariableIntrinsic *, 4> DbgUsers;
1454      findDbgUsers(DbgUsers, &I);
1455      for (DbgVariableIntrinsic *DVI : DbgUsers)
1456        if (DVI->getFunction() != &F)
1457          DVI->eraseFromParent();
1458    }
1459  }
1460  
1461  /// Fix up the debug info in the old and new functions by pointing line
1462  /// locations and debug intrinsics to the new subprogram scope, and by deleting
1463  /// intrinsics which point to values outside of the new function.
1464  static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
1465                                           CallInst &TheCall) {
1466    DISubprogram *OldSP = OldFunc.getSubprogram();
1467    LLVMContext &Ctx = OldFunc.getContext();
1468  
1469    if (!OldSP) {
1470      // Erase any debug info the new function contains.
1471      stripDebugInfo(NewFunc);
1472      // Make sure the old function doesn't contain any non-local metadata refs.
1473      eraseDebugIntrinsicsWithNonLocalRefs(NewFunc);
1474      return;
1475    }
1476  
1477    // Create a subprogram for the new function. Leave out a description of the
1478    // function arguments, as the parameters don't correspond to anything at the
1479    // source level.
1480    assert(OldSP->getUnit() && "Missing compile unit for subprogram");
1481    DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolved=*/false,
1482                  OldSP->getUnit());
1483    auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
1484    DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition |
1485                                      DISubprogram::SPFlagOptimized |
1486                                      DISubprogram::SPFlagLocalToUnit;
1487    auto NewSP = DIB.createFunction(
1488        OldSP->getUnit(), NewFunc.getName(), NewFunc.getName(), OldSP->getFile(),
1489        /*LineNo=*/0, SPType, /*ScopeLine=*/0, DINode::FlagZero, SPFlags);
1490    NewFunc.setSubprogram(NewSP);
1491  
1492    // Debug intrinsics in the new function need to be updated in one of two
1493    // ways:
1494    //  1) They need to be deleted, because they describe a value in the old
1495    //     function.
1496    //  2) They need to point to fresh metadata, e.g. because they currently
1497    //     point to a variable in the wrong scope.
1498    SmallDenseMap<DINode *, DINode *> RemappedMetadata;
1499    SmallVector<Instruction *, 4> DebugIntrinsicsToDelete;
1500    for (Instruction &I : instructions(NewFunc)) {
1501      auto *DII = dyn_cast<DbgInfoIntrinsic>(&I);
1502      if (!DII)
1503        continue;
1504  
1505      // Point the intrinsic to a fresh label within the new function.
1506      if (auto *DLI = dyn_cast<DbgLabelInst>(&I)) {
1507        DILabel *OldLabel = DLI->getLabel();
1508        DINode *&NewLabel = RemappedMetadata[OldLabel];
1509        if (!NewLabel)
1510          NewLabel = DILabel::get(Ctx, NewSP, OldLabel->getName(),
1511                                  OldLabel->getFile(), OldLabel->getLine());
1512        DLI->setArgOperand(0, MetadataAsValue::get(Ctx, NewLabel));
1513        continue;
1514      }
1515  
1516      // If the location isn't a constant or an instruction, delete the
1517      // intrinsic.
1518      auto *DVI = cast<DbgVariableIntrinsic>(DII);
1519      Value *Location = DVI->getVariableLocation();
1520      if (!Location ||
1521          (!isa<Constant>(Location) && !isa<Instruction>(Location))) {
1522        DebugIntrinsicsToDelete.push_back(DVI);
1523        continue;
1524      }
1525  
1526      // If the variable location is an instruction but isn't in the new
1527      // function, delete the intrinsic.
1528      Instruction *LocationInst = dyn_cast<Instruction>(Location);
1529      if (LocationInst && LocationInst->getFunction() != &NewFunc) {
1530        DebugIntrinsicsToDelete.push_back(DVI);
1531        continue;
1532      }
1533  
1534      // Point the intrinsic to a fresh variable within the new function.
1535      DILocalVariable *OldVar = DVI->getVariable();
1536      DINode *&NewVar = RemappedMetadata[OldVar];
1537      if (!NewVar)
1538        NewVar = DIB.createAutoVariable(
1539            NewSP, OldVar->getName(), OldVar->getFile(), OldVar->getLine(),
1540            OldVar->getType(), /*AlwaysPreserve=*/false, DINode::FlagZero,
1541            OldVar->getAlignInBits());
1542      DVI->setArgOperand(1, MetadataAsValue::get(Ctx, NewVar));
1543    }
1544    for (auto *DII : DebugIntrinsicsToDelete)
1545      DII->eraseFromParent();
1546    DIB.finalizeSubprogram(NewSP);
1547  
1548    // Fix up the scope information attached to the line locations in the new
1549    // function.
1550    for (Instruction &I : instructions(NewFunc)) {
1551      if (const DebugLoc &DL = I.getDebugLoc())
1552        I.setDebugLoc(DILocation::get(Ctx, DL.getLine(), DL.getCol(), NewSP));
1553  
1554      // Loop info metadata may contain line locations. Fix them up.
1555      auto updateLoopInfoLoc = [&Ctx,
1556                                NewSP](const DILocation &Loc) -> DILocation * {
1557        return DILocation::get(Ctx, Loc.getLine(), Loc.getColumn(), NewSP,
1558                               nullptr);
1559      };
1560      updateLoopMetadataDebugLocations(I, updateLoopInfoLoc);
1561    }
1562    if (!TheCall.getDebugLoc())
1563      TheCall.setDebugLoc(DILocation::get(Ctx, 0, 0, OldSP));
1564  
1565    eraseDebugIntrinsicsWithNonLocalRefs(NewFunc);
1566  }
1567  
1568  Function *
1569  CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
1570    if (!isEligible())
1571      return nullptr;
1572  
1573    // Assumption: this is a single-entry code region, and the header is the first
1574    // block in the region.
1575    BasicBlock *header = *Blocks.begin();
1576    Function *oldFunction = header->getParent();
1577  
1578    // Calculate the entry frequency of the new function before we change the root
1579    //   block.
1580    BlockFrequency EntryFreq;
1581    if (BFI) {
1582      assert(BPI && "Both BPI and BFI are required to preserve profile info");
1583      for (BasicBlock *Pred : predecessors(header)) {
1584        if (Blocks.count(Pred))
1585          continue;
1586        EntryFreq +=
1587            BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header);
1588      }
1589    }
1590  
1591    // Remove @llvm.assume calls that will be moved to the new function from the
1592    // old function's assumption cache.
1593    for (BasicBlock *Block : Blocks) {
1594      for (auto It = Block->begin(), End = Block->end(); It != End;) {
1595        Instruction *I = &*It;
1596        ++It;
1597  
1598        if (match(I, m_Intrinsic<Intrinsic::assume>())) {
1599          if (AC)
1600            AC->unregisterAssumption(cast<CallInst>(I));
1601          I->eraseFromParent();
1602        }
1603      }
1604    }
1605  
1606    // If we have any return instructions in the region, split those blocks so
1607    // that the return is not in the region.
1608    splitReturnBlocks();
1609  
1610    // Calculate the exit blocks for the extracted region and the total exit
1611    // weights for each of those blocks.
1612    DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
1613    SmallPtrSet<BasicBlock *, 1> ExitBlocks;
1614    for (BasicBlock *Block : Blocks) {
1615      for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE;
1616           ++SI) {
1617        if (!Blocks.count(*SI)) {
1618          // Update the branch weight for this successor.
1619          if (BFI) {
1620            BlockFrequency &BF = ExitWeights[*SI];
1621            BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI);
1622          }
1623          ExitBlocks.insert(*SI);
1624        }
1625      }
1626    }
1627    NumExitBlocks = ExitBlocks.size();
1628  
1629    // If we have to split PHI nodes of the entry or exit blocks, do so now.
1630    severSplitPHINodesOfEntry(header);
1631    severSplitPHINodesOfExits(ExitBlocks);
1632  
1633    // This takes place of the original loop
1634    BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),
1635                                                  "codeRepl", oldFunction,
1636                                                  header);
1637  
1638    // The new function needs a root node because other nodes can branch to the
1639    // head of the region, but the entry node of a function cannot have preds.
1640    BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),
1641                                                 "newFuncRoot");
1642    auto *BranchI = BranchInst::Create(header);
1643    // If the original function has debug info, we have to add a debug location
1644    // to the new branch instruction from the artificial entry block.
1645    // We use the debug location of the first instruction in the extracted
1646    // blocks, as there is no other equivalent line in the source code.
1647    if (oldFunction->getSubprogram()) {
1648      any_of(Blocks, [&BranchI](const BasicBlock *BB) {
1649        return any_of(*BB, [&BranchI](const Instruction &I) {
1650          if (!I.getDebugLoc())
1651            return false;
1652          BranchI->setDebugLoc(I.getDebugLoc());
1653          return true;
1654        });
1655      });
1656    }
1657    newFuncRoot->getInstList().push_back(BranchI);
1658  
1659    ValueSet inputs, outputs, SinkingCands, HoistingCands;
1660    BasicBlock *CommonExit = nullptr;
1661    findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1662    assert(HoistingCands.empty() || CommonExit);
1663  
1664    // Find inputs to, outputs from the code region.
1665    findInputsOutputs(inputs, outputs, SinkingCands);
1666  
1667    // Now sink all instructions which only have non-phi uses inside the region.
1668    // Group the allocas at the start of the block, so that any bitcast uses of
1669    // the allocas are well-defined.
1670    AllocaInst *FirstSunkAlloca = nullptr;
1671    for (auto *II : SinkingCands) {
1672      if (auto *AI = dyn_cast<AllocaInst>(II)) {
1673        AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt());
1674        if (!FirstSunkAlloca)
1675          FirstSunkAlloca = AI;
1676      }
1677    }
1678    assert((SinkingCands.empty() || FirstSunkAlloca) &&
1679           "Did not expect a sink candidate without any allocas");
1680    for (auto *II : SinkingCands) {
1681      if (!isa<AllocaInst>(II)) {
1682        cast<Instruction>(II)->moveAfter(FirstSunkAlloca);
1683      }
1684    }
1685  
1686    if (!HoistingCands.empty()) {
1687      auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit);
1688      Instruction *TI = HoistToBlock->getTerminator();
1689      for (auto *II : HoistingCands)
1690        cast<Instruction>(II)->moveBefore(TI);
1691    }
1692  
1693    // Collect objects which are inputs to the extraction region and also
1694    // referenced by lifetime start markers within it. The effects of these
1695    // markers must be replicated in the calling function to prevent the stack
1696    // coloring pass from merging slots which store input objects.
1697    ValueSet LifetimesStart;
1698    eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart);
1699  
1700    // Construct new function based on inputs/outputs & add allocas for all defs.
1701    Function *newFunction =
1702        constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer,
1703                          oldFunction, oldFunction->getParent());
1704  
1705    // Update the entry count of the function.
1706    if (BFI) {
1707      auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency());
1708      if (Count.hasValue())
1709        newFunction->setEntryCount(
1710            ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME
1711      BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
1712    }
1713  
1714    CallInst *TheCall =
1715        emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
1716  
1717    moveCodeToFunction(newFunction);
1718  
1719    // Replicate the effects of any lifetime start/end markers which referenced
1720    // input objects in the extraction region by placing markers around the call.
1721    insertLifetimeMarkersSurroundingCall(
1722        oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall);
1723  
1724    // Propagate personality info to the new function if there is one.
1725    if (oldFunction->hasPersonalityFn())
1726      newFunction->setPersonalityFn(oldFunction->getPersonalityFn());
1727  
1728    // Update the branch weights for the exit block.
1729    if (BFI && NumExitBlocks > 1)
1730      calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI);
1731  
1732    // Loop over all of the PHI nodes in the header and exit blocks, and change
1733    // any references to the old incoming edge to be the new incoming edge.
1734    for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
1735      PHINode *PN = cast<PHINode>(I);
1736      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1737        if (!Blocks.count(PN->getIncomingBlock(i)))
1738          PN->setIncomingBlock(i, newFuncRoot);
1739    }
1740  
1741    for (BasicBlock *ExitBB : ExitBlocks)
1742      for (PHINode &PN : ExitBB->phis()) {
1743        Value *IncomingCodeReplacerVal = nullptr;
1744        for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
1745          // Ignore incoming values from outside of the extracted region.
1746          if (!Blocks.count(PN.getIncomingBlock(i)))
1747            continue;
1748  
1749          // Ensure that there is only one incoming value from codeReplacer.
1750          if (!IncomingCodeReplacerVal) {
1751            PN.setIncomingBlock(i, codeReplacer);
1752            IncomingCodeReplacerVal = PN.getIncomingValue(i);
1753          } else
1754            assert(IncomingCodeReplacerVal == PN.getIncomingValue(i) &&
1755                   "PHI has two incompatbile incoming values from codeRepl");
1756        }
1757      }
1758  
1759    fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall);
1760  
1761    // Mark the new function `noreturn` if applicable. Terminators which resume
1762    // exception propagation are treated as returning instructions. This is to
1763    // avoid inserting traps after calls to outlined functions which unwind.
1764    bool doesNotReturn = none_of(*newFunction, [](const BasicBlock &BB) {
1765      const Instruction *Term = BB.getTerminator();
1766      return isa<ReturnInst>(Term) || isa<ResumeInst>(Term);
1767    });
1768    if (doesNotReturn)
1769      newFunction->setDoesNotReturn();
1770  
1771    LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) {
1772      newFunction->dump();
1773      report_fatal_error("verification of newFunction failed!");
1774    });
1775    LLVM_DEBUG(if (verifyFunction(*oldFunction))
1776               report_fatal_error("verification of oldFunction failed!"));
1777    LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC))
1778                   report_fatal_error("Stale Asumption cache for old Function!"));
1779    return newFunction;
1780  }
1781  
1782  bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,
1783                                            const Function &NewFunc,
1784                                            AssumptionCache *AC) {
1785    for (auto AssumeVH : AC->assumptions()) {
1786      auto *I = dyn_cast_or_null<CallInst>(AssumeVH);
1787      if (!I)
1788        continue;
1789  
1790      // There shouldn't be any llvm.assume intrinsics in the new function.
1791      if (I->getFunction() != &OldFunc)
1792        return true;
1793  
1794      // There shouldn't be any stale affected values in the assumption cache
1795      // that were previously in the old function, but that have now been moved
1796      // to the new function.
1797      for (auto AffectedValVH : AC->assumptionsFor(I->getOperand(0))) {
1798        auto *AffectedCI = dyn_cast_or_null<CallInst>(AffectedValVH);
1799        if (!AffectedCI)
1800          continue;
1801        if (AffectedCI->getFunction() != &OldFunc)
1802          return true;
1803        auto *AssumedInst = cast<Instruction>(AffectedCI->getOperand(0));
1804        if (AssumedInst->getFunction() != &OldFunc)
1805          return true;
1806      }
1807    }
1808    return false;
1809  }
1810