xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp (revision 258a0d760aa8b42899a000e30f610f900a402556)
1 //===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file defines ObjC ARC optimizations. ARC stands for Automatic
11 /// Reference Counting and is a system for managing reference counts for objects
12 /// in Objective C.
13 ///
14 /// The optimizations performed include elimination of redundant, partially
15 /// redundant, and inconsequential reference count operations, elimination of
16 /// redundant weak pointer operations, and numerous minor simplifications.
17 ///
18 /// WARNING: This file knows about certain library functions. It recognizes them
19 /// by name, and hardwires knowledge of their semantics.
20 ///
21 /// WARNING: This file knows about how certain Objective-C library functions are
22 /// used. Naive LLVM IR transformations which would otherwise be
23 /// behavior-preserving may break these assumptions.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "ARCRuntimeEntryPoints.h"
28 #include "BlotMapVector.h"
29 #include "DependencyAnalysis.h"
30 #include "ObjCARC.h"
31 #include "ProvenanceAnalysis.h"
32 #include "PtrState.h"
33 #include "llvm/ADT/DenseMap.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/Analysis/AliasAnalysis.h"
39 #include "llvm/Analysis/EHPersonalities.h"
40 #include "llvm/Analysis/ObjCARCAliasAnalysis.h"
41 #include "llvm/Analysis/ObjCARCAnalysisUtils.h"
42 #include "llvm/Analysis/ObjCARCInstKind.h"
43 #include "llvm/Analysis/ObjCARCUtil.h"
44 #include "llvm/IR/BasicBlock.h"
45 #include "llvm/IR/CFG.h"
46 #include "llvm/IR/Constant.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DerivedTypes.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalVariable.h"
51 #include "llvm/IR/InstIterator.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/User.h"
59 #include "llvm/IR/Value.h"
60 #include "llvm/Support/Casting.h"
61 #include "llvm/Support/CommandLine.h"
62 #include "llvm/Support/Compiler.h"
63 #include "llvm/Support/Debug.h"
64 #include "llvm/Support/ErrorHandling.h"
65 #include "llvm/Support/raw_ostream.h"
66 #include "llvm/Transforms/ObjCARC.h"
67 #include <cassert>
68 #include <iterator>
69 #include <utility>
70 
71 using namespace llvm;
72 using namespace llvm::objcarc;
73 
74 #define DEBUG_TYPE "objc-arc-opts"
75 
76 static cl::opt<unsigned> MaxPtrStates("arc-opt-max-ptr-states",
77     cl::Hidden,
78     cl::desc("Maximum number of ptr states the optimizer keeps track of"),
79     cl::init(4095));
80 
81 /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC.
82 /// @{
83 
84 /// This is similar to GetRCIdentityRoot but it stops as soon
85 /// as it finds a value with multiple uses.
86 static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
87   // ConstantData (like ConstantPointerNull and UndefValue) is used across
88   // modules.  It's never a single-use value.
89   if (isa<ConstantData>(Arg))
90     return nullptr;
91 
92   if (Arg->hasOneUse()) {
93     if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
94       return FindSingleUseIdentifiedObject(BC->getOperand(0));
95     if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
96       if (GEP->hasAllZeroIndices())
97         return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
98     if (IsForwarding(GetBasicARCInstKind(Arg)))
99       return FindSingleUseIdentifiedObject(
100                cast<CallInst>(Arg)->getArgOperand(0));
101     if (!IsObjCIdentifiedObject(Arg))
102       return nullptr;
103     return Arg;
104   }
105 
106   // If we found an identifiable object but it has multiple uses, but they are
107   // trivial uses, we can still consider this to be a single-use value.
108   if (IsObjCIdentifiedObject(Arg)) {
109     for (const User *U : Arg->users())
110       if (!U->use_empty() || GetRCIdentityRoot(U) != Arg)
111          return nullptr;
112 
113     return Arg;
114   }
115 
116   return nullptr;
117 }
118 
119 /// @}
120 ///
121 /// \defgroup ARCOpt ARC Optimization.
122 /// @{
123 
124 // TODO: On code like this:
125 //
126 // objc_retain(%x)
127 // stuff_that_cannot_release()
128 // objc_autorelease(%x)
129 // stuff_that_cannot_release()
130 // objc_retain(%x)
131 // stuff_that_cannot_release()
132 // objc_autorelease(%x)
133 //
134 // The second retain and autorelease can be deleted.
135 
136 // TODO: It should be possible to delete
137 // objc_autoreleasePoolPush and objc_autoreleasePoolPop
138 // pairs if nothing is actually autoreleased between them. Also, autorelease
139 // calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code
140 // after inlining) can be turned into plain release calls.
141 
142 // TODO: Critical-edge splitting. If the optimial insertion point is
143 // a critical edge, the current algorithm has to fail, because it doesn't
144 // know how to split edges. It should be possible to make the optimizer
145 // think in terms of edges, rather than blocks, and then split critical
146 // edges on demand.
147 
148 // TODO: OptimizeSequences could generalized to be Interprocedural.
149 
150 // TODO: Recognize that a bunch of other objc runtime calls have
151 // non-escaping arguments and non-releasing arguments, and may be
152 // non-autoreleasing.
153 
154 // TODO: Sink autorelease calls as far as possible. Unfortunately we
155 // usually can't sink them past other calls, which would be the main
156 // case where it would be useful.
157 
158 // TODO: The pointer returned from objc_loadWeakRetained is retained.
159 
160 // TODO: Delete release+retain pairs (rare).
161 
162 STATISTIC(NumNoops,       "Number of no-op objc calls eliminated");
163 STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
164 STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
165 STATISTIC(NumRets,        "Number of return value forwarding "
166                           "retain+autoreleases eliminated");
167 STATISTIC(NumRRs,         "Number of retain+release paths eliminated");
168 STATISTIC(NumPeeps,       "Number of calls peephole-optimized");
169 #ifndef NDEBUG
170 STATISTIC(NumRetainsBeforeOpt,
171           "Number of retains before optimization");
172 STATISTIC(NumReleasesBeforeOpt,
173           "Number of releases before optimization");
174 STATISTIC(NumRetainsAfterOpt,
175           "Number of retains after optimization");
176 STATISTIC(NumReleasesAfterOpt,
177           "Number of releases after optimization");
178 #endif
179 
180 namespace {
181 
182   /// Per-BasicBlock state.
183   class BBState {
184     /// The number of unique control paths from the entry which can reach this
185     /// block.
186     unsigned TopDownPathCount = 0;
187 
188     /// The number of unique control paths to exits from this block.
189     unsigned BottomUpPathCount = 0;
190 
191     /// The top-down traversal uses this to record information known about a
192     /// pointer at the bottom of each block.
193     BlotMapVector<const Value *, TopDownPtrState> PerPtrTopDown;
194 
195     /// The bottom-up traversal uses this to record information known about a
196     /// pointer at the top of each block.
197     BlotMapVector<const Value *, BottomUpPtrState> PerPtrBottomUp;
198 
199     /// Effective predecessors of the current block ignoring ignorable edges and
200     /// ignored backedges.
201     SmallVector<BasicBlock *, 2> Preds;
202 
203     /// Effective successors of the current block ignoring ignorable edges and
204     /// ignored backedges.
205     SmallVector<BasicBlock *, 2> Succs;
206 
207   public:
208     static const unsigned OverflowOccurredValue;
209 
210     BBState() = default;
211 
212     using top_down_ptr_iterator = decltype(PerPtrTopDown)::iterator;
213     using const_top_down_ptr_iterator = decltype(PerPtrTopDown)::const_iterator;
214 
215     top_down_ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
216     top_down_ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
217     const_top_down_ptr_iterator top_down_ptr_begin() const {
218       return PerPtrTopDown.begin();
219     }
220     const_top_down_ptr_iterator top_down_ptr_end() const {
221       return PerPtrTopDown.end();
222     }
223     bool hasTopDownPtrs() const {
224       return !PerPtrTopDown.empty();
225     }
226 
227     unsigned top_down_ptr_list_size() const {
228       return std::distance(top_down_ptr_begin(), top_down_ptr_end());
229     }
230 
231     using bottom_up_ptr_iterator = decltype(PerPtrBottomUp)::iterator;
232     using const_bottom_up_ptr_iterator =
233         decltype(PerPtrBottomUp)::const_iterator;
234 
235     bottom_up_ptr_iterator bottom_up_ptr_begin() {
236       return PerPtrBottomUp.begin();
237     }
238     bottom_up_ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
239     const_bottom_up_ptr_iterator bottom_up_ptr_begin() const {
240       return PerPtrBottomUp.begin();
241     }
242     const_bottom_up_ptr_iterator bottom_up_ptr_end() const {
243       return PerPtrBottomUp.end();
244     }
245     bool hasBottomUpPtrs() const {
246       return !PerPtrBottomUp.empty();
247     }
248 
249     unsigned bottom_up_ptr_list_size() const {
250       return std::distance(bottom_up_ptr_begin(), bottom_up_ptr_end());
251     }
252 
253     /// Mark this block as being an entry block, which has one path from the
254     /// entry by definition.
255     void SetAsEntry() { TopDownPathCount = 1; }
256 
257     /// Mark this block as being an exit block, which has one path to an exit by
258     /// definition.
259     void SetAsExit()  { BottomUpPathCount = 1; }
260 
261     /// Attempt to find the PtrState object describing the top down state for
262     /// pointer Arg. Return a new initialized PtrState describing the top down
263     /// state for Arg if we do not find one.
264     TopDownPtrState &getPtrTopDownState(const Value *Arg) {
265       return PerPtrTopDown[Arg];
266     }
267 
268     /// Attempt to find the PtrState object describing the bottom up state for
269     /// pointer Arg. Return a new initialized PtrState describing the bottom up
270     /// state for Arg if we do not find one.
271     BottomUpPtrState &getPtrBottomUpState(const Value *Arg) {
272       return PerPtrBottomUp[Arg];
273     }
274 
275     /// Attempt to find the PtrState object describing the bottom up state for
276     /// pointer Arg.
277     bottom_up_ptr_iterator findPtrBottomUpState(const Value *Arg) {
278       return PerPtrBottomUp.find(Arg);
279     }
280 
281     void clearBottomUpPointers() {
282       PerPtrBottomUp.clear();
283     }
284 
285     void clearTopDownPointers() {
286       PerPtrTopDown.clear();
287     }
288 
289     void InitFromPred(const BBState &Other);
290     void InitFromSucc(const BBState &Other);
291     void MergePred(const BBState &Other);
292     void MergeSucc(const BBState &Other);
293 
294     /// Compute the number of possible unique paths from an entry to an exit
295     /// which pass through this block. This is only valid after both the
296     /// top-down and bottom-up traversals are complete.
297     ///
298     /// Returns true if overflow occurred. Returns false if overflow did not
299     /// occur.
300     bool GetAllPathCountWithOverflow(unsigned &PathCount) const {
301       if (TopDownPathCount == OverflowOccurredValue ||
302           BottomUpPathCount == OverflowOccurredValue)
303         return true;
304       unsigned long long Product =
305         (unsigned long long)TopDownPathCount*BottomUpPathCount;
306       // Overflow occurred if any of the upper bits of Product are set or if all
307       // the lower bits of Product are all set.
308       return (Product >> 32) ||
309              ((PathCount = Product) == OverflowOccurredValue);
310     }
311 
312     // Specialized CFG utilities.
313     using edge_iterator = SmallVectorImpl<BasicBlock *>::const_iterator;
314 
315     edge_iterator pred_begin() const { return Preds.begin(); }
316     edge_iterator pred_end() const { return Preds.end(); }
317     edge_iterator succ_begin() const { return Succs.begin(); }
318     edge_iterator succ_end() const { return Succs.end(); }
319 
320     void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
321     void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
322 
323     bool isExit() const { return Succs.empty(); }
324   };
325 
326 } // end anonymous namespace
327 
328 const unsigned BBState::OverflowOccurredValue = 0xffffffff;
329 
330 namespace llvm {
331 
332 raw_ostream &operator<<(raw_ostream &OS,
333                         BBState &BBState) LLVM_ATTRIBUTE_UNUSED;
334 
335 } // end namespace llvm
336 
337 void BBState::InitFromPred(const BBState &Other) {
338   PerPtrTopDown = Other.PerPtrTopDown;
339   TopDownPathCount = Other.TopDownPathCount;
340 }
341 
342 void BBState::InitFromSucc(const BBState &Other) {
343   PerPtrBottomUp = Other.PerPtrBottomUp;
344   BottomUpPathCount = Other.BottomUpPathCount;
345 }
346 
347 /// The top-down traversal uses this to merge information about predecessors to
348 /// form the initial state for a new block.
349 void BBState::MergePred(const BBState &Other) {
350   if (TopDownPathCount == OverflowOccurredValue)
351     return;
352 
353   // Other.TopDownPathCount can be 0, in which case it is either dead or a
354   // loop backedge. Loop backedges are special.
355   TopDownPathCount += Other.TopDownPathCount;
356 
357   // In order to be consistent, we clear the top down pointers when by adding
358   // TopDownPathCount becomes OverflowOccurredValue even though "true" overflow
359   // has not occurred.
360   if (TopDownPathCount == OverflowOccurredValue) {
361     clearTopDownPointers();
362     return;
363   }
364 
365   // Check for overflow. If we have overflow, fall back to conservative
366   // behavior.
367   if (TopDownPathCount < Other.TopDownPathCount) {
368     TopDownPathCount = OverflowOccurredValue;
369     clearTopDownPointers();
370     return;
371   }
372 
373   // For each entry in the other set, if our set has an entry with the same key,
374   // merge the entries. Otherwise, copy the entry and merge it with an empty
375   // entry.
376   for (auto MI = Other.top_down_ptr_begin(), ME = Other.top_down_ptr_end();
377        MI != ME; ++MI) {
378     auto Pair = PerPtrTopDown.insert(*MI);
379     Pair.first->second.Merge(Pair.second ? TopDownPtrState() : MI->second,
380                              /*TopDown=*/true);
381   }
382 
383   // For each entry in our set, if the other set doesn't have an entry with the
384   // same key, force it to merge with an empty entry.
385   for (auto MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI)
386     if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
387       MI->second.Merge(TopDownPtrState(), /*TopDown=*/true);
388 }
389 
390 /// The bottom-up traversal uses this to merge information about successors to
391 /// form the initial state for a new block.
392 void BBState::MergeSucc(const BBState &Other) {
393   if (BottomUpPathCount == OverflowOccurredValue)
394     return;
395 
396   // Other.BottomUpPathCount can be 0, in which case it is either dead or a
397   // loop backedge. Loop backedges are special.
398   BottomUpPathCount += Other.BottomUpPathCount;
399 
400   // In order to be consistent, we clear the top down pointers when by adding
401   // BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow
402   // has not occurred.
403   if (BottomUpPathCount == OverflowOccurredValue) {
404     clearBottomUpPointers();
405     return;
406   }
407 
408   // Check for overflow. If we have overflow, fall back to conservative
409   // behavior.
410   if (BottomUpPathCount < Other.BottomUpPathCount) {
411     BottomUpPathCount = OverflowOccurredValue;
412     clearBottomUpPointers();
413     return;
414   }
415 
416   // For each entry in the other set, if our set has an entry with the
417   // same key, merge the entries. Otherwise, copy the entry and merge
418   // it with an empty entry.
419   for (auto MI = Other.bottom_up_ptr_begin(), ME = Other.bottom_up_ptr_end();
420        MI != ME; ++MI) {
421     auto Pair = PerPtrBottomUp.insert(*MI);
422     Pair.first->second.Merge(Pair.second ? BottomUpPtrState() : MI->second,
423                              /*TopDown=*/false);
424   }
425 
426   // For each entry in our set, if the other set doesn't have an entry
427   // with the same key, force it to merge with an empty entry.
428   for (auto MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME;
429        ++MI)
430     if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
431       MI->second.Merge(BottomUpPtrState(), /*TopDown=*/false);
432 }
433 
434 raw_ostream &llvm::operator<<(raw_ostream &OS, BBState &BBInfo) {
435   // Dump the pointers we are tracking.
436   OS << "    TopDown State:\n";
437   if (!BBInfo.hasTopDownPtrs()) {
438     LLVM_DEBUG(dbgs() << "        NONE!\n");
439   } else {
440     for (auto I = BBInfo.top_down_ptr_begin(), E = BBInfo.top_down_ptr_end();
441          I != E; ++I) {
442       const PtrState &P = I->second;
443       OS << "        Ptr: " << *I->first
444          << "\n            KnownSafe:        " << (P.IsKnownSafe()?"true":"false")
445          << "\n            ImpreciseRelease: "
446            << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n"
447          << "            HasCFGHazards:    "
448            << (P.IsCFGHazardAfflicted()?"true":"false") << "\n"
449          << "            KnownPositive:    "
450            << (P.HasKnownPositiveRefCount()?"true":"false") << "\n"
451          << "            Seq:              "
452          << P.GetSeq() << "\n";
453     }
454   }
455 
456   OS << "    BottomUp State:\n";
457   if (!BBInfo.hasBottomUpPtrs()) {
458     LLVM_DEBUG(dbgs() << "        NONE!\n");
459   } else {
460     for (auto I = BBInfo.bottom_up_ptr_begin(), E = BBInfo.bottom_up_ptr_end();
461          I != E; ++I) {
462       const PtrState &P = I->second;
463       OS << "        Ptr: " << *I->first
464          << "\n            KnownSafe:        " << (P.IsKnownSafe()?"true":"false")
465          << "\n            ImpreciseRelease: "
466            << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n"
467          << "            HasCFGHazards:    "
468            << (P.IsCFGHazardAfflicted()?"true":"false") << "\n"
469          << "            KnownPositive:    "
470            << (P.HasKnownPositiveRefCount()?"true":"false") << "\n"
471          << "            Seq:              "
472          << P.GetSeq() << "\n";
473     }
474   }
475 
476   return OS;
477 }
478 
479 namespace {
480 
481   /// The main ARC optimization pass.
482 class ObjCARCOpt {
483   bool Changed = false;
484   bool CFGChanged = false;
485   ProvenanceAnalysis PA;
486 
487   /// A cache of references to runtime entry point constants.
488   ARCRuntimeEntryPoints EP;
489 
490   /// A cache of MDKinds that can be passed into other functions to propagate
491   /// MDKind identifiers.
492   ARCMDKindCache MDKindCache;
493 
494   BundledRetainClaimRVs *BundledInsts = nullptr;
495 
496   /// A flag indicating whether the optimization that removes or moves
497   /// retain/release pairs should be performed.
498   bool DisableRetainReleasePairing = false;
499 
500   /// Flags which determine whether each of the interesting runtime functions
501   /// is in fact used in the current function.
502   unsigned UsedInThisFunction;
503 
504   DenseMap<BasicBlock *, ColorVector> BlockEHColors;
505 
506   bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
507   void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
508                                  ARCInstKind &Class);
509   void OptimizeIndividualCalls(Function &F);
510 
511   /// Optimize an individual call, optionally passing the
512   /// GetArgRCIdentityRoot if it has already been computed.
513   void OptimizeIndividualCallImpl(Function &F, Instruction *Inst,
514                                   ARCInstKind Class, const Value *Arg);
515 
516   /// Try to optimize an AutoreleaseRV with a RetainRV or UnsafeClaimRV.  If the
517   /// optimization occurs, returns true to indicate that the caller should
518   /// assume the instructions are dead.
519   bool OptimizeInlinedAutoreleaseRVCall(Function &F, Instruction *Inst,
520                                         const Value *&Arg, ARCInstKind Class,
521                                         Instruction *AutoreleaseRV,
522                                         const Value *&AutoreleaseRVArg);
523 
524   void CheckForCFGHazards(const BasicBlock *BB,
525                           DenseMap<const BasicBlock *, BBState> &BBStates,
526                           BBState &MyStates) const;
527   bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB,
528                                 BlotMapVector<Value *, RRInfo> &Retains,
529                                 BBState &MyStates);
530   bool VisitBottomUp(BasicBlock *BB,
531                      DenseMap<const BasicBlock *, BBState> &BBStates,
532                      BlotMapVector<Value *, RRInfo> &Retains);
533   bool VisitInstructionTopDown(
534       Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates,
535       const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
536           &ReleaseInsertPtToRCIdentityRoots);
537   bool VisitTopDown(
538       BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates,
539       DenseMap<Value *, RRInfo> &Releases,
540       const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
541           &ReleaseInsertPtToRCIdentityRoots);
542   bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates,
543              BlotMapVector<Value *, RRInfo> &Retains,
544              DenseMap<Value *, RRInfo> &Releases);
545 
546   void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
547                  BlotMapVector<Value *, RRInfo> &Retains,
548                  DenseMap<Value *, RRInfo> &Releases,
549                  SmallVectorImpl<Instruction *> &DeadInsts, Module *M);
550 
551   bool PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates,
552                                 BlotMapVector<Value *, RRInfo> &Retains,
553                                 DenseMap<Value *, RRInfo> &Releases, Module *M,
554                                 Instruction *Retain,
555                                 SmallVectorImpl<Instruction *> &DeadInsts,
556                                 RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
557                                 Value *Arg, bool KnownSafe,
558                                 bool &AnyPairsCompletelyEliminated);
559 
560   bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
561                             BlotMapVector<Value *, RRInfo> &Retains,
562                             DenseMap<Value *, RRInfo> &Releases, Module *M);
563 
564   void OptimizeWeakCalls(Function &F);
565 
566   bool OptimizeSequences(Function &F);
567 
568   void OptimizeReturns(Function &F);
569 
570   template <typename PredicateT>
571   static void cloneOpBundlesIf(CallBase *CI,
572                                SmallVectorImpl<OperandBundleDef> &OpBundles,
573                                PredicateT Predicate) {
574     for (unsigned I = 0, E = CI->getNumOperandBundles(); I != E; ++I) {
575       OperandBundleUse B = CI->getOperandBundleAt(I);
576       if (Predicate(B))
577         OpBundles.emplace_back(B);
578     }
579   }
580 
581   void addOpBundleForFunclet(BasicBlock *BB,
582                              SmallVectorImpl<OperandBundleDef> &OpBundles) {
583     if (!BlockEHColors.empty()) {
584       const ColorVector &CV = BlockEHColors.find(BB)->second;
585       assert(CV.size() > 0 && "Uncolored block");
586       for (BasicBlock *EHPadBB : CV)
587         if (auto *EHPad = dyn_cast<FuncletPadInst>(EHPadBB->getFirstNonPHI())) {
588           OpBundles.emplace_back("funclet", EHPad);
589           return;
590         }
591     }
592   }
593 
594 #ifndef NDEBUG
595   void GatherStatistics(Function &F, bool AfterOptimization = false);
596 #endif
597 
598   public:
599     void init(Function &F);
600     bool run(Function &F, AAResults &AA);
601     bool hasCFGChanged() const { return CFGChanged; }
602 };
603 } // end anonymous namespace
604 
605 /// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
606 /// not a return value.
607 bool
608 ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
609   // Check for the argument being from an immediately preceding call or invoke.
610   const Value *Arg = GetArgRCIdentityRoot(RetainRV);
611   if (const Instruction *Call = dyn_cast<CallBase>(Arg)) {
612     if (Call->getParent() == RetainRV->getParent()) {
613       BasicBlock::const_iterator I(Call);
614       ++I;
615       while (IsNoopInstruction(&*I))
616         ++I;
617       if (&*I == RetainRV)
618         return false;
619     } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
620       BasicBlock *RetainRVParent = RetainRV->getParent();
621       if (II->getNormalDest() == RetainRVParent) {
622         BasicBlock::const_iterator I = RetainRVParent->begin();
623         while (IsNoopInstruction(&*I))
624           ++I;
625         if (&*I == RetainRV)
626           return false;
627       }
628     }
629   }
630 
631   assert(!BundledInsts->contains(RetainRV) &&
632          "a bundled retainRV's argument should be a call");
633 
634   // Turn it to a plain objc_retain.
635   Changed = true;
636   ++NumPeeps;
637 
638   LLVM_DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => "
639                        "objc_retain since the operand is not a return value.\n"
640                        "Old = "
641                     << *RetainRV << "\n");
642 
643   Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain);
644   cast<CallInst>(RetainRV)->setCalledFunction(NewDecl);
645 
646   LLVM_DEBUG(dbgs() << "New = " << *RetainRV << "\n");
647 
648   return false;
649 }
650 
651 bool ObjCARCOpt::OptimizeInlinedAutoreleaseRVCall(
652     Function &F, Instruction *Inst, const Value *&Arg, ARCInstKind Class,
653     Instruction *AutoreleaseRV, const Value *&AutoreleaseRVArg) {
654   if (BundledInsts->contains(Inst))
655     return false;
656 
657   // Must be in the same basic block.
658   assert(Inst->getParent() == AutoreleaseRV->getParent());
659 
660   // Must operate on the same root.
661   Arg = GetArgRCIdentityRoot(Inst);
662   AutoreleaseRVArg = GetArgRCIdentityRoot(AutoreleaseRV);
663   if (Arg != AutoreleaseRVArg) {
664     // If there isn't an exact match, check if we have equivalent PHIs.
665     const PHINode *PN = dyn_cast<PHINode>(Arg);
666     if (!PN)
667       return false;
668 
669     SmallVector<const Value *, 4> ArgUsers;
670     getEquivalentPHIs(*PN, ArgUsers);
671     if (!llvm::is_contained(ArgUsers, AutoreleaseRVArg))
672       return false;
673   }
674 
675   // Okay, this is a match.  Merge them.
676   ++NumPeeps;
677   LLVM_DEBUG(dbgs() << "Found inlined objc_autoreleaseReturnValue '"
678                     << *AutoreleaseRV << "' paired with '" << *Inst << "'\n");
679 
680   // Delete the RV pair, starting with the AutoreleaseRV.
681   AutoreleaseRV->replaceAllUsesWith(
682       cast<CallInst>(AutoreleaseRV)->getArgOperand(0));
683   Changed = true;
684   EraseInstruction(AutoreleaseRV);
685   if (Class == ARCInstKind::RetainRV) {
686     // AutoreleaseRV and RetainRV cancel out.  Delete the RetainRV.
687     Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0));
688     EraseInstruction(Inst);
689     return true;
690   }
691 
692   // UnsafeClaimRV is a frontend peephole for RetainRV + Release.  Since the
693   // AutoreleaseRV and RetainRV cancel out, replace UnsafeClaimRV with Release.
694   assert(Class == ARCInstKind::UnsafeClaimRV);
695   Value *CallArg = cast<CallInst>(Inst)->getArgOperand(0);
696   CallInst *Release = CallInst::Create(
697       EP.get(ARCRuntimeEntryPointKind::Release), CallArg, "", Inst);
698   assert(IsAlwaysTail(ARCInstKind::UnsafeClaimRV) &&
699          "Expected UnsafeClaimRV to be safe to tail call");
700   Release->setTailCall();
701   Inst->replaceAllUsesWith(CallArg);
702   EraseInstruction(Inst);
703 
704   // Run the normal optimizations on Release.
705   OptimizeIndividualCallImpl(F, Release, ARCInstKind::Release, Arg);
706   return true;
707 }
708 
709 /// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not
710 /// used as a return value.
711 void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F,
712                                            Instruction *AutoreleaseRV,
713                                            ARCInstKind &Class) {
714   // Check for a return of the pointer value.
715   const Value *Ptr = GetArgRCIdentityRoot(AutoreleaseRV);
716 
717   // If the argument is ConstantPointerNull or UndefValue, its other users
718   // aren't actually interesting to look at.
719   if (isa<ConstantData>(Ptr))
720     return;
721 
722   SmallVector<const Value *, 2> Users;
723   Users.push_back(Ptr);
724 
725   // Add PHIs that are equivalent to Ptr to Users.
726   if (const PHINode *PN = dyn_cast<PHINode>(Ptr))
727     getEquivalentPHIs(*PN, Users);
728 
729   do {
730     Ptr = Users.pop_back_val();
731     for (const User *U : Ptr->users()) {
732       if (isa<ReturnInst>(U) || GetBasicARCInstKind(U) == ARCInstKind::RetainRV)
733         return;
734       if (isa<BitCastInst>(U))
735         Users.push_back(U);
736     }
737   } while (!Users.empty());
738 
739   Changed = true;
740   ++NumPeeps;
741 
742   LLVM_DEBUG(
743       dbgs() << "Transforming objc_autoreleaseReturnValue => "
744                 "objc_autorelease since its operand is not used as a return "
745                 "value.\n"
746                 "Old = "
747              << *AutoreleaseRV << "\n");
748 
749   CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
750   Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease);
751   AutoreleaseRVCI->setCalledFunction(NewDecl);
752   AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
753   Class = ARCInstKind::Autorelease;
754 
755   LLVM_DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n");
756 }
757 
758 /// Visit each call, one at a time, and make simplifications without doing any
759 /// additional analysis.
760 void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
761   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n");
762   // Reset all the flags in preparation for recomputing them.
763   UsedInThisFunction = 0;
764 
765   // Store any delayed AutoreleaseRV intrinsics, so they can be easily paired
766   // with RetainRV and UnsafeClaimRV.
767   Instruction *DelayedAutoreleaseRV = nullptr;
768   const Value *DelayedAutoreleaseRVArg = nullptr;
769   auto setDelayedAutoreleaseRV = [&](Instruction *AutoreleaseRV) {
770     assert(!DelayedAutoreleaseRV || !AutoreleaseRV);
771     DelayedAutoreleaseRV = AutoreleaseRV;
772     DelayedAutoreleaseRVArg = nullptr;
773   };
774   auto optimizeDelayedAutoreleaseRV = [&]() {
775     if (!DelayedAutoreleaseRV)
776       return;
777     OptimizeIndividualCallImpl(F, DelayedAutoreleaseRV,
778                                ARCInstKind::AutoreleaseRV,
779                                DelayedAutoreleaseRVArg);
780     setDelayedAutoreleaseRV(nullptr);
781   };
782   auto shouldDelayAutoreleaseRV = [&](Instruction *NonARCInst) {
783     // Nothing to delay, but we may as well skip the logic below.
784     if (!DelayedAutoreleaseRV)
785       return true;
786 
787     // If we hit the end of the basic block we're not going to find an RV-pair.
788     // Stop delaying.
789     if (NonARCInst->isTerminator())
790       return false;
791 
792     // Given the frontend rules for emitting AutoreleaseRV, RetainRV, and
793     // UnsafeClaimRV, it's probably safe to skip over even opaque function calls
794     // here since OptimizeInlinedAutoreleaseRVCall will confirm that they
795     // have the same RCIdentityRoot.  However, what really matters is
796     // skipping instructions or intrinsics that the inliner could leave behind;
797     // be conservative for now and don't skip over opaque calls, which could
798     // potentially include other ARC calls.
799     auto *CB = dyn_cast<CallBase>(NonARCInst);
800     if (!CB)
801       return true;
802     return CB->getIntrinsicID() != Intrinsic::not_intrinsic;
803   };
804 
805   // Visit all objc_* calls in F.
806   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
807     Instruction *Inst = &*I++;
808 
809     if (auto *CI = dyn_cast<CallInst>(Inst))
810       if (objcarc::hasAttachedCallOpBundle(CI)) {
811         BundledInsts->insertRVCall(&*I, CI);
812         Changed = true;
813       }
814 
815     ARCInstKind Class = GetBasicARCInstKind(Inst);
816 
817     // Skip this loop if this instruction isn't itself an ARC intrinsic.
818     const Value *Arg = nullptr;
819     switch (Class) {
820     default:
821       optimizeDelayedAutoreleaseRV();
822       break;
823     case ARCInstKind::CallOrUser:
824     case ARCInstKind::User:
825     case ARCInstKind::None:
826       // This is a non-ARC instruction.  If we're delaying an AutoreleaseRV,
827       // check if it's safe to skip over it; if not, optimize the AutoreleaseRV
828       // now.
829       if (!shouldDelayAutoreleaseRV(Inst))
830         optimizeDelayedAutoreleaseRV();
831       continue;
832     case ARCInstKind::AutoreleaseRV:
833       optimizeDelayedAutoreleaseRV();
834       setDelayedAutoreleaseRV(Inst);
835       continue;
836     case ARCInstKind::RetainRV:
837     case ARCInstKind::UnsafeClaimRV:
838       if (DelayedAutoreleaseRV) {
839         // We have a potential RV pair.  Check if they cancel out.
840         if (OptimizeInlinedAutoreleaseRVCall(F, Inst, Arg, Class,
841                                              DelayedAutoreleaseRV,
842                                              DelayedAutoreleaseRVArg)) {
843           setDelayedAutoreleaseRV(nullptr);
844           continue;
845         }
846         optimizeDelayedAutoreleaseRV();
847       }
848       break;
849     }
850 
851     OptimizeIndividualCallImpl(F, Inst, Class, Arg);
852   }
853 
854   // Catch the final delayed AutoreleaseRV.
855   optimizeDelayedAutoreleaseRV();
856 }
857 
858 /// This function returns true if the value is inert. An ObjC ARC runtime call
859 /// taking an inert operand can be safely deleted.
860 static bool isInertARCValue(Value *V, SmallPtrSet<Value *, 1> &VisitedPhis) {
861   V = V->stripPointerCasts();
862 
863   if (IsNullOrUndef(V))
864     return true;
865 
866   // See if this is a global attribute annotated with an 'objc_arc_inert'.
867   if (auto *GV = dyn_cast<GlobalVariable>(V))
868     if (GV->hasAttribute("objc_arc_inert"))
869       return true;
870 
871   if (auto PN = dyn_cast<PHINode>(V)) {
872     // Ignore this phi if it has already been discovered.
873     if (!VisitedPhis.insert(PN).second)
874       return true;
875     // Look through phis's operands.
876     for (Value *Opnd : PN->incoming_values())
877       if (!isInertARCValue(Opnd, VisitedPhis))
878         return false;
879     return true;
880   }
881 
882   return false;
883 }
884 
885 void ObjCARCOpt::OptimizeIndividualCallImpl(Function &F, Instruction *Inst,
886                                             ARCInstKind Class,
887                                             const Value *Arg) {
888   LLVM_DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
889 
890   // We can delete this call if it takes an inert value.
891   SmallPtrSet<Value *, 1> VisitedPhis;
892 
893   if (BundledInsts->contains(Inst)) {
894     UsedInThisFunction |= 1 << unsigned(Class);
895     return;
896   }
897 
898   if (IsNoopOnGlobal(Class))
899     if (isInertARCValue(Inst->getOperand(0), VisitedPhis)) {
900       if (!Inst->getType()->isVoidTy())
901         Inst->replaceAllUsesWith(Inst->getOperand(0));
902       Inst->eraseFromParent();
903       Changed = true;
904       return;
905     }
906 
907   switch (Class) {
908   default:
909     break;
910 
911   // Delete no-op casts. These function calls have special semantics, but
912   // the semantics are entirely implemented via lowering in the front-end,
913   // so by the time they reach the optimizer, they are just no-op calls
914   // which return their argument.
915   //
916   // There are gray areas here, as the ability to cast reference-counted
917   // pointers to raw void* and back allows code to break ARC assumptions,
918   // however these are currently considered to be unimportant.
919   case ARCInstKind::NoopCast:
920     Changed = true;
921     ++NumNoops;
922     LLVM_DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n");
923     EraseInstruction(Inst);
924     return;
925 
926   // If the pointer-to-weak-pointer is null, it's undefined behavior.
927   case ARCInstKind::StoreWeak:
928   case ARCInstKind::LoadWeak:
929   case ARCInstKind::LoadWeakRetained:
930   case ARCInstKind::InitWeak:
931   case ARCInstKind::DestroyWeak: {
932     CallInst *CI = cast<CallInst>(Inst);
933     if (IsNullOrUndef(CI->getArgOperand(0))) {
934       Changed = true;
935       new StoreInst(ConstantInt::getTrue(CI->getContext()),
936                     UndefValue::get(Type::getInt1PtrTy(CI->getContext())), CI);
937       Value *NewValue = UndefValue::get(CI->getType());
938       LLVM_DEBUG(
939           dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
940                     "\nOld = "
941                  << *CI << "\nNew = " << *NewValue << "\n");
942       CI->replaceAllUsesWith(NewValue);
943       CI->eraseFromParent();
944       return;
945     }
946     break;
947   }
948   case ARCInstKind::CopyWeak:
949   case ARCInstKind::MoveWeak: {
950     CallInst *CI = cast<CallInst>(Inst);
951     if (IsNullOrUndef(CI->getArgOperand(0)) ||
952         IsNullOrUndef(CI->getArgOperand(1))) {
953       Changed = true;
954       new StoreInst(ConstantInt::getTrue(CI->getContext()),
955                     UndefValue::get(Type::getInt1PtrTy(CI->getContext())), CI);
956 
957       Value *NewValue = UndefValue::get(CI->getType());
958       LLVM_DEBUG(
959           dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
960                     "\nOld = "
961                  << *CI << "\nNew = " << *NewValue << "\n");
962 
963       CI->replaceAllUsesWith(NewValue);
964       CI->eraseFromParent();
965       return;
966     }
967     break;
968   }
969   case ARCInstKind::RetainRV:
970     if (OptimizeRetainRVCall(F, Inst))
971       return;
972     break;
973   case ARCInstKind::AutoreleaseRV:
974     OptimizeAutoreleaseRVCall(F, Inst, Class);
975     break;
976   }
977 
978   // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
979   if (IsAutorelease(Class) && Inst->use_empty()) {
980     CallInst *Call = cast<CallInst>(Inst);
981     const Value *Arg = Call->getArgOperand(0);
982     Arg = FindSingleUseIdentifiedObject(Arg);
983     if (Arg) {
984       Changed = true;
985       ++NumAutoreleases;
986 
987       // Create the declaration lazily.
988       LLVMContext &C = Inst->getContext();
989 
990       Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
991       CallInst *NewCall =
992           CallInst::Create(Decl, Call->getArgOperand(0), "", Call);
993       NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease),
994                            MDNode::get(C, std::nullopt));
995 
996       LLVM_DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
997                            "since x is otherwise unused.\nOld: "
998                         << *Call << "\nNew: " << *NewCall << "\n");
999 
1000       EraseInstruction(Call);
1001       Inst = NewCall;
1002       Class = ARCInstKind::Release;
1003     }
1004   }
1005 
1006   // For functions which can never be passed stack arguments, add
1007   // a tail keyword.
1008   if (IsAlwaysTail(Class) && !cast<CallInst>(Inst)->isNoTailCall()) {
1009     Changed = true;
1010     LLVM_DEBUG(
1011         dbgs() << "Adding tail keyword to function since it can never be "
1012                   "passed stack args: "
1013                << *Inst << "\n");
1014     cast<CallInst>(Inst)->setTailCall();
1015   }
1016 
1017   // Ensure that functions that can never have a "tail" keyword due to the
1018   // semantics of ARC truly do not do so.
1019   if (IsNeverTail(Class)) {
1020     Changed = true;
1021     LLVM_DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst
1022                       << "\n");
1023     cast<CallInst>(Inst)->setTailCall(false);
1024   }
1025 
1026   // Set nounwind as needed.
1027   if (IsNoThrow(Class)) {
1028     Changed = true;
1029     LLVM_DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst
1030                       << "\n");
1031     cast<CallInst>(Inst)->setDoesNotThrow();
1032   }
1033 
1034   // Note: This catches instructions unrelated to ARC.
1035   if (!IsNoopOnNull(Class)) {
1036     UsedInThisFunction |= 1 << unsigned(Class);
1037     return;
1038   }
1039 
1040   // If we haven't already looked up the root, look it up now.
1041   if (!Arg)
1042     Arg = GetArgRCIdentityRoot(Inst);
1043 
1044   // ARC calls with null are no-ops. Delete them.
1045   if (IsNullOrUndef(Arg)) {
1046     Changed = true;
1047     ++NumNoops;
1048     LLVM_DEBUG(dbgs() << "ARC calls with  null are no-ops. Erasing: " << *Inst
1049                       << "\n");
1050     EraseInstruction(Inst);
1051     return;
1052   }
1053 
1054   // Keep track of which of retain, release, autorelease, and retain_block
1055   // are actually present in this function.
1056   UsedInThisFunction |= 1 << unsigned(Class);
1057 
1058   // If Arg is a PHI, and one or more incoming values to the
1059   // PHI are null, and the call is control-equivalent to the PHI, and there
1060   // are no relevant side effects between the PHI and the call, and the call
1061   // is not a release that doesn't have the clang.imprecise_release tag, the
1062   // call could be pushed up to just those paths with non-null incoming
1063   // values. For now, don't bother splitting critical edges for this.
1064   if (Class == ARCInstKind::Release &&
1065       !Inst->getMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease)))
1066     return;
1067 
1068   SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
1069   Worklist.push_back(std::make_pair(Inst, Arg));
1070   do {
1071     std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
1072     Inst = Pair.first;
1073     Arg = Pair.second;
1074 
1075     const PHINode *PN = dyn_cast<PHINode>(Arg);
1076     if (!PN)
1077       continue;
1078 
1079     // Determine if the PHI has any null operands, or any incoming
1080     // critical edges.
1081     bool HasNull = false;
1082     bool HasCriticalEdges = false;
1083     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1084       Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i));
1085       if (IsNullOrUndef(Incoming))
1086         HasNull = true;
1087       else if (PN->getIncomingBlock(i)->getTerminator()->getNumSuccessors() !=
1088                1) {
1089         HasCriticalEdges = true;
1090         break;
1091       }
1092     }
1093     // If we have null operands and no critical edges, optimize.
1094     if (HasCriticalEdges)
1095       continue;
1096     if (!HasNull)
1097       continue;
1098 
1099     Instruction *DepInst = nullptr;
1100 
1101     // Check that there is nothing that cares about the reference
1102     // count between the call and the phi.
1103     switch (Class) {
1104     case ARCInstKind::Retain:
1105     case ARCInstKind::RetainBlock:
1106       // These can always be moved up.
1107       break;
1108     case ARCInstKind::Release:
1109       // These can't be moved across things that care about the retain
1110       // count.
1111       DepInst = findSingleDependency(NeedsPositiveRetainCount, Arg,
1112                                      Inst->getParent(), Inst, PA);
1113       break;
1114     case ARCInstKind::Autorelease:
1115       // These can't be moved across autorelease pool scope boundaries.
1116       DepInst = findSingleDependency(AutoreleasePoolBoundary, Arg,
1117                                      Inst->getParent(), Inst, PA);
1118       break;
1119     case ARCInstKind::UnsafeClaimRV:
1120     case ARCInstKind::RetainRV:
1121     case ARCInstKind::AutoreleaseRV:
1122       // Don't move these; the RV optimization depends on the autoreleaseRV
1123       // being tail called, and the retainRV being immediately after a call
1124       // (which might still happen if we get lucky with codegen layout, but
1125       // it's not worth taking the chance).
1126       continue;
1127     default:
1128       llvm_unreachable("Invalid dependence flavor");
1129     }
1130 
1131     if (DepInst != PN)
1132       continue;
1133 
1134     Changed = true;
1135     ++NumPartialNoops;
1136     // Clone the call into each predecessor that has a non-null value.
1137     CallInst *CInst = cast<CallInst>(Inst);
1138     Type *ParamTy = CInst->getArgOperand(0)->getType();
1139     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1140       Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i));
1141       if (IsNullOrUndef(Incoming))
1142         continue;
1143       Value *Op = PN->getIncomingValue(i);
1144       Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
1145       SmallVector<OperandBundleDef, 1> OpBundles;
1146       cloneOpBundlesIf(CInst, OpBundles, [](const OperandBundleUse &B) {
1147         return B.getTagID() != LLVMContext::OB_funclet;
1148       });
1149       addOpBundleForFunclet(InsertPos->getParent(), OpBundles);
1150       CallInst *Clone = CallInst::Create(CInst, OpBundles);
1151       if (Op->getType() != ParamTy)
1152         Op = new BitCastInst(Op, ParamTy, "", InsertPos);
1153       Clone->setArgOperand(0, Op);
1154       Clone->insertBefore(InsertPos);
1155 
1156       LLVM_DEBUG(dbgs() << "Cloning " << *CInst << "\n"
1157                                                    "And inserting clone at "
1158                         << *InsertPos << "\n");
1159       Worklist.push_back(std::make_pair(Clone, Incoming));
1160     }
1161     // Erase the original call.
1162     LLVM_DEBUG(dbgs() << "Erasing: " << *CInst << "\n");
1163     EraseInstruction(CInst);
1164   } while (!Worklist.empty());
1165 }
1166 
1167 /// If we have a top down pointer in the S_Use state, make sure that there are
1168 /// no CFG hazards by checking the states of various bottom up pointers.
1169 static void CheckForUseCFGHazard(const Sequence SuccSSeq,
1170                                  const bool SuccSRRIKnownSafe,
1171                                  TopDownPtrState &S,
1172                                  bool &SomeSuccHasSame,
1173                                  bool &AllSuccsHaveSame,
1174                                  bool &NotAllSeqEqualButKnownSafe,
1175                                  bool &ShouldContinue) {
1176   switch (SuccSSeq) {
1177   case S_CanRelease: {
1178     if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) {
1179       S.ClearSequenceProgress();
1180       break;
1181     }
1182     S.SetCFGHazardAfflicted(true);
1183     ShouldContinue = true;
1184     break;
1185   }
1186   case S_Use:
1187     SomeSuccHasSame = true;
1188     break;
1189   case S_Stop:
1190   case S_MovableRelease:
1191     if (!S.IsKnownSafe() && !SuccSRRIKnownSafe)
1192       AllSuccsHaveSame = false;
1193     else
1194       NotAllSeqEqualButKnownSafe = true;
1195     break;
1196   case S_Retain:
1197     llvm_unreachable("bottom-up pointer in retain state!");
1198   case S_None:
1199     llvm_unreachable("This should have been handled earlier.");
1200   }
1201 }
1202 
1203 /// If we have a Top Down pointer in the S_CanRelease state, make sure that
1204 /// there are no CFG hazards by checking the states of various bottom up
1205 /// pointers.
1206 static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
1207                                         const bool SuccSRRIKnownSafe,
1208                                         TopDownPtrState &S,
1209                                         bool &SomeSuccHasSame,
1210                                         bool &AllSuccsHaveSame,
1211                                         bool &NotAllSeqEqualButKnownSafe) {
1212   switch (SuccSSeq) {
1213   case S_CanRelease:
1214     SomeSuccHasSame = true;
1215     break;
1216   case S_Stop:
1217   case S_MovableRelease:
1218   case S_Use:
1219     if (!S.IsKnownSafe() && !SuccSRRIKnownSafe)
1220       AllSuccsHaveSame = false;
1221     else
1222       NotAllSeqEqualButKnownSafe = true;
1223     break;
1224   case S_Retain:
1225     llvm_unreachable("bottom-up pointer in retain state!");
1226   case S_None:
1227     llvm_unreachable("This should have been handled earlier.");
1228   }
1229 }
1230 
1231 /// Check for critical edges, loop boundaries, irreducible control flow, or
1232 /// other CFG structures where moving code across the edge would result in it
1233 /// being executed more.
1234 void
1235 ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
1236                                DenseMap<const BasicBlock *, BBState> &BBStates,
1237                                BBState &MyStates) const {
1238   // If any top-down local-use or possible-dec has a succ which is earlier in
1239   // the sequence, forget it.
1240   for (auto I = MyStates.top_down_ptr_begin(), E = MyStates.top_down_ptr_end();
1241        I != E; ++I) {
1242     TopDownPtrState &S = I->second;
1243     const Sequence Seq = I->second.GetSeq();
1244 
1245     // We only care about S_Retain, S_CanRelease, and S_Use.
1246     if (Seq == S_None)
1247       continue;
1248 
1249     // Make sure that if extra top down states are added in the future that this
1250     // code is updated to handle it.
1251     assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) &&
1252            "Unknown top down sequence state.");
1253 
1254     const Value *Arg = I->first;
1255     bool SomeSuccHasSame = false;
1256     bool AllSuccsHaveSame = true;
1257     bool NotAllSeqEqualButKnownSafe = false;
1258 
1259     for (const BasicBlock *Succ : successors(BB)) {
1260       // If VisitBottomUp has pointer information for this successor, take
1261       // what we know about it.
1262       const DenseMap<const BasicBlock *, BBState>::iterator BBI =
1263           BBStates.find(Succ);
1264       assert(BBI != BBStates.end());
1265       const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
1266       const Sequence SuccSSeq = SuccS.GetSeq();
1267 
1268       // If bottom up, the pointer is in an S_None state, clear the sequence
1269       // progress since the sequence in the bottom up state finished
1270       // suggesting a mismatch in between retains/releases. This is true for
1271       // all three cases that we are handling here: S_Retain, S_Use, and
1272       // S_CanRelease.
1273       if (SuccSSeq == S_None) {
1274         S.ClearSequenceProgress();
1275         continue;
1276       }
1277 
1278       // If we have S_Use or S_CanRelease, perform our check for cfg hazard
1279       // checks.
1280       const bool SuccSRRIKnownSafe = SuccS.IsKnownSafe();
1281 
1282       // *NOTE* We do not use Seq from above here since we are allowing for
1283       // S.GetSeq() to change while we are visiting basic blocks.
1284       switch(S.GetSeq()) {
1285       case S_Use: {
1286         bool ShouldContinue = false;
1287         CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, SomeSuccHasSame,
1288                              AllSuccsHaveSame, NotAllSeqEqualButKnownSafe,
1289                              ShouldContinue);
1290         if (ShouldContinue)
1291           continue;
1292         break;
1293       }
1294       case S_CanRelease:
1295         CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S,
1296                                     SomeSuccHasSame, AllSuccsHaveSame,
1297                                     NotAllSeqEqualButKnownSafe);
1298         break;
1299       case S_Retain:
1300       case S_None:
1301       case S_Stop:
1302       case S_MovableRelease:
1303         break;
1304       }
1305     }
1306 
1307     // If the state at the other end of any of the successor edges
1308     // matches the current state, require all edges to match. This
1309     // guards against loops in the middle of a sequence.
1310     if (SomeSuccHasSame && !AllSuccsHaveSame) {
1311       S.ClearSequenceProgress();
1312     } else if (NotAllSeqEqualButKnownSafe) {
1313       // If we would have cleared the state foregoing the fact that we are known
1314       // safe, stop code motion. This is because whether or not it is safe to
1315       // remove RR pairs via KnownSafe is an orthogonal concept to whether we
1316       // are allowed to perform code motion.
1317       S.SetCFGHazardAfflicted(true);
1318     }
1319   }
1320 }
1321 
1322 bool ObjCARCOpt::VisitInstructionBottomUp(
1323     Instruction *Inst, BasicBlock *BB, BlotMapVector<Value *, RRInfo> &Retains,
1324     BBState &MyStates) {
1325   bool NestingDetected = false;
1326   ARCInstKind Class = GetARCInstKind(Inst);
1327   const Value *Arg = nullptr;
1328 
1329   LLVM_DEBUG(dbgs() << "        Class: " << Class << "\n");
1330 
1331   switch (Class) {
1332   case ARCInstKind::Release: {
1333     Arg = GetArgRCIdentityRoot(Inst);
1334 
1335     BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg);
1336     NestingDetected |= S.InitBottomUp(MDKindCache, Inst);
1337     break;
1338   }
1339   case ARCInstKind::RetainBlock:
1340     // In OptimizeIndividualCalls, we have strength reduced all optimizable
1341     // objc_retainBlocks to objc_retains. Thus at this point any
1342     // objc_retainBlocks that we see are not optimizable.
1343     break;
1344   case ARCInstKind::Retain:
1345   case ARCInstKind::RetainRV: {
1346     Arg = GetArgRCIdentityRoot(Inst);
1347     BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg);
1348     if (S.MatchWithRetain()) {
1349       // Don't do retain+release tracking for ARCInstKind::RetainRV, because
1350       // it's better to let it remain as the first instruction after a call.
1351       if (Class != ARCInstKind::RetainRV) {
1352         LLVM_DEBUG(dbgs() << "        Matching with: " << *Inst << "\n");
1353         Retains[Inst] = S.GetRRInfo();
1354       }
1355       S.ClearSequenceProgress();
1356     }
1357     // A retain moving bottom up can be a use.
1358     break;
1359   }
1360   case ARCInstKind::AutoreleasepoolPop:
1361     // Conservatively, clear MyStates for all known pointers.
1362     MyStates.clearBottomUpPointers();
1363     return NestingDetected;
1364   case ARCInstKind::AutoreleasepoolPush:
1365   case ARCInstKind::None:
1366     // These are irrelevant.
1367     return NestingDetected;
1368   default:
1369     break;
1370   }
1371 
1372   // Consider any other possible effects of this instruction on each
1373   // pointer being tracked.
1374   for (auto MI = MyStates.bottom_up_ptr_begin(),
1375             ME = MyStates.bottom_up_ptr_end();
1376        MI != ME; ++MI) {
1377     const Value *Ptr = MI->first;
1378     if (Ptr == Arg)
1379       continue; // Handled above.
1380     BottomUpPtrState &S = MI->second;
1381 
1382     if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class))
1383       continue;
1384 
1385     S.HandlePotentialUse(BB, Inst, Ptr, PA, Class);
1386   }
1387 
1388   return NestingDetected;
1389 }
1390 
1391 bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
1392                                DenseMap<const BasicBlock *, BBState> &BBStates,
1393                                BlotMapVector<Value *, RRInfo> &Retains) {
1394   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n");
1395 
1396   bool NestingDetected = false;
1397   BBState &MyStates = BBStates[BB];
1398 
1399   // Merge the states from each successor to compute the initial state
1400   // for the current block.
1401   BBState::edge_iterator SI(MyStates.succ_begin()),
1402                          SE(MyStates.succ_end());
1403   if (SI != SE) {
1404     const BasicBlock *Succ = *SI;
1405     DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
1406     assert(I != BBStates.end());
1407     MyStates.InitFromSucc(I->second);
1408     ++SI;
1409     for (; SI != SE; ++SI) {
1410       Succ = *SI;
1411       I = BBStates.find(Succ);
1412       assert(I != BBStates.end());
1413       MyStates.MergeSucc(I->second);
1414     }
1415   }
1416 
1417   LLVM_DEBUG(dbgs() << "Before:\n"
1418                     << BBStates[BB] << "\n"
1419                     << "Performing Dataflow:\n");
1420 
1421   // Visit all the instructions, bottom-up.
1422   for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
1423     Instruction *Inst = &*std::prev(I);
1424 
1425     // Invoke instructions are visited as part of their successors (below).
1426     if (isa<InvokeInst>(Inst))
1427       continue;
1428 
1429     LLVM_DEBUG(dbgs() << "    Visiting " << *Inst << "\n");
1430 
1431     NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
1432 
1433     // Bail out if the number of pointers being tracked becomes too large so
1434     // that this pass can complete in a reasonable amount of time.
1435     if (MyStates.bottom_up_ptr_list_size() > MaxPtrStates) {
1436       DisableRetainReleasePairing = true;
1437       return false;
1438     }
1439   }
1440 
1441   // If there's a predecessor with an invoke, visit the invoke as if it were
1442   // part of this block, since we can't insert code after an invoke in its own
1443   // block, and we don't want to split critical edges.
1444   for (BBState::edge_iterator PI(MyStates.pred_begin()),
1445        PE(MyStates.pred_end()); PI != PE; ++PI) {
1446     BasicBlock *Pred = *PI;
1447     if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back()))
1448       NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
1449   }
1450 
1451   LLVM_DEBUG(dbgs() << "\nFinal State:\n" << BBStates[BB] << "\n");
1452 
1453   return NestingDetected;
1454 }
1455 
1456 // Fill ReleaseInsertPtToRCIdentityRoots, which is a map from insertion points
1457 // to the set of RC identity roots that would be released by the release calls
1458 // moved to the insertion points.
1459 static void collectReleaseInsertPts(
1460     const BlotMapVector<Value *, RRInfo> &Retains,
1461     DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
1462         &ReleaseInsertPtToRCIdentityRoots) {
1463   for (const auto &P : Retains) {
1464     // Retains is a map from an objc_retain call to a RRInfo of the RC identity
1465     // root of the call. Get the RC identity root of the objc_retain call.
1466     Instruction *Retain = cast<Instruction>(P.first);
1467     Value *Root = GetRCIdentityRoot(Retain->getOperand(0));
1468     // Collect all the insertion points of the objc_release calls that release
1469     // the RC identity root of the objc_retain call.
1470     for (const Instruction *InsertPt : P.second.ReverseInsertPts)
1471       ReleaseInsertPtToRCIdentityRoots[InsertPt].insert(Root);
1472   }
1473 }
1474 
1475 // Get the RC identity roots from an insertion point of an objc_release call.
1476 // Return nullptr if the passed instruction isn't an insertion point.
1477 static const SmallPtrSet<const Value *, 2> *
1478 getRCIdentityRootsFromReleaseInsertPt(
1479     const Instruction *InsertPt,
1480     const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
1481         &ReleaseInsertPtToRCIdentityRoots) {
1482   auto I = ReleaseInsertPtToRCIdentityRoots.find(InsertPt);
1483   if (I == ReleaseInsertPtToRCIdentityRoots.end())
1484     return nullptr;
1485   return &I->second;
1486 }
1487 
1488 bool ObjCARCOpt::VisitInstructionTopDown(
1489     Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates,
1490     const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
1491         &ReleaseInsertPtToRCIdentityRoots) {
1492   bool NestingDetected = false;
1493   ARCInstKind Class = GetARCInstKind(Inst);
1494   const Value *Arg = nullptr;
1495 
1496   // Make sure a call to objc_retain isn't moved past insertion points of calls
1497   // to objc_release.
1498   if (const SmallPtrSet<const Value *, 2> *Roots =
1499           getRCIdentityRootsFromReleaseInsertPt(
1500               Inst, ReleaseInsertPtToRCIdentityRoots))
1501     for (const auto *Root : *Roots) {
1502       TopDownPtrState &S = MyStates.getPtrTopDownState(Root);
1503       // Disable code motion if the current position is S_Retain to prevent
1504       // moving the objc_retain call past objc_release calls. If it's
1505       // S_CanRelease or larger, it's not necessary to disable code motion as
1506       // the insertion points that prevent the objc_retain call from moving down
1507       // should have been set already.
1508       if (S.GetSeq() == S_Retain)
1509         S.SetCFGHazardAfflicted(true);
1510     }
1511 
1512   LLVM_DEBUG(dbgs() << "        Class: " << Class << "\n");
1513 
1514   switch (Class) {
1515   case ARCInstKind::RetainBlock:
1516     // In OptimizeIndividualCalls, we have strength reduced all optimizable
1517     // objc_retainBlocks to objc_retains. Thus at this point any
1518     // objc_retainBlocks that we see are not optimizable. We need to break since
1519     // a retain can be a potential use.
1520     break;
1521   case ARCInstKind::Retain:
1522   case ARCInstKind::RetainRV: {
1523     Arg = GetArgRCIdentityRoot(Inst);
1524     TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
1525     NestingDetected |= S.InitTopDown(Class, Inst);
1526     // A retain can be a potential use; proceed to the generic checking
1527     // code below.
1528     break;
1529   }
1530   case ARCInstKind::Release: {
1531     Arg = GetArgRCIdentityRoot(Inst);
1532     TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
1533     // Try to form a tentative pair in between this release instruction and the
1534     // top down pointers that we are tracking.
1535     if (S.MatchWithRelease(MDKindCache, Inst)) {
1536       // If we succeed, copy S's RRInfo into the Release -> {Retain Set
1537       // Map}. Then we clear S.
1538       LLVM_DEBUG(dbgs() << "        Matching with: " << *Inst << "\n");
1539       Releases[Inst] = S.GetRRInfo();
1540       S.ClearSequenceProgress();
1541     }
1542     break;
1543   }
1544   case ARCInstKind::AutoreleasepoolPop:
1545     // Conservatively, clear MyStates for all known pointers.
1546     MyStates.clearTopDownPointers();
1547     return false;
1548   case ARCInstKind::AutoreleasepoolPush:
1549   case ARCInstKind::None:
1550     // These can not be uses of
1551     return false;
1552   default:
1553     break;
1554   }
1555 
1556   // Consider any other possible effects of this instruction on each
1557   // pointer being tracked.
1558   for (auto MI = MyStates.top_down_ptr_begin(),
1559             ME = MyStates.top_down_ptr_end();
1560        MI != ME; ++MI) {
1561     const Value *Ptr = MI->first;
1562     if (Ptr == Arg)
1563       continue; // Handled above.
1564     TopDownPtrState &S = MI->second;
1565     if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class, *BundledInsts))
1566       continue;
1567 
1568     S.HandlePotentialUse(Inst, Ptr, PA, Class);
1569   }
1570 
1571   return NestingDetected;
1572 }
1573 
1574 bool ObjCARCOpt::VisitTopDown(
1575     BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates,
1576     DenseMap<Value *, RRInfo> &Releases,
1577     const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
1578         &ReleaseInsertPtToRCIdentityRoots) {
1579   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n");
1580   bool NestingDetected = false;
1581   BBState &MyStates = BBStates[BB];
1582 
1583   // Merge the states from each predecessor to compute the initial state
1584   // for the current block.
1585   BBState::edge_iterator PI(MyStates.pred_begin()),
1586                          PE(MyStates.pred_end());
1587   if (PI != PE) {
1588     const BasicBlock *Pred = *PI;
1589     DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
1590     assert(I != BBStates.end());
1591     MyStates.InitFromPred(I->second);
1592     ++PI;
1593     for (; PI != PE; ++PI) {
1594       Pred = *PI;
1595       I = BBStates.find(Pred);
1596       assert(I != BBStates.end());
1597       MyStates.MergePred(I->second);
1598     }
1599   }
1600 
1601   // Check that BB and MyStates have the same number of predecessors. This
1602   // prevents retain calls that live outside a loop from being moved into the
1603   // loop.
1604   if (!BB->hasNPredecessors(MyStates.pred_end() - MyStates.pred_begin()))
1605     for (auto I = MyStates.top_down_ptr_begin(),
1606               E = MyStates.top_down_ptr_end();
1607          I != E; ++I)
1608       I->second.SetCFGHazardAfflicted(true);
1609 
1610   LLVM_DEBUG(dbgs() << "Before:\n"
1611                     << BBStates[BB] << "\n"
1612                     << "Performing Dataflow:\n");
1613 
1614   // Visit all the instructions, top-down.
1615   for (Instruction &Inst : *BB) {
1616     LLVM_DEBUG(dbgs() << "    Visiting " << Inst << "\n");
1617 
1618     NestingDetected |= VisitInstructionTopDown(
1619         &Inst, Releases, MyStates, ReleaseInsertPtToRCIdentityRoots);
1620 
1621     // Bail out if the number of pointers being tracked becomes too large so
1622     // that this pass can complete in a reasonable amount of time.
1623     if (MyStates.top_down_ptr_list_size() > MaxPtrStates) {
1624       DisableRetainReleasePairing = true;
1625       return false;
1626     }
1627   }
1628 
1629   LLVM_DEBUG(dbgs() << "\nState Before Checking for CFG Hazards:\n"
1630                     << BBStates[BB] << "\n\n");
1631   CheckForCFGHazards(BB, BBStates, MyStates);
1632   LLVM_DEBUG(dbgs() << "Final State:\n" << BBStates[BB] << "\n");
1633   return NestingDetected;
1634 }
1635 
1636 static void
1637 ComputePostOrders(Function &F,
1638                   SmallVectorImpl<BasicBlock *> &PostOrder,
1639                   SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder,
1640                   unsigned NoObjCARCExceptionsMDKind,
1641                   DenseMap<const BasicBlock *, BBState> &BBStates) {
1642   /// The visited set, for doing DFS walks.
1643   SmallPtrSet<BasicBlock *, 16> Visited;
1644 
1645   // Do DFS, computing the PostOrder.
1646   SmallPtrSet<BasicBlock *, 16> OnStack;
1647   SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
1648 
1649   // Functions always have exactly one entry block, and we don't have
1650   // any other block that we treat like an entry block.
1651   BasicBlock *EntryBB = &F.getEntryBlock();
1652   BBState &MyStates = BBStates[EntryBB];
1653   MyStates.SetAsEntry();
1654   Instruction *EntryTI = EntryBB->getTerminator();
1655   SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI)));
1656   Visited.insert(EntryBB);
1657   OnStack.insert(EntryBB);
1658   do {
1659   dfs_next_succ:
1660     BasicBlock *CurrBB = SuccStack.back().first;
1661     succ_iterator SE(CurrBB->getTerminator(), false);
1662 
1663     while (SuccStack.back().second != SE) {
1664       BasicBlock *SuccBB = *SuccStack.back().second++;
1665       if (Visited.insert(SuccBB).second) {
1666         SuccStack.push_back(
1667             std::make_pair(SuccBB, succ_iterator(SuccBB->getTerminator())));
1668         BBStates[CurrBB].addSucc(SuccBB);
1669         BBState &SuccStates = BBStates[SuccBB];
1670         SuccStates.addPred(CurrBB);
1671         OnStack.insert(SuccBB);
1672         goto dfs_next_succ;
1673       }
1674 
1675       if (!OnStack.count(SuccBB)) {
1676         BBStates[CurrBB].addSucc(SuccBB);
1677         BBStates[SuccBB].addPred(CurrBB);
1678       }
1679     }
1680     OnStack.erase(CurrBB);
1681     PostOrder.push_back(CurrBB);
1682     SuccStack.pop_back();
1683   } while (!SuccStack.empty());
1684 
1685   Visited.clear();
1686 
1687   // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
1688   // Functions may have many exits, and there also blocks which we treat
1689   // as exits due to ignored edges.
1690   SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack;
1691   for (BasicBlock &ExitBB : F) {
1692     BBState &MyStates = BBStates[&ExitBB];
1693     if (!MyStates.isExit())
1694       continue;
1695 
1696     MyStates.SetAsExit();
1697 
1698     PredStack.push_back(std::make_pair(&ExitBB, MyStates.pred_begin()));
1699     Visited.insert(&ExitBB);
1700     while (!PredStack.empty()) {
1701     reverse_dfs_next_succ:
1702       BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
1703       while (PredStack.back().second != PE) {
1704         BasicBlock *BB = *PredStack.back().second++;
1705         if (Visited.insert(BB).second) {
1706           PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin()));
1707           goto reverse_dfs_next_succ;
1708         }
1709       }
1710       ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
1711     }
1712   }
1713 }
1714 
1715 // Visit the function both top-down and bottom-up.
1716 bool ObjCARCOpt::Visit(Function &F,
1717                        DenseMap<const BasicBlock *, BBState> &BBStates,
1718                        BlotMapVector<Value *, RRInfo> &Retains,
1719                        DenseMap<Value *, RRInfo> &Releases) {
1720   // Use reverse-postorder traversals, because we magically know that loops
1721   // will be well behaved, i.e. they won't repeatedly call retain on a single
1722   // pointer without doing a release. We can't use the ReversePostOrderTraversal
1723   // class here because we want the reverse-CFG postorder to consider each
1724   // function exit point, and we want to ignore selected cycle edges.
1725   SmallVector<BasicBlock *, 16> PostOrder;
1726   SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
1727   ComputePostOrders(F, PostOrder, ReverseCFGPostOrder,
1728                     MDKindCache.get(ARCMDKindID::NoObjCARCExceptions),
1729                     BBStates);
1730 
1731   // Use reverse-postorder on the reverse CFG for bottom-up.
1732   bool BottomUpNestingDetected = false;
1733   for (BasicBlock *BB : llvm::reverse(ReverseCFGPostOrder)) {
1734     BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
1735     if (DisableRetainReleasePairing)
1736       return false;
1737   }
1738 
1739   DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
1740       ReleaseInsertPtToRCIdentityRoots;
1741   collectReleaseInsertPts(Retains, ReleaseInsertPtToRCIdentityRoots);
1742 
1743   // Use reverse-postorder for top-down.
1744   bool TopDownNestingDetected = false;
1745   for (BasicBlock *BB : llvm::reverse(PostOrder)) {
1746     TopDownNestingDetected |=
1747         VisitTopDown(BB, BBStates, Releases, ReleaseInsertPtToRCIdentityRoots);
1748     if (DisableRetainReleasePairing)
1749       return false;
1750   }
1751 
1752   return TopDownNestingDetected && BottomUpNestingDetected;
1753 }
1754 
1755 /// Move the calls in RetainsToMove and ReleasesToMove.
1756 void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove,
1757                            RRInfo &ReleasesToMove,
1758                            BlotMapVector<Value *, RRInfo> &Retains,
1759                            DenseMap<Value *, RRInfo> &Releases,
1760                            SmallVectorImpl<Instruction *> &DeadInsts,
1761                            Module *M) {
1762   Type *ArgTy = Arg->getType();
1763   Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
1764 
1765   LLVM_DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n");
1766 
1767   // Insert the new retain and release calls.
1768   for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) {
1769     Value *MyArg = ArgTy == ParamTy ? Arg :
1770                    new BitCastInst(Arg, ParamTy, "", InsertPt);
1771     Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
1772     SmallVector<OperandBundleDef, 1> BundleList;
1773     addOpBundleForFunclet(InsertPt->getParent(), BundleList);
1774     CallInst *Call = CallInst::Create(Decl, MyArg, BundleList, "", InsertPt);
1775     Call->setDoesNotThrow();
1776     Call->setTailCall();
1777 
1778     LLVM_DEBUG(dbgs() << "Inserting new Retain: " << *Call
1779                       << "\n"
1780                          "At insertion point: "
1781                       << *InsertPt << "\n");
1782   }
1783   for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) {
1784     Value *MyArg = ArgTy == ParamTy ? Arg :
1785                    new BitCastInst(Arg, ParamTy, "", InsertPt);
1786     Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
1787     SmallVector<OperandBundleDef, 1> BundleList;
1788     addOpBundleForFunclet(InsertPt->getParent(), BundleList);
1789     CallInst *Call = CallInst::Create(Decl, MyArg, BundleList, "", InsertPt);
1790     // Attach a clang.imprecise_release metadata tag, if appropriate.
1791     if (MDNode *M = ReleasesToMove.ReleaseMetadata)
1792       Call->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), M);
1793     Call->setDoesNotThrow();
1794     if (ReleasesToMove.IsTailCallRelease)
1795       Call->setTailCall();
1796 
1797     LLVM_DEBUG(dbgs() << "Inserting new Release: " << *Call
1798                       << "\n"
1799                          "At insertion point: "
1800                       << *InsertPt << "\n");
1801   }
1802 
1803   // Delete the original retain and release calls.
1804   for (Instruction *OrigRetain : RetainsToMove.Calls) {
1805     Retains.blot(OrigRetain);
1806     DeadInsts.push_back(OrigRetain);
1807     LLVM_DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n");
1808   }
1809   for (Instruction *OrigRelease : ReleasesToMove.Calls) {
1810     Releases.erase(OrigRelease);
1811     DeadInsts.push_back(OrigRelease);
1812     LLVM_DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n");
1813   }
1814 }
1815 
1816 bool ObjCARCOpt::PairUpRetainsAndReleases(
1817     DenseMap<const BasicBlock *, BBState> &BBStates,
1818     BlotMapVector<Value *, RRInfo> &Retains,
1819     DenseMap<Value *, RRInfo> &Releases, Module *M,
1820     Instruction *Retain,
1821     SmallVectorImpl<Instruction *> &DeadInsts, RRInfo &RetainsToMove,
1822     RRInfo &ReleasesToMove, Value *Arg, bool KnownSafe,
1823     bool &AnyPairsCompletelyEliminated) {
1824   // If a pair happens in a region where it is known that the reference count
1825   // is already incremented, we can similarly ignore possible decrements unless
1826   // we are dealing with a retainable object with multiple provenance sources.
1827   bool KnownSafeTD = true, KnownSafeBU = true;
1828   bool CFGHazardAfflicted = false;
1829 
1830   // Connect the dots between the top-down-collected RetainsToMove and
1831   // bottom-up-collected ReleasesToMove to form sets of related calls.
1832   // This is an iterative process so that we connect multiple releases
1833   // to multiple retains if needed.
1834   unsigned OldDelta = 0;
1835   unsigned NewDelta = 0;
1836   unsigned OldCount = 0;
1837   unsigned NewCount = 0;
1838   bool FirstRelease = true;
1839   for (SmallVector<Instruction *, 4> NewRetains{Retain};;) {
1840     SmallVector<Instruction *, 4> NewReleases;
1841     for (Instruction *NewRetain : NewRetains) {
1842       auto It = Retains.find(NewRetain);
1843       assert(It != Retains.end());
1844       const RRInfo &NewRetainRRI = It->second;
1845       KnownSafeTD &= NewRetainRRI.KnownSafe;
1846       CFGHazardAfflicted |= NewRetainRRI.CFGHazardAfflicted;
1847       for (Instruction *NewRetainRelease : NewRetainRRI.Calls) {
1848         auto Jt = Releases.find(NewRetainRelease);
1849         if (Jt == Releases.end())
1850           return false;
1851         const RRInfo &NewRetainReleaseRRI = Jt->second;
1852 
1853         // If the release does not have a reference to the retain as well,
1854         // something happened which is unaccounted for. Do not do anything.
1855         //
1856         // This can happen if we catch an additive overflow during path count
1857         // merging.
1858         if (!NewRetainReleaseRRI.Calls.count(NewRetain))
1859           return false;
1860 
1861         if (ReleasesToMove.Calls.insert(NewRetainRelease).second) {
1862           // If we overflow when we compute the path count, don't remove/move
1863           // anything.
1864           const BBState &NRRBBState = BBStates[NewRetainRelease->getParent()];
1865           unsigned PathCount = BBState::OverflowOccurredValue;
1866           if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
1867             return false;
1868           assert(PathCount != BBState::OverflowOccurredValue &&
1869                  "PathCount at this point can not be "
1870                  "OverflowOccurredValue.");
1871           OldDelta -= PathCount;
1872 
1873           // Merge the ReleaseMetadata and IsTailCallRelease values.
1874           if (FirstRelease) {
1875             ReleasesToMove.ReleaseMetadata =
1876               NewRetainReleaseRRI.ReleaseMetadata;
1877             ReleasesToMove.IsTailCallRelease =
1878               NewRetainReleaseRRI.IsTailCallRelease;
1879             FirstRelease = false;
1880           } else {
1881             if (ReleasesToMove.ReleaseMetadata !=
1882                 NewRetainReleaseRRI.ReleaseMetadata)
1883               ReleasesToMove.ReleaseMetadata = nullptr;
1884             if (ReleasesToMove.IsTailCallRelease !=
1885                 NewRetainReleaseRRI.IsTailCallRelease)
1886               ReleasesToMove.IsTailCallRelease = false;
1887           }
1888 
1889           // Collect the optimal insertion points.
1890           if (!KnownSafe)
1891             for (Instruction *RIP : NewRetainReleaseRRI.ReverseInsertPts) {
1892               if (ReleasesToMove.ReverseInsertPts.insert(RIP).second) {
1893                 // If we overflow when we compute the path count, don't
1894                 // remove/move anything.
1895                 const BBState &RIPBBState = BBStates[RIP->getParent()];
1896                 PathCount = BBState::OverflowOccurredValue;
1897                 if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
1898                   return false;
1899                 assert(PathCount != BBState::OverflowOccurredValue &&
1900                        "PathCount at this point can not be "
1901                        "OverflowOccurredValue.");
1902                 NewDelta -= PathCount;
1903               }
1904             }
1905           NewReleases.push_back(NewRetainRelease);
1906         }
1907       }
1908     }
1909     NewRetains.clear();
1910     if (NewReleases.empty()) break;
1911 
1912     // Back the other way.
1913     for (Instruction *NewRelease : NewReleases) {
1914       auto It = Releases.find(NewRelease);
1915       assert(It != Releases.end());
1916       const RRInfo &NewReleaseRRI = It->second;
1917       KnownSafeBU &= NewReleaseRRI.KnownSafe;
1918       CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted;
1919       for (Instruction *NewReleaseRetain : NewReleaseRRI.Calls) {
1920         auto Jt = Retains.find(NewReleaseRetain);
1921         if (Jt == Retains.end())
1922           return false;
1923         const RRInfo &NewReleaseRetainRRI = Jt->second;
1924 
1925         // If the retain does not have a reference to the release as well,
1926         // something happened which is unaccounted for. Do not do anything.
1927         //
1928         // This can happen if we catch an additive overflow during path count
1929         // merging.
1930         if (!NewReleaseRetainRRI.Calls.count(NewRelease))
1931           return false;
1932 
1933         if (RetainsToMove.Calls.insert(NewReleaseRetain).second) {
1934           // If we overflow when we compute the path count, don't remove/move
1935           // anything.
1936           const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()];
1937           unsigned PathCount = BBState::OverflowOccurredValue;
1938           if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
1939             return false;
1940           assert(PathCount != BBState::OverflowOccurredValue &&
1941                  "PathCount at this point can not be "
1942                  "OverflowOccurredValue.");
1943           OldDelta += PathCount;
1944           OldCount += PathCount;
1945 
1946           // Collect the optimal insertion points.
1947           if (!KnownSafe)
1948             for (Instruction *RIP : NewReleaseRetainRRI.ReverseInsertPts) {
1949               if (RetainsToMove.ReverseInsertPts.insert(RIP).second) {
1950                 // If we overflow when we compute the path count, don't
1951                 // remove/move anything.
1952                 const BBState &RIPBBState = BBStates[RIP->getParent()];
1953 
1954                 PathCount = BBState::OverflowOccurredValue;
1955                 if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
1956                   return false;
1957                 assert(PathCount != BBState::OverflowOccurredValue &&
1958                        "PathCount at this point can not be "
1959                        "OverflowOccurredValue.");
1960                 NewDelta += PathCount;
1961                 NewCount += PathCount;
1962               }
1963             }
1964           NewRetains.push_back(NewReleaseRetain);
1965         }
1966       }
1967     }
1968     if (NewRetains.empty()) break;
1969   }
1970 
1971   // We can only remove pointers if we are known safe in both directions.
1972   bool UnconditionallySafe = KnownSafeTD && KnownSafeBU;
1973   if (UnconditionallySafe) {
1974     RetainsToMove.ReverseInsertPts.clear();
1975     ReleasesToMove.ReverseInsertPts.clear();
1976     NewCount = 0;
1977   } else {
1978     // Determine whether the new insertion points we computed preserve the
1979     // balance of retain and release calls through the program.
1980     // TODO: If the fully aggressive solution isn't valid, try to find a
1981     // less aggressive solution which is.
1982     if (NewDelta != 0)
1983       return false;
1984 
1985     // At this point, we are not going to remove any RR pairs, but we still are
1986     // able to move RR pairs. If one of our pointers is afflicted with
1987     // CFGHazards, we cannot perform such code motion so exit early.
1988     const bool WillPerformCodeMotion =
1989         !RetainsToMove.ReverseInsertPts.empty() ||
1990         !ReleasesToMove.ReverseInsertPts.empty();
1991     if (CFGHazardAfflicted && WillPerformCodeMotion)
1992       return false;
1993   }
1994 
1995   // Determine whether the original call points are balanced in the retain and
1996   // release calls through the program. If not, conservatively don't touch
1997   // them.
1998   // TODO: It's theoretically possible to do code motion in this case, as
1999   // long as the existing imbalances are maintained.
2000   if (OldDelta != 0)
2001     return false;
2002 
2003   Changed = true;
2004   assert(OldCount != 0 && "Unreachable code?");
2005   NumRRs += OldCount - NewCount;
2006   // Set to true if we completely removed any RR pairs.
2007   AnyPairsCompletelyEliminated = NewCount == 0;
2008 
2009   // We can move calls!
2010   return true;
2011 }
2012 
2013 /// Identify pairings between the retains and releases, and delete and/or move
2014 /// them.
2015 bool ObjCARCOpt::PerformCodePlacement(
2016     DenseMap<const BasicBlock *, BBState> &BBStates,
2017     BlotMapVector<Value *, RRInfo> &Retains,
2018     DenseMap<Value *, RRInfo> &Releases, Module *M) {
2019   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n");
2020 
2021   bool AnyPairsCompletelyEliminated = false;
2022   SmallVector<Instruction *, 8> DeadInsts;
2023 
2024   // Visit each retain.
2025   for (BlotMapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
2026                                                       E = Retains.end();
2027        I != E; ++I) {
2028     Value *V = I->first;
2029     if (!V) continue; // blotted
2030 
2031     Instruction *Retain = cast<Instruction>(V);
2032 
2033     LLVM_DEBUG(dbgs() << "Visiting: " << *Retain << "\n");
2034 
2035     Value *Arg = GetArgRCIdentityRoot(Retain);
2036 
2037     // If the object being released is in static or stack storage, we know it's
2038     // not being managed by ObjC reference counting, so we can delete pairs
2039     // regardless of what possible decrements or uses lie between them.
2040     bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
2041 
2042     // A constant pointer can't be pointing to an object on the heap. It may
2043     // be reference-counted, but it won't be deleted.
2044     if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
2045       if (const GlobalVariable *GV =
2046             dyn_cast<GlobalVariable>(
2047               GetRCIdentityRoot(LI->getPointerOperand())))
2048         if (GV->isConstant())
2049           KnownSafe = true;
2050 
2051     // Connect the dots between the top-down-collected RetainsToMove and
2052     // bottom-up-collected ReleasesToMove to form sets of related calls.
2053     RRInfo RetainsToMove, ReleasesToMove;
2054 
2055     bool PerformMoveCalls = PairUpRetainsAndReleases(
2056         BBStates, Retains, Releases, M, Retain, DeadInsts,
2057         RetainsToMove, ReleasesToMove, Arg, KnownSafe,
2058         AnyPairsCompletelyEliminated);
2059 
2060     if (PerformMoveCalls) {
2061       // Ok, everything checks out and we're all set. Let's move/delete some
2062       // code!
2063       MoveCalls(Arg, RetainsToMove, ReleasesToMove,
2064                 Retains, Releases, DeadInsts, M);
2065     }
2066   }
2067 
2068   // Now that we're done moving everything, we can delete the newly dead
2069   // instructions, as we no longer need them as insert points.
2070   while (!DeadInsts.empty())
2071     EraseInstruction(DeadInsts.pop_back_val());
2072 
2073   return AnyPairsCompletelyEliminated;
2074 }
2075 
2076 /// Weak pointer optimizations.
2077 void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
2078   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n");
2079 
2080   // First, do memdep-style RLE and S2L optimizations. We can't use memdep
2081   // itself because it uses AliasAnalysis and we need to do provenance
2082   // queries instead.
2083   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
2084     Instruction *Inst = &*I++;
2085 
2086     LLVM_DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
2087 
2088     ARCInstKind Class = GetBasicARCInstKind(Inst);
2089     if (Class != ARCInstKind::LoadWeak &&
2090         Class != ARCInstKind::LoadWeakRetained)
2091       continue;
2092 
2093     // Delete objc_loadWeak calls with no users.
2094     if (Class == ARCInstKind::LoadWeak && Inst->use_empty()) {
2095       Inst->eraseFromParent();
2096       Changed = true;
2097       continue;
2098     }
2099 
2100     // TODO: For now, just look for an earlier available version of this value
2101     // within the same block. Theoretically, we could do memdep-style non-local
2102     // analysis too, but that would want caching. A better approach would be to
2103     // use the technique that EarlyCSE uses.
2104     inst_iterator Current = std::prev(I);
2105     BasicBlock *CurrentBB = &*Current.getBasicBlockIterator();
2106     for (BasicBlock::iterator B = CurrentBB->begin(),
2107                               J = Current.getInstructionIterator();
2108          J != B; --J) {
2109       Instruction *EarlierInst = &*std::prev(J);
2110       ARCInstKind EarlierClass = GetARCInstKind(EarlierInst);
2111       switch (EarlierClass) {
2112       case ARCInstKind::LoadWeak:
2113       case ARCInstKind::LoadWeakRetained: {
2114         // If this is loading from the same pointer, replace this load's value
2115         // with that one.
2116         CallInst *Call = cast<CallInst>(Inst);
2117         CallInst *EarlierCall = cast<CallInst>(EarlierInst);
2118         Value *Arg = Call->getArgOperand(0);
2119         Value *EarlierArg = EarlierCall->getArgOperand(0);
2120         switch (PA.getAA()->alias(Arg, EarlierArg)) {
2121         case AliasResult::MustAlias:
2122           Changed = true;
2123           // If the load has a builtin retain, insert a plain retain for it.
2124           if (Class == ARCInstKind::LoadWeakRetained) {
2125             Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
2126             CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
2127             CI->setTailCall();
2128           }
2129           // Zap the fully redundant load.
2130           Call->replaceAllUsesWith(EarlierCall);
2131           Call->eraseFromParent();
2132           goto clobbered;
2133         case AliasResult::MayAlias:
2134         case AliasResult::PartialAlias:
2135           goto clobbered;
2136         case AliasResult::NoAlias:
2137           break;
2138         }
2139         break;
2140       }
2141       case ARCInstKind::StoreWeak:
2142       case ARCInstKind::InitWeak: {
2143         // If this is storing to the same pointer and has the same size etc.
2144         // replace this load's value with the stored value.
2145         CallInst *Call = cast<CallInst>(Inst);
2146         CallInst *EarlierCall = cast<CallInst>(EarlierInst);
2147         Value *Arg = Call->getArgOperand(0);
2148         Value *EarlierArg = EarlierCall->getArgOperand(0);
2149         switch (PA.getAA()->alias(Arg, EarlierArg)) {
2150         case AliasResult::MustAlias:
2151           Changed = true;
2152           // If the load has a builtin retain, insert a plain retain for it.
2153           if (Class == ARCInstKind::LoadWeakRetained) {
2154             Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
2155             CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
2156             CI->setTailCall();
2157           }
2158           // Zap the fully redundant load.
2159           Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
2160           Call->eraseFromParent();
2161           goto clobbered;
2162         case AliasResult::MayAlias:
2163         case AliasResult::PartialAlias:
2164           goto clobbered;
2165         case AliasResult::NoAlias:
2166           break;
2167         }
2168         break;
2169       }
2170       case ARCInstKind::MoveWeak:
2171       case ARCInstKind::CopyWeak:
2172         // TOOD: Grab the copied value.
2173         goto clobbered;
2174       case ARCInstKind::AutoreleasepoolPush:
2175       case ARCInstKind::None:
2176       case ARCInstKind::IntrinsicUser:
2177       case ARCInstKind::User:
2178         // Weak pointers are only modified through the weak entry points
2179         // (and arbitrary calls, which could call the weak entry points).
2180         break;
2181       default:
2182         // Anything else could modify the weak pointer.
2183         goto clobbered;
2184       }
2185     }
2186   clobbered:;
2187   }
2188 
2189   // Then, for each destroyWeak with an alloca operand, check to see if
2190   // the alloca and all its users can be zapped.
2191   for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) {
2192     ARCInstKind Class = GetBasicARCInstKind(&Inst);
2193     if (Class != ARCInstKind::DestroyWeak)
2194       continue;
2195 
2196     CallInst *Call = cast<CallInst>(&Inst);
2197     Value *Arg = Call->getArgOperand(0);
2198     if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
2199       for (User *U : Alloca->users()) {
2200         const Instruction *UserInst = cast<Instruction>(U);
2201         switch (GetBasicARCInstKind(UserInst)) {
2202         case ARCInstKind::InitWeak:
2203         case ARCInstKind::StoreWeak:
2204         case ARCInstKind::DestroyWeak:
2205           continue;
2206         default:
2207           goto done;
2208         }
2209       }
2210       Changed = true;
2211       for (User *U : llvm::make_early_inc_range(Alloca->users())) {
2212         CallInst *UserInst = cast<CallInst>(U);
2213         switch (GetBasicARCInstKind(UserInst)) {
2214         case ARCInstKind::InitWeak:
2215         case ARCInstKind::StoreWeak:
2216           // These functions return their second argument.
2217           UserInst->replaceAllUsesWith(UserInst->getArgOperand(1));
2218           break;
2219         case ARCInstKind::DestroyWeak:
2220           // No return value.
2221           break;
2222         default:
2223           llvm_unreachable("alloca really is used!");
2224         }
2225         UserInst->eraseFromParent();
2226       }
2227       Alloca->eraseFromParent();
2228     done:;
2229     }
2230   }
2231 }
2232 
2233 /// Identify program paths which execute sequences of retains and releases which
2234 /// can be eliminated.
2235 bool ObjCARCOpt::OptimizeSequences(Function &F) {
2236   // Releases, Retains - These are used to store the results of the main flow
2237   // analysis. These use Value* as the key instead of Instruction* so that the
2238   // map stays valid when we get around to rewriting code and calls get
2239   // replaced by arguments.
2240   DenseMap<Value *, RRInfo> Releases;
2241   BlotMapVector<Value *, RRInfo> Retains;
2242 
2243   // This is used during the traversal of the function to track the
2244   // states for each identified object at each block.
2245   DenseMap<const BasicBlock *, BBState> BBStates;
2246 
2247   // Analyze the CFG of the function, and all instructions.
2248   bool NestingDetected = Visit(F, BBStates, Retains, Releases);
2249 
2250   if (DisableRetainReleasePairing)
2251     return false;
2252 
2253   // Transform.
2254   bool AnyPairsCompletelyEliminated = PerformCodePlacement(BBStates, Retains,
2255                                                            Releases,
2256                                                            F.getParent());
2257 
2258   return AnyPairsCompletelyEliminated && NestingDetected;
2259 }
2260 
2261 /// Check if there is a dependent call earlier that does not have anything in
2262 /// between the Retain and the call that can affect the reference count of their
2263 /// shared pointer argument. Note that Retain need not be in BB.
2264 static CallInst *HasSafePathToPredecessorCall(const Value *Arg,
2265                                               Instruction *Retain,
2266                                               ProvenanceAnalysis &PA) {
2267   auto *Call = dyn_cast_or_null<CallInst>(findSingleDependency(
2268       CanChangeRetainCount, Arg, Retain->getParent(), Retain, PA));
2269 
2270   // Check that the pointer is the return value of the call.
2271   if (!Call || Arg != Call)
2272     return nullptr;
2273 
2274   // Check that the call is a regular call.
2275   ARCInstKind Class = GetBasicARCInstKind(Call);
2276   return Class == ARCInstKind::CallOrUser || Class == ARCInstKind::Call
2277              ? Call
2278              : nullptr;
2279 }
2280 
2281 /// Find a dependent retain that precedes the given autorelease for which there
2282 /// is nothing in between the two instructions that can affect the ref count of
2283 /// Arg.
2284 static CallInst *
2285 FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
2286                                   Instruction *Autorelease,
2287                                   ProvenanceAnalysis &PA) {
2288   auto *Retain = dyn_cast_or_null<CallInst>(
2289       findSingleDependency(CanChangeRetainCount, Arg, BB, Autorelease, PA));
2290 
2291   // Check that we found a retain with the same argument.
2292   if (!Retain || !IsRetain(GetBasicARCInstKind(Retain)) ||
2293       GetArgRCIdentityRoot(Retain) != Arg) {
2294     return nullptr;
2295   }
2296 
2297   return Retain;
2298 }
2299 
2300 /// Look for an ``autorelease'' instruction dependent on Arg such that there are
2301 /// no instructions dependent on Arg that need a positive ref count in between
2302 /// the autorelease and the ret.
2303 static CallInst *
2304 FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
2305                                        ReturnInst *Ret,
2306                                        ProvenanceAnalysis &PA) {
2307   SmallPtrSet<Instruction *, 4> DepInsts;
2308   auto *Autorelease = dyn_cast_or_null<CallInst>(
2309       findSingleDependency(NeedsPositiveRetainCount, Arg, BB, Ret, PA));
2310 
2311   if (!Autorelease)
2312     return nullptr;
2313   ARCInstKind AutoreleaseClass = GetBasicARCInstKind(Autorelease);
2314   if (!IsAutorelease(AutoreleaseClass))
2315     return nullptr;
2316   if (GetArgRCIdentityRoot(Autorelease) != Arg)
2317     return nullptr;
2318 
2319   return Autorelease;
2320 }
2321 
2322 /// Look for this pattern:
2323 /// \code
2324 ///    %call = call i8* @something(...)
2325 ///    %2 = call i8* @objc_retain(i8* %call)
2326 ///    %3 = call i8* @objc_autorelease(i8* %2)
2327 ///    ret i8* %3
2328 /// \endcode
2329 /// And delete the retain and autorelease.
2330 void ObjCARCOpt::OptimizeReturns(Function &F) {
2331   if (!F.getReturnType()->isPointerTy())
2332     return;
2333 
2334   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n");
2335 
2336   for (BasicBlock &BB: F) {
2337     ReturnInst *Ret = dyn_cast<ReturnInst>(&BB.back());
2338     if (!Ret)
2339       continue;
2340 
2341     LLVM_DEBUG(dbgs() << "Visiting: " << *Ret << "\n");
2342 
2343     const Value *Arg = GetRCIdentityRoot(Ret->getOperand(0));
2344 
2345     // Look for an ``autorelease'' instruction that is a predecessor of Ret and
2346     // dependent on Arg such that there are no instructions dependent on Arg
2347     // that need a positive ref count in between the autorelease and Ret.
2348     CallInst *Autorelease =
2349         FindPredecessorAutoreleaseWithSafePath(Arg, &BB, Ret, PA);
2350 
2351     if (!Autorelease)
2352       continue;
2353 
2354     CallInst *Retain = FindPredecessorRetainWithSafePath(
2355         Arg, Autorelease->getParent(), Autorelease, PA);
2356 
2357     if (!Retain)
2358       continue;
2359 
2360     // Check that there is nothing that can affect the reference count
2361     // between the retain and the call.  Note that Retain need not be in BB.
2362     CallInst *Call = HasSafePathToPredecessorCall(Arg, Retain, PA);
2363 
2364     // Don't remove retainRV/autoreleaseRV pairs if the call isn't a tail call.
2365     if (!Call ||
2366         (!Call->isTailCall() &&
2367          GetBasicARCInstKind(Retain) == ARCInstKind::RetainRV &&
2368          GetBasicARCInstKind(Autorelease) == ARCInstKind::AutoreleaseRV))
2369       continue;
2370 
2371     // If so, we can zap the retain and autorelease.
2372     Changed = true;
2373     ++NumRets;
2374     LLVM_DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: " << *Autorelease
2375                       << "\n");
2376     BundledInsts->eraseInst(Retain);
2377     EraseInstruction(Autorelease);
2378   }
2379 }
2380 
2381 #ifndef NDEBUG
2382 void
2383 ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) {
2384   Statistic &NumRetains =
2385       AfterOptimization ? NumRetainsAfterOpt : NumRetainsBeforeOpt;
2386   Statistic &NumReleases =
2387       AfterOptimization ? NumReleasesAfterOpt : NumReleasesBeforeOpt;
2388 
2389   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
2390     Instruction *Inst = &*I++;
2391     switch (GetBasicARCInstKind(Inst)) {
2392     default:
2393       break;
2394     case ARCInstKind::Retain:
2395       ++NumRetains;
2396       break;
2397     case ARCInstKind::Release:
2398       ++NumReleases;
2399       break;
2400     }
2401   }
2402 }
2403 #endif
2404 
2405 void ObjCARCOpt::init(Function &F) {
2406   if (!EnableARCOpts)
2407     return;
2408 
2409   // Intuitively, objc_retain and others are nocapture, however in practice
2410   // they are not, because they return their argument value. And objc_release
2411   // calls finalizers which can have arbitrary side effects.
2412   MDKindCache.init(F.getParent());
2413 
2414   // Initialize our runtime entry point cache.
2415   EP.init(F.getParent());
2416 
2417   // Compute which blocks are in which funclet.
2418   if (F.hasPersonalityFn() &&
2419       isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
2420     BlockEHColors = colorEHFunclets(F);
2421 }
2422 
2423 bool ObjCARCOpt::run(Function &F, AAResults &AA) {
2424   if (!EnableARCOpts)
2425     return false;
2426 
2427   Changed = CFGChanged = false;
2428   BundledRetainClaimRVs BRV(/*ContractPass=*/false);
2429   BundledInsts = &BRV;
2430 
2431   LLVM_DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName()
2432                     << " >>>"
2433                        "\n");
2434 
2435   std::pair<bool, bool> R = BundledInsts->insertAfterInvokes(F, nullptr);
2436   Changed |= R.first;
2437   CFGChanged |= R.second;
2438 
2439   PA.setAA(&AA);
2440 
2441 #ifndef NDEBUG
2442   if (AreStatisticsEnabled()) {
2443     GatherStatistics(F, false);
2444   }
2445 #endif
2446 
2447   // This pass performs several distinct transformations. As a compile-time aid
2448   // when compiling code that isn't ObjC, skip these if the relevant ObjC
2449   // library functions aren't declared.
2450 
2451   // Preliminary optimizations. This also computes UsedInThisFunction.
2452   OptimizeIndividualCalls(F);
2453 
2454   // Optimizations for weak pointers.
2455   if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::LoadWeak)) |
2456                             (1 << unsigned(ARCInstKind::LoadWeakRetained)) |
2457                             (1 << unsigned(ARCInstKind::StoreWeak)) |
2458                             (1 << unsigned(ARCInstKind::InitWeak)) |
2459                             (1 << unsigned(ARCInstKind::CopyWeak)) |
2460                             (1 << unsigned(ARCInstKind::MoveWeak)) |
2461                             (1 << unsigned(ARCInstKind::DestroyWeak))))
2462     OptimizeWeakCalls(F);
2463 
2464   // Optimizations for retain+release pairs.
2465   if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Retain)) |
2466                             (1 << unsigned(ARCInstKind::RetainRV)) |
2467                             (1 << unsigned(ARCInstKind::RetainBlock))))
2468     if (UsedInThisFunction & (1 << unsigned(ARCInstKind::Release)))
2469       // Run OptimizeSequences until it either stops making changes or
2470       // no retain+release pair nesting is detected.
2471       while (OptimizeSequences(F)) {}
2472 
2473   // Optimizations if objc_autorelease is used.
2474   if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Autorelease)) |
2475                             (1 << unsigned(ARCInstKind::AutoreleaseRV))))
2476     OptimizeReturns(F);
2477 
2478   // Gather statistics after optimization.
2479 #ifndef NDEBUG
2480   if (AreStatisticsEnabled()) {
2481     GatherStatistics(F, true);
2482   }
2483 #endif
2484 
2485   LLVM_DEBUG(dbgs() << "\n");
2486 
2487   return Changed;
2488 }
2489 
2490 /// @}
2491 ///
2492 
2493 PreservedAnalyses ObjCARCOptPass::run(Function &F,
2494                                       FunctionAnalysisManager &AM) {
2495   ObjCARCOpt OCAO;
2496   OCAO.init(F);
2497 
2498   bool Changed = OCAO.run(F, AM.getResult<AAManager>(F));
2499   bool CFGChanged = OCAO.hasCFGChanged();
2500   if (Changed) {
2501     PreservedAnalyses PA;
2502     if (!CFGChanged)
2503       PA.preserveSet<CFGAnalyses>();
2504     return PA;
2505   }
2506   return PreservedAnalyses::all();
2507 }
2508