xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file defines ObjC ARC optimizations. ARC stands for Automatic
11 /// Reference Counting and is a system for managing reference counts for objects
12 /// in Objective C.
13 ///
14 /// The optimizations performed include elimination of redundant, partially
15 /// redundant, and inconsequential reference count operations, elimination of
16 /// redundant weak pointer operations, and numerous minor simplifications.
17 ///
18 /// WARNING: This file knows about certain library functions. It recognizes them
19 /// by name, and hardwires knowledge of their semantics.
20 ///
21 /// WARNING: This file knows about how certain Objective-C library functions are
22 /// used. Naive LLVM IR transformations which would otherwise be
23 /// behavior-preserving may break these assumptions.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "ARCRuntimeEntryPoints.h"
28 #include "BlotMapVector.h"
29 #include "DependencyAnalysis.h"
30 #include "ObjCARC.h"
31 #include "ProvenanceAnalysis.h"
32 #include "PtrState.h"
33 #include "llvm/ADT/DenseMap.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/Analysis/AliasAnalysis.h"
39 #include "llvm/Analysis/ObjCARCAliasAnalysis.h"
40 #include "llvm/Analysis/ObjCARCAnalysisUtils.h"
41 #include "llvm/Analysis/ObjCARCInstKind.h"
42 #include "llvm/Analysis/ObjCARCUtil.h"
43 #include "llvm/IR/BasicBlock.h"
44 #include "llvm/IR/CFG.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/DerivedTypes.h"
48 #include "llvm/IR/EHPersonalities.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GlobalVariable.h"
51 #include "llvm/IR/InstIterator.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/User.h"
59 #include "llvm/IR/Value.h"
60 #include "llvm/Support/Casting.h"
61 #include "llvm/Support/CommandLine.h"
62 #include "llvm/Support/Compiler.h"
63 #include "llvm/Support/Debug.h"
64 #include "llvm/Support/ErrorHandling.h"
65 #include "llvm/Support/raw_ostream.h"
66 #include "llvm/Transforms/ObjCARC.h"
67 #include <cassert>
68 #include <iterator>
69 #include <utility>
70 
71 using namespace llvm;
72 using namespace llvm::objcarc;
73 
74 #define DEBUG_TYPE "objc-arc-opts"
75 
76 static cl::opt<unsigned> MaxPtrStates("arc-opt-max-ptr-states",
77     cl::Hidden,
78     cl::desc("Maximum number of ptr states the optimizer keeps track of"),
79     cl::init(4095));
80 
81 /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC.
82 /// @{
83 
84 /// This is similar to GetRCIdentityRoot but it stops as soon
85 /// as it finds a value with multiple uses.
FindSingleUseIdentifiedObject(const Value * Arg)86 static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
87   // ConstantData (like ConstantPointerNull and UndefValue) is used across
88   // modules.  It's never a single-use value.
89   if (isa<ConstantData>(Arg))
90     return nullptr;
91 
92   if (Arg->hasOneUse()) {
93     if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
94       return FindSingleUseIdentifiedObject(BC->getOperand(0));
95     if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
96       if (GEP->hasAllZeroIndices())
97         return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
98     if (IsForwarding(GetBasicARCInstKind(Arg)))
99       return FindSingleUseIdentifiedObject(
100                cast<CallInst>(Arg)->getArgOperand(0));
101     if (!IsObjCIdentifiedObject(Arg))
102       return nullptr;
103     return Arg;
104   }
105 
106   // If we found an identifiable object but it has multiple uses, but they are
107   // trivial uses, we can still consider this to be a single-use value.
108   if (IsObjCIdentifiedObject(Arg)) {
109     for (const User *U : Arg->users())
110       if (!U->use_empty() || GetRCIdentityRoot(U) != Arg)
111          return nullptr;
112 
113     return Arg;
114   }
115 
116   return nullptr;
117 }
118 
119 /// @}
120 ///
121 /// \defgroup ARCOpt ARC Optimization.
122 /// @{
123 
124 // TODO: On code like this:
125 //
126 // objc_retain(%x)
127 // stuff_that_cannot_release()
128 // objc_autorelease(%x)
129 // stuff_that_cannot_release()
130 // objc_retain(%x)
131 // stuff_that_cannot_release()
132 // objc_autorelease(%x)
133 //
134 // The second retain and autorelease can be deleted.
135 
136 // TODO: It should be possible to delete
137 // objc_autoreleasePoolPush and objc_autoreleasePoolPop
138 // pairs if nothing is actually autoreleased between them. Also, autorelease
139 // calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code
140 // after inlining) can be turned into plain release calls.
141 
142 // TODO: Critical-edge splitting. If the optimial insertion point is
143 // a critical edge, the current algorithm has to fail, because it doesn't
144 // know how to split edges. It should be possible to make the optimizer
145 // think in terms of edges, rather than blocks, and then split critical
146 // edges on demand.
147 
148 // TODO: OptimizeSequences could generalized to be Interprocedural.
149 
150 // TODO: Recognize that a bunch of other objc runtime calls have
151 // non-escaping arguments and non-releasing arguments, and may be
152 // non-autoreleasing.
153 
154 // TODO: Sink autorelease calls as far as possible. Unfortunately we
155 // usually can't sink them past other calls, which would be the main
156 // case where it would be useful.
157 
158 // TODO: The pointer returned from objc_loadWeakRetained is retained.
159 
160 // TODO: Delete release+retain pairs (rare).
161 
162 STATISTIC(NumNoops,       "Number of no-op objc calls eliminated");
163 STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
164 STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
165 STATISTIC(NumRets,        "Number of return value forwarding "
166                           "retain+autoreleases eliminated");
167 STATISTIC(NumRRs,         "Number of retain+release paths eliminated");
168 STATISTIC(NumPeeps,       "Number of calls peephole-optimized");
169 #ifndef NDEBUG
170 STATISTIC(NumRetainsBeforeOpt,
171           "Number of retains before optimization");
172 STATISTIC(NumReleasesBeforeOpt,
173           "Number of releases before optimization");
174 STATISTIC(NumRetainsAfterOpt,
175           "Number of retains after optimization");
176 STATISTIC(NumReleasesAfterOpt,
177           "Number of releases after optimization");
178 #endif
179 
180 namespace {
181 
182   /// Per-BasicBlock state.
183   class BBState {
184     /// The number of unique control paths from the entry which can reach this
185     /// block.
186     unsigned TopDownPathCount = 0;
187 
188     /// The number of unique control paths to exits from this block.
189     unsigned BottomUpPathCount = 0;
190 
191     /// The top-down traversal uses this to record information known about a
192     /// pointer at the bottom of each block.
193     BlotMapVector<const Value *, TopDownPtrState> PerPtrTopDown;
194 
195     /// The bottom-up traversal uses this to record information known about a
196     /// pointer at the top of each block.
197     BlotMapVector<const Value *, BottomUpPtrState> PerPtrBottomUp;
198 
199     /// Effective predecessors of the current block ignoring ignorable edges and
200     /// ignored backedges.
201     SmallVector<BasicBlock *, 2> Preds;
202 
203     /// Effective successors of the current block ignoring ignorable edges and
204     /// ignored backedges.
205     SmallVector<BasicBlock *, 2> Succs;
206 
207   public:
208     static const unsigned OverflowOccurredValue;
209 
210     BBState() = default;
211 
212     using top_down_ptr_iterator = decltype(PerPtrTopDown)::iterator;
213     using const_top_down_ptr_iterator = decltype(PerPtrTopDown)::const_iterator;
214 
top_down_ptr_begin()215     top_down_ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
top_down_ptr_end()216     top_down_ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
top_down_ptr_begin() const217     const_top_down_ptr_iterator top_down_ptr_begin() const {
218       return PerPtrTopDown.begin();
219     }
top_down_ptr_end() const220     const_top_down_ptr_iterator top_down_ptr_end() const {
221       return PerPtrTopDown.end();
222     }
hasTopDownPtrs() const223     bool hasTopDownPtrs() const {
224       return !PerPtrTopDown.empty();
225     }
226 
top_down_ptr_list_size() const227     unsigned top_down_ptr_list_size() const {
228       return std::distance(top_down_ptr_begin(), top_down_ptr_end());
229     }
230 
231     using bottom_up_ptr_iterator = decltype(PerPtrBottomUp)::iterator;
232     using const_bottom_up_ptr_iterator =
233         decltype(PerPtrBottomUp)::const_iterator;
234 
bottom_up_ptr_begin()235     bottom_up_ptr_iterator bottom_up_ptr_begin() {
236       return PerPtrBottomUp.begin();
237     }
bottom_up_ptr_end()238     bottom_up_ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
bottom_up_ptr_begin() const239     const_bottom_up_ptr_iterator bottom_up_ptr_begin() const {
240       return PerPtrBottomUp.begin();
241     }
bottom_up_ptr_end() const242     const_bottom_up_ptr_iterator bottom_up_ptr_end() const {
243       return PerPtrBottomUp.end();
244     }
hasBottomUpPtrs() const245     bool hasBottomUpPtrs() const {
246       return !PerPtrBottomUp.empty();
247     }
248 
bottom_up_ptr_list_size() const249     unsigned bottom_up_ptr_list_size() const {
250       return std::distance(bottom_up_ptr_begin(), bottom_up_ptr_end());
251     }
252 
253     /// Mark this block as being an entry block, which has one path from the
254     /// entry by definition.
SetAsEntry()255     void SetAsEntry() { TopDownPathCount = 1; }
256 
257     /// Mark this block as being an exit block, which has one path to an exit by
258     /// definition.
SetAsExit()259     void SetAsExit()  { BottomUpPathCount = 1; }
260 
261     /// Attempt to find the PtrState object describing the top down state for
262     /// pointer Arg. Return a new initialized PtrState describing the top down
263     /// state for Arg if we do not find one.
getPtrTopDownState(const Value * Arg)264     TopDownPtrState &getPtrTopDownState(const Value *Arg) {
265       return PerPtrTopDown[Arg];
266     }
267 
268     /// Attempt to find the PtrState object describing the bottom up state for
269     /// pointer Arg. Return a new initialized PtrState describing the bottom up
270     /// state for Arg if we do not find one.
getPtrBottomUpState(const Value * Arg)271     BottomUpPtrState &getPtrBottomUpState(const Value *Arg) {
272       return PerPtrBottomUp[Arg];
273     }
274 
275     /// Attempt to find the PtrState object describing the bottom up state for
276     /// pointer Arg.
findPtrBottomUpState(const Value * Arg)277     bottom_up_ptr_iterator findPtrBottomUpState(const Value *Arg) {
278       return PerPtrBottomUp.find(Arg);
279     }
280 
clearBottomUpPointers()281     void clearBottomUpPointers() {
282       PerPtrBottomUp.clear();
283     }
284 
clearTopDownPointers()285     void clearTopDownPointers() {
286       PerPtrTopDown.clear();
287     }
288 
289     void InitFromPred(const BBState &Other);
290     void InitFromSucc(const BBState &Other);
291     void MergePred(const BBState &Other);
292     void MergeSucc(const BBState &Other);
293 
294     /// Compute the number of possible unique paths from an entry to an exit
295     /// which pass through this block. This is only valid after both the
296     /// top-down and bottom-up traversals are complete.
297     ///
298     /// Returns true if overflow occurred. Returns false if overflow did not
299     /// occur.
GetAllPathCountWithOverflow(unsigned & PathCount) const300     bool GetAllPathCountWithOverflow(unsigned &PathCount) const {
301       if (TopDownPathCount == OverflowOccurredValue ||
302           BottomUpPathCount == OverflowOccurredValue)
303         return true;
304       unsigned long long Product =
305         (unsigned long long)TopDownPathCount*BottomUpPathCount;
306       // Overflow occurred if any of the upper bits of Product are set or if all
307       // the lower bits of Product are all set.
308       return (Product >> 32) ||
309              ((PathCount = Product) == OverflowOccurredValue);
310     }
311 
312     // Specialized CFG utilities.
313     using edge_iterator = SmallVectorImpl<BasicBlock *>::const_iterator;
314 
pred_begin() const315     edge_iterator pred_begin() const { return Preds.begin(); }
pred_end() const316     edge_iterator pred_end() const { return Preds.end(); }
succ_begin() const317     edge_iterator succ_begin() const { return Succs.begin(); }
succ_end() const318     edge_iterator succ_end() const { return Succs.end(); }
319 
addSucc(BasicBlock * Succ)320     void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
addPred(BasicBlock * Pred)321     void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
322 
isExit() const323     bool isExit() const { return Succs.empty(); }
324   };
325 
326 } // end anonymous namespace
327 
328 const unsigned BBState::OverflowOccurredValue = 0xffffffff;
329 
330 namespace llvm {
331 
332 raw_ostream &operator<<(raw_ostream &OS,
333                         BBState &BBState) LLVM_ATTRIBUTE_UNUSED;
334 
335 } // end namespace llvm
336 
InitFromPred(const BBState & Other)337 void BBState::InitFromPred(const BBState &Other) {
338   PerPtrTopDown = Other.PerPtrTopDown;
339   TopDownPathCount = Other.TopDownPathCount;
340 }
341 
InitFromSucc(const BBState & Other)342 void BBState::InitFromSucc(const BBState &Other) {
343   PerPtrBottomUp = Other.PerPtrBottomUp;
344   BottomUpPathCount = Other.BottomUpPathCount;
345 }
346 
347 /// The top-down traversal uses this to merge information about predecessors to
348 /// form the initial state for a new block.
MergePred(const BBState & Other)349 void BBState::MergePred(const BBState &Other) {
350   if (TopDownPathCount == OverflowOccurredValue)
351     return;
352 
353   // Other.TopDownPathCount can be 0, in which case it is either dead or a
354   // loop backedge. Loop backedges are special.
355   TopDownPathCount += Other.TopDownPathCount;
356 
357   // In order to be consistent, we clear the top down pointers when by adding
358   // TopDownPathCount becomes OverflowOccurredValue even though "true" overflow
359   // has not occurred.
360   if (TopDownPathCount == OverflowOccurredValue) {
361     clearTopDownPointers();
362     return;
363   }
364 
365   // Check for overflow. If we have overflow, fall back to conservative
366   // behavior.
367   if (TopDownPathCount < Other.TopDownPathCount) {
368     TopDownPathCount = OverflowOccurredValue;
369     clearTopDownPointers();
370     return;
371   }
372 
373   // For each entry in the other set, if our set has an entry with the same key,
374   // merge the entries. Otherwise, copy the entry and merge it with an empty
375   // entry.
376   for (auto MI = Other.top_down_ptr_begin(), ME = Other.top_down_ptr_end();
377        MI != ME; ++MI) {
378     auto Pair = PerPtrTopDown.insert(*MI);
379     Pair.first->second.Merge(Pair.second ? TopDownPtrState() : MI->second,
380                              /*TopDown=*/true);
381   }
382 
383   // For each entry in our set, if the other set doesn't have an entry with the
384   // same key, force it to merge with an empty entry.
385   for (auto MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI)
386     if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
387       MI->second.Merge(TopDownPtrState(), /*TopDown=*/true);
388 }
389 
390 /// The bottom-up traversal uses this to merge information about successors to
391 /// form the initial state for a new block.
MergeSucc(const BBState & Other)392 void BBState::MergeSucc(const BBState &Other) {
393   if (BottomUpPathCount == OverflowOccurredValue)
394     return;
395 
396   // Other.BottomUpPathCount can be 0, in which case it is either dead or a
397   // loop backedge. Loop backedges are special.
398   BottomUpPathCount += Other.BottomUpPathCount;
399 
400   // In order to be consistent, we clear the top down pointers when by adding
401   // BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow
402   // has not occurred.
403   if (BottomUpPathCount == OverflowOccurredValue) {
404     clearBottomUpPointers();
405     return;
406   }
407 
408   // Check for overflow. If we have overflow, fall back to conservative
409   // behavior.
410   if (BottomUpPathCount < Other.BottomUpPathCount) {
411     BottomUpPathCount = OverflowOccurredValue;
412     clearBottomUpPointers();
413     return;
414   }
415 
416   // For each entry in the other set, if our set has an entry with the
417   // same key, merge the entries. Otherwise, copy the entry and merge
418   // it with an empty entry.
419   for (auto MI = Other.bottom_up_ptr_begin(), ME = Other.bottom_up_ptr_end();
420        MI != ME; ++MI) {
421     auto Pair = PerPtrBottomUp.insert(*MI);
422     Pair.first->second.Merge(Pair.second ? BottomUpPtrState() : MI->second,
423                              /*TopDown=*/false);
424   }
425 
426   // For each entry in our set, if the other set doesn't have an entry
427   // with the same key, force it to merge with an empty entry.
428   for (auto MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME;
429        ++MI)
430     if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
431       MI->second.Merge(BottomUpPtrState(), /*TopDown=*/false);
432 }
433 
operator <<(raw_ostream & OS,BBState & BBInfo)434 raw_ostream &llvm::operator<<(raw_ostream &OS, BBState &BBInfo) {
435   // Dump the pointers we are tracking.
436   OS << "    TopDown State:\n";
437   if (!BBInfo.hasTopDownPtrs()) {
438     LLVM_DEBUG(dbgs() << "        NONE!\n");
439   } else {
440     for (auto I = BBInfo.top_down_ptr_begin(), E = BBInfo.top_down_ptr_end();
441          I != E; ++I) {
442       const PtrState &P = I->second;
443       OS << "        Ptr: " << *I->first
444          << "\n            KnownSafe:        " << (P.IsKnownSafe()?"true":"false")
445          << "\n            ImpreciseRelease: "
446            << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n"
447          << "            HasCFGHazards:    "
448            << (P.IsCFGHazardAfflicted()?"true":"false") << "\n"
449          << "            KnownPositive:    "
450            << (P.HasKnownPositiveRefCount()?"true":"false") << "\n"
451          << "            Seq:              "
452          << P.GetSeq() << "\n";
453     }
454   }
455 
456   OS << "    BottomUp State:\n";
457   if (!BBInfo.hasBottomUpPtrs()) {
458     LLVM_DEBUG(dbgs() << "        NONE!\n");
459   } else {
460     for (auto I = BBInfo.bottom_up_ptr_begin(), E = BBInfo.bottom_up_ptr_end();
461          I != E; ++I) {
462       const PtrState &P = I->second;
463       OS << "        Ptr: " << *I->first
464          << "\n            KnownSafe:        " << (P.IsKnownSafe()?"true":"false")
465          << "\n            ImpreciseRelease: "
466            << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n"
467          << "            HasCFGHazards:    "
468            << (P.IsCFGHazardAfflicted()?"true":"false") << "\n"
469          << "            KnownPositive:    "
470            << (P.HasKnownPositiveRefCount()?"true":"false") << "\n"
471          << "            Seq:              "
472          << P.GetSeq() << "\n";
473     }
474   }
475 
476   return OS;
477 }
478 
479 namespace {
480 
481   /// The main ARC optimization pass.
482 class ObjCARCOpt {
483   bool Changed = false;
484   bool CFGChanged = false;
485   ProvenanceAnalysis PA;
486 
487   /// A cache of references to runtime entry point constants.
488   ARCRuntimeEntryPoints EP;
489 
490   /// A cache of MDKinds that can be passed into other functions to propagate
491   /// MDKind identifiers.
492   ARCMDKindCache MDKindCache;
493 
494   BundledRetainClaimRVs *BundledInsts = nullptr;
495 
496   /// A flag indicating whether the optimization that removes or moves
497   /// retain/release pairs should be performed.
498   bool DisableRetainReleasePairing = false;
499 
500   /// Flags which determine whether each of the interesting runtime functions
501   /// is in fact used in the current function.
502   unsigned UsedInThisFunction;
503 
504   DenseMap<BasicBlock *, ColorVector> BlockEHColors;
505 
506   bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
507   void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
508                                  ARCInstKind &Class);
509   void OptimizeIndividualCalls(Function &F);
510 
511   /// Optimize an individual call, optionally passing the
512   /// GetArgRCIdentityRoot if it has already been computed.
513   void OptimizeIndividualCallImpl(Function &F, Instruction *Inst,
514                                   ARCInstKind Class, const Value *Arg);
515 
516   /// Try to optimize an AutoreleaseRV with a RetainRV or UnsafeClaimRV.  If the
517   /// optimization occurs, returns true to indicate that the caller should
518   /// assume the instructions are dead.
519   bool OptimizeInlinedAutoreleaseRVCall(Function &F, Instruction *Inst,
520                                         const Value *&Arg, ARCInstKind Class,
521                                         Instruction *AutoreleaseRV,
522                                         const Value *&AutoreleaseRVArg);
523 
524   void CheckForCFGHazards(const BasicBlock *BB,
525                           DenseMap<const BasicBlock *, BBState> &BBStates,
526                           BBState &MyStates) const;
527   bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB,
528                                 BlotMapVector<Value *, RRInfo> &Retains,
529                                 BBState &MyStates);
530   bool VisitBottomUp(BasicBlock *BB,
531                      DenseMap<const BasicBlock *, BBState> &BBStates,
532                      BlotMapVector<Value *, RRInfo> &Retains);
533   bool VisitInstructionTopDown(
534       Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates,
535       const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
536           &ReleaseInsertPtToRCIdentityRoots);
537   bool VisitTopDown(
538       BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates,
539       DenseMap<Value *, RRInfo> &Releases,
540       const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
541           &ReleaseInsertPtToRCIdentityRoots);
542   bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates,
543              BlotMapVector<Value *, RRInfo> &Retains,
544              DenseMap<Value *, RRInfo> &Releases);
545 
546   void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
547                  BlotMapVector<Value *, RRInfo> &Retains,
548                  DenseMap<Value *, RRInfo> &Releases,
549                  SmallVectorImpl<Instruction *> &DeadInsts, Module *M);
550 
551   bool PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates,
552                                 BlotMapVector<Value *, RRInfo> &Retains,
553                                 DenseMap<Value *, RRInfo> &Releases, Module *M,
554                                 Instruction *Retain,
555                                 SmallVectorImpl<Instruction *> &DeadInsts,
556                                 RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
557                                 Value *Arg, bool KnownSafe,
558                                 bool &AnyPairsCompletelyEliminated);
559 
560   bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
561                             BlotMapVector<Value *, RRInfo> &Retains,
562                             DenseMap<Value *, RRInfo> &Releases, Module *M);
563 
564   void OptimizeWeakCalls(Function &F);
565 
566   bool OptimizeSequences(Function &F);
567 
568   void OptimizeReturns(Function &F);
569 
570   template <typename PredicateT>
cloneOpBundlesIf(CallBase * CI,SmallVectorImpl<OperandBundleDef> & OpBundles,PredicateT Predicate)571   static void cloneOpBundlesIf(CallBase *CI,
572                                SmallVectorImpl<OperandBundleDef> &OpBundles,
573                                PredicateT Predicate) {
574     for (unsigned I = 0, E = CI->getNumOperandBundles(); I != E; ++I) {
575       OperandBundleUse B = CI->getOperandBundleAt(I);
576       if (Predicate(B))
577         OpBundles.emplace_back(B);
578     }
579   }
580 
addOpBundleForFunclet(BasicBlock * BB,SmallVectorImpl<OperandBundleDef> & OpBundles)581   void addOpBundleForFunclet(BasicBlock *BB,
582                              SmallVectorImpl<OperandBundleDef> &OpBundles) {
583     if (!BlockEHColors.empty()) {
584       const ColorVector &CV = BlockEHColors.find(BB)->second;
585       assert(CV.size() > 0 && "Uncolored block");
586       for (BasicBlock *EHPadBB : CV)
587         if (auto *EHPad = dyn_cast<FuncletPadInst>(EHPadBB->getFirstNonPHI())) {
588           OpBundles.emplace_back("funclet", EHPad);
589           return;
590         }
591     }
592   }
593 
594 #ifndef NDEBUG
595   void GatherStatistics(Function &F, bool AfterOptimization = false);
596 #endif
597 
598   public:
599     void init(Function &F);
600     bool run(Function &F, AAResults &AA);
hasCFGChanged() const601     bool hasCFGChanged() const { return CFGChanged; }
602 };
603 } // end anonymous namespace
604 
605 /// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
606 /// not a return value.
607 bool
OptimizeRetainRVCall(Function & F,Instruction * RetainRV)608 ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
609   // Check for the argument being from an immediately preceding call or invoke.
610   const Value *Arg = GetArgRCIdentityRoot(RetainRV);
611   if (const Instruction *Call = dyn_cast<CallBase>(Arg)) {
612     if (Call->getParent() == RetainRV->getParent()) {
613       BasicBlock::const_iterator I(Call);
614       ++I;
615       while (IsNoopInstruction(&*I))
616         ++I;
617       if (&*I == RetainRV)
618         return false;
619     } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
620       BasicBlock *RetainRVParent = RetainRV->getParent();
621       if (II->getNormalDest() == RetainRVParent) {
622         BasicBlock::const_iterator I = RetainRVParent->begin();
623         while (IsNoopInstruction(&*I))
624           ++I;
625         if (&*I == RetainRV)
626           return false;
627       }
628     }
629   }
630 
631   assert(!BundledInsts->contains(RetainRV) &&
632          "a bundled retainRV's argument should be a call");
633 
634   // Turn it to a plain objc_retain.
635   Changed = true;
636   ++NumPeeps;
637 
638   LLVM_DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => "
639                        "objc_retain since the operand is not a return value.\n"
640                        "Old = "
641                     << *RetainRV << "\n");
642 
643   Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain);
644   cast<CallInst>(RetainRV)->setCalledFunction(NewDecl);
645 
646   LLVM_DEBUG(dbgs() << "New = " << *RetainRV << "\n");
647 
648   return false;
649 }
650 
OptimizeInlinedAutoreleaseRVCall(Function & F,Instruction * Inst,const Value * & Arg,ARCInstKind Class,Instruction * AutoreleaseRV,const Value * & AutoreleaseRVArg)651 bool ObjCARCOpt::OptimizeInlinedAutoreleaseRVCall(
652     Function &F, Instruction *Inst, const Value *&Arg, ARCInstKind Class,
653     Instruction *AutoreleaseRV, const Value *&AutoreleaseRVArg) {
654   if (BundledInsts->contains(Inst))
655     return false;
656 
657   // Must be in the same basic block.
658   assert(Inst->getParent() == AutoreleaseRV->getParent());
659 
660   // Must operate on the same root.
661   Arg = GetArgRCIdentityRoot(Inst);
662   AutoreleaseRVArg = GetArgRCIdentityRoot(AutoreleaseRV);
663   if (Arg != AutoreleaseRVArg) {
664     // If there isn't an exact match, check if we have equivalent PHIs.
665     const PHINode *PN = dyn_cast<PHINode>(Arg);
666     if (!PN)
667       return false;
668 
669     SmallVector<const Value *, 4> ArgUsers;
670     getEquivalentPHIs(*PN, ArgUsers);
671     if (!llvm::is_contained(ArgUsers, AutoreleaseRVArg))
672       return false;
673   }
674 
675   // Okay, this is a match.  Merge them.
676   ++NumPeeps;
677   LLVM_DEBUG(dbgs() << "Found inlined objc_autoreleaseReturnValue '"
678                     << *AutoreleaseRV << "' paired with '" << *Inst << "'\n");
679 
680   // Delete the RV pair, starting with the AutoreleaseRV.
681   AutoreleaseRV->replaceAllUsesWith(
682       cast<CallInst>(AutoreleaseRV)->getArgOperand(0));
683   Changed = true;
684   EraseInstruction(AutoreleaseRV);
685   if (Class == ARCInstKind::RetainRV) {
686     // AutoreleaseRV and RetainRV cancel out.  Delete the RetainRV.
687     Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0));
688     EraseInstruction(Inst);
689     return true;
690   }
691 
692   // UnsafeClaimRV is a frontend peephole for RetainRV + Release.  Since the
693   // AutoreleaseRV and RetainRV cancel out, replace UnsafeClaimRV with Release.
694   assert(Class == ARCInstKind::UnsafeClaimRV);
695   Value *CallArg = cast<CallInst>(Inst)->getArgOperand(0);
696   CallInst *Release =
697       CallInst::Create(EP.get(ARCRuntimeEntryPointKind::Release), CallArg, "",
698                        Inst->getIterator());
699   assert(IsAlwaysTail(ARCInstKind::UnsafeClaimRV) &&
700          "Expected UnsafeClaimRV to be safe to tail call");
701   Release->setTailCall();
702   Inst->replaceAllUsesWith(CallArg);
703   EraseInstruction(Inst);
704 
705   // Run the normal optimizations on Release.
706   OptimizeIndividualCallImpl(F, Release, ARCInstKind::Release, Arg);
707   return true;
708 }
709 
710 /// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not
711 /// used as a return value.
OptimizeAutoreleaseRVCall(Function & F,Instruction * AutoreleaseRV,ARCInstKind & Class)712 void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F,
713                                            Instruction *AutoreleaseRV,
714                                            ARCInstKind &Class) {
715   // Check for a return of the pointer value.
716   const Value *Ptr = GetArgRCIdentityRoot(AutoreleaseRV);
717 
718   // If the argument is ConstantPointerNull or UndefValue, its other users
719   // aren't actually interesting to look at.
720   if (isa<ConstantData>(Ptr))
721     return;
722 
723   SmallVector<const Value *, 2> Users;
724   Users.push_back(Ptr);
725 
726   // Add PHIs that are equivalent to Ptr to Users.
727   if (const PHINode *PN = dyn_cast<PHINode>(Ptr))
728     getEquivalentPHIs(*PN, Users);
729 
730   do {
731     Ptr = Users.pop_back_val();
732     for (const User *U : Ptr->users()) {
733       if (isa<ReturnInst>(U) || GetBasicARCInstKind(U) == ARCInstKind::RetainRV)
734         return;
735       if (isa<BitCastInst>(U))
736         Users.push_back(U);
737     }
738   } while (!Users.empty());
739 
740   Changed = true;
741   ++NumPeeps;
742 
743   LLVM_DEBUG(
744       dbgs() << "Transforming objc_autoreleaseReturnValue => "
745                 "objc_autorelease since its operand is not used as a return "
746                 "value.\n"
747                 "Old = "
748              << *AutoreleaseRV << "\n");
749 
750   CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
751   Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease);
752   AutoreleaseRVCI->setCalledFunction(NewDecl);
753   AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
754   Class = ARCInstKind::Autorelease;
755 
756   LLVM_DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n");
757 }
758 
759 /// Visit each call, one at a time, and make simplifications without doing any
760 /// additional analysis.
OptimizeIndividualCalls(Function & F)761 void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
762   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n");
763   // Reset all the flags in preparation for recomputing them.
764   UsedInThisFunction = 0;
765 
766   // Store any delayed AutoreleaseRV intrinsics, so they can be easily paired
767   // with RetainRV and UnsafeClaimRV.
768   Instruction *DelayedAutoreleaseRV = nullptr;
769   const Value *DelayedAutoreleaseRVArg = nullptr;
770   auto setDelayedAutoreleaseRV = [&](Instruction *AutoreleaseRV) {
771     assert(!DelayedAutoreleaseRV || !AutoreleaseRV);
772     DelayedAutoreleaseRV = AutoreleaseRV;
773     DelayedAutoreleaseRVArg = nullptr;
774   };
775   auto optimizeDelayedAutoreleaseRV = [&]() {
776     if (!DelayedAutoreleaseRV)
777       return;
778     OptimizeIndividualCallImpl(F, DelayedAutoreleaseRV,
779                                ARCInstKind::AutoreleaseRV,
780                                DelayedAutoreleaseRVArg);
781     setDelayedAutoreleaseRV(nullptr);
782   };
783   auto shouldDelayAutoreleaseRV = [&](Instruction *NonARCInst) {
784     // Nothing to delay, but we may as well skip the logic below.
785     if (!DelayedAutoreleaseRV)
786       return true;
787 
788     // If we hit the end of the basic block we're not going to find an RV-pair.
789     // Stop delaying.
790     if (NonARCInst->isTerminator())
791       return false;
792 
793     // Given the frontend rules for emitting AutoreleaseRV, RetainRV, and
794     // UnsafeClaimRV, it's probably safe to skip over even opaque function calls
795     // here since OptimizeInlinedAutoreleaseRVCall will confirm that they
796     // have the same RCIdentityRoot.  However, what really matters is
797     // skipping instructions or intrinsics that the inliner could leave behind;
798     // be conservative for now and don't skip over opaque calls, which could
799     // potentially include other ARC calls.
800     auto *CB = dyn_cast<CallBase>(NonARCInst);
801     if (!CB)
802       return true;
803     return CB->getIntrinsicID() != Intrinsic::not_intrinsic;
804   };
805 
806   // Visit all objc_* calls in F.
807   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
808     Instruction *Inst = &*I++;
809 
810     if (auto *CI = dyn_cast<CallInst>(Inst))
811       if (objcarc::hasAttachedCallOpBundle(CI)) {
812         BundledInsts->insertRVCall(I->getIterator(), CI);
813         Changed = true;
814       }
815 
816     ARCInstKind Class = GetBasicARCInstKind(Inst);
817 
818     // Skip this loop if this instruction isn't itself an ARC intrinsic.
819     const Value *Arg = nullptr;
820     switch (Class) {
821     default:
822       optimizeDelayedAutoreleaseRV();
823       break;
824     case ARCInstKind::CallOrUser:
825     case ARCInstKind::User:
826     case ARCInstKind::None:
827       // This is a non-ARC instruction.  If we're delaying an AutoreleaseRV,
828       // check if it's safe to skip over it; if not, optimize the AutoreleaseRV
829       // now.
830       if (!shouldDelayAutoreleaseRV(Inst))
831         optimizeDelayedAutoreleaseRV();
832       continue;
833     case ARCInstKind::AutoreleaseRV:
834       optimizeDelayedAutoreleaseRV();
835       setDelayedAutoreleaseRV(Inst);
836       continue;
837     case ARCInstKind::RetainRV:
838     case ARCInstKind::UnsafeClaimRV:
839       if (DelayedAutoreleaseRV) {
840         // We have a potential RV pair.  Check if they cancel out.
841         if (OptimizeInlinedAutoreleaseRVCall(F, Inst, Arg, Class,
842                                              DelayedAutoreleaseRV,
843                                              DelayedAutoreleaseRVArg)) {
844           setDelayedAutoreleaseRV(nullptr);
845           continue;
846         }
847         optimizeDelayedAutoreleaseRV();
848       }
849       break;
850     }
851 
852     OptimizeIndividualCallImpl(F, Inst, Class, Arg);
853   }
854 
855   // Catch the final delayed AutoreleaseRV.
856   optimizeDelayedAutoreleaseRV();
857 }
858 
859 /// This function returns true if the value is inert. An ObjC ARC runtime call
860 /// taking an inert operand can be safely deleted.
isInertARCValue(Value * V,SmallPtrSet<Value *,1> & VisitedPhis)861 static bool isInertARCValue(Value *V, SmallPtrSet<Value *, 1> &VisitedPhis) {
862   V = V->stripPointerCasts();
863 
864   if (IsNullOrUndef(V))
865     return true;
866 
867   // See if this is a global attribute annotated with an 'objc_arc_inert'.
868   if (auto *GV = dyn_cast<GlobalVariable>(V))
869     if (GV->hasAttribute("objc_arc_inert"))
870       return true;
871 
872   if (auto PN = dyn_cast<PHINode>(V)) {
873     // Ignore this phi if it has already been discovered.
874     if (!VisitedPhis.insert(PN).second)
875       return true;
876     // Look through phis's operands.
877     for (Value *Opnd : PN->incoming_values())
878       if (!isInertARCValue(Opnd, VisitedPhis))
879         return false;
880     return true;
881   }
882 
883   return false;
884 }
885 
OptimizeIndividualCallImpl(Function & F,Instruction * Inst,ARCInstKind Class,const Value * Arg)886 void ObjCARCOpt::OptimizeIndividualCallImpl(Function &F, Instruction *Inst,
887                                             ARCInstKind Class,
888                                             const Value *Arg) {
889   LLVM_DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
890 
891   // We can delete this call if it takes an inert value.
892   SmallPtrSet<Value *, 1> VisitedPhis;
893 
894   if (BundledInsts->contains(Inst)) {
895     UsedInThisFunction |= 1 << unsigned(Class);
896     return;
897   }
898 
899   if (IsNoopOnGlobal(Class))
900     if (isInertARCValue(Inst->getOperand(0), VisitedPhis)) {
901       if (!Inst->getType()->isVoidTy())
902         Inst->replaceAllUsesWith(Inst->getOperand(0));
903       Inst->eraseFromParent();
904       Changed = true;
905       return;
906     }
907 
908   switch (Class) {
909   default:
910     break;
911 
912   // Delete no-op casts. These function calls have special semantics, but
913   // the semantics are entirely implemented via lowering in the front-end,
914   // so by the time they reach the optimizer, they are just no-op calls
915   // which return their argument.
916   //
917   // There are gray areas here, as the ability to cast reference-counted
918   // pointers to raw void* and back allows code to break ARC assumptions,
919   // however these are currently considered to be unimportant.
920   case ARCInstKind::NoopCast:
921     Changed = true;
922     ++NumNoops;
923     LLVM_DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n");
924     EraseInstruction(Inst);
925     return;
926 
927   // If the pointer-to-weak-pointer is null, it's undefined behavior.
928   case ARCInstKind::StoreWeak:
929   case ARCInstKind::LoadWeak:
930   case ARCInstKind::LoadWeakRetained:
931   case ARCInstKind::InitWeak:
932   case ARCInstKind::DestroyWeak: {
933     CallInst *CI = cast<CallInst>(Inst);
934     if (IsNullOrUndef(CI->getArgOperand(0))) {
935       Changed = true;
936       new StoreInst(ConstantInt::getTrue(CI->getContext()),
937                     PoisonValue::get(PointerType::getUnqual(CI->getContext())),
938                     CI->getIterator());
939       Value *NewValue = PoisonValue::get(CI->getType());
940       LLVM_DEBUG(
941           dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
942                     "\nOld = "
943                  << *CI << "\nNew = " << *NewValue << "\n");
944       CI->replaceAllUsesWith(NewValue);
945       CI->eraseFromParent();
946       return;
947     }
948     break;
949   }
950   case ARCInstKind::CopyWeak:
951   case ARCInstKind::MoveWeak: {
952     CallInst *CI = cast<CallInst>(Inst);
953     if (IsNullOrUndef(CI->getArgOperand(0)) ||
954         IsNullOrUndef(CI->getArgOperand(1))) {
955       Changed = true;
956       new StoreInst(ConstantInt::getTrue(CI->getContext()),
957                     PoisonValue::get(PointerType::getUnqual(CI->getContext())),
958                     CI->getIterator());
959 
960       Value *NewValue = PoisonValue::get(CI->getType());
961       LLVM_DEBUG(
962           dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
963                     "\nOld = "
964                  << *CI << "\nNew = " << *NewValue << "\n");
965 
966       CI->replaceAllUsesWith(NewValue);
967       CI->eraseFromParent();
968       return;
969     }
970     break;
971   }
972   case ARCInstKind::RetainRV:
973     if (OptimizeRetainRVCall(F, Inst))
974       return;
975     break;
976   case ARCInstKind::AutoreleaseRV:
977     OptimizeAutoreleaseRVCall(F, Inst, Class);
978     break;
979   }
980 
981   // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
982   if (IsAutorelease(Class) && Inst->use_empty()) {
983     CallInst *Call = cast<CallInst>(Inst);
984     const Value *Arg = Call->getArgOperand(0);
985     Arg = FindSingleUseIdentifiedObject(Arg);
986     if (Arg) {
987       Changed = true;
988       ++NumAutoreleases;
989 
990       // Create the declaration lazily.
991       LLVMContext &C = Inst->getContext();
992 
993       Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
994       CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "",
995                                            Call->getIterator());
996       NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease),
997                            MDNode::get(C, std::nullopt));
998 
999       LLVM_DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
1000                            "since x is otherwise unused.\nOld: "
1001                         << *Call << "\nNew: " << *NewCall << "\n");
1002 
1003       EraseInstruction(Call);
1004       Inst = NewCall;
1005       Class = ARCInstKind::Release;
1006     }
1007   }
1008 
1009   // For functions which can never be passed stack arguments, add
1010   // a tail keyword.
1011   if (IsAlwaysTail(Class) && !cast<CallInst>(Inst)->isNoTailCall()) {
1012     Changed = true;
1013     LLVM_DEBUG(
1014         dbgs() << "Adding tail keyword to function since it can never be "
1015                   "passed stack args: "
1016                << *Inst << "\n");
1017     cast<CallInst>(Inst)->setTailCall();
1018   }
1019 
1020   // Ensure that functions that can never have a "tail" keyword due to the
1021   // semantics of ARC truly do not do so.
1022   if (IsNeverTail(Class)) {
1023     Changed = true;
1024     LLVM_DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst
1025                       << "\n");
1026     cast<CallInst>(Inst)->setTailCall(false);
1027   }
1028 
1029   // Set nounwind as needed.
1030   if (IsNoThrow(Class)) {
1031     Changed = true;
1032     LLVM_DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst
1033                       << "\n");
1034     cast<CallInst>(Inst)->setDoesNotThrow();
1035   }
1036 
1037   // Note: This catches instructions unrelated to ARC.
1038   if (!IsNoopOnNull(Class)) {
1039     UsedInThisFunction |= 1 << unsigned(Class);
1040     return;
1041   }
1042 
1043   // If we haven't already looked up the root, look it up now.
1044   if (!Arg)
1045     Arg = GetArgRCIdentityRoot(Inst);
1046 
1047   // ARC calls with null are no-ops. Delete them.
1048   if (IsNullOrUndef(Arg)) {
1049     Changed = true;
1050     ++NumNoops;
1051     LLVM_DEBUG(dbgs() << "ARC calls with  null are no-ops. Erasing: " << *Inst
1052                       << "\n");
1053     EraseInstruction(Inst);
1054     return;
1055   }
1056 
1057   // Keep track of which of retain, release, autorelease, and retain_block
1058   // are actually present in this function.
1059   UsedInThisFunction |= 1 << unsigned(Class);
1060 
1061   // If Arg is a PHI, and one or more incoming values to the
1062   // PHI are null, and the call is control-equivalent to the PHI, and there
1063   // are no relevant side effects between the PHI and the call, and the call
1064   // is not a release that doesn't have the clang.imprecise_release tag, the
1065   // call could be pushed up to just those paths with non-null incoming
1066   // values. For now, don't bother splitting critical edges for this.
1067   if (Class == ARCInstKind::Release &&
1068       !Inst->getMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease)))
1069     return;
1070 
1071   SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
1072   Worklist.push_back(std::make_pair(Inst, Arg));
1073   do {
1074     std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
1075     Inst = Pair.first;
1076     Arg = Pair.second;
1077 
1078     const PHINode *PN = dyn_cast<PHINode>(Arg);
1079     if (!PN)
1080       continue;
1081 
1082     // Determine if the PHI has any null operands, or any incoming
1083     // critical edges.
1084     bool HasNull = false;
1085     bool HasCriticalEdges = false;
1086     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1087       Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i));
1088       if (IsNullOrUndef(Incoming))
1089         HasNull = true;
1090       else if (PN->getIncomingBlock(i)->getTerminator()->getNumSuccessors() !=
1091                1) {
1092         HasCriticalEdges = true;
1093         break;
1094       }
1095     }
1096     // If we have null operands and no critical edges, optimize.
1097     if (HasCriticalEdges)
1098       continue;
1099     if (!HasNull)
1100       continue;
1101 
1102     Instruction *DepInst = nullptr;
1103 
1104     // Check that there is nothing that cares about the reference
1105     // count between the call and the phi.
1106     switch (Class) {
1107     case ARCInstKind::Retain:
1108     case ARCInstKind::RetainBlock:
1109       // These can always be moved up.
1110       break;
1111     case ARCInstKind::Release:
1112       // These can't be moved across things that care about the retain
1113       // count.
1114       DepInst = findSingleDependency(NeedsPositiveRetainCount, Arg,
1115                                      Inst->getParent(), Inst, PA);
1116       break;
1117     case ARCInstKind::Autorelease:
1118       // These can't be moved across autorelease pool scope boundaries.
1119       DepInst = findSingleDependency(AutoreleasePoolBoundary, Arg,
1120                                      Inst->getParent(), Inst, PA);
1121       break;
1122     case ARCInstKind::UnsafeClaimRV:
1123     case ARCInstKind::RetainRV:
1124     case ARCInstKind::AutoreleaseRV:
1125       // Don't move these; the RV optimization depends on the autoreleaseRV
1126       // being tail called, and the retainRV being immediately after a call
1127       // (which might still happen if we get lucky with codegen layout, but
1128       // it's not worth taking the chance).
1129       continue;
1130     default:
1131       llvm_unreachable("Invalid dependence flavor");
1132     }
1133 
1134     if (DepInst != PN)
1135       continue;
1136 
1137     Changed = true;
1138     ++NumPartialNoops;
1139     // Clone the call into each predecessor that has a non-null value.
1140     CallInst *CInst = cast<CallInst>(Inst);
1141     Type *ParamTy = CInst->getArgOperand(0)->getType();
1142     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1143       Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i));
1144       if (IsNullOrUndef(Incoming))
1145         continue;
1146       Value *Op = PN->getIncomingValue(i);
1147       BasicBlock::iterator InsertPos =
1148           PN->getIncomingBlock(i)->back().getIterator();
1149       SmallVector<OperandBundleDef, 1> OpBundles;
1150       cloneOpBundlesIf(CInst, OpBundles, [](const OperandBundleUse &B) {
1151         return B.getTagID() != LLVMContext::OB_funclet;
1152       });
1153       addOpBundleForFunclet(InsertPos->getParent(), OpBundles);
1154       CallInst *Clone = CallInst::Create(CInst, OpBundles);
1155       if (Op->getType() != ParamTy)
1156         Op = new BitCastInst(Op, ParamTy, "", InsertPos);
1157       Clone->setArgOperand(0, Op);
1158       Clone->insertBefore(*InsertPos->getParent(), InsertPos);
1159 
1160       LLVM_DEBUG(dbgs() << "Cloning " << *CInst << "\n"
1161                                                    "And inserting clone at "
1162                         << *InsertPos << "\n");
1163       Worklist.push_back(std::make_pair(Clone, Incoming));
1164     }
1165     // Erase the original call.
1166     LLVM_DEBUG(dbgs() << "Erasing: " << *CInst << "\n");
1167     EraseInstruction(CInst);
1168   } while (!Worklist.empty());
1169 }
1170 
1171 /// If we have a top down pointer in the S_Use state, make sure that there are
1172 /// no CFG hazards by checking the states of various bottom up pointers.
CheckForUseCFGHazard(const Sequence SuccSSeq,const bool SuccSRRIKnownSafe,TopDownPtrState & S,bool & SomeSuccHasSame,bool & AllSuccsHaveSame,bool & NotAllSeqEqualButKnownSafe,bool & ShouldContinue)1173 static void CheckForUseCFGHazard(const Sequence SuccSSeq,
1174                                  const bool SuccSRRIKnownSafe,
1175                                  TopDownPtrState &S,
1176                                  bool &SomeSuccHasSame,
1177                                  bool &AllSuccsHaveSame,
1178                                  bool &NotAllSeqEqualButKnownSafe,
1179                                  bool &ShouldContinue) {
1180   switch (SuccSSeq) {
1181   case S_CanRelease: {
1182     if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) {
1183       S.ClearSequenceProgress();
1184       break;
1185     }
1186     S.SetCFGHazardAfflicted(true);
1187     ShouldContinue = true;
1188     break;
1189   }
1190   case S_Use:
1191     SomeSuccHasSame = true;
1192     break;
1193   case S_Stop:
1194   case S_MovableRelease:
1195     if (!S.IsKnownSafe() && !SuccSRRIKnownSafe)
1196       AllSuccsHaveSame = false;
1197     else
1198       NotAllSeqEqualButKnownSafe = true;
1199     break;
1200   case S_Retain:
1201     llvm_unreachable("bottom-up pointer in retain state!");
1202   case S_None:
1203     llvm_unreachable("This should have been handled earlier.");
1204   }
1205 }
1206 
1207 /// If we have a Top Down pointer in the S_CanRelease state, make sure that
1208 /// there are no CFG hazards by checking the states of various bottom up
1209 /// pointers.
CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,const bool SuccSRRIKnownSafe,TopDownPtrState & S,bool & SomeSuccHasSame,bool & AllSuccsHaveSame,bool & NotAllSeqEqualButKnownSafe)1210 static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
1211                                         const bool SuccSRRIKnownSafe,
1212                                         TopDownPtrState &S,
1213                                         bool &SomeSuccHasSame,
1214                                         bool &AllSuccsHaveSame,
1215                                         bool &NotAllSeqEqualButKnownSafe) {
1216   switch (SuccSSeq) {
1217   case S_CanRelease:
1218     SomeSuccHasSame = true;
1219     break;
1220   case S_Stop:
1221   case S_MovableRelease:
1222   case S_Use:
1223     if (!S.IsKnownSafe() && !SuccSRRIKnownSafe)
1224       AllSuccsHaveSame = false;
1225     else
1226       NotAllSeqEqualButKnownSafe = true;
1227     break;
1228   case S_Retain:
1229     llvm_unreachable("bottom-up pointer in retain state!");
1230   case S_None:
1231     llvm_unreachable("This should have been handled earlier.");
1232   }
1233 }
1234 
1235 /// Check for critical edges, loop boundaries, irreducible control flow, or
1236 /// other CFG structures where moving code across the edge would result in it
1237 /// being executed more.
1238 void
CheckForCFGHazards(const BasicBlock * BB,DenseMap<const BasicBlock *,BBState> & BBStates,BBState & MyStates) const1239 ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
1240                                DenseMap<const BasicBlock *, BBState> &BBStates,
1241                                BBState &MyStates) const {
1242   // If any top-down local-use or possible-dec has a succ which is earlier in
1243   // the sequence, forget it.
1244   for (auto I = MyStates.top_down_ptr_begin(), E = MyStates.top_down_ptr_end();
1245        I != E; ++I) {
1246     TopDownPtrState &S = I->second;
1247     const Sequence Seq = I->second.GetSeq();
1248 
1249     // We only care about S_Retain, S_CanRelease, and S_Use.
1250     if (Seq == S_None)
1251       continue;
1252 
1253     // Make sure that if extra top down states are added in the future that this
1254     // code is updated to handle it.
1255     assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) &&
1256            "Unknown top down sequence state.");
1257 
1258     const Value *Arg = I->first;
1259     bool SomeSuccHasSame = false;
1260     bool AllSuccsHaveSame = true;
1261     bool NotAllSeqEqualButKnownSafe = false;
1262 
1263     for (const BasicBlock *Succ : successors(BB)) {
1264       // If VisitBottomUp has pointer information for this successor, take
1265       // what we know about it.
1266       const DenseMap<const BasicBlock *, BBState>::iterator BBI =
1267           BBStates.find(Succ);
1268       assert(BBI != BBStates.end());
1269       const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
1270       const Sequence SuccSSeq = SuccS.GetSeq();
1271 
1272       // If bottom up, the pointer is in an S_None state, clear the sequence
1273       // progress since the sequence in the bottom up state finished
1274       // suggesting a mismatch in between retains/releases. This is true for
1275       // all three cases that we are handling here: S_Retain, S_Use, and
1276       // S_CanRelease.
1277       if (SuccSSeq == S_None) {
1278         S.ClearSequenceProgress();
1279         continue;
1280       }
1281 
1282       // If we have S_Use or S_CanRelease, perform our check for cfg hazard
1283       // checks.
1284       const bool SuccSRRIKnownSafe = SuccS.IsKnownSafe();
1285 
1286       // *NOTE* We do not use Seq from above here since we are allowing for
1287       // S.GetSeq() to change while we are visiting basic blocks.
1288       switch(S.GetSeq()) {
1289       case S_Use: {
1290         bool ShouldContinue = false;
1291         CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, SomeSuccHasSame,
1292                              AllSuccsHaveSame, NotAllSeqEqualButKnownSafe,
1293                              ShouldContinue);
1294         if (ShouldContinue)
1295           continue;
1296         break;
1297       }
1298       case S_CanRelease:
1299         CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S,
1300                                     SomeSuccHasSame, AllSuccsHaveSame,
1301                                     NotAllSeqEqualButKnownSafe);
1302         break;
1303       case S_Retain:
1304       case S_None:
1305       case S_Stop:
1306       case S_MovableRelease:
1307         break;
1308       }
1309     }
1310 
1311     // If the state at the other end of any of the successor edges
1312     // matches the current state, require all edges to match. This
1313     // guards against loops in the middle of a sequence.
1314     if (SomeSuccHasSame && !AllSuccsHaveSame) {
1315       S.ClearSequenceProgress();
1316     } else if (NotAllSeqEqualButKnownSafe) {
1317       // If we would have cleared the state foregoing the fact that we are known
1318       // safe, stop code motion. This is because whether or not it is safe to
1319       // remove RR pairs via KnownSafe is an orthogonal concept to whether we
1320       // are allowed to perform code motion.
1321       S.SetCFGHazardAfflicted(true);
1322     }
1323   }
1324 }
1325 
VisitInstructionBottomUp(Instruction * Inst,BasicBlock * BB,BlotMapVector<Value *,RRInfo> & Retains,BBState & MyStates)1326 bool ObjCARCOpt::VisitInstructionBottomUp(
1327     Instruction *Inst, BasicBlock *BB, BlotMapVector<Value *, RRInfo> &Retains,
1328     BBState &MyStates) {
1329   bool NestingDetected = false;
1330   ARCInstKind Class = GetARCInstKind(Inst);
1331   const Value *Arg = nullptr;
1332 
1333   LLVM_DEBUG(dbgs() << "        Class: " << Class << "\n");
1334 
1335   switch (Class) {
1336   case ARCInstKind::Release: {
1337     Arg = GetArgRCIdentityRoot(Inst);
1338 
1339     BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg);
1340     NestingDetected |= S.InitBottomUp(MDKindCache, Inst);
1341     break;
1342   }
1343   case ARCInstKind::RetainBlock:
1344     // In OptimizeIndividualCalls, we have strength reduced all optimizable
1345     // objc_retainBlocks to objc_retains. Thus at this point any
1346     // objc_retainBlocks that we see are not optimizable.
1347     break;
1348   case ARCInstKind::Retain:
1349   case ARCInstKind::RetainRV: {
1350     Arg = GetArgRCIdentityRoot(Inst);
1351     BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg);
1352     if (S.MatchWithRetain()) {
1353       // Don't do retain+release tracking for ARCInstKind::RetainRV, because
1354       // it's better to let it remain as the first instruction after a call.
1355       if (Class != ARCInstKind::RetainRV) {
1356         LLVM_DEBUG(dbgs() << "        Matching with: " << *Inst << "\n");
1357         Retains[Inst] = S.GetRRInfo();
1358       }
1359       S.ClearSequenceProgress();
1360     }
1361     // A retain moving bottom up can be a use.
1362     break;
1363   }
1364   case ARCInstKind::AutoreleasepoolPop:
1365     // Conservatively, clear MyStates for all known pointers.
1366     MyStates.clearBottomUpPointers();
1367     return NestingDetected;
1368   case ARCInstKind::AutoreleasepoolPush:
1369   case ARCInstKind::None:
1370     // These are irrelevant.
1371     return NestingDetected;
1372   default:
1373     break;
1374   }
1375 
1376   // Consider any other possible effects of this instruction on each
1377   // pointer being tracked.
1378   for (auto MI = MyStates.bottom_up_ptr_begin(),
1379             ME = MyStates.bottom_up_ptr_end();
1380        MI != ME; ++MI) {
1381     const Value *Ptr = MI->first;
1382     if (Ptr == Arg)
1383       continue; // Handled above.
1384     BottomUpPtrState &S = MI->second;
1385 
1386     if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class))
1387       continue;
1388 
1389     S.HandlePotentialUse(BB, Inst, Ptr, PA, Class);
1390   }
1391 
1392   return NestingDetected;
1393 }
1394 
VisitBottomUp(BasicBlock * BB,DenseMap<const BasicBlock *,BBState> & BBStates,BlotMapVector<Value *,RRInfo> & Retains)1395 bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
1396                                DenseMap<const BasicBlock *, BBState> &BBStates,
1397                                BlotMapVector<Value *, RRInfo> &Retains) {
1398   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n");
1399 
1400   bool NestingDetected = false;
1401   BBState &MyStates = BBStates[BB];
1402 
1403   // Merge the states from each successor to compute the initial state
1404   // for the current block.
1405   BBState::edge_iterator SI(MyStates.succ_begin()),
1406                          SE(MyStates.succ_end());
1407   if (SI != SE) {
1408     const BasicBlock *Succ = *SI;
1409     DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
1410     assert(I != BBStates.end());
1411     MyStates.InitFromSucc(I->second);
1412     ++SI;
1413     for (; SI != SE; ++SI) {
1414       Succ = *SI;
1415       I = BBStates.find(Succ);
1416       assert(I != BBStates.end());
1417       MyStates.MergeSucc(I->second);
1418     }
1419   }
1420 
1421   LLVM_DEBUG(dbgs() << "Before:\n"
1422                     << BBStates[BB] << "\n"
1423                     << "Performing Dataflow:\n");
1424 
1425   // Visit all the instructions, bottom-up.
1426   for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
1427     Instruction *Inst = &*std::prev(I);
1428 
1429     // Invoke instructions are visited as part of their successors (below).
1430     if (isa<InvokeInst>(Inst))
1431       continue;
1432 
1433     LLVM_DEBUG(dbgs() << "    Visiting " << *Inst << "\n");
1434 
1435     NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
1436 
1437     // Bail out if the number of pointers being tracked becomes too large so
1438     // that this pass can complete in a reasonable amount of time.
1439     if (MyStates.bottom_up_ptr_list_size() > MaxPtrStates) {
1440       DisableRetainReleasePairing = true;
1441       return false;
1442     }
1443   }
1444 
1445   // If there's a predecessor with an invoke, visit the invoke as if it were
1446   // part of this block, since we can't insert code after an invoke in its own
1447   // block, and we don't want to split critical edges.
1448   for (BBState::edge_iterator PI(MyStates.pred_begin()),
1449        PE(MyStates.pred_end()); PI != PE; ++PI) {
1450     BasicBlock *Pred = *PI;
1451     if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back()))
1452       NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
1453   }
1454 
1455   LLVM_DEBUG(dbgs() << "\nFinal State:\n" << BBStates[BB] << "\n");
1456 
1457   return NestingDetected;
1458 }
1459 
1460 // Fill ReleaseInsertPtToRCIdentityRoots, which is a map from insertion points
1461 // to the set of RC identity roots that would be released by the release calls
1462 // moved to the insertion points.
collectReleaseInsertPts(const BlotMapVector<Value *,RRInfo> & Retains,DenseMap<const Instruction *,SmallPtrSet<const Value *,2>> & ReleaseInsertPtToRCIdentityRoots)1463 static void collectReleaseInsertPts(
1464     const BlotMapVector<Value *, RRInfo> &Retains,
1465     DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
1466         &ReleaseInsertPtToRCIdentityRoots) {
1467   for (const auto &P : Retains) {
1468     // Retains is a map from an objc_retain call to a RRInfo of the RC identity
1469     // root of the call. Get the RC identity root of the objc_retain call.
1470     Instruction *Retain = cast<Instruction>(P.first);
1471     Value *Root = GetRCIdentityRoot(Retain->getOperand(0));
1472     // Collect all the insertion points of the objc_release calls that release
1473     // the RC identity root of the objc_retain call.
1474     for (const Instruction *InsertPt : P.second.ReverseInsertPts)
1475       ReleaseInsertPtToRCIdentityRoots[InsertPt].insert(Root);
1476   }
1477 }
1478 
1479 // Get the RC identity roots from an insertion point of an objc_release call.
1480 // Return nullptr if the passed instruction isn't an insertion point.
1481 static const SmallPtrSet<const Value *, 2> *
getRCIdentityRootsFromReleaseInsertPt(const Instruction * InsertPt,const DenseMap<const Instruction *,SmallPtrSet<const Value *,2>> & ReleaseInsertPtToRCIdentityRoots)1482 getRCIdentityRootsFromReleaseInsertPt(
1483     const Instruction *InsertPt,
1484     const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
1485         &ReleaseInsertPtToRCIdentityRoots) {
1486   auto I = ReleaseInsertPtToRCIdentityRoots.find(InsertPt);
1487   if (I == ReleaseInsertPtToRCIdentityRoots.end())
1488     return nullptr;
1489   return &I->second;
1490 }
1491 
VisitInstructionTopDown(Instruction * Inst,DenseMap<Value *,RRInfo> & Releases,BBState & MyStates,const DenseMap<const Instruction *,SmallPtrSet<const Value *,2>> & ReleaseInsertPtToRCIdentityRoots)1492 bool ObjCARCOpt::VisitInstructionTopDown(
1493     Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates,
1494     const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
1495         &ReleaseInsertPtToRCIdentityRoots) {
1496   bool NestingDetected = false;
1497   ARCInstKind Class = GetARCInstKind(Inst);
1498   const Value *Arg = nullptr;
1499 
1500   // Make sure a call to objc_retain isn't moved past insertion points of calls
1501   // to objc_release.
1502   if (const SmallPtrSet<const Value *, 2> *Roots =
1503           getRCIdentityRootsFromReleaseInsertPt(
1504               Inst, ReleaseInsertPtToRCIdentityRoots))
1505     for (const auto *Root : *Roots) {
1506       TopDownPtrState &S = MyStates.getPtrTopDownState(Root);
1507       // Disable code motion if the current position is S_Retain to prevent
1508       // moving the objc_retain call past objc_release calls. If it's
1509       // S_CanRelease or larger, it's not necessary to disable code motion as
1510       // the insertion points that prevent the objc_retain call from moving down
1511       // should have been set already.
1512       if (S.GetSeq() == S_Retain)
1513         S.SetCFGHazardAfflicted(true);
1514     }
1515 
1516   LLVM_DEBUG(dbgs() << "        Class: " << Class << "\n");
1517 
1518   switch (Class) {
1519   case ARCInstKind::RetainBlock:
1520     // In OptimizeIndividualCalls, we have strength reduced all optimizable
1521     // objc_retainBlocks to objc_retains. Thus at this point any
1522     // objc_retainBlocks that we see are not optimizable. We need to break since
1523     // a retain can be a potential use.
1524     break;
1525   case ARCInstKind::Retain:
1526   case ARCInstKind::RetainRV: {
1527     Arg = GetArgRCIdentityRoot(Inst);
1528     TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
1529     NestingDetected |= S.InitTopDown(Class, Inst);
1530     // A retain can be a potential use; proceed to the generic checking
1531     // code below.
1532     break;
1533   }
1534   case ARCInstKind::Release: {
1535     Arg = GetArgRCIdentityRoot(Inst);
1536     TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
1537     // Try to form a tentative pair in between this release instruction and the
1538     // top down pointers that we are tracking.
1539     if (S.MatchWithRelease(MDKindCache, Inst)) {
1540       // If we succeed, copy S's RRInfo into the Release -> {Retain Set
1541       // Map}. Then we clear S.
1542       LLVM_DEBUG(dbgs() << "        Matching with: " << *Inst << "\n");
1543       Releases[Inst] = S.GetRRInfo();
1544       S.ClearSequenceProgress();
1545     }
1546     break;
1547   }
1548   case ARCInstKind::AutoreleasepoolPop:
1549     // Conservatively, clear MyStates for all known pointers.
1550     MyStates.clearTopDownPointers();
1551     return false;
1552   case ARCInstKind::AutoreleasepoolPush:
1553   case ARCInstKind::None:
1554     // These can not be uses of
1555     return false;
1556   default:
1557     break;
1558   }
1559 
1560   // Consider any other possible effects of this instruction on each
1561   // pointer being tracked.
1562   for (auto MI = MyStates.top_down_ptr_begin(),
1563             ME = MyStates.top_down_ptr_end();
1564        MI != ME; ++MI) {
1565     const Value *Ptr = MI->first;
1566     if (Ptr == Arg)
1567       continue; // Handled above.
1568     TopDownPtrState &S = MI->second;
1569     if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class, *BundledInsts))
1570       continue;
1571 
1572     S.HandlePotentialUse(Inst, Ptr, PA, Class);
1573   }
1574 
1575   return NestingDetected;
1576 }
1577 
VisitTopDown(BasicBlock * BB,DenseMap<const BasicBlock *,BBState> & BBStates,DenseMap<Value *,RRInfo> & Releases,const DenseMap<const Instruction *,SmallPtrSet<const Value *,2>> & ReleaseInsertPtToRCIdentityRoots)1578 bool ObjCARCOpt::VisitTopDown(
1579     BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates,
1580     DenseMap<Value *, RRInfo> &Releases,
1581     const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
1582         &ReleaseInsertPtToRCIdentityRoots) {
1583   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n");
1584   bool NestingDetected = false;
1585   BBState &MyStates = BBStates[BB];
1586 
1587   // Merge the states from each predecessor to compute the initial state
1588   // for the current block.
1589   BBState::edge_iterator PI(MyStates.pred_begin()),
1590                          PE(MyStates.pred_end());
1591   if (PI != PE) {
1592     const BasicBlock *Pred = *PI;
1593     DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
1594     assert(I != BBStates.end());
1595     MyStates.InitFromPred(I->second);
1596     ++PI;
1597     for (; PI != PE; ++PI) {
1598       Pred = *PI;
1599       I = BBStates.find(Pred);
1600       assert(I != BBStates.end());
1601       MyStates.MergePred(I->second);
1602     }
1603   }
1604 
1605   // Check that BB and MyStates have the same number of predecessors. This
1606   // prevents retain calls that live outside a loop from being moved into the
1607   // loop.
1608   if (!BB->hasNPredecessors(MyStates.pred_end() - MyStates.pred_begin()))
1609     for (auto I = MyStates.top_down_ptr_begin(),
1610               E = MyStates.top_down_ptr_end();
1611          I != E; ++I)
1612       I->second.SetCFGHazardAfflicted(true);
1613 
1614   LLVM_DEBUG(dbgs() << "Before:\n"
1615                     << BBStates[BB] << "\n"
1616                     << "Performing Dataflow:\n");
1617 
1618   // Visit all the instructions, top-down.
1619   for (Instruction &Inst : *BB) {
1620     LLVM_DEBUG(dbgs() << "    Visiting " << Inst << "\n");
1621 
1622     NestingDetected |= VisitInstructionTopDown(
1623         &Inst, Releases, MyStates, ReleaseInsertPtToRCIdentityRoots);
1624 
1625     // Bail out if the number of pointers being tracked becomes too large so
1626     // that this pass can complete in a reasonable amount of time.
1627     if (MyStates.top_down_ptr_list_size() > MaxPtrStates) {
1628       DisableRetainReleasePairing = true;
1629       return false;
1630     }
1631   }
1632 
1633   LLVM_DEBUG(dbgs() << "\nState Before Checking for CFG Hazards:\n"
1634                     << BBStates[BB] << "\n\n");
1635   CheckForCFGHazards(BB, BBStates, MyStates);
1636   LLVM_DEBUG(dbgs() << "Final State:\n" << BBStates[BB] << "\n");
1637   return NestingDetected;
1638 }
1639 
1640 static void
ComputePostOrders(Function & F,SmallVectorImpl<BasicBlock * > & PostOrder,SmallVectorImpl<BasicBlock * > & ReverseCFGPostOrder,unsigned NoObjCARCExceptionsMDKind,DenseMap<const BasicBlock *,BBState> & BBStates)1641 ComputePostOrders(Function &F,
1642                   SmallVectorImpl<BasicBlock *> &PostOrder,
1643                   SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder,
1644                   unsigned NoObjCARCExceptionsMDKind,
1645                   DenseMap<const BasicBlock *, BBState> &BBStates) {
1646   /// The visited set, for doing DFS walks.
1647   SmallPtrSet<BasicBlock *, 16> Visited;
1648 
1649   // Do DFS, computing the PostOrder.
1650   SmallPtrSet<BasicBlock *, 16> OnStack;
1651   SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
1652 
1653   // Functions always have exactly one entry block, and we don't have
1654   // any other block that we treat like an entry block.
1655   BasicBlock *EntryBB = &F.getEntryBlock();
1656   BBState &MyStates = BBStates[EntryBB];
1657   MyStates.SetAsEntry();
1658   Instruction *EntryTI = EntryBB->getTerminator();
1659   SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI)));
1660   Visited.insert(EntryBB);
1661   OnStack.insert(EntryBB);
1662   do {
1663   dfs_next_succ:
1664     BasicBlock *CurrBB = SuccStack.back().first;
1665     succ_iterator SE(CurrBB->getTerminator(), false);
1666 
1667     while (SuccStack.back().second != SE) {
1668       BasicBlock *SuccBB = *SuccStack.back().second++;
1669       if (Visited.insert(SuccBB).second) {
1670         SuccStack.push_back(
1671             std::make_pair(SuccBB, succ_iterator(SuccBB->getTerminator())));
1672         BBStates[CurrBB].addSucc(SuccBB);
1673         BBState &SuccStates = BBStates[SuccBB];
1674         SuccStates.addPred(CurrBB);
1675         OnStack.insert(SuccBB);
1676         goto dfs_next_succ;
1677       }
1678 
1679       if (!OnStack.count(SuccBB)) {
1680         BBStates[CurrBB].addSucc(SuccBB);
1681         BBStates[SuccBB].addPred(CurrBB);
1682       }
1683     }
1684     OnStack.erase(CurrBB);
1685     PostOrder.push_back(CurrBB);
1686     SuccStack.pop_back();
1687   } while (!SuccStack.empty());
1688 
1689   Visited.clear();
1690 
1691   // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
1692   // Functions may have many exits, and there also blocks which we treat
1693   // as exits due to ignored edges.
1694   SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack;
1695   for (BasicBlock &ExitBB : F) {
1696     BBState &MyStates = BBStates[&ExitBB];
1697     if (!MyStates.isExit())
1698       continue;
1699 
1700     MyStates.SetAsExit();
1701 
1702     PredStack.push_back(std::make_pair(&ExitBB, MyStates.pred_begin()));
1703     Visited.insert(&ExitBB);
1704     while (!PredStack.empty()) {
1705     reverse_dfs_next_succ:
1706       BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
1707       while (PredStack.back().second != PE) {
1708         BasicBlock *BB = *PredStack.back().second++;
1709         if (Visited.insert(BB).second) {
1710           PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin()));
1711           goto reverse_dfs_next_succ;
1712         }
1713       }
1714       ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
1715     }
1716   }
1717 }
1718 
1719 // Visit the function both top-down and bottom-up.
Visit(Function & F,DenseMap<const BasicBlock *,BBState> & BBStates,BlotMapVector<Value *,RRInfo> & Retains,DenseMap<Value *,RRInfo> & Releases)1720 bool ObjCARCOpt::Visit(Function &F,
1721                        DenseMap<const BasicBlock *, BBState> &BBStates,
1722                        BlotMapVector<Value *, RRInfo> &Retains,
1723                        DenseMap<Value *, RRInfo> &Releases) {
1724   // Use reverse-postorder traversals, because we magically know that loops
1725   // will be well behaved, i.e. they won't repeatedly call retain on a single
1726   // pointer without doing a release. We can't use the ReversePostOrderTraversal
1727   // class here because we want the reverse-CFG postorder to consider each
1728   // function exit point, and we want to ignore selected cycle edges.
1729   SmallVector<BasicBlock *, 16> PostOrder;
1730   SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
1731   ComputePostOrders(F, PostOrder, ReverseCFGPostOrder,
1732                     MDKindCache.get(ARCMDKindID::NoObjCARCExceptions),
1733                     BBStates);
1734 
1735   // Use reverse-postorder on the reverse CFG for bottom-up.
1736   bool BottomUpNestingDetected = false;
1737   for (BasicBlock *BB : llvm::reverse(ReverseCFGPostOrder)) {
1738     BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
1739     if (DisableRetainReleasePairing)
1740       return false;
1741   }
1742 
1743   DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>>
1744       ReleaseInsertPtToRCIdentityRoots;
1745   collectReleaseInsertPts(Retains, ReleaseInsertPtToRCIdentityRoots);
1746 
1747   // Use reverse-postorder for top-down.
1748   bool TopDownNestingDetected = false;
1749   for (BasicBlock *BB : llvm::reverse(PostOrder)) {
1750     TopDownNestingDetected |=
1751         VisitTopDown(BB, BBStates, Releases, ReleaseInsertPtToRCIdentityRoots);
1752     if (DisableRetainReleasePairing)
1753       return false;
1754   }
1755 
1756   return TopDownNestingDetected && BottomUpNestingDetected;
1757 }
1758 
1759 /// Move the calls in RetainsToMove and ReleasesToMove.
MoveCalls(Value * Arg,RRInfo & RetainsToMove,RRInfo & ReleasesToMove,BlotMapVector<Value *,RRInfo> & Retains,DenseMap<Value *,RRInfo> & Releases,SmallVectorImpl<Instruction * > & DeadInsts,Module * M)1760 void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove,
1761                            RRInfo &ReleasesToMove,
1762                            BlotMapVector<Value *, RRInfo> &Retains,
1763                            DenseMap<Value *, RRInfo> &Releases,
1764                            SmallVectorImpl<Instruction *> &DeadInsts,
1765                            Module *M) {
1766   Type *ArgTy = Arg->getType();
1767   Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
1768 
1769   LLVM_DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n");
1770 
1771   // Insert the new retain and release calls.
1772   for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) {
1773     Value *MyArg = ArgTy == ParamTy ? Arg
1774                                     : new BitCastInst(Arg, ParamTy, "",
1775                                                       InsertPt->getIterator());
1776     Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
1777     SmallVector<OperandBundleDef, 1> BundleList;
1778     addOpBundleForFunclet(InsertPt->getParent(), BundleList);
1779     CallInst *Call =
1780         CallInst::Create(Decl, MyArg, BundleList, "", InsertPt->getIterator());
1781     Call->setDoesNotThrow();
1782     Call->setTailCall();
1783 
1784     LLVM_DEBUG(dbgs() << "Inserting new Retain: " << *Call
1785                       << "\n"
1786                          "At insertion point: "
1787                       << *InsertPt << "\n");
1788   }
1789   for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) {
1790     Value *MyArg = ArgTy == ParamTy ? Arg
1791                                     : new BitCastInst(Arg, ParamTy, "",
1792                                                       InsertPt->getIterator());
1793     Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
1794     SmallVector<OperandBundleDef, 1> BundleList;
1795     addOpBundleForFunclet(InsertPt->getParent(), BundleList);
1796     CallInst *Call =
1797         CallInst::Create(Decl, MyArg, BundleList, "", InsertPt->getIterator());
1798     // Attach a clang.imprecise_release metadata tag, if appropriate.
1799     if (MDNode *M = ReleasesToMove.ReleaseMetadata)
1800       Call->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), M);
1801     Call->setDoesNotThrow();
1802     if (ReleasesToMove.IsTailCallRelease)
1803       Call->setTailCall();
1804 
1805     LLVM_DEBUG(dbgs() << "Inserting new Release: " << *Call
1806                       << "\n"
1807                          "At insertion point: "
1808                       << *InsertPt << "\n");
1809   }
1810 
1811   // Delete the original retain and release calls.
1812   for (Instruction *OrigRetain : RetainsToMove.Calls) {
1813     Retains.blot(OrigRetain);
1814     DeadInsts.push_back(OrigRetain);
1815     LLVM_DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n");
1816   }
1817   for (Instruction *OrigRelease : ReleasesToMove.Calls) {
1818     Releases.erase(OrigRelease);
1819     DeadInsts.push_back(OrigRelease);
1820     LLVM_DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n");
1821   }
1822 }
1823 
PairUpRetainsAndReleases(DenseMap<const BasicBlock *,BBState> & BBStates,BlotMapVector<Value *,RRInfo> & Retains,DenseMap<Value *,RRInfo> & Releases,Module * M,Instruction * Retain,SmallVectorImpl<Instruction * > & DeadInsts,RRInfo & RetainsToMove,RRInfo & ReleasesToMove,Value * Arg,bool KnownSafe,bool & AnyPairsCompletelyEliminated)1824 bool ObjCARCOpt::PairUpRetainsAndReleases(
1825     DenseMap<const BasicBlock *, BBState> &BBStates,
1826     BlotMapVector<Value *, RRInfo> &Retains,
1827     DenseMap<Value *, RRInfo> &Releases, Module *M,
1828     Instruction *Retain,
1829     SmallVectorImpl<Instruction *> &DeadInsts, RRInfo &RetainsToMove,
1830     RRInfo &ReleasesToMove, Value *Arg, bool KnownSafe,
1831     bool &AnyPairsCompletelyEliminated) {
1832   // If a pair happens in a region where it is known that the reference count
1833   // is already incremented, we can similarly ignore possible decrements unless
1834   // we are dealing with a retainable object with multiple provenance sources.
1835   bool KnownSafeTD = true, KnownSafeBU = true;
1836   bool CFGHazardAfflicted = false;
1837 
1838   // Connect the dots between the top-down-collected RetainsToMove and
1839   // bottom-up-collected ReleasesToMove to form sets of related calls.
1840   // This is an iterative process so that we connect multiple releases
1841   // to multiple retains if needed.
1842   unsigned OldDelta = 0;
1843   unsigned NewDelta = 0;
1844   unsigned OldCount = 0;
1845   unsigned NewCount = 0;
1846   bool FirstRelease = true;
1847   for (SmallVector<Instruction *, 4> NewRetains{Retain};;) {
1848     SmallVector<Instruction *, 4> NewReleases;
1849     for (Instruction *NewRetain : NewRetains) {
1850       auto It = Retains.find(NewRetain);
1851       assert(It != Retains.end());
1852       const RRInfo &NewRetainRRI = It->second;
1853       KnownSafeTD &= NewRetainRRI.KnownSafe;
1854       CFGHazardAfflicted |= NewRetainRRI.CFGHazardAfflicted;
1855       for (Instruction *NewRetainRelease : NewRetainRRI.Calls) {
1856         auto Jt = Releases.find(NewRetainRelease);
1857         if (Jt == Releases.end())
1858           return false;
1859         const RRInfo &NewRetainReleaseRRI = Jt->second;
1860 
1861         // If the release does not have a reference to the retain as well,
1862         // something happened which is unaccounted for. Do not do anything.
1863         //
1864         // This can happen if we catch an additive overflow during path count
1865         // merging.
1866         if (!NewRetainReleaseRRI.Calls.count(NewRetain))
1867           return false;
1868 
1869         if (ReleasesToMove.Calls.insert(NewRetainRelease).second) {
1870           // If we overflow when we compute the path count, don't remove/move
1871           // anything.
1872           const BBState &NRRBBState = BBStates[NewRetainRelease->getParent()];
1873           unsigned PathCount = BBState::OverflowOccurredValue;
1874           if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
1875             return false;
1876           assert(PathCount != BBState::OverflowOccurredValue &&
1877                  "PathCount at this point can not be "
1878                  "OverflowOccurredValue.");
1879           OldDelta -= PathCount;
1880 
1881           // Merge the ReleaseMetadata and IsTailCallRelease values.
1882           if (FirstRelease) {
1883             ReleasesToMove.ReleaseMetadata =
1884               NewRetainReleaseRRI.ReleaseMetadata;
1885             ReleasesToMove.IsTailCallRelease =
1886               NewRetainReleaseRRI.IsTailCallRelease;
1887             FirstRelease = false;
1888           } else {
1889             if (ReleasesToMove.ReleaseMetadata !=
1890                 NewRetainReleaseRRI.ReleaseMetadata)
1891               ReleasesToMove.ReleaseMetadata = nullptr;
1892             if (ReleasesToMove.IsTailCallRelease !=
1893                 NewRetainReleaseRRI.IsTailCallRelease)
1894               ReleasesToMove.IsTailCallRelease = false;
1895           }
1896 
1897           // Collect the optimal insertion points.
1898           if (!KnownSafe)
1899             for (Instruction *RIP : NewRetainReleaseRRI.ReverseInsertPts) {
1900               if (ReleasesToMove.ReverseInsertPts.insert(RIP).second) {
1901                 // If we overflow when we compute the path count, don't
1902                 // remove/move anything.
1903                 const BBState &RIPBBState = BBStates[RIP->getParent()];
1904                 PathCount = BBState::OverflowOccurredValue;
1905                 if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
1906                   return false;
1907                 assert(PathCount != BBState::OverflowOccurredValue &&
1908                        "PathCount at this point can not be "
1909                        "OverflowOccurredValue.");
1910                 NewDelta -= PathCount;
1911               }
1912             }
1913           NewReleases.push_back(NewRetainRelease);
1914         }
1915       }
1916     }
1917     NewRetains.clear();
1918     if (NewReleases.empty()) break;
1919 
1920     // Back the other way.
1921     for (Instruction *NewRelease : NewReleases) {
1922       auto It = Releases.find(NewRelease);
1923       assert(It != Releases.end());
1924       const RRInfo &NewReleaseRRI = It->second;
1925       KnownSafeBU &= NewReleaseRRI.KnownSafe;
1926       CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted;
1927       for (Instruction *NewReleaseRetain : NewReleaseRRI.Calls) {
1928         auto Jt = Retains.find(NewReleaseRetain);
1929         if (Jt == Retains.end())
1930           return false;
1931         const RRInfo &NewReleaseRetainRRI = Jt->second;
1932 
1933         // If the retain does not have a reference to the release as well,
1934         // something happened which is unaccounted for. Do not do anything.
1935         //
1936         // This can happen if we catch an additive overflow during path count
1937         // merging.
1938         if (!NewReleaseRetainRRI.Calls.count(NewRelease))
1939           return false;
1940 
1941         if (RetainsToMove.Calls.insert(NewReleaseRetain).second) {
1942           // If we overflow when we compute the path count, don't remove/move
1943           // anything.
1944           const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()];
1945           unsigned PathCount = BBState::OverflowOccurredValue;
1946           if (NRRBBState.GetAllPathCountWithOverflow(PathCount))
1947             return false;
1948           assert(PathCount != BBState::OverflowOccurredValue &&
1949                  "PathCount at this point can not be "
1950                  "OverflowOccurredValue.");
1951           OldDelta += PathCount;
1952           OldCount += PathCount;
1953 
1954           // Collect the optimal insertion points.
1955           if (!KnownSafe)
1956             for (Instruction *RIP : NewReleaseRetainRRI.ReverseInsertPts) {
1957               if (RetainsToMove.ReverseInsertPts.insert(RIP).second) {
1958                 // If we overflow when we compute the path count, don't
1959                 // remove/move anything.
1960                 const BBState &RIPBBState = BBStates[RIP->getParent()];
1961 
1962                 PathCount = BBState::OverflowOccurredValue;
1963                 if (RIPBBState.GetAllPathCountWithOverflow(PathCount))
1964                   return false;
1965                 assert(PathCount != BBState::OverflowOccurredValue &&
1966                        "PathCount at this point can not be "
1967                        "OverflowOccurredValue.");
1968                 NewDelta += PathCount;
1969                 NewCount += PathCount;
1970               }
1971             }
1972           NewRetains.push_back(NewReleaseRetain);
1973         }
1974       }
1975     }
1976     if (NewRetains.empty()) break;
1977   }
1978 
1979   // We can only remove pointers if we are known safe in both directions.
1980   bool UnconditionallySafe = KnownSafeTD && KnownSafeBU;
1981   if (UnconditionallySafe) {
1982     RetainsToMove.ReverseInsertPts.clear();
1983     ReleasesToMove.ReverseInsertPts.clear();
1984     NewCount = 0;
1985   } else {
1986     // Determine whether the new insertion points we computed preserve the
1987     // balance of retain and release calls through the program.
1988     // TODO: If the fully aggressive solution isn't valid, try to find a
1989     // less aggressive solution which is.
1990     if (NewDelta != 0)
1991       return false;
1992 
1993     // At this point, we are not going to remove any RR pairs, but we still are
1994     // able to move RR pairs. If one of our pointers is afflicted with
1995     // CFGHazards, we cannot perform such code motion so exit early.
1996     const bool WillPerformCodeMotion =
1997         !RetainsToMove.ReverseInsertPts.empty() ||
1998         !ReleasesToMove.ReverseInsertPts.empty();
1999     if (CFGHazardAfflicted && WillPerformCodeMotion)
2000       return false;
2001   }
2002 
2003   // Determine whether the original call points are balanced in the retain and
2004   // release calls through the program. If not, conservatively don't touch
2005   // them.
2006   // TODO: It's theoretically possible to do code motion in this case, as
2007   // long as the existing imbalances are maintained.
2008   if (OldDelta != 0)
2009     return false;
2010 
2011   Changed = true;
2012   assert(OldCount != 0 && "Unreachable code?");
2013   NumRRs += OldCount - NewCount;
2014   // Set to true if we completely removed any RR pairs.
2015   AnyPairsCompletelyEliminated = NewCount == 0;
2016 
2017   // We can move calls!
2018   return true;
2019 }
2020 
2021 /// Identify pairings between the retains and releases, and delete and/or move
2022 /// them.
PerformCodePlacement(DenseMap<const BasicBlock *,BBState> & BBStates,BlotMapVector<Value *,RRInfo> & Retains,DenseMap<Value *,RRInfo> & Releases,Module * M)2023 bool ObjCARCOpt::PerformCodePlacement(
2024     DenseMap<const BasicBlock *, BBState> &BBStates,
2025     BlotMapVector<Value *, RRInfo> &Retains,
2026     DenseMap<Value *, RRInfo> &Releases, Module *M) {
2027   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n");
2028 
2029   bool AnyPairsCompletelyEliminated = false;
2030   SmallVector<Instruction *, 8> DeadInsts;
2031 
2032   // Visit each retain.
2033   for (BlotMapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
2034                                                       E = Retains.end();
2035        I != E; ++I) {
2036     Value *V = I->first;
2037     if (!V) continue; // blotted
2038 
2039     Instruction *Retain = cast<Instruction>(V);
2040 
2041     LLVM_DEBUG(dbgs() << "Visiting: " << *Retain << "\n");
2042 
2043     Value *Arg = GetArgRCIdentityRoot(Retain);
2044 
2045     // If the object being released is in static or stack storage, we know it's
2046     // not being managed by ObjC reference counting, so we can delete pairs
2047     // regardless of what possible decrements or uses lie between them.
2048     bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
2049 
2050     // A constant pointer can't be pointing to an object on the heap. It may
2051     // be reference-counted, but it won't be deleted.
2052     if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
2053       if (const GlobalVariable *GV =
2054             dyn_cast<GlobalVariable>(
2055               GetRCIdentityRoot(LI->getPointerOperand())))
2056         if (GV->isConstant())
2057           KnownSafe = true;
2058 
2059     // Connect the dots between the top-down-collected RetainsToMove and
2060     // bottom-up-collected ReleasesToMove to form sets of related calls.
2061     RRInfo RetainsToMove, ReleasesToMove;
2062 
2063     bool PerformMoveCalls = PairUpRetainsAndReleases(
2064         BBStates, Retains, Releases, M, Retain, DeadInsts,
2065         RetainsToMove, ReleasesToMove, Arg, KnownSafe,
2066         AnyPairsCompletelyEliminated);
2067 
2068     if (PerformMoveCalls) {
2069       // Ok, everything checks out and we're all set. Let's move/delete some
2070       // code!
2071       MoveCalls(Arg, RetainsToMove, ReleasesToMove,
2072                 Retains, Releases, DeadInsts, M);
2073     }
2074   }
2075 
2076   // Now that we're done moving everything, we can delete the newly dead
2077   // instructions, as we no longer need them as insert points.
2078   while (!DeadInsts.empty())
2079     EraseInstruction(DeadInsts.pop_back_val());
2080 
2081   return AnyPairsCompletelyEliminated;
2082 }
2083 
2084 /// Weak pointer optimizations.
OptimizeWeakCalls(Function & F)2085 void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
2086   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n");
2087 
2088   // First, do memdep-style RLE and S2L optimizations. We can't use memdep
2089   // itself because it uses AliasAnalysis and we need to do provenance
2090   // queries instead.
2091   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
2092     Instruction *Inst = &*I++;
2093 
2094     LLVM_DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
2095 
2096     ARCInstKind Class = GetBasicARCInstKind(Inst);
2097     if (Class != ARCInstKind::LoadWeak &&
2098         Class != ARCInstKind::LoadWeakRetained)
2099       continue;
2100 
2101     // Delete objc_loadWeak calls with no users.
2102     if (Class == ARCInstKind::LoadWeak && Inst->use_empty()) {
2103       Inst->eraseFromParent();
2104       Changed = true;
2105       continue;
2106     }
2107 
2108     // TODO: For now, just look for an earlier available version of this value
2109     // within the same block. Theoretically, we could do memdep-style non-local
2110     // analysis too, but that would want caching. A better approach would be to
2111     // use the technique that EarlyCSE uses.
2112     inst_iterator Current = std::prev(I);
2113     BasicBlock *CurrentBB = &*Current.getBasicBlockIterator();
2114     for (BasicBlock::iterator B = CurrentBB->begin(),
2115                               J = Current.getInstructionIterator();
2116          J != B; --J) {
2117       Instruction *EarlierInst = &*std::prev(J);
2118       ARCInstKind EarlierClass = GetARCInstKind(EarlierInst);
2119       switch (EarlierClass) {
2120       case ARCInstKind::LoadWeak:
2121       case ARCInstKind::LoadWeakRetained: {
2122         // If this is loading from the same pointer, replace this load's value
2123         // with that one.
2124         CallInst *Call = cast<CallInst>(Inst);
2125         CallInst *EarlierCall = cast<CallInst>(EarlierInst);
2126         Value *Arg = Call->getArgOperand(0);
2127         Value *EarlierArg = EarlierCall->getArgOperand(0);
2128         switch (PA.getAA()->alias(Arg, EarlierArg)) {
2129         case AliasResult::MustAlias:
2130           Changed = true;
2131           // If the load has a builtin retain, insert a plain retain for it.
2132           if (Class == ARCInstKind::LoadWeakRetained) {
2133             Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
2134             CallInst *CI =
2135                 CallInst::Create(Decl, EarlierCall, "", Call->getIterator());
2136             CI->setTailCall();
2137           }
2138           // Zap the fully redundant load.
2139           Call->replaceAllUsesWith(EarlierCall);
2140           Call->eraseFromParent();
2141           goto clobbered;
2142         case AliasResult::MayAlias:
2143         case AliasResult::PartialAlias:
2144           goto clobbered;
2145         case AliasResult::NoAlias:
2146           break;
2147         }
2148         break;
2149       }
2150       case ARCInstKind::StoreWeak:
2151       case ARCInstKind::InitWeak: {
2152         // If this is storing to the same pointer and has the same size etc.
2153         // replace this load's value with the stored value.
2154         CallInst *Call = cast<CallInst>(Inst);
2155         CallInst *EarlierCall = cast<CallInst>(EarlierInst);
2156         Value *Arg = Call->getArgOperand(0);
2157         Value *EarlierArg = EarlierCall->getArgOperand(0);
2158         switch (PA.getAA()->alias(Arg, EarlierArg)) {
2159         case AliasResult::MustAlias:
2160           Changed = true;
2161           // If the load has a builtin retain, insert a plain retain for it.
2162           if (Class == ARCInstKind::LoadWeakRetained) {
2163             Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
2164             CallInst *CI =
2165                 CallInst::Create(Decl, EarlierCall, "", Call->getIterator());
2166             CI->setTailCall();
2167           }
2168           // Zap the fully redundant load.
2169           Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
2170           Call->eraseFromParent();
2171           goto clobbered;
2172         case AliasResult::MayAlias:
2173         case AliasResult::PartialAlias:
2174           goto clobbered;
2175         case AliasResult::NoAlias:
2176           break;
2177         }
2178         break;
2179       }
2180       case ARCInstKind::MoveWeak:
2181       case ARCInstKind::CopyWeak:
2182         // TOOD: Grab the copied value.
2183         goto clobbered;
2184       case ARCInstKind::AutoreleasepoolPush:
2185       case ARCInstKind::None:
2186       case ARCInstKind::IntrinsicUser:
2187       case ARCInstKind::User:
2188         // Weak pointers are only modified through the weak entry points
2189         // (and arbitrary calls, which could call the weak entry points).
2190         break;
2191       default:
2192         // Anything else could modify the weak pointer.
2193         goto clobbered;
2194       }
2195     }
2196   clobbered:;
2197   }
2198 
2199   // Then, for each destroyWeak with an alloca operand, check to see if
2200   // the alloca and all its users can be zapped.
2201   for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) {
2202     ARCInstKind Class = GetBasicARCInstKind(&Inst);
2203     if (Class != ARCInstKind::DestroyWeak)
2204       continue;
2205 
2206     CallInst *Call = cast<CallInst>(&Inst);
2207     Value *Arg = Call->getArgOperand(0);
2208     if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
2209       for (User *U : Alloca->users()) {
2210         const Instruction *UserInst = cast<Instruction>(U);
2211         switch (GetBasicARCInstKind(UserInst)) {
2212         case ARCInstKind::InitWeak:
2213         case ARCInstKind::StoreWeak:
2214         case ARCInstKind::DestroyWeak:
2215           continue;
2216         default:
2217           goto done;
2218         }
2219       }
2220       Changed = true;
2221       for (User *U : llvm::make_early_inc_range(Alloca->users())) {
2222         CallInst *UserInst = cast<CallInst>(U);
2223         switch (GetBasicARCInstKind(UserInst)) {
2224         case ARCInstKind::InitWeak:
2225         case ARCInstKind::StoreWeak:
2226           // These functions return their second argument.
2227           UserInst->replaceAllUsesWith(UserInst->getArgOperand(1));
2228           break;
2229         case ARCInstKind::DestroyWeak:
2230           // No return value.
2231           break;
2232         default:
2233           llvm_unreachable("alloca really is used!");
2234         }
2235         UserInst->eraseFromParent();
2236       }
2237       Alloca->eraseFromParent();
2238     done:;
2239     }
2240   }
2241 }
2242 
2243 /// Identify program paths which execute sequences of retains and releases which
2244 /// can be eliminated.
OptimizeSequences(Function & F)2245 bool ObjCARCOpt::OptimizeSequences(Function &F) {
2246   // Releases, Retains - These are used to store the results of the main flow
2247   // analysis. These use Value* as the key instead of Instruction* so that the
2248   // map stays valid when we get around to rewriting code and calls get
2249   // replaced by arguments.
2250   DenseMap<Value *, RRInfo> Releases;
2251   BlotMapVector<Value *, RRInfo> Retains;
2252 
2253   // This is used during the traversal of the function to track the
2254   // states for each identified object at each block.
2255   DenseMap<const BasicBlock *, BBState> BBStates;
2256 
2257   // Analyze the CFG of the function, and all instructions.
2258   bool NestingDetected = Visit(F, BBStates, Retains, Releases);
2259 
2260   if (DisableRetainReleasePairing)
2261     return false;
2262 
2263   // Transform.
2264   bool AnyPairsCompletelyEliminated = PerformCodePlacement(BBStates, Retains,
2265                                                            Releases,
2266                                                            F.getParent());
2267 
2268   return AnyPairsCompletelyEliminated && NestingDetected;
2269 }
2270 
2271 /// Check if there is a dependent call earlier that does not have anything in
2272 /// between the Retain and the call that can affect the reference count of their
2273 /// shared pointer argument. Note that Retain need not be in BB.
HasSafePathToPredecessorCall(const Value * Arg,Instruction * Retain,ProvenanceAnalysis & PA)2274 static CallInst *HasSafePathToPredecessorCall(const Value *Arg,
2275                                               Instruction *Retain,
2276                                               ProvenanceAnalysis &PA) {
2277   auto *Call = dyn_cast_or_null<CallInst>(findSingleDependency(
2278       CanChangeRetainCount, Arg, Retain->getParent(), Retain, PA));
2279 
2280   // Check that the pointer is the return value of the call.
2281   if (!Call || Arg != Call)
2282     return nullptr;
2283 
2284   // Check that the call is a regular call.
2285   ARCInstKind Class = GetBasicARCInstKind(Call);
2286   return Class == ARCInstKind::CallOrUser || Class == ARCInstKind::Call
2287              ? Call
2288              : nullptr;
2289 }
2290 
2291 /// Find a dependent retain that precedes the given autorelease for which there
2292 /// is nothing in between the two instructions that can affect the ref count of
2293 /// Arg.
2294 static CallInst *
FindPredecessorRetainWithSafePath(const Value * Arg,BasicBlock * BB,Instruction * Autorelease,ProvenanceAnalysis & PA)2295 FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
2296                                   Instruction *Autorelease,
2297                                   ProvenanceAnalysis &PA) {
2298   auto *Retain = dyn_cast_or_null<CallInst>(
2299       findSingleDependency(CanChangeRetainCount, Arg, BB, Autorelease, PA));
2300 
2301   // Check that we found a retain with the same argument.
2302   if (!Retain || !IsRetain(GetBasicARCInstKind(Retain)) ||
2303       GetArgRCIdentityRoot(Retain) != Arg) {
2304     return nullptr;
2305   }
2306 
2307   return Retain;
2308 }
2309 
2310 /// Look for an ``autorelease'' instruction dependent on Arg such that there are
2311 /// no instructions dependent on Arg that need a positive ref count in between
2312 /// the autorelease and the ret.
2313 static CallInst *
FindPredecessorAutoreleaseWithSafePath(const Value * Arg,BasicBlock * BB,ReturnInst * Ret,ProvenanceAnalysis & PA)2314 FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
2315                                        ReturnInst *Ret,
2316                                        ProvenanceAnalysis &PA) {
2317   SmallPtrSet<Instruction *, 4> DepInsts;
2318   auto *Autorelease = dyn_cast_or_null<CallInst>(
2319       findSingleDependency(NeedsPositiveRetainCount, Arg, BB, Ret, PA));
2320 
2321   if (!Autorelease)
2322     return nullptr;
2323   ARCInstKind AutoreleaseClass = GetBasicARCInstKind(Autorelease);
2324   if (!IsAutorelease(AutoreleaseClass))
2325     return nullptr;
2326   if (GetArgRCIdentityRoot(Autorelease) != Arg)
2327     return nullptr;
2328 
2329   return Autorelease;
2330 }
2331 
2332 /// Look for this pattern:
2333 /// \code
2334 ///    %call = call i8* @something(...)
2335 ///    %2 = call i8* @objc_retain(i8* %call)
2336 ///    %3 = call i8* @objc_autorelease(i8* %2)
2337 ///    ret i8* %3
2338 /// \endcode
2339 /// And delete the retain and autorelease.
OptimizeReturns(Function & F)2340 void ObjCARCOpt::OptimizeReturns(Function &F) {
2341   if (!F.getReturnType()->isPointerTy())
2342     return;
2343 
2344   LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n");
2345 
2346   for (BasicBlock &BB: F) {
2347     ReturnInst *Ret = dyn_cast<ReturnInst>(&BB.back());
2348     if (!Ret)
2349       continue;
2350 
2351     LLVM_DEBUG(dbgs() << "Visiting: " << *Ret << "\n");
2352 
2353     const Value *Arg = GetRCIdentityRoot(Ret->getOperand(0));
2354 
2355     // Look for an ``autorelease'' instruction that is a predecessor of Ret and
2356     // dependent on Arg such that there are no instructions dependent on Arg
2357     // that need a positive ref count in between the autorelease and Ret.
2358     CallInst *Autorelease =
2359         FindPredecessorAutoreleaseWithSafePath(Arg, &BB, Ret, PA);
2360 
2361     if (!Autorelease)
2362       continue;
2363 
2364     CallInst *Retain = FindPredecessorRetainWithSafePath(
2365         Arg, Autorelease->getParent(), Autorelease, PA);
2366 
2367     if (!Retain)
2368       continue;
2369 
2370     // Check that there is nothing that can affect the reference count
2371     // between the retain and the call.  Note that Retain need not be in BB.
2372     CallInst *Call = HasSafePathToPredecessorCall(Arg, Retain, PA);
2373 
2374     // Don't remove retainRV/autoreleaseRV pairs if the call isn't a tail call.
2375     if (!Call ||
2376         (!Call->isTailCall() &&
2377          GetBasicARCInstKind(Retain) == ARCInstKind::RetainRV &&
2378          GetBasicARCInstKind(Autorelease) == ARCInstKind::AutoreleaseRV))
2379       continue;
2380 
2381     // If so, we can zap the retain and autorelease.
2382     Changed = true;
2383     ++NumRets;
2384     LLVM_DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: " << *Autorelease
2385                       << "\n");
2386     BundledInsts->eraseInst(Retain);
2387     EraseInstruction(Autorelease);
2388   }
2389 }
2390 
2391 #ifndef NDEBUG
2392 void
GatherStatistics(Function & F,bool AfterOptimization)2393 ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) {
2394   Statistic &NumRetains =
2395       AfterOptimization ? NumRetainsAfterOpt : NumRetainsBeforeOpt;
2396   Statistic &NumReleases =
2397       AfterOptimization ? NumReleasesAfterOpt : NumReleasesBeforeOpt;
2398 
2399   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
2400     Instruction *Inst = &*I++;
2401     switch (GetBasicARCInstKind(Inst)) {
2402     default:
2403       break;
2404     case ARCInstKind::Retain:
2405       ++NumRetains;
2406       break;
2407     case ARCInstKind::Release:
2408       ++NumReleases;
2409       break;
2410     }
2411   }
2412 }
2413 #endif
2414 
init(Function & F)2415 void ObjCARCOpt::init(Function &F) {
2416   if (!EnableARCOpts)
2417     return;
2418 
2419   // Intuitively, objc_retain and others are nocapture, however in practice
2420   // they are not, because they return their argument value. And objc_release
2421   // calls finalizers which can have arbitrary side effects.
2422   MDKindCache.init(F.getParent());
2423 
2424   // Initialize our runtime entry point cache.
2425   EP.init(F.getParent());
2426 
2427   // Compute which blocks are in which funclet.
2428   if (F.hasPersonalityFn() &&
2429       isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
2430     BlockEHColors = colorEHFunclets(F);
2431 }
2432 
run(Function & F,AAResults & AA)2433 bool ObjCARCOpt::run(Function &F, AAResults &AA) {
2434   if (!EnableARCOpts)
2435     return false;
2436 
2437   Changed = CFGChanged = false;
2438   BundledRetainClaimRVs BRV(/*ContractPass=*/false);
2439   BundledInsts = &BRV;
2440 
2441   LLVM_DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName()
2442                     << " >>>"
2443                        "\n");
2444 
2445   std::pair<bool, bool> R = BundledInsts->insertAfterInvokes(F, nullptr);
2446   Changed |= R.first;
2447   CFGChanged |= R.second;
2448 
2449   PA.setAA(&AA);
2450 
2451 #ifndef NDEBUG
2452   if (AreStatisticsEnabled()) {
2453     GatherStatistics(F, false);
2454   }
2455 #endif
2456 
2457   // This pass performs several distinct transformations. As a compile-time aid
2458   // when compiling code that isn't ObjC, skip these if the relevant ObjC
2459   // library functions aren't declared.
2460 
2461   // Preliminary optimizations. This also computes UsedInThisFunction.
2462   OptimizeIndividualCalls(F);
2463 
2464   // Optimizations for weak pointers.
2465   if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::LoadWeak)) |
2466                             (1 << unsigned(ARCInstKind::LoadWeakRetained)) |
2467                             (1 << unsigned(ARCInstKind::StoreWeak)) |
2468                             (1 << unsigned(ARCInstKind::InitWeak)) |
2469                             (1 << unsigned(ARCInstKind::CopyWeak)) |
2470                             (1 << unsigned(ARCInstKind::MoveWeak)) |
2471                             (1 << unsigned(ARCInstKind::DestroyWeak))))
2472     OptimizeWeakCalls(F);
2473 
2474   // Optimizations for retain+release pairs.
2475   if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Retain)) |
2476                             (1 << unsigned(ARCInstKind::RetainRV)) |
2477                             (1 << unsigned(ARCInstKind::RetainBlock))))
2478     if (UsedInThisFunction & (1 << unsigned(ARCInstKind::Release)))
2479       // Run OptimizeSequences until it either stops making changes or
2480       // no retain+release pair nesting is detected.
2481       while (OptimizeSequences(F)) {}
2482 
2483   // Optimizations if objc_autorelease is used.
2484   if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Autorelease)) |
2485                             (1 << unsigned(ARCInstKind::AutoreleaseRV))))
2486     OptimizeReturns(F);
2487 
2488   // Gather statistics after optimization.
2489 #ifndef NDEBUG
2490   if (AreStatisticsEnabled()) {
2491     GatherStatistics(F, true);
2492   }
2493 #endif
2494 
2495   LLVM_DEBUG(dbgs() << "\n");
2496 
2497   return Changed;
2498 }
2499 
2500 /// @}
2501 ///
2502 
run(Function & F,FunctionAnalysisManager & AM)2503 PreservedAnalyses ObjCARCOptPass::run(Function &F,
2504                                       FunctionAnalysisManager &AM) {
2505   ObjCARCOpt OCAO;
2506   OCAO.init(F);
2507 
2508   bool Changed = OCAO.run(F, AM.getResult<AAManager>(F));
2509   bool CFGChanged = OCAO.hasCFGChanged();
2510   if (Changed) {
2511     PreservedAnalyses PA;
2512     if (!CFGChanged)
2513       PA.preserveSet<CFGAnalyses>();
2514     return PA;
2515   }
2516   return PreservedAnalyses::all();
2517 }
2518