1 //===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file defines ObjC ARC optimizations. ARC stands for Automatic 11 /// Reference Counting and is a system for managing reference counts for objects 12 /// in Objective C. 13 /// 14 /// The optimizations performed include elimination of redundant, partially 15 /// redundant, and inconsequential reference count operations, elimination of 16 /// redundant weak pointer operations, and numerous minor simplifications. 17 /// 18 /// WARNING: This file knows about certain library functions. It recognizes them 19 /// by name, and hardwires knowledge of their semantics. 20 /// 21 /// WARNING: This file knows about how certain Objective-C library functions are 22 /// used. Naive LLVM IR transformations which would otherwise be 23 /// behavior-preserving may break these assumptions. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "ARCRuntimeEntryPoints.h" 28 #include "BlotMapVector.h" 29 #include "DependencyAnalysis.h" 30 #include "ObjCARC.h" 31 #include "ProvenanceAnalysis.h" 32 #include "PtrState.h" 33 #include "llvm/ADT/DenseMap.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/SmallPtrSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/Analysis/AliasAnalysis.h" 39 #include "llvm/Analysis/EHPersonalities.h" 40 #include "llvm/Analysis/ObjCARCAliasAnalysis.h" 41 #include "llvm/Analysis/ObjCARCAnalysisUtils.h" 42 #include "llvm/Analysis/ObjCARCInstKind.h" 43 #include "llvm/Analysis/ObjCARCUtil.h" 44 #include "llvm/IR/BasicBlock.h" 45 #include "llvm/IR/CFG.h" 46 #include "llvm/IR/Constant.h" 47 #include "llvm/IR/Constants.h" 48 #include "llvm/IR/DerivedTypes.h" 49 #include "llvm/IR/Function.h" 50 #include "llvm/IR/GlobalVariable.h" 51 #include "llvm/IR/InstIterator.h" 52 #include "llvm/IR/InstrTypes.h" 53 #include "llvm/IR/Instruction.h" 54 #include "llvm/IR/Instructions.h" 55 #include "llvm/IR/LLVMContext.h" 56 #include "llvm/IR/Metadata.h" 57 #include "llvm/IR/Type.h" 58 #include "llvm/IR/User.h" 59 #include "llvm/IR/Value.h" 60 #include "llvm/Support/Casting.h" 61 #include "llvm/Support/CommandLine.h" 62 #include "llvm/Support/Compiler.h" 63 #include "llvm/Support/Debug.h" 64 #include "llvm/Support/ErrorHandling.h" 65 #include "llvm/Support/raw_ostream.h" 66 #include "llvm/Transforms/ObjCARC.h" 67 #include <cassert> 68 #include <iterator> 69 #include <utility> 70 71 using namespace llvm; 72 using namespace llvm::objcarc; 73 74 #define DEBUG_TYPE "objc-arc-opts" 75 76 static cl::opt<unsigned> MaxPtrStates("arc-opt-max-ptr-states", 77 cl::Hidden, 78 cl::desc("Maximum number of ptr states the optimizer keeps track of"), 79 cl::init(4095)); 80 81 /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC. 82 /// @{ 83 84 /// This is similar to GetRCIdentityRoot but it stops as soon 85 /// as it finds a value with multiple uses. 86 static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { 87 // ConstantData (like ConstantPointerNull and UndefValue) is used across 88 // modules. It's never a single-use value. 89 if (isa<ConstantData>(Arg)) 90 return nullptr; 91 92 if (Arg->hasOneUse()) { 93 if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg)) 94 return FindSingleUseIdentifiedObject(BC->getOperand(0)); 95 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg)) 96 if (GEP->hasAllZeroIndices()) 97 return FindSingleUseIdentifiedObject(GEP->getPointerOperand()); 98 if (IsForwarding(GetBasicARCInstKind(Arg))) 99 return FindSingleUseIdentifiedObject( 100 cast<CallInst>(Arg)->getArgOperand(0)); 101 if (!IsObjCIdentifiedObject(Arg)) 102 return nullptr; 103 return Arg; 104 } 105 106 // If we found an identifiable object but it has multiple uses, but they are 107 // trivial uses, we can still consider this to be a single-use value. 108 if (IsObjCIdentifiedObject(Arg)) { 109 for (const User *U : Arg->users()) 110 if (!U->use_empty() || GetRCIdentityRoot(U) != Arg) 111 return nullptr; 112 113 return Arg; 114 } 115 116 return nullptr; 117 } 118 119 /// @} 120 /// 121 /// \defgroup ARCOpt ARC Optimization. 122 /// @{ 123 124 // TODO: On code like this: 125 // 126 // objc_retain(%x) 127 // stuff_that_cannot_release() 128 // objc_autorelease(%x) 129 // stuff_that_cannot_release() 130 // objc_retain(%x) 131 // stuff_that_cannot_release() 132 // objc_autorelease(%x) 133 // 134 // The second retain and autorelease can be deleted. 135 136 // TODO: It should be possible to delete 137 // objc_autoreleasePoolPush and objc_autoreleasePoolPop 138 // pairs if nothing is actually autoreleased between them. Also, autorelease 139 // calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code 140 // after inlining) can be turned into plain release calls. 141 142 // TODO: Critical-edge splitting. If the optimial insertion point is 143 // a critical edge, the current algorithm has to fail, because it doesn't 144 // know how to split edges. It should be possible to make the optimizer 145 // think in terms of edges, rather than blocks, and then split critical 146 // edges on demand. 147 148 // TODO: OptimizeSequences could generalized to be Interprocedural. 149 150 // TODO: Recognize that a bunch of other objc runtime calls have 151 // non-escaping arguments and non-releasing arguments, and may be 152 // non-autoreleasing. 153 154 // TODO: Sink autorelease calls as far as possible. Unfortunately we 155 // usually can't sink them past other calls, which would be the main 156 // case where it would be useful. 157 158 // TODO: The pointer returned from objc_loadWeakRetained is retained. 159 160 // TODO: Delete release+retain pairs (rare). 161 162 STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); 163 STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); 164 STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); 165 STATISTIC(NumRets, "Number of return value forwarding " 166 "retain+autoreleases eliminated"); 167 STATISTIC(NumRRs, "Number of retain+release paths eliminated"); 168 STATISTIC(NumPeeps, "Number of calls peephole-optimized"); 169 #ifndef NDEBUG 170 STATISTIC(NumRetainsBeforeOpt, 171 "Number of retains before optimization"); 172 STATISTIC(NumReleasesBeforeOpt, 173 "Number of releases before optimization"); 174 STATISTIC(NumRetainsAfterOpt, 175 "Number of retains after optimization"); 176 STATISTIC(NumReleasesAfterOpt, 177 "Number of releases after optimization"); 178 #endif 179 180 namespace { 181 182 /// Per-BasicBlock state. 183 class BBState { 184 /// The number of unique control paths from the entry which can reach this 185 /// block. 186 unsigned TopDownPathCount = 0; 187 188 /// The number of unique control paths to exits from this block. 189 unsigned BottomUpPathCount = 0; 190 191 /// The top-down traversal uses this to record information known about a 192 /// pointer at the bottom of each block. 193 BlotMapVector<const Value *, TopDownPtrState> PerPtrTopDown; 194 195 /// The bottom-up traversal uses this to record information known about a 196 /// pointer at the top of each block. 197 BlotMapVector<const Value *, BottomUpPtrState> PerPtrBottomUp; 198 199 /// Effective predecessors of the current block ignoring ignorable edges and 200 /// ignored backedges. 201 SmallVector<BasicBlock *, 2> Preds; 202 203 /// Effective successors of the current block ignoring ignorable edges and 204 /// ignored backedges. 205 SmallVector<BasicBlock *, 2> Succs; 206 207 public: 208 static const unsigned OverflowOccurredValue; 209 210 BBState() = default; 211 212 using top_down_ptr_iterator = decltype(PerPtrTopDown)::iterator; 213 using const_top_down_ptr_iterator = decltype(PerPtrTopDown)::const_iterator; 214 215 top_down_ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } 216 top_down_ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } 217 const_top_down_ptr_iterator top_down_ptr_begin() const { 218 return PerPtrTopDown.begin(); 219 } 220 const_top_down_ptr_iterator top_down_ptr_end() const { 221 return PerPtrTopDown.end(); 222 } 223 bool hasTopDownPtrs() const { 224 return !PerPtrTopDown.empty(); 225 } 226 227 unsigned top_down_ptr_list_size() const { 228 return std::distance(top_down_ptr_begin(), top_down_ptr_end()); 229 } 230 231 using bottom_up_ptr_iterator = decltype(PerPtrBottomUp)::iterator; 232 using const_bottom_up_ptr_iterator = 233 decltype(PerPtrBottomUp)::const_iterator; 234 235 bottom_up_ptr_iterator bottom_up_ptr_begin() { 236 return PerPtrBottomUp.begin(); 237 } 238 bottom_up_ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } 239 const_bottom_up_ptr_iterator bottom_up_ptr_begin() const { 240 return PerPtrBottomUp.begin(); 241 } 242 const_bottom_up_ptr_iterator bottom_up_ptr_end() const { 243 return PerPtrBottomUp.end(); 244 } 245 bool hasBottomUpPtrs() const { 246 return !PerPtrBottomUp.empty(); 247 } 248 249 unsigned bottom_up_ptr_list_size() const { 250 return std::distance(bottom_up_ptr_begin(), bottom_up_ptr_end()); 251 } 252 253 /// Mark this block as being an entry block, which has one path from the 254 /// entry by definition. 255 void SetAsEntry() { TopDownPathCount = 1; } 256 257 /// Mark this block as being an exit block, which has one path to an exit by 258 /// definition. 259 void SetAsExit() { BottomUpPathCount = 1; } 260 261 /// Attempt to find the PtrState object describing the top down state for 262 /// pointer Arg. Return a new initialized PtrState describing the top down 263 /// state for Arg if we do not find one. 264 TopDownPtrState &getPtrTopDownState(const Value *Arg) { 265 return PerPtrTopDown[Arg]; 266 } 267 268 /// Attempt to find the PtrState object describing the bottom up state for 269 /// pointer Arg. Return a new initialized PtrState describing the bottom up 270 /// state for Arg if we do not find one. 271 BottomUpPtrState &getPtrBottomUpState(const Value *Arg) { 272 return PerPtrBottomUp[Arg]; 273 } 274 275 /// Attempt to find the PtrState object describing the bottom up state for 276 /// pointer Arg. 277 bottom_up_ptr_iterator findPtrBottomUpState(const Value *Arg) { 278 return PerPtrBottomUp.find(Arg); 279 } 280 281 void clearBottomUpPointers() { 282 PerPtrBottomUp.clear(); 283 } 284 285 void clearTopDownPointers() { 286 PerPtrTopDown.clear(); 287 } 288 289 void InitFromPred(const BBState &Other); 290 void InitFromSucc(const BBState &Other); 291 void MergePred(const BBState &Other); 292 void MergeSucc(const BBState &Other); 293 294 /// Compute the number of possible unique paths from an entry to an exit 295 /// which pass through this block. This is only valid after both the 296 /// top-down and bottom-up traversals are complete. 297 /// 298 /// Returns true if overflow occurred. Returns false if overflow did not 299 /// occur. 300 bool GetAllPathCountWithOverflow(unsigned &PathCount) const { 301 if (TopDownPathCount == OverflowOccurredValue || 302 BottomUpPathCount == OverflowOccurredValue) 303 return true; 304 unsigned long long Product = 305 (unsigned long long)TopDownPathCount*BottomUpPathCount; 306 // Overflow occurred if any of the upper bits of Product are set or if all 307 // the lower bits of Product are all set. 308 return (Product >> 32) || 309 ((PathCount = Product) == OverflowOccurredValue); 310 } 311 312 // Specialized CFG utilities. 313 using edge_iterator = SmallVectorImpl<BasicBlock *>::const_iterator; 314 315 edge_iterator pred_begin() const { return Preds.begin(); } 316 edge_iterator pred_end() const { return Preds.end(); } 317 edge_iterator succ_begin() const { return Succs.begin(); } 318 edge_iterator succ_end() const { return Succs.end(); } 319 320 void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); } 321 void addPred(BasicBlock *Pred) { Preds.push_back(Pred); } 322 323 bool isExit() const { return Succs.empty(); } 324 }; 325 326 } // end anonymous namespace 327 328 const unsigned BBState::OverflowOccurredValue = 0xffffffff; 329 330 namespace llvm { 331 332 raw_ostream &operator<<(raw_ostream &OS, 333 BBState &BBState) LLVM_ATTRIBUTE_UNUSED; 334 335 } // end namespace llvm 336 337 void BBState::InitFromPred(const BBState &Other) { 338 PerPtrTopDown = Other.PerPtrTopDown; 339 TopDownPathCount = Other.TopDownPathCount; 340 } 341 342 void BBState::InitFromSucc(const BBState &Other) { 343 PerPtrBottomUp = Other.PerPtrBottomUp; 344 BottomUpPathCount = Other.BottomUpPathCount; 345 } 346 347 /// The top-down traversal uses this to merge information about predecessors to 348 /// form the initial state for a new block. 349 void BBState::MergePred(const BBState &Other) { 350 if (TopDownPathCount == OverflowOccurredValue) 351 return; 352 353 // Other.TopDownPathCount can be 0, in which case it is either dead or a 354 // loop backedge. Loop backedges are special. 355 TopDownPathCount += Other.TopDownPathCount; 356 357 // In order to be consistent, we clear the top down pointers when by adding 358 // TopDownPathCount becomes OverflowOccurredValue even though "true" overflow 359 // has not occurred. 360 if (TopDownPathCount == OverflowOccurredValue) { 361 clearTopDownPointers(); 362 return; 363 } 364 365 // Check for overflow. If we have overflow, fall back to conservative 366 // behavior. 367 if (TopDownPathCount < Other.TopDownPathCount) { 368 TopDownPathCount = OverflowOccurredValue; 369 clearTopDownPointers(); 370 return; 371 } 372 373 // For each entry in the other set, if our set has an entry with the same key, 374 // merge the entries. Otherwise, copy the entry and merge it with an empty 375 // entry. 376 for (auto MI = Other.top_down_ptr_begin(), ME = Other.top_down_ptr_end(); 377 MI != ME; ++MI) { 378 auto Pair = PerPtrTopDown.insert(*MI); 379 Pair.first->second.Merge(Pair.second ? TopDownPtrState() : MI->second, 380 /*TopDown=*/true); 381 } 382 383 // For each entry in our set, if the other set doesn't have an entry with the 384 // same key, force it to merge with an empty entry. 385 for (auto MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI) 386 if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end()) 387 MI->second.Merge(TopDownPtrState(), /*TopDown=*/true); 388 } 389 390 /// The bottom-up traversal uses this to merge information about successors to 391 /// form the initial state for a new block. 392 void BBState::MergeSucc(const BBState &Other) { 393 if (BottomUpPathCount == OverflowOccurredValue) 394 return; 395 396 // Other.BottomUpPathCount can be 0, in which case it is either dead or a 397 // loop backedge. Loop backedges are special. 398 BottomUpPathCount += Other.BottomUpPathCount; 399 400 // In order to be consistent, we clear the top down pointers when by adding 401 // BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow 402 // has not occurred. 403 if (BottomUpPathCount == OverflowOccurredValue) { 404 clearBottomUpPointers(); 405 return; 406 } 407 408 // Check for overflow. If we have overflow, fall back to conservative 409 // behavior. 410 if (BottomUpPathCount < Other.BottomUpPathCount) { 411 BottomUpPathCount = OverflowOccurredValue; 412 clearBottomUpPointers(); 413 return; 414 } 415 416 // For each entry in the other set, if our set has an entry with the 417 // same key, merge the entries. Otherwise, copy the entry and merge 418 // it with an empty entry. 419 for (auto MI = Other.bottom_up_ptr_begin(), ME = Other.bottom_up_ptr_end(); 420 MI != ME; ++MI) { 421 auto Pair = PerPtrBottomUp.insert(*MI); 422 Pair.first->second.Merge(Pair.second ? BottomUpPtrState() : MI->second, 423 /*TopDown=*/false); 424 } 425 426 // For each entry in our set, if the other set doesn't have an entry 427 // with the same key, force it to merge with an empty entry. 428 for (auto MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME; 429 ++MI) 430 if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end()) 431 MI->second.Merge(BottomUpPtrState(), /*TopDown=*/false); 432 } 433 434 raw_ostream &llvm::operator<<(raw_ostream &OS, BBState &BBInfo) { 435 // Dump the pointers we are tracking. 436 OS << " TopDown State:\n"; 437 if (!BBInfo.hasTopDownPtrs()) { 438 LLVM_DEBUG(dbgs() << " NONE!\n"); 439 } else { 440 for (auto I = BBInfo.top_down_ptr_begin(), E = BBInfo.top_down_ptr_end(); 441 I != E; ++I) { 442 const PtrState &P = I->second; 443 OS << " Ptr: " << *I->first 444 << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false") 445 << "\n ImpreciseRelease: " 446 << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n" 447 << " HasCFGHazards: " 448 << (P.IsCFGHazardAfflicted()?"true":"false") << "\n" 449 << " KnownPositive: " 450 << (P.HasKnownPositiveRefCount()?"true":"false") << "\n" 451 << " Seq: " 452 << P.GetSeq() << "\n"; 453 } 454 } 455 456 OS << " BottomUp State:\n"; 457 if (!BBInfo.hasBottomUpPtrs()) { 458 LLVM_DEBUG(dbgs() << " NONE!\n"); 459 } else { 460 for (auto I = BBInfo.bottom_up_ptr_begin(), E = BBInfo.bottom_up_ptr_end(); 461 I != E; ++I) { 462 const PtrState &P = I->second; 463 OS << " Ptr: " << *I->first 464 << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false") 465 << "\n ImpreciseRelease: " 466 << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n" 467 << " HasCFGHazards: " 468 << (P.IsCFGHazardAfflicted()?"true":"false") << "\n" 469 << " KnownPositive: " 470 << (P.HasKnownPositiveRefCount()?"true":"false") << "\n" 471 << " Seq: " 472 << P.GetSeq() << "\n"; 473 } 474 } 475 476 return OS; 477 } 478 479 namespace { 480 481 /// The main ARC optimization pass. 482 class ObjCARCOpt { 483 bool Changed = false; 484 bool CFGChanged = false; 485 ProvenanceAnalysis PA; 486 487 /// A cache of references to runtime entry point constants. 488 ARCRuntimeEntryPoints EP; 489 490 /// A cache of MDKinds that can be passed into other functions to propagate 491 /// MDKind identifiers. 492 ARCMDKindCache MDKindCache; 493 494 BundledRetainClaimRVs *BundledInsts = nullptr; 495 496 /// A flag indicating whether the optimization that removes or moves 497 /// retain/release pairs should be performed. 498 bool DisableRetainReleasePairing = false; 499 500 /// Flags which determine whether each of the interesting runtime functions 501 /// is in fact used in the current function. 502 unsigned UsedInThisFunction; 503 504 DenseMap<BasicBlock *, ColorVector> BlockEHColors; 505 506 bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); 507 void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, 508 ARCInstKind &Class); 509 void OptimizeIndividualCalls(Function &F); 510 511 /// Optimize an individual call, optionally passing the 512 /// GetArgRCIdentityRoot if it has already been computed. 513 void OptimizeIndividualCallImpl(Function &F, Instruction *Inst, 514 ARCInstKind Class, const Value *Arg); 515 516 /// Try to optimize an AutoreleaseRV with a RetainRV or UnsafeClaimRV. If the 517 /// optimization occurs, returns true to indicate that the caller should 518 /// assume the instructions are dead. 519 bool OptimizeInlinedAutoreleaseRVCall(Function &F, Instruction *Inst, 520 const Value *&Arg, ARCInstKind Class, 521 Instruction *AutoreleaseRV, 522 const Value *&AutoreleaseRVArg); 523 524 void CheckForCFGHazards(const BasicBlock *BB, 525 DenseMap<const BasicBlock *, BBState> &BBStates, 526 BBState &MyStates) const; 527 bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB, 528 BlotMapVector<Value *, RRInfo> &Retains, 529 BBState &MyStates); 530 bool VisitBottomUp(BasicBlock *BB, 531 DenseMap<const BasicBlock *, BBState> &BBStates, 532 BlotMapVector<Value *, RRInfo> &Retains); 533 bool VisitInstructionTopDown( 534 Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates, 535 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 536 &ReleaseInsertPtToRCIdentityRoots); 537 bool VisitTopDown( 538 BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, 539 DenseMap<Value *, RRInfo> &Releases, 540 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 541 &ReleaseInsertPtToRCIdentityRoots); 542 bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates, 543 BlotMapVector<Value *, RRInfo> &Retains, 544 DenseMap<Value *, RRInfo> &Releases); 545 546 void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, 547 BlotMapVector<Value *, RRInfo> &Retains, 548 DenseMap<Value *, RRInfo> &Releases, 549 SmallVectorImpl<Instruction *> &DeadInsts, Module *M); 550 551 bool PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates, 552 BlotMapVector<Value *, RRInfo> &Retains, 553 DenseMap<Value *, RRInfo> &Releases, Module *M, 554 Instruction *Retain, 555 SmallVectorImpl<Instruction *> &DeadInsts, 556 RRInfo &RetainsToMove, RRInfo &ReleasesToMove, 557 Value *Arg, bool KnownSafe, 558 bool &AnyPairsCompletelyEliminated); 559 560 bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, 561 BlotMapVector<Value *, RRInfo> &Retains, 562 DenseMap<Value *, RRInfo> &Releases, Module *M); 563 564 void OptimizeWeakCalls(Function &F); 565 566 bool OptimizeSequences(Function &F); 567 568 void OptimizeReturns(Function &F); 569 570 template <typename PredicateT> 571 static void cloneOpBundlesIf(CallBase *CI, 572 SmallVectorImpl<OperandBundleDef> &OpBundles, 573 PredicateT Predicate) { 574 for (unsigned I = 0, E = CI->getNumOperandBundles(); I != E; ++I) { 575 OperandBundleUse B = CI->getOperandBundleAt(I); 576 if (Predicate(B)) 577 OpBundles.emplace_back(B); 578 } 579 } 580 581 void addOpBundleForFunclet(BasicBlock *BB, 582 SmallVectorImpl<OperandBundleDef> &OpBundles) { 583 if (!BlockEHColors.empty()) { 584 const ColorVector &CV = BlockEHColors.find(BB)->second; 585 assert(CV.size() > 0 && "Uncolored block"); 586 for (BasicBlock *EHPadBB : CV) 587 if (auto *EHPad = dyn_cast<FuncletPadInst>(EHPadBB->getFirstNonPHI())) { 588 OpBundles.emplace_back("funclet", EHPad); 589 return; 590 } 591 } 592 } 593 594 #ifndef NDEBUG 595 void GatherStatistics(Function &F, bool AfterOptimization = false); 596 #endif 597 598 public: 599 void init(Function &F); 600 bool run(Function &F, AAResults &AA); 601 bool hasCFGChanged() const { return CFGChanged; } 602 }; 603 } // end anonymous namespace 604 605 /// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is 606 /// not a return value. 607 bool 608 ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { 609 // Check for the argument being from an immediately preceding call or invoke. 610 const Value *Arg = GetArgRCIdentityRoot(RetainRV); 611 if (const Instruction *Call = dyn_cast<CallBase>(Arg)) { 612 if (Call->getParent() == RetainRV->getParent()) { 613 BasicBlock::const_iterator I(Call); 614 ++I; 615 while (IsNoopInstruction(&*I)) 616 ++I; 617 if (&*I == RetainRV) 618 return false; 619 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) { 620 BasicBlock *RetainRVParent = RetainRV->getParent(); 621 if (II->getNormalDest() == RetainRVParent) { 622 BasicBlock::const_iterator I = RetainRVParent->begin(); 623 while (IsNoopInstruction(&*I)) 624 ++I; 625 if (&*I == RetainRV) 626 return false; 627 } 628 } 629 } 630 631 assert(!BundledInsts->contains(RetainRV) && 632 "a bundled retainRV's argument should be a call"); 633 634 // Turn it to a plain objc_retain. 635 Changed = true; 636 ++NumPeeps; 637 638 LLVM_DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => " 639 "objc_retain since the operand is not a return value.\n" 640 "Old = " 641 << *RetainRV << "\n"); 642 643 Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain); 644 cast<CallInst>(RetainRV)->setCalledFunction(NewDecl); 645 646 LLVM_DEBUG(dbgs() << "New = " << *RetainRV << "\n"); 647 648 return false; 649 } 650 651 bool ObjCARCOpt::OptimizeInlinedAutoreleaseRVCall( 652 Function &F, Instruction *Inst, const Value *&Arg, ARCInstKind Class, 653 Instruction *AutoreleaseRV, const Value *&AutoreleaseRVArg) { 654 if (BundledInsts->contains(Inst)) 655 return false; 656 657 // Must be in the same basic block. 658 assert(Inst->getParent() == AutoreleaseRV->getParent()); 659 660 // Must operate on the same root. 661 Arg = GetArgRCIdentityRoot(Inst); 662 AutoreleaseRVArg = GetArgRCIdentityRoot(AutoreleaseRV); 663 if (Arg != AutoreleaseRVArg) { 664 // If there isn't an exact match, check if we have equivalent PHIs. 665 const PHINode *PN = dyn_cast<PHINode>(Arg); 666 if (!PN) 667 return false; 668 669 SmallVector<const Value *, 4> ArgUsers; 670 getEquivalentPHIs(*PN, ArgUsers); 671 if (!llvm::is_contained(ArgUsers, AutoreleaseRVArg)) 672 return false; 673 } 674 675 // Okay, this is a match. Merge them. 676 ++NumPeeps; 677 LLVM_DEBUG(dbgs() << "Found inlined objc_autoreleaseReturnValue '" 678 << *AutoreleaseRV << "' paired with '" << *Inst << "'\n"); 679 680 // Delete the RV pair, starting with the AutoreleaseRV. 681 AutoreleaseRV->replaceAllUsesWith( 682 cast<CallInst>(AutoreleaseRV)->getArgOperand(0)); 683 Changed = true; 684 EraseInstruction(AutoreleaseRV); 685 if (Class == ARCInstKind::RetainRV) { 686 // AutoreleaseRV and RetainRV cancel out. Delete the RetainRV. 687 Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0)); 688 EraseInstruction(Inst); 689 return true; 690 } 691 692 // UnsafeClaimRV is a frontend peephole for RetainRV + Release. Since the 693 // AutoreleaseRV and RetainRV cancel out, replace UnsafeClaimRV with Release. 694 assert(Class == ARCInstKind::UnsafeClaimRV); 695 Value *CallArg = cast<CallInst>(Inst)->getArgOperand(0); 696 CallInst *Release = CallInst::Create( 697 EP.get(ARCRuntimeEntryPointKind::Release), CallArg, "", Inst); 698 assert(IsAlwaysTail(ARCInstKind::UnsafeClaimRV) && 699 "Expected UnsafeClaimRV to be safe to tail call"); 700 Release->setTailCall(); 701 Inst->replaceAllUsesWith(CallArg); 702 EraseInstruction(Inst); 703 704 // Run the normal optimizations on Release. 705 OptimizeIndividualCallImpl(F, Release, ARCInstKind::Release, Arg); 706 return true; 707 } 708 709 /// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not 710 /// used as a return value. 711 void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, 712 Instruction *AutoreleaseRV, 713 ARCInstKind &Class) { 714 // Check for a return of the pointer value. 715 const Value *Ptr = GetArgRCIdentityRoot(AutoreleaseRV); 716 717 // If the argument is ConstantPointerNull or UndefValue, its other users 718 // aren't actually interesting to look at. 719 if (isa<ConstantData>(Ptr)) 720 return; 721 722 SmallVector<const Value *, 2> Users; 723 Users.push_back(Ptr); 724 725 // Add PHIs that are equivalent to Ptr to Users. 726 if (const PHINode *PN = dyn_cast<PHINode>(Ptr)) 727 getEquivalentPHIs(*PN, Users); 728 729 do { 730 Ptr = Users.pop_back_val(); 731 for (const User *U : Ptr->users()) { 732 if (isa<ReturnInst>(U) || GetBasicARCInstKind(U) == ARCInstKind::RetainRV) 733 return; 734 if (isa<BitCastInst>(U)) 735 Users.push_back(U); 736 } 737 } while (!Users.empty()); 738 739 Changed = true; 740 ++NumPeeps; 741 742 LLVM_DEBUG( 743 dbgs() << "Transforming objc_autoreleaseReturnValue => " 744 "objc_autorelease since its operand is not used as a return " 745 "value.\n" 746 "Old = " 747 << *AutoreleaseRV << "\n"); 748 749 CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV); 750 Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease); 751 AutoreleaseRVCI->setCalledFunction(NewDecl); 752 AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease. 753 Class = ARCInstKind::Autorelease; 754 755 LLVM_DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n"); 756 } 757 758 /// Visit each call, one at a time, and make simplifications without doing any 759 /// additional analysis. 760 void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { 761 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n"); 762 // Reset all the flags in preparation for recomputing them. 763 UsedInThisFunction = 0; 764 765 // Store any delayed AutoreleaseRV intrinsics, so they can be easily paired 766 // with RetainRV and UnsafeClaimRV. 767 Instruction *DelayedAutoreleaseRV = nullptr; 768 const Value *DelayedAutoreleaseRVArg = nullptr; 769 auto setDelayedAutoreleaseRV = [&](Instruction *AutoreleaseRV) { 770 assert(!DelayedAutoreleaseRV || !AutoreleaseRV); 771 DelayedAutoreleaseRV = AutoreleaseRV; 772 DelayedAutoreleaseRVArg = nullptr; 773 }; 774 auto optimizeDelayedAutoreleaseRV = [&]() { 775 if (!DelayedAutoreleaseRV) 776 return; 777 OptimizeIndividualCallImpl(F, DelayedAutoreleaseRV, 778 ARCInstKind::AutoreleaseRV, 779 DelayedAutoreleaseRVArg); 780 setDelayedAutoreleaseRV(nullptr); 781 }; 782 auto shouldDelayAutoreleaseRV = [&](Instruction *NonARCInst) { 783 // Nothing to delay, but we may as well skip the logic below. 784 if (!DelayedAutoreleaseRV) 785 return true; 786 787 // If we hit the end of the basic block we're not going to find an RV-pair. 788 // Stop delaying. 789 if (NonARCInst->isTerminator()) 790 return false; 791 792 // Given the frontend rules for emitting AutoreleaseRV, RetainRV, and 793 // UnsafeClaimRV, it's probably safe to skip over even opaque function calls 794 // here since OptimizeInlinedAutoreleaseRVCall will confirm that they 795 // have the same RCIdentityRoot. However, what really matters is 796 // skipping instructions or intrinsics that the inliner could leave behind; 797 // be conservative for now and don't skip over opaque calls, which could 798 // potentially include other ARC calls. 799 auto *CB = dyn_cast<CallBase>(NonARCInst); 800 if (!CB) 801 return true; 802 return CB->getIntrinsicID() != Intrinsic::not_intrinsic; 803 }; 804 805 // Visit all objc_* calls in F. 806 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 807 Instruction *Inst = &*I++; 808 809 if (auto *CI = dyn_cast<CallInst>(Inst)) 810 if (objcarc::hasAttachedCallOpBundle(CI)) { 811 BundledInsts->insertRVCall(&*I, CI); 812 Changed = true; 813 } 814 815 ARCInstKind Class = GetBasicARCInstKind(Inst); 816 817 // Skip this loop if this instruction isn't itself an ARC intrinsic. 818 const Value *Arg = nullptr; 819 switch (Class) { 820 default: 821 optimizeDelayedAutoreleaseRV(); 822 break; 823 case ARCInstKind::CallOrUser: 824 case ARCInstKind::User: 825 case ARCInstKind::None: 826 // This is a non-ARC instruction. If we're delaying an AutoreleaseRV, 827 // check if it's safe to skip over it; if not, optimize the AutoreleaseRV 828 // now. 829 if (!shouldDelayAutoreleaseRV(Inst)) 830 optimizeDelayedAutoreleaseRV(); 831 continue; 832 case ARCInstKind::AutoreleaseRV: 833 optimizeDelayedAutoreleaseRV(); 834 setDelayedAutoreleaseRV(Inst); 835 continue; 836 case ARCInstKind::RetainRV: 837 case ARCInstKind::UnsafeClaimRV: 838 if (DelayedAutoreleaseRV) { 839 // We have a potential RV pair. Check if they cancel out. 840 if (OptimizeInlinedAutoreleaseRVCall(F, Inst, Arg, Class, 841 DelayedAutoreleaseRV, 842 DelayedAutoreleaseRVArg)) { 843 setDelayedAutoreleaseRV(nullptr); 844 continue; 845 } 846 optimizeDelayedAutoreleaseRV(); 847 } 848 break; 849 } 850 851 OptimizeIndividualCallImpl(F, Inst, Class, Arg); 852 } 853 854 // Catch the final delayed AutoreleaseRV. 855 optimizeDelayedAutoreleaseRV(); 856 } 857 858 /// This function returns true if the value is inert. An ObjC ARC runtime call 859 /// taking an inert operand can be safely deleted. 860 static bool isInertARCValue(Value *V, SmallPtrSet<Value *, 1> &VisitedPhis) { 861 V = V->stripPointerCasts(); 862 863 if (IsNullOrUndef(V)) 864 return true; 865 866 // See if this is a global attribute annotated with an 'objc_arc_inert'. 867 if (auto *GV = dyn_cast<GlobalVariable>(V)) 868 if (GV->hasAttribute("objc_arc_inert")) 869 return true; 870 871 if (auto PN = dyn_cast<PHINode>(V)) { 872 // Ignore this phi if it has already been discovered. 873 if (!VisitedPhis.insert(PN).second) 874 return true; 875 // Look through phis's operands. 876 for (Value *Opnd : PN->incoming_values()) 877 if (!isInertARCValue(Opnd, VisitedPhis)) 878 return false; 879 return true; 880 } 881 882 return false; 883 } 884 885 void ObjCARCOpt::OptimizeIndividualCallImpl(Function &F, Instruction *Inst, 886 ARCInstKind Class, 887 const Value *Arg) { 888 LLVM_DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n"); 889 890 // We can delete this call if it takes an inert value. 891 SmallPtrSet<Value *, 1> VisitedPhis; 892 893 if (BundledInsts->contains(Inst)) { 894 UsedInThisFunction |= 1 << unsigned(Class); 895 return; 896 } 897 898 if (IsNoopOnGlobal(Class)) 899 if (isInertARCValue(Inst->getOperand(0), VisitedPhis)) { 900 if (!Inst->getType()->isVoidTy()) 901 Inst->replaceAllUsesWith(Inst->getOperand(0)); 902 Inst->eraseFromParent(); 903 Changed = true; 904 return; 905 } 906 907 switch (Class) { 908 default: 909 break; 910 911 // Delete no-op casts. These function calls have special semantics, but 912 // the semantics are entirely implemented via lowering in the front-end, 913 // so by the time they reach the optimizer, they are just no-op calls 914 // which return their argument. 915 // 916 // There are gray areas here, as the ability to cast reference-counted 917 // pointers to raw void* and back allows code to break ARC assumptions, 918 // however these are currently considered to be unimportant. 919 case ARCInstKind::NoopCast: 920 Changed = true; 921 ++NumNoops; 922 LLVM_DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n"); 923 EraseInstruction(Inst); 924 return; 925 926 // If the pointer-to-weak-pointer is null, it's undefined behavior. 927 case ARCInstKind::StoreWeak: 928 case ARCInstKind::LoadWeak: 929 case ARCInstKind::LoadWeakRetained: 930 case ARCInstKind::InitWeak: 931 case ARCInstKind::DestroyWeak: { 932 CallInst *CI = cast<CallInst>(Inst); 933 if (IsNullOrUndef(CI->getArgOperand(0))) { 934 Changed = true; 935 new StoreInst(ConstantInt::getTrue(CI->getContext()), 936 UndefValue::get(Type::getInt1PtrTy(CI->getContext())), CI); 937 Value *NewValue = UndefValue::get(CI->getType()); 938 LLVM_DEBUG( 939 dbgs() << "A null pointer-to-weak-pointer is undefined behavior." 940 "\nOld = " 941 << *CI << "\nNew = " << *NewValue << "\n"); 942 CI->replaceAllUsesWith(NewValue); 943 CI->eraseFromParent(); 944 return; 945 } 946 break; 947 } 948 case ARCInstKind::CopyWeak: 949 case ARCInstKind::MoveWeak: { 950 CallInst *CI = cast<CallInst>(Inst); 951 if (IsNullOrUndef(CI->getArgOperand(0)) || 952 IsNullOrUndef(CI->getArgOperand(1))) { 953 Changed = true; 954 new StoreInst(ConstantInt::getTrue(CI->getContext()), 955 UndefValue::get(Type::getInt1PtrTy(CI->getContext())), CI); 956 957 Value *NewValue = UndefValue::get(CI->getType()); 958 LLVM_DEBUG( 959 dbgs() << "A null pointer-to-weak-pointer is undefined behavior." 960 "\nOld = " 961 << *CI << "\nNew = " << *NewValue << "\n"); 962 963 CI->replaceAllUsesWith(NewValue); 964 CI->eraseFromParent(); 965 return; 966 } 967 break; 968 } 969 case ARCInstKind::RetainRV: 970 if (OptimizeRetainRVCall(F, Inst)) 971 return; 972 break; 973 case ARCInstKind::AutoreleaseRV: 974 OptimizeAutoreleaseRVCall(F, Inst, Class); 975 break; 976 } 977 978 // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. 979 if (IsAutorelease(Class) && Inst->use_empty()) { 980 CallInst *Call = cast<CallInst>(Inst); 981 const Value *Arg = Call->getArgOperand(0); 982 Arg = FindSingleUseIdentifiedObject(Arg); 983 if (Arg) { 984 Changed = true; 985 ++NumAutoreleases; 986 987 // Create the declaration lazily. 988 LLVMContext &C = Inst->getContext(); 989 990 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release); 991 CallInst *NewCall = 992 CallInst::Create(Decl, Call->getArgOperand(0), "", Call); 993 NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), 994 MDNode::get(C, std::nullopt)); 995 996 LLVM_DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) " 997 "since x is otherwise unused.\nOld: " 998 << *Call << "\nNew: " << *NewCall << "\n"); 999 1000 EraseInstruction(Call); 1001 Inst = NewCall; 1002 Class = ARCInstKind::Release; 1003 } 1004 } 1005 1006 // For functions which can never be passed stack arguments, add 1007 // a tail keyword. 1008 if (IsAlwaysTail(Class) && !cast<CallInst>(Inst)->isNoTailCall()) { 1009 Changed = true; 1010 LLVM_DEBUG( 1011 dbgs() << "Adding tail keyword to function since it can never be " 1012 "passed stack args: " 1013 << *Inst << "\n"); 1014 cast<CallInst>(Inst)->setTailCall(); 1015 } 1016 1017 // Ensure that functions that can never have a "tail" keyword due to the 1018 // semantics of ARC truly do not do so. 1019 if (IsNeverTail(Class)) { 1020 Changed = true; 1021 LLVM_DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst 1022 << "\n"); 1023 cast<CallInst>(Inst)->setTailCall(false); 1024 } 1025 1026 // Set nounwind as needed. 1027 if (IsNoThrow(Class)) { 1028 Changed = true; 1029 LLVM_DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst 1030 << "\n"); 1031 cast<CallInst>(Inst)->setDoesNotThrow(); 1032 } 1033 1034 // Note: This catches instructions unrelated to ARC. 1035 if (!IsNoopOnNull(Class)) { 1036 UsedInThisFunction |= 1 << unsigned(Class); 1037 return; 1038 } 1039 1040 // If we haven't already looked up the root, look it up now. 1041 if (!Arg) 1042 Arg = GetArgRCIdentityRoot(Inst); 1043 1044 // ARC calls with null are no-ops. Delete them. 1045 if (IsNullOrUndef(Arg)) { 1046 Changed = true; 1047 ++NumNoops; 1048 LLVM_DEBUG(dbgs() << "ARC calls with null are no-ops. Erasing: " << *Inst 1049 << "\n"); 1050 EraseInstruction(Inst); 1051 return; 1052 } 1053 1054 // Keep track of which of retain, release, autorelease, and retain_block 1055 // are actually present in this function. 1056 UsedInThisFunction |= 1 << unsigned(Class); 1057 1058 // If Arg is a PHI, and one or more incoming values to the 1059 // PHI are null, and the call is control-equivalent to the PHI, and there 1060 // are no relevant side effects between the PHI and the call, and the call 1061 // is not a release that doesn't have the clang.imprecise_release tag, the 1062 // call could be pushed up to just those paths with non-null incoming 1063 // values. For now, don't bother splitting critical edges for this. 1064 if (Class == ARCInstKind::Release && 1065 !Inst->getMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease))) 1066 return; 1067 1068 SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist; 1069 Worklist.push_back(std::make_pair(Inst, Arg)); 1070 do { 1071 std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val(); 1072 Inst = Pair.first; 1073 Arg = Pair.second; 1074 1075 const PHINode *PN = dyn_cast<PHINode>(Arg); 1076 if (!PN) 1077 continue; 1078 1079 // Determine if the PHI has any null operands, or any incoming 1080 // critical edges. 1081 bool HasNull = false; 1082 bool HasCriticalEdges = false; 1083 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1084 Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i)); 1085 if (IsNullOrUndef(Incoming)) 1086 HasNull = true; 1087 else if (PN->getIncomingBlock(i)->getTerminator()->getNumSuccessors() != 1088 1) { 1089 HasCriticalEdges = true; 1090 break; 1091 } 1092 } 1093 // If we have null operands and no critical edges, optimize. 1094 if (HasCriticalEdges) 1095 continue; 1096 if (!HasNull) 1097 continue; 1098 1099 Instruction *DepInst = nullptr; 1100 1101 // Check that there is nothing that cares about the reference 1102 // count between the call and the phi. 1103 switch (Class) { 1104 case ARCInstKind::Retain: 1105 case ARCInstKind::RetainBlock: 1106 // These can always be moved up. 1107 break; 1108 case ARCInstKind::Release: 1109 // These can't be moved across things that care about the retain 1110 // count. 1111 DepInst = findSingleDependency(NeedsPositiveRetainCount, Arg, 1112 Inst->getParent(), Inst, PA); 1113 break; 1114 case ARCInstKind::Autorelease: 1115 // These can't be moved across autorelease pool scope boundaries. 1116 DepInst = findSingleDependency(AutoreleasePoolBoundary, Arg, 1117 Inst->getParent(), Inst, PA); 1118 break; 1119 case ARCInstKind::UnsafeClaimRV: 1120 case ARCInstKind::RetainRV: 1121 case ARCInstKind::AutoreleaseRV: 1122 // Don't move these; the RV optimization depends on the autoreleaseRV 1123 // being tail called, and the retainRV being immediately after a call 1124 // (which might still happen if we get lucky with codegen layout, but 1125 // it's not worth taking the chance). 1126 continue; 1127 default: 1128 llvm_unreachable("Invalid dependence flavor"); 1129 } 1130 1131 if (DepInst != PN) 1132 continue; 1133 1134 Changed = true; 1135 ++NumPartialNoops; 1136 // Clone the call into each predecessor that has a non-null value. 1137 CallInst *CInst = cast<CallInst>(Inst); 1138 Type *ParamTy = CInst->getArgOperand(0)->getType(); 1139 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1140 Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i)); 1141 if (IsNullOrUndef(Incoming)) 1142 continue; 1143 Value *Op = PN->getIncomingValue(i); 1144 Instruction *InsertPos = &PN->getIncomingBlock(i)->back(); 1145 SmallVector<OperandBundleDef, 1> OpBundles; 1146 cloneOpBundlesIf(CInst, OpBundles, [](const OperandBundleUse &B) { 1147 return B.getTagID() != LLVMContext::OB_funclet; 1148 }); 1149 addOpBundleForFunclet(InsertPos->getParent(), OpBundles); 1150 CallInst *Clone = CallInst::Create(CInst, OpBundles); 1151 if (Op->getType() != ParamTy) 1152 Op = new BitCastInst(Op, ParamTy, "", InsertPos); 1153 Clone->setArgOperand(0, Op); 1154 Clone->insertBefore(InsertPos); 1155 1156 LLVM_DEBUG(dbgs() << "Cloning " << *CInst << "\n" 1157 "And inserting clone at " 1158 << *InsertPos << "\n"); 1159 Worklist.push_back(std::make_pair(Clone, Incoming)); 1160 } 1161 // Erase the original call. 1162 LLVM_DEBUG(dbgs() << "Erasing: " << *CInst << "\n"); 1163 EraseInstruction(CInst); 1164 } while (!Worklist.empty()); 1165 } 1166 1167 /// If we have a top down pointer in the S_Use state, make sure that there are 1168 /// no CFG hazards by checking the states of various bottom up pointers. 1169 static void CheckForUseCFGHazard(const Sequence SuccSSeq, 1170 const bool SuccSRRIKnownSafe, 1171 TopDownPtrState &S, 1172 bool &SomeSuccHasSame, 1173 bool &AllSuccsHaveSame, 1174 bool &NotAllSeqEqualButKnownSafe, 1175 bool &ShouldContinue) { 1176 switch (SuccSSeq) { 1177 case S_CanRelease: { 1178 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) { 1179 S.ClearSequenceProgress(); 1180 break; 1181 } 1182 S.SetCFGHazardAfflicted(true); 1183 ShouldContinue = true; 1184 break; 1185 } 1186 case S_Use: 1187 SomeSuccHasSame = true; 1188 break; 1189 case S_Stop: 1190 case S_MovableRelease: 1191 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) 1192 AllSuccsHaveSame = false; 1193 else 1194 NotAllSeqEqualButKnownSafe = true; 1195 break; 1196 case S_Retain: 1197 llvm_unreachable("bottom-up pointer in retain state!"); 1198 case S_None: 1199 llvm_unreachable("This should have been handled earlier."); 1200 } 1201 } 1202 1203 /// If we have a Top Down pointer in the S_CanRelease state, make sure that 1204 /// there are no CFG hazards by checking the states of various bottom up 1205 /// pointers. 1206 static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq, 1207 const bool SuccSRRIKnownSafe, 1208 TopDownPtrState &S, 1209 bool &SomeSuccHasSame, 1210 bool &AllSuccsHaveSame, 1211 bool &NotAllSeqEqualButKnownSafe) { 1212 switch (SuccSSeq) { 1213 case S_CanRelease: 1214 SomeSuccHasSame = true; 1215 break; 1216 case S_Stop: 1217 case S_MovableRelease: 1218 case S_Use: 1219 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) 1220 AllSuccsHaveSame = false; 1221 else 1222 NotAllSeqEqualButKnownSafe = true; 1223 break; 1224 case S_Retain: 1225 llvm_unreachable("bottom-up pointer in retain state!"); 1226 case S_None: 1227 llvm_unreachable("This should have been handled earlier."); 1228 } 1229 } 1230 1231 /// Check for critical edges, loop boundaries, irreducible control flow, or 1232 /// other CFG structures where moving code across the edge would result in it 1233 /// being executed more. 1234 void 1235 ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, 1236 DenseMap<const BasicBlock *, BBState> &BBStates, 1237 BBState &MyStates) const { 1238 // If any top-down local-use or possible-dec has a succ which is earlier in 1239 // the sequence, forget it. 1240 for (auto I = MyStates.top_down_ptr_begin(), E = MyStates.top_down_ptr_end(); 1241 I != E; ++I) { 1242 TopDownPtrState &S = I->second; 1243 const Sequence Seq = I->second.GetSeq(); 1244 1245 // We only care about S_Retain, S_CanRelease, and S_Use. 1246 if (Seq == S_None) 1247 continue; 1248 1249 // Make sure that if extra top down states are added in the future that this 1250 // code is updated to handle it. 1251 assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) && 1252 "Unknown top down sequence state."); 1253 1254 const Value *Arg = I->first; 1255 bool SomeSuccHasSame = false; 1256 bool AllSuccsHaveSame = true; 1257 bool NotAllSeqEqualButKnownSafe = false; 1258 1259 for (const BasicBlock *Succ : successors(BB)) { 1260 // If VisitBottomUp has pointer information for this successor, take 1261 // what we know about it. 1262 const DenseMap<const BasicBlock *, BBState>::iterator BBI = 1263 BBStates.find(Succ); 1264 assert(BBI != BBStates.end()); 1265 const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); 1266 const Sequence SuccSSeq = SuccS.GetSeq(); 1267 1268 // If bottom up, the pointer is in an S_None state, clear the sequence 1269 // progress since the sequence in the bottom up state finished 1270 // suggesting a mismatch in between retains/releases. This is true for 1271 // all three cases that we are handling here: S_Retain, S_Use, and 1272 // S_CanRelease. 1273 if (SuccSSeq == S_None) { 1274 S.ClearSequenceProgress(); 1275 continue; 1276 } 1277 1278 // If we have S_Use or S_CanRelease, perform our check for cfg hazard 1279 // checks. 1280 const bool SuccSRRIKnownSafe = SuccS.IsKnownSafe(); 1281 1282 // *NOTE* We do not use Seq from above here since we are allowing for 1283 // S.GetSeq() to change while we are visiting basic blocks. 1284 switch(S.GetSeq()) { 1285 case S_Use: { 1286 bool ShouldContinue = false; 1287 CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, SomeSuccHasSame, 1288 AllSuccsHaveSame, NotAllSeqEqualButKnownSafe, 1289 ShouldContinue); 1290 if (ShouldContinue) 1291 continue; 1292 break; 1293 } 1294 case S_CanRelease: 1295 CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, 1296 SomeSuccHasSame, AllSuccsHaveSame, 1297 NotAllSeqEqualButKnownSafe); 1298 break; 1299 case S_Retain: 1300 case S_None: 1301 case S_Stop: 1302 case S_MovableRelease: 1303 break; 1304 } 1305 } 1306 1307 // If the state at the other end of any of the successor edges 1308 // matches the current state, require all edges to match. This 1309 // guards against loops in the middle of a sequence. 1310 if (SomeSuccHasSame && !AllSuccsHaveSame) { 1311 S.ClearSequenceProgress(); 1312 } else if (NotAllSeqEqualButKnownSafe) { 1313 // If we would have cleared the state foregoing the fact that we are known 1314 // safe, stop code motion. This is because whether or not it is safe to 1315 // remove RR pairs via KnownSafe is an orthogonal concept to whether we 1316 // are allowed to perform code motion. 1317 S.SetCFGHazardAfflicted(true); 1318 } 1319 } 1320 } 1321 1322 bool ObjCARCOpt::VisitInstructionBottomUp( 1323 Instruction *Inst, BasicBlock *BB, BlotMapVector<Value *, RRInfo> &Retains, 1324 BBState &MyStates) { 1325 bool NestingDetected = false; 1326 ARCInstKind Class = GetARCInstKind(Inst); 1327 const Value *Arg = nullptr; 1328 1329 LLVM_DEBUG(dbgs() << " Class: " << Class << "\n"); 1330 1331 switch (Class) { 1332 case ARCInstKind::Release: { 1333 Arg = GetArgRCIdentityRoot(Inst); 1334 1335 BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg); 1336 NestingDetected |= S.InitBottomUp(MDKindCache, Inst); 1337 break; 1338 } 1339 case ARCInstKind::RetainBlock: 1340 // In OptimizeIndividualCalls, we have strength reduced all optimizable 1341 // objc_retainBlocks to objc_retains. Thus at this point any 1342 // objc_retainBlocks that we see are not optimizable. 1343 break; 1344 case ARCInstKind::Retain: 1345 case ARCInstKind::RetainRV: { 1346 Arg = GetArgRCIdentityRoot(Inst); 1347 BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg); 1348 if (S.MatchWithRetain()) { 1349 // Don't do retain+release tracking for ARCInstKind::RetainRV, because 1350 // it's better to let it remain as the first instruction after a call. 1351 if (Class != ARCInstKind::RetainRV) { 1352 LLVM_DEBUG(dbgs() << " Matching with: " << *Inst << "\n"); 1353 Retains[Inst] = S.GetRRInfo(); 1354 } 1355 S.ClearSequenceProgress(); 1356 } 1357 // A retain moving bottom up can be a use. 1358 break; 1359 } 1360 case ARCInstKind::AutoreleasepoolPop: 1361 // Conservatively, clear MyStates for all known pointers. 1362 MyStates.clearBottomUpPointers(); 1363 return NestingDetected; 1364 case ARCInstKind::AutoreleasepoolPush: 1365 case ARCInstKind::None: 1366 // These are irrelevant. 1367 return NestingDetected; 1368 default: 1369 break; 1370 } 1371 1372 // Consider any other possible effects of this instruction on each 1373 // pointer being tracked. 1374 for (auto MI = MyStates.bottom_up_ptr_begin(), 1375 ME = MyStates.bottom_up_ptr_end(); 1376 MI != ME; ++MI) { 1377 const Value *Ptr = MI->first; 1378 if (Ptr == Arg) 1379 continue; // Handled above. 1380 BottomUpPtrState &S = MI->second; 1381 1382 if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class)) 1383 continue; 1384 1385 S.HandlePotentialUse(BB, Inst, Ptr, PA, Class); 1386 } 1387 1388 return NestingDetected; 1389 } 1390 1391 bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB, 1392 DenseMap<const BasicBlock *, BBState> &BBStates, 1393 BlotMapVector<Value *, RRInfo> &Retains) { 1394 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n"); 1395 1396 bool NestingDetected = false; 1397 BBState &MyStates = BBStates[BB]; 1398 1399 // Merge the states from each successor to compute the initial state 1400 // for the current block. 1401 BBState::edge_iterator SI(MyStates.succ_begin()), 1402 SE(MyStates.succ_end()); 1403 if (SI != SE) { 1404 const BasicBlock *Succ = *SI; 1405 DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); 1406 assert(I != BBStates.end()); 1407 MyStates.InitFromSucc(I->second); 1408 ++SI; 1409 for (; SI != SE; ++SI) { 1410 Succ = *SI; 1411 I = BBStates.find(Succ); 1412 assert(I != BBStates.end()); 1413 MyStates.MergeSucc(I->second); 1414 } 1415 } 1416 1417 LLVM_DEBUG(dbgs() << "Before:\n" 1418 << BBStates[BB] << "\n" 1419 << "Performing Dataflow:\n"); 1420 1421 // Visit all the instructions, bottom-up. 1422 for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { 1423 Instruction *Inst = &*std::prev(I); 1424 1425 // Invoke instructions are visited as part of their successors (below). 1426 if (isa<InvokeInst>(Inst)) 1427 continue; 1428 1429 LLVM_DEBUG(dbgs() << " Visiting " << *Inst << "\n"); 1430 1431 NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); 1432 1433 // Bail out if the number of pointers being tracked becomes too large so 1434 // that this pass can complete in a reasonable amount of time. 1435 if (MyStates.bottom_up_ptr_list_size() > MaxPtrStates) { 1436 DisableRetainReleasePairing = true; 1437 return false; 1438 } 1439 } 1440 1441 // If there's a predecessor with an invoke, visit the invoke as if it were 1442 // part of this block, since we can't insert code after an invoke in its own 1443 // block, and we don't want to split critical edges. 1444 for (BBState::edge_iterator PI(MyStates.pred_begin()), 1445 PE(MyStates.pred_end()); PI != PE; ++PI) { 1446 BasicBlock *Pred = *PI; 1447 if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back())) 1448 NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); 1449 } 1450 1451 LLVM_DEBUG(dbgs() << "\nFinal State:\n" << BBStates[BB] << "\n"); 1452 1453 return NestingDetected; 1454 } 1455 1456 // Fill ReleaseInsertPtToRCIdentityRoots, which is a map from insertion points 1457 // to the set of RC identity roots that would be released by the release calls 1458 // moved to the insertion points. 1459 static void collectReleaseInsertPts( 1460 const BlotMapVector<Value *, RRInfo> &Retains, 1461 DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1462 &ReleaseInsertPtToRCIdentityRoots) { 1463 for (const auto &P : Retains) { 1464 // Retains is a map from an objc_retain call to a RRInfo of the RC identity 1465 // root of the call. Get the RC identity root of the objc_retain call. 1466 Instruction *Retain = cast<Instruction>(P.first); 1467 Value *Root = GetRCIdentityRoot(Retain->getOperand(0)); 1468 // Collect all the insertion points of the objc_release calls that release 1469 // the RC identity root of the objc_retain call. 1470 for (const Instruction *InsertPt : P.second.ReverseInsertPts) 1471 ReleaseInsertPtToRCIdentityRoots[InsertPt].insert(Root); 1472 } 1473 } 1474 1475 // Get the RC identity roots from an insertion point of an objc_release call. 1476 // Return nullptr if the passed instruction isn't an insertion point. 1477 static const SmallPtrSet<const Value *, 2> * 1478 getRCIdentityRootsFromReleaseInsertPt( 1479 const Instruction *InsertPt, 1480 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1481 &ReleaseInsertPtToRCIdentityRoots) { 1482 auto I = ReleaseInsertPtToRCIdentityRoots.find(InsertPt); 1483 if (I == ReleaseInsertPtToRCIdentityRoots.end()) 1484 return nullptr; 1485 return &I->second; 1486 } 1487 1488 bool ObjCARCOpt::VisitInstructionTopDown( 1489 Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates, 1490 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1491 &ReleaseInsertPtToRCIdentityRoots) { 1492 bool NestingDetected = false; 1493 ARCInstKind Class = GetARCInstKind(Inst); 1494 const Value *Arg = nullptr; 1495 1496 // Make sure a call to objc_retain isn't moved past insertion points of calls 1497 // to objc_release. 1498 if (const SmallPtrSet<const Value *, 2> *Roots = 1499 getRCIdentityRootsFromReleaseInsertPt( 1500 Inst, ReleaseInsertPtToRCIdentityRoots)) 1501 for (const auto *Root : *Roots) { 1502 TopDownPtrState &S = MyStates.getPtrTopDownState(Root); 1503 // Disable code motion if the current position is S_Retain to prevent 1504 // moving the objc_retain call past objc_release calls. If it's 1505 // S_CanRelease or larger, it's not necessary to disable code motion as 1506 // the insertion points that prevent the objc_retain call from moving down 1507 // should have been set already. 1508 if (S.GetSeq() == S_Retain) 1509 S.SetCFGHazardAfflicted(true); 1510 } 1511 1512 LLVM_DEBUG(dbgs() << " Class: " << Class << "\n"); 1513 1514 switch (Class) { 1515 case ARCInstKind::RetainBlock: 1516 // In OptimizeIndividualCalls, we have strength reduced all optimizable 1517 // objc_retainBlocks to objc_retains. Thus at this point any 1518 // objc_retainBlocks that we see are not optimizable. We need to break since 1519 // a retain can be a potential use. 1520 break; 1521 case ARCInstKind::Retain: 1522 case ARCInstKind::RetainRV: { 1523 Arg = GetArgRCIdentityRoot(Inst); 1524 TopDownPtrState &S = MyStates.getPtrTopDownState(Arg); 1525 NestingDetected |= S.InitTopDown(Class, Inst); 1526 // A retain can be a potential use; proceed to the generic checking 1527 // code below. 1528 break; 1529 } 1530 case ARCInstKind::Release: { 1531 Arg = GetArgRCIdentityRoot(Inst); 1532 TopDownPtrState &S = MyStates.getPtrTopDownState(Arg); 1533 // Try to form a tentative pair in between this release instruction and the 1534 // top down pointers that we are tracking. 1535 if (S.MatchWithRelease(MDKindCache, Inst)) { 1536 // If we succeed, copy S's RRInfo into the Release -> {Retain Set 1537 // Map}. Then we clear S. 1538 LLVM_DEBUG(dbgs() << " Matching with: " << *Inst << "\n"); 1539 Releases[Inst] = S.GetRRInfo(); 1540 S.ClearSequenceProgress(); 1541 } 1542 break; 1543 } 1544 case ARCInstKind::AutoreleasepoolPop: 1545 // Conservatively, clear MyStates for all known pointers. 1546 MyStates.clearTopDownPointers(); 1547 return false; 1548 case ARCInstKind::AutoreleasepoolPush: 1549 case ARCInstKind::None: 1550 // These can not be uses of 1551 return false; 1552 default: 1553 break; 1554 } 1555 1556 // Consider any other possible effects of this instruction on each 1557 // pointer being tracked. 1558 for (auto MI = MyStates.top_down_ptr_begin(), 1559 ME = MyStates.top_down_ptr_end(); 1560 MI != ME; ++MI) { 1561 const Value *Ptr = MI->first; 1562 if (Ptr == Arg) 1563 continue; // Handled above. 1564 TopDownPtrState &S = MI->second; 1565 if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class, *BundledInsts)) 1566 continue; 1567 1568 S.HandlePotentialUse(Inst, Ptr, PA, Class); 1569 } 1570 1571 return NestingDetected; 1572 } 1573 1574 bool ObjCARCOpt::VisitTopDown( 1575 BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, 1576 DenseMap<Value *, RRInfo> &Releases, 1577 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1578 &ReleaseInsertPtToRCIdentityRoots) { 1579 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n"); 1580 bool NestingDetected = false; 1581 BBState &MyStates = BBStates[BB]; 1582 1583 // Merge the states from each predecessor to compute the initial state 1584 // for the current block. 1585 BBState::edge_iterator PI(MyStates.pred_begin()), 1586 PE(MyStates.pred_end()); 1587 if (PI != PE) { 1588 const BasicBlock *Pred = *PI; 1589 DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); 1590 assert(I != BBStates.end()); 1591 MyStates.InitFromPred(I->second); 1592 ++PI; 1593 for (; PI != PE; ++PI) { 1594 Pred = *PI; 1595 I = BBStates.find(Pred); 1596 assert(I != BBStates.end()); 1597 MyStates.MergePred(I->second); 1598 } 1599 } 1600 1601 // Check that BB and MyStates have the same number of predecessors. This 1602 // prevents retain calls that live outside a loop from being moved into the 1603 // loop. 1604 if (!BB->hasNPredecessors(MyStates.pred_end() - MyStates.pred_begin())) 1605 for (auto I = MyStates.top_down_ptr_begin(), 1606 E = MyStates.top_down_ptr_end(); 1607 I != E; ++I) 1608 I->second.SetCFGHazardAfflicted(true); 1609 1610 LLVM_DEBUG(dbgs() << "Before:\n" 1611 << BBStates[BB] << "\n" 1612 << "Performing Dataflow:\n"); 1613 1614 // Visit all the instructions, top-down. 1615 for (Instruction &Inst : *BB) { 1616 LLVM_DEBUG(dbgs() << " Visiting " << Inst << "\n"); 1617 1618 NestingDetected |= VisitInstructionTopDown( 1619 &Inst, Releases, MyStates, ReleaseInsertPtToRCIdentityRoots); 1620 1621 // Bail out if the number of pointers being tracked becomes too large so 1622 // that this pass can complete in a reasonable amount of time. 1623 if (MyStates.top_down_ptr_list_size() > MaxPtrStates) { 1624 DisableRetainReleasePairing = true; 1625 return false; 1626 } 1627 } 1628 1629 LLVM_DEBUG(dbgs() << "\nState Before Checking for CFG Hazards:\n" 1630 << BBStates[BB] << "\n\n"); 1631 CheckForCFGHazards(BB, BBStates, MyStates); 1632 LLVM_DEBUG(dbgs() << "Final State:\n" << BBStates[BB] << "\n"); 1633 return NestingDetected; 1634 } 1635 1636 static void 1637 ComputePostOrders(Function &F, 1638 SmallVectorImpl<BasicBlock *> &PostOrder, 1639 SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder, 1640 unsigned NoObjCARCExceptionsMDKind, 1641 DenseMap<const BasicBlock *, BBState> &BBStates) { 1642 /// The visited set, for doing DFS walks. 1643 SmallPtrSet<BasicBlock *, 16> Visited; 1644 1645 // Do DFS, computing the PostOrder. 1646 SmallPtrSet<BasicBlock *, 16> OnStack; 1647 SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack; 1648 1649 // Functions always have exactly one entry block, and we don't have 1650 // any other block that we treat like an entry block. 1651 BasicBlock *EntryBB = &F.getEntryBlock(); 1652 BBState &MyStates = BBStates[EntryBB]; 1653 MyStates.SetAsEntry(); 1654 Instruction *EntryTI = EntryBB->getTerminator(); 1655 SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI))); 1656 Visited.insert(EntryBB); 1657 OnStack.insert(EntryBB); 1658 do { 1659 dfs_next_succ: 1660 BasicBlock *CurrBB = SuccStack.back().first; 1661 succ_iterator SE(CurrBB->getTerminator(), false); 1662 1663 while (SuccStack.back().second != SE) { 1664 BasicBlock *SuccBB = *SuccStack.back().second++; 1665 if (Visited.insert(SuccBB).second) { 1666 SuccStack.push_back( 1667 std::make_pair(SuccBB, succ_iterator(SuccBB->getTerminator()))); 1668 BBStates[CurrBB].addSucc(SuccBB); 1669 BBState &SuccStates = BBStates[SuccBB]; 1670 SuccStates.addPred(CurrBB); 1671 OnStack.insert(SuccBB); 1672 goto dfs_next_succ; 1673 } 1674 1675 if (!OnStack.count(SuccBB)) { 1676 BBStates[CurrBB].addSucc(SuccBB); 1677 BBStates[SuccBB].addPred(CurrBB); 1678 } 1679 } 1680 OnStack.erase(CurrBB); 1681 PostOrder.push_back(CurrBB); 1682 SuccStack.pop_back(); 1683 } while (!SuccStack.empty()); 1684 1685 Visited.clear(); 1686 1687 // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. 1688 // Functions may have many exits, and there also blocks which we treat 1689 // as exits due to ignored edges. 1690 SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack; 1691 for (BasicBlock &ExitBB : F) { 1692 BBState &MyStates = BBStates[&ExitBB]; 1693 if (!MyStates.isExit()) 1694 continue; 1695 1696 MyStates.SetAsExit(); 1697 1698 PredStack.push_back(std::make_pair(&ExitBB, MyStates.pred_begin())); 1699 Visited.insert(&ExitBB); 1700 while (!PredStack.empty()) { 1701 reverse_dfs_next_succ: 1702 BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end(); 1703 while (PredStack.back().second != PE) { 1704 BasicBlock *BB = *PredStack.back().second++; 1705 if (Visited.insert(BB).second) { 1706 PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin())); 1707 goto reverse_dfs_next_succ; 1708 } 1709 } 1710 ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first); 1711 } 1712 } 1713 } 1714 1715 // Visit the function both top-down and bottom-up. 1716 bool ObjCARCOpt::Visit(Function &F, 1717 DenseMap<const BasicBlock *, BBState> &BBStates, 1718 BlotMapVector<Value *, RRInfo> &Retains, 1719 DenseMap<Value *, RRInfo> &Releases) { 1720 // Use reverse-postorder traversals, because we magically know that loops 1721 // will be well behaved, i.e. they won't repeatedly call retain on a single 1722 // pointer without doing a release. We can't use the ReversePostOrderTraversal 1723 // class here because we want the reverse-CFG postorder to consider each 1724 // function exit point, and we want to ignore selected cycle edges. 1725 SmallVector<BasicBlock *, 16> PostOrder; 1726 SmallVector<BasicBlock *, 16> ReverseCFGPostOrder; 1727 ComputePostOrders(F, PostOrder, ReverseCFGPostOrder, 1728 MDKindCache.get(ARCMDKindID::NoObjCARCExceptions), 1729 BBStates); 1730 1731 // Use reverse-postorder on the reverse CFG for bottom-up. 1732 bool BottomUpNestingDetected = false; 1733 for (BasicBlock *BB : llvm::reverse(ReverseCFGPostOrder)) { 1734 BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains); 1735 if (DisableRetainReleasePairing) 1736 return false; 1737 } 1738 1739 DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1740 ReleaseInsertPtToRCIdentityRoots; 1741 collectReleaseInsertPts(Retains, ReleaseInsertPtToRCIdentityRoots); 1742 1743 // Use reverse-postorder for top-down. 1744 bool TopDownNestingDetected = false; 1745 for (BasicBlock *BB : llvm::reverse(PostOrder)) { 1746 TopDownNestingDetected |= 1747 VisitTopDown(BB, BBStates, Releases, ReleaseInsertPtToRCIdentityRoots); 1748 if (DisableRetainReleasePairing) 1749 return false; 1750 } 1751 1752 return TopDownNestingDetected && BottomUpNestingDetected; 1753 } 1754 1755 /// Move the calls in RetainsToMove and ReleasesToMove. 1756 void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove, 1757 RRInfo &ReleasesToMove, 1758 BlotMapVector<Value *, RRInfo> &Retains, 1759 DenseMap<Value *, RRInfo> &Releases, 1760 SmallVectorImpl<Instruction *> &DeadInsts, 1761 Module *M) { 1762 Type *ArgTy = Arg->getType(); 1763 Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext())); 1764 1765 LLVM_DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n"); 1766 1767 // Insert the new retain and release calls. 1768 for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) { 1769 Value *MyArg = ArgTy == ParamTy ? Arg : 1770 new BitCastInst(Arg, ParamTy, "", InsertPt); 1771 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); 1772 SmallVector<OperandBundleDef, 1> BundleList; 1773 addOpBundleForFunclet(InsertPt->getParent(), BundleList); 1774 CallInst *Call = CallInst::Create(Decl, MyArg, BundleList, "", InsertPt); 1775 Call->setDoesNotThrow(); 1776 Call->setTailCall(); 1777 1778 LLVM_DEBUG(dbgs() << "Inserting new Retain: " << *Call 1779 << "\n" 1780 "At insertion point: " 1781 << *InsertPt << "\n"); 1782 } 1783 for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) { 1784 Value *MyArg = ArgTy == ParamTy ? Arg : 1785 new BitCastInst(Arg, ParamTy, "", InsertPt); 1786 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release); 1787 SmallVector<OperandBundleDef, 1> BundleList; 1788 addOpBundleForFunclet(InsertPt->getParent(), BundleList); 1789 CallInst *Call = CallInst::Create(Decl, MyArg, BundleList, "", InsertPt); 1790 // Attach a clang.imprecise_release metadata tag, if appropriate. 1791 if (MDNode *M = ReleasesToMove.ReleaseMetadata) 1792 Call->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), M); 1793 Call->setDoesNotThrow(); 1794 if (ReleasesToMove.IsTailCallRelease) 1795 Call->setTailCall(); 1796 1797 LLVM_DEBUG(dbgs() << "Inserting new Release: " << *Call 1798 << "\n" 1799 "At insertion point: " 1800 << *InsertPt << "\n"); 1801 } 1802 1803 // Delete the original retain and release calls. 1804 for (Instruction *OrigRetain : RetainsToMove.Calls) { 1805 Retains.blot(OrigRetain); 1806 DeadInsts.push_back(OrigRetain); 1807 LLVM_DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n"); 1808 } 1809 for (Instruction *OrigRelease : ReleasesToMove.Calls) { 1810 Releases.erase(OrigRelease); 1811 DeadInsts.push_back(OrigRelease); 1812 LLVM_DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n"); 1813 } 1814 } 1815 1816 bool ObjCARCOpt::PairUpRetainsAndReleases( 1817 DenseMap<const BasicBlock *, BBState> &BBStates, 1818 BlotMapVector<Value *, RRInfo> &Retains, 1819 DenseMap<Value *, RRInfo> &Releases, Module *M, 1820 Instruction *Retain, 1821 SmallVectorImpl<Instruction *> &DeadInsts, RRInfo &RetainsToMove, 1822 RRInfo &ReleasesToMove, Value *Arg, bool KnownSafe, 1823 bool &AnyPairsCompletelyEliminated) { 1824 // If a pair happens in a region where it is known that the reference count 1825 // is already incremented, we can similarly ignore possible decrements unless 1826 // we are dealing with a retainable object with multiple provenance sources. 1827 bool KnownSafeTD = true, KnownSafeBU = true; 1828 bool CFGHazardAfflicted = false; 1829 1830 // Connect the dots between the top-down-collected RetainsToMove and 1831 // bottom-up-collected ReleasesToMove to form sets of related calls. 1832 // This is an iterative process so that we connect multiple releases 1833 // to multiple retains if needed. 1834 unsigned OldDelta = 0; 1835 unsigned NewDelta = 0; 1836 unsigned OldCount = 0; 1837 unsigned NewCount = 0; 1838 bool FirstRelease = true; 1839 for (SmallVector<Instruction *, 4> NewRetains{Retain};;) { 1840 SmallVector<Instruction *, 4> NewReleases; 1841 for (Instruction *NewRetain : NewRetains) { 1842 auto It = Retains.find(NewRetain); 1843 assert(It != Retains.end()); 1844 const RRInfo &NewRetainRRI = It->second; 1845 KnownSafeTD &= NewRetainRRI.KnownSafe; 1846 CFGHazardAfflicted |= NewRetainRRI.CFGHazardAfflicted; 1847 for (Instruction *NewRetainRelease : NewRetainRRI.Calls) { 1848 auto Jt = Releases.find(NewRetainRelease); 1849 if (Jt == Releases.end()) 1850 return false; 1851 const RRInfo &NewRetainReleaseRRI = Jt->second; 1852 1853 // If the release does not have a reference to the retain as well, 1854 // something happened which is unaccounted for. Do not do anything. 1855 // 1856 // This can happen if we catch an additive overflow during path count 1857 // merging. 1858 if (!NewRetainReleaseRRI.Calls.count(NewRetain)) 1859 return false; 1860 1861 if (ReleasesToMove.Calls.insert(NewRetainRelease).second) { 1862 // If we overflow when we compute the path count, don't remove/move 1863 // anything. 1864 const BBState &NRRBBState = BBStates[NewRetainRelease->getParent()]; 1865 unsigned PathCount = BBState::OverflowOccurredValue; 1866 if (NRRBBState.GetAllPathCountWithOverflow(PathCount)) 1867 return false; 1868 assert(PathCount != BBState::OverflowOccurredValue && 1869 "PathCount at this point can not be " 1870 "OverflowOccurredValue."); 1871 OldDelta -= PathCount; 1872 1873 // Merge the ReleaseMetadata and IsTailCallRelease values. 1874 if (FirstRelease) { 1875 ReleasesToMove.ReleaseMetadata = 1876 NewRetainReleaseRRI.ReleaseMetadata; 1877 ReleasesToMove.IsTailCallRelease = 1878 NewRetainReleaseRRI.IsTailCallRelease; 1879 FirstRelease = false; 1880 } else { 1881 if (ReleasesToMove.ReleaseMetadata != 1882 NewRetainReleaseRRI.ReleaseMetadata) 1883 ReleasesToMove.ReleaseMetadata = nullptr; 1884 if (ReleasesToMove.IsTailCallRelease != 1885 NewRetainReleaseRRI.IsTailCallRelease) 1886 ReleasesToMove.IsTailCallRelease = false; 1887 } 1888 1889 // Collect the optimal insertion points. 1890 if (!KnownSafe) 1891 for (Instruction *RIP : NewRetainReleaseRRI.ReverseInsertPts) { 1892 if (ReleasesToMove.ReverseInsertPts.insert(RIP).second) { 1893 // If we overflow when we compute the path count, don't 1894 // remove/move anything. 1895 const BBState &RIPBBState = BBStates[RIP->getParent()]; 1896 PathCount = BBState::OverflowOccurredValue; 1897 if (RIPBBState.GetAllPathCountWithOverflow(PathCount)) 1898 return false; 1899 assert(PathCount != BBState::OverflowOccurredValue && 1900 "PathCount at this point can not be " 1901 "OverflowOccurredValue."); 1902 NewDelta -= PathCount; 1903 } 1904 } 1905 NewReleases.push_back(NewRetainRelease); 1906 } 1907 } 1908 } 1909 NewRetains.clear(); 1910 if (NewReleases.empty()) break; 1911 1912 // Back the other way. 1913 for (Instruction *NewRelease : NewReleases) { 1914 auto It = Releases.find(NewRelease); 1915 assert(It != Releases.end()); 1916 const RRInfo &NewReleaseRRI = It->second; 1917 KnownSafeBU &= NewReleaseRRI.KnownSafe; 1918 CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted; 1919 for (Instruction *NewReleaseRetain : NewReleaseRRI.Calls) { 1920 auto Jt = Retains.find(NewReleaseRetain); 1921 if (Jt == Retains.end()) 1922 return false; 1923 const RRInfo &NewReleaseRetainRRI = Jt->second; 1924 1925 // If the retain does not have a reference to the release as well, 1926 // something happened which is unaccounted for. Do not do anything. 1927 // 1928 // This can happen if we catch an additive overflow during path count 1929 // merging. 1930 if (!NewReleaseRetainRRI.Calls.count(NewRelease)) 1931 return false; 1932 1933 if (RetainsToMove.Calls.insert(NewReleaseRetain).second) { 1934 // If we overflow when we compute the path count, don't remove/move 1935 // anything. 1936 const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()]; 1937 unsigned PathCount = BBState::OverflowOccurredValue; 1938 if (NRRBBState.GetAllPathCountWithOverflow(PathCount)) 1939 return false; 1940 assert(PathCount != BBState::OverflowOccurredValue && 1941 "PathCount at this point can not be " 1942 "OverflowOccurredValue."); 1943 OldDelta += PathCount; 1944 OldCount += PathCount; 1945 1946 // Collect the optimal insertion points. 1947 if (!KnownSafe) 1948 for (Instruction *RIP : NewReleaseRetainRRI.ReverseInsertPts) { 1949 if (RetainsToMove.ReverseInsertPts.insert(RIP).second) { 1950 // If we overflow when we compute the path count, don't 1951 // remove/move anything. 1952 const BBState &RIPBBState = BBStates[RIP->getParent()]; 1953 1954 PathCount = BBState::OverflowOccurredValue; 1955 if (RIPBBState.GetAllPathCountWithOverflow(PathCount)) 1956 return false; 1957 assert(PathCount != BBState::OverflowOccurredValue && 1958 "PathCount at this point can not be " 1959 "OverflowOccurredValue."); 1960 NewDelta += PathCount; 1961 NewCount += PathCount; 1962 } 1963 } 1964 NewRetains.push_back(NewReleaseRetain); 1965 } 1966 } 1967 } 1968 if (NewRetains.empty()) break; 1969 } 1970 1971 // We can only remove pointers if we are known safe in both directions. 1972 bool UnconditionallySafe = KnownSafeTD && KnownSafeBU; 1973 if (UnconditionallySafe) { 1974 RetainsToMove.ReverseInsertPts.clear(); 1975 ReleasesToMove.ReverseInsertPts.clear(); 1976 NewCount = 0; 1977 } else { 1978 // Determine whether the new insertion points we computed preserve the 1979 // balance of retain and release calls through the program. 1980 // TODO: If the fully aggressive solution isn't valid, try to find a 1981 // less aggressive solution which is. 1982 if (NewDelta != 0) 1983 return false; 1984 1985 // At this point, we are not going to remove any RR pairs, but we still are 1986 // able to move RR pairs. If one of our pointers is afflicted with 1987 // CFGHazards, we cannot perform such code motion so exit early. 1988 const bool WillPerformCodeMotion = 1989 !RetainsToMove.ReverseInsertPts.empty() || 1990 !ReleasesToMove.ReverseInsertPts.empty(); 1991 if (CFGHazardAfflicted && WillPerformCodeMotion) 1992 return false; 1993 } 1994 1995 // Determine whether the original call points are balanced in the retain and 1996 // release calls through the program. If not, conservatively don't touch 1997 // them. 1998 // TODO: It's theoretically possible to do code motion in this case, as 1999 // long as the existing imbalances are maintained. 2000 if (OldDelta != 0) 2001 return false; 2002 2003 Changed = true; 2004 assert(OldCount != 0 && "Unreachable code?"); 2005 NumRRs += OldCount - NewCount; 2006 // Set to true if we completely removed any RR pairs. 2007 AnyPairsCompletelyEliminated = NewCount == 0; 2008 2009 // We can move calls! 2010 return true; 2011 } 2012 2013 /// Identify pairings between the retains and releases, and delete and/or move 2014 /// them. 2015 bool ObjCARCOpt::PerformCodePlacement( 2016 DenseMap<const BasicBlock *, BBState> &BBStates, 2017 BlotMapVector<Value *, RRInfo> &Retains, 2018 DenseMap<Value *, RRInfo> &Releases, Module *M) { 2019 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n"); 2020 2021 bool AnyPairsCompletelyEliminated = false; 2022 SmallVector<Instruction *, 8> DeadInsts; 2023 2024 // Visit each retain. 2025 for (BlotMapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), 2026 E = Retains.end(); 2027 I != E; ++I) { 2028 Value *V = I->first; 2029 if (!V) continue; // blotted 2030 2031 Instruction *Retain = cast<Instruction>(V); 2032 2033 LLVM_DEBUG(dbgs() << "Visiting: " << *Retain << "\n"); 2034 2035 Value *Arg = GetArgRCIdentityRoot(Retain); 2036 2037 // If the object being released is in static or stack storage, we know it's 2038 // not being managed by ObjC reference counting, so we can delete pairs 2039 // regardless of what possible decrements or uses lie between them. 2040 bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg); 2041 2042 // A constant pointer can't be pointing to an object on the heap. It may 2043 // be reference-counted, but it won't be deleted. 2044 if (const LoadInst *LI = dyn_cast<LoadInst>(Arg)) 2045 if (const GlobalVariable *GV = 2046 dyn_cast<GlobalVariable>( 2047 GetRCIdentityRoot(LI->getPointerOperand()))) 2048 if (GV->isConstant()) 2049 KnownSafe = true; 2050 2051 // Connect the dots between the top-down-collected RetainsToMove and 2052 // bottom-up-collected ReleasesToMove to form sets of related calls. 2053 RRInfo RetainsToMove, ReleasesToMove; 2054 2055 bool PerformMoveCalls = PairUpRetainsAndReleases( 2056 BBStates, Retains, Releases, M, Retain, DeadInsts, 2057 RetainsToMove, ReleasesToMove, Arg, KnownSafe, 2058 AnyPairsCompletelyEliminated); 2059 2060 if (PerformMoveCalls) { 2061 // Ok, everything checks out and we're all set. Let's move/delete some 2062 // code! 2063 MoveCalls(Arg, RetainsToMove, ReleasesToMove, 2064 Retains, Releases, DeadInsts, M); 2065 } 2066 } 2067 2068 // Now that we're done moving everything, we can delete the newly dead 2069 // instructions, as we no longer need them as insert points. 2070 while (!DeadInsts.empty()) 2071 EraseInstruction(DeadInsts.pop_back_val()); 2072 2073 return AnyPairsCompletelyEliminated; 2074 } 2075 2076 /// Weak pointer optimizations. 2077 void ObjCARCOpt::OptimizeWeakCalls(Function &F) { 2078 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n"); 2079 2080 // First, do memdep-style RLE and S2L optimizations. We can't use memdep 2081 // itself because it uses AliasAnalysis and we need to do provenance 2082 // queries instead. 2083 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 2084 Instruction *Inst = &*I++; 2085 2086 LLVM_DEBUG(dbgs() << "Visiting: " << *Inst << "\n"); 2087 2088 ARCInstKind Class = GetBasicARCInstKind(Inst); 2089 if (Class != ARCInstKind::LoadWeak && 2090 Class != ARCInstKind::LoadWeakRetained) 2091 continue; 2092 2093 // Delete objc_loadWeak calls with no users. 2094 if (Class == ARCInstKind::LoadWeak && Inst->use_empty()) { 2095 Inst->eraseFromParent(); 2096 Changed = true; 2097 continue; 2098 } 2099 2100 // TODO: For now, just look for an earlier available version of this value 2101 // within the same block. Theoretically, we could do memdep-style non-local 2102 // analysis too, but that would want caching. A better approach would be to 2103 // use the technique that EarlyCSE uses. 2104 inst_iterator Current = std::prev(I); 2105 BasicBlock *CurrentBB = &*Current.getBasicBlockIterator(); 2106 for (BasicBlock::iterator B = CurrentBB->begin(), 2107 J = Current.getInstructionIterator(); 2108 J != B; --J) { 2109 Instruction *EarlierInst = &*std::prev(J); 2110 ARCInstKind EarlierClass = GetARCInstKind(EarlierInst); 2111 switch (EarlierClass) { 2112 case ARCInstKind::LoadWeak: 2113 case ARCInstKind::LoadWeakRetained: { 2114 // If this is loading from the same pointer, replace this load's value 2115 // with that one. 2116 CallInst *Call = cast<CallInst>(Inst); 2117 CallInst *EarlierCall = cast<CallInst>(EarlierInst); 2118 Value *Arg = Call->getArgOperand(0); 2119 Value *EarlierArg = EarlierCall->getArgOperand(0); 2120 switch (PA.getAA()->alias(Arg, EarlierArg)) { 2121 case AliasResult::MustAlias: 2122 Changed = true; 2123 // If the load has a builtin retain, insert a plain retain for it. 2124 if (Class == ARCInstKind::LoadWeakRetained) { 2125 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); 2126 CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call); 2127 CI->setTailCall(); 2128 } 2129 // Zap the fully redundant load. 2130 Call->replaceAllUsesWith(EarlierCall); 2131 Call->eraseFromParent(); 2132 goto clobbered; 2133 case AliasResult::MayAlias: 2134 case AliasResult::PartialAlias: 2135 goto clobbered; 2136 case AliasResult::NoAlias: 2137 break; 2138 } 2139 break; 2140 } 2141 case ARCInstKind::StoreWeak: 2142 case ARCInstKind::InitWeak: { 2143 // If this is storing to the same pointer and has the same size etc. 2144 // replace this load's value with the stored value. 2145 CallInst *Call = cast<CallInst>(Inst); 2146 CallInst *EarlierCall = cast<CallInst>(EarlierInst); 2147 Value *Arg = Call->getArgOperand(0); 2148 Value *EarlierArg = EarlierCall->getArgOperand(0); 2149 switch (PA.getAA()->alias(Arg, EarlierArg)) { 2150 case AliasResult::MustAlias: 2151 Changed = true; 2152 // If the load has a builtin retain, insert a plain retain for it. 2153 if (Class == ARCInstKind::LoadWeakRetained) { 2154 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); 2155 CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call); 2156 CI->setTailCall(); 2157 } 2158 // Zap the fully redundant load. 2159 Call->replaceAllUsesWith(EarlierCall->getArgOperand(1)); 2160 Call->eraseFromParent(); 2161 goto clobbered; 2162 case AliasResult::MayAlias: 2163 case AliasResult::PartialAlias: 2164 goto clobbered; 2165 case AliasResult::NoAlias: 2166 break; 2167 } 2168 break; 2169 } 2170 case ARCInstKind::MoveWeak: 2171 case ARCInstKind::CopyWeak: 2172 // TOOD: Grab the copied value. 2173 goto clobbered; 2174 case ARCInstKind::AutoreleasepoolPush: 2175 case ARCInstKind::None: 2176 case ARCInstKind::IntrinsicUser: 2177 case ARCInstKind::User: 2178 // Weak pointers are only modified through the weak entry points 2179 // (and arbitrary calls, which could call the weak entry points). 2180 break; 2181 default: 2182 // Anything else could modify the weak pointer. 2183 goto clobbered; 2184 } 2185 } 2186 clobbered:; 2187 } 2188 2189 // Then, for each destroyWeak with an alloca operand, check to see if 2190 // the alloca and all its users can be zapped. 2191 for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) { 2192 ARCInstKind Class = GetBasicARCInstKind(&Inst); 2193 if (Class != ARCInstKind::DestroyWeak) 2194 continue; 2195 2196 CallInst *Call = cast<CallInst>(&Inst); 2197 Value *Arg = Call->getArgOperand(0); 2198 if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) { 2199 for (User *U : Alloca->users()) { 2200 const Instruction *UserInst = cast<Instruction>(U); 2201 switch (GetBasicARCInstKind(UserInst)) { 2202 case ARCInstKind::InitWeak: 2203 case ARCInstKind::StoreWeak: 2204 case ARCInstKind::DestroyWeak: 2205 continue; 2206 default: 2207 goto done; 2208 } 2209 } 2210 Changed = true; 2211 for (User *U : llvm::make_early_inc_range(Alloca->users())) { 2212 CallInst *UserInst = cast<CallInst>(U); 2213 switch (GetBasicARCInstKind(UserInst)) { 2214 case ARCInstKind::InitWeak: 2215 case ARCInstKind::StoreWeak: 2216 // These functions return their second argument. 2217 UserInst->replaceAllUsesWith(UserInst->getArgOperand(1)); 2218 break; 2219 case ARCInstKind::DestroyWeak: 2220 // No return value. 2221 break; 2222 default: 2223 llvm_unreachable("alloca really is used!"); 2224 } 2225 UserInst->eraseFromParent(); 2226 } 2227 Alloca->eraseFromParent(); 2228 done:; 2229 } 2230 } 2231 } 2232 2233 /// Identify program paths which execute sequences of retains and releases which 2234 /// can be eliminated. 2235 bool ObjCARCOpt::OptimizeSequences(Function &F) { 2236 // Releases, Retains - These are used to store the results of the main flow 2237 // analysis. These use Value* as the key instead of Instruction* so that the 2238 // map stays valid when we get around to rewriting code and calls get 2239 // replaced by arguments. 2240 DenseMap<Value *, RRInfo> Releases; 2241 BlotMapVector<Value *, RRInfo> Retains; 2242 2243 // This is used during the traversal of the function to track the 2244 // states for each identified object at each block. 2245 DenseMap<const BasicBlock *, BBState> BBStates; 2246 2247 // Analyze the CFG of the function, and all instructions. 2248 bool NestingDetected = Visit(F, BBStates, Retains, Releases); 2249 2250 if (DisableRetainReleasePairing) 2251 return false; 2252 2253 // Transform. 2254 bool AnyPairsCompletelyEliminated = PerformCodePlacement(BBStates, Retains, 2255 Releases, 2256 F.getParent()); 2257 2258 return AnyPairsCompletelyEliminated && NestingDetected; 2259 } 2260 2261 /// Check if there is a dependent call earlier that does not have anything in 2262 /// between the Retain and the call that can affect the reference count of their 2263 /// shared pointer argument. Note that Retain need not be in BB. 2264 static CallInst *HasSafePathToPredecessorCall(const Value *Arg, 2265 Instruction *Retain, 2266 ProvenanceAnalysis &PA) { 2267 auto *Call = dyn_cast_or_null<CallInst>(findSingleDependency( 2268 CanChangeRetainCount, Arg, Retain->getParent(), Retain, PA)); 2269 2270 // Check that the pointer is the return value of the call. 2271 if (!Call || Arg != Call) 2272 return nullptr; 2273 2274 // Check that the call is a regular call. 2275 ARCInstKind Class = GetBasicARCInstKind(Call); 2276 return Class == ARCInstKind::CallOrUser || Class == ARCInstKind::Call 2277 ? Call 2278 : nullptr; 2279 } 2280 2281 /// Find a dependent retain that precedes the given autorelease for which there 2282 /// is nothing in between the two instructions that can affect the ref count of 2283 /// Arg. 2284 static CallInst * 2285 FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB, 2286 Instruction *Autorelease, 2287 ProvenanceAnalysis &PA) { 2288 auto *Retain = dyn_cast_or_null<CallInst>( 2289 findSingleDependency(CanChangeRetainCount, Arg, BB, Autorelease, PA)); 2290 2291 // Check that we found a retain with the same argument. 2292 if (!Retain || !IsRetain(GetBasicARCInstKind(Retain)) || 2293 GetArgRCIdentityRoot(Retain) != Arg) { 2294 return nullptr; 2295 } 2296 2297 return Retain; 2298 } 2299 2300 /// Look for an ``autorelease'' instruction dependent on Arg such that there are 2301 /// no instructions dependent on Arg that need a positive ref count in between 2302 /// the autorelease and the ret. 2303 static CallInst * 2304 FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB, 2305 ReturnInst *Ret, 2306 ProvenanceAnalysis &PA) { 2307 SmallPtrSet<Instruction *, 4> DepInsts; 2308 auto *Autorelease = dyn_cast_or_null<CallInst>( 2309 findSingleDependency(NeedsPositiveRetainCount, Arg, BB, Ret, PA)); 2310 2311 if (!Autorelease) 2312 return nullptr; 2313 ARCInstKind AutoreleaseClass = GetBasicARCInstKind(Autorelease); 2314 if (!IsAutorelease(AutoreleaseClass)) 2315 return nullptr; 2316 if (GetArgRCIdentityRoot(Autorelease) != Arg) 2317 return nullptr; 2318 2319 return Autorelease; 2320 } 2321 2322 /// Look for this pattern: 2323 /// \code 2324 /// %call = call i8* @something(...) 2325 /// %2 = call i8* @objc_retain(i8* %call) 2326 /// %3 = call i8* @objc_autorelease(i8* %2) 2327 /// ret i8* %3 2328 /// \endcode 2329 /// And delete the retain and autorelease. 2330 void ObjCARCOpt::OptimizeReturns(Function &F) { 2331 if (!F.getReturnType()->isPointerTy()) 2332 return; 2333 2334 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n"); 2335 2336 for (BasicBlock &BB: F) { 2337 ReturnInst *Ret = dyn_cast<ReturnInst>(&BB.back()); 2338 if (!Ret) 2339 continue; 2340 2341 LLVM_DEBUG(dbgs() << "Visiting: " << *Ret << "\n"); 2342 2343 const Value *Arg = GetRCIdentityRoot(Ret->getOperand(0)); 2344 2345 // Look for an ``autorelease'' instruction that is a predecessor of Ret and 2346 // dependent on Arg such that there are no instructions dependent on Arg 2347 // that need a positive ref count in between the autorelease and Ret. 2348 CallInst *Autorelease = 2349 FindPredecessorAutoreleaseWithSafePath(Arg, &BB, Ret, PA); 2350 2351 if (!Autorelease) 2352 continue; 2353 2354 CallInst *Retain = FindPredecessorRetainWithSafePath( 2355 Arg, Autorelease->getParent(), Autorelease, PA); 2356 2357 if (!Retain) 2358 continue; 2359 2360 // Check that there is nothing that can affect the reference count 2361 // between the retain and the call. Note that Retain need not be in BB. 2362 CallInst *Call = HasSafePathToPredecessorCall(Arg, Retain, PA); 2363 2364 // Don't remove retainRV/autoreleaseRV pairs if the call isn't a tail call. 2365 if (!Call || 2366 (!Call->isTailCall() && 2367 GetBasicARCInstKind(Retain) == ARCInstKind::RetainRV && 2368 GetBasicARCInstKind(Autorelease) == ARCInstKind::AutoreleaseRV)) 2369 continue; 2370 2371 // If so, we can zap the retain and autorelease. 2372 Changed = true; 2373 ++NumRets; 2374 LLVM_DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: " << *Autorelease 2375 << "\n"); 2376 BundledInsts->eraseInst(Retain); 2377 EraseInstruction(Autorelease); 2378 } 2379 } 2380 2381 #ifndef NDEBUG 2382 void 2383 ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) { 2384 Statistic &NumRetains = 2385 AfterOptimization ? NumRetainsAfterOpt : NumRetainsBeforeOpt; 2386 Statistic &NumReleases = 2387 AfterOptimization ? NumReleasesAfterOpt : NumReleasesBeforeOpt; 2388 2389 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 2390 Instruction *Inst = &*I++; 2391 switch (GetBasicARCInstKind(Inst)) { 2392 default: 2393 break; 2394 case ARCInstKind::Retain: 2395 ++NumRetains; 2396 break; 2397 case ARCInstKind::Release: 2398 ++NumReleases; 2399 break; 2400 } 2401 } 2402 } 2403 #endif 2404 2405 void ObjCARCOpt::init(Function &F) { 2406 if (!EnableARCOpts) 2407 return; 2408 2409 // Intuitively, objc_retain and others are nocapture, however in practice 2410 // they are not, because they return their argument value. And objc_release 2411 // calls finalizers which can have arbitrary side effects. 2412 MDKindCache.init(F.getParent()); 2413 2414 // Initialize our runtime entry point cache. 2415 EP.init(F.getParent()); 2416 2417 // Compute which blocks are in which funclet. 2418 if (F.hasPersonalityFn() && 2419 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 2420 BlockEHColors = colorEHFunclets(F); 2421 } 2422 2423 bool ObjCARCOpt::run(Function &F, AAResults &AA) { 2424 if (!EnableARCOpts) 2425 return false; 2426 2427 Changed = CFGChanged = false; 2428 BundledRetainClaimRVs BRV(/*ContractPass=*/false); 2429 BundledInsts = &BRV; 2430 2431 LLVM_DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() 2432 << " >>>" 2433 "\n"); 2434 2435 std::pair<bool, bool> R = BundledInsts->insertAfterInvokes(F, nullptr); 2436 Changed |= R.first; 2437 CFGChanged |= R.second; 2438 2439 PA.setAA(&AA); 2440 2441 #ifndef NDEBUG 2442 if (AreStatisticsEnabled()) { 2443 GatherStatistics(F, false); 2444 } 2445 #endif 2446 2447 // This pass performs several distinct transformations. As a compile-time aid 2448 // when compiling code that isn't ObjC, skip these if the relevant ObjC 2449 // library functions aren't declared. 2450 2451 // Preliminary optimizations. This also computes UsedInThisFunction. 2452 OptimizeIndividualCalls(F); 2453 2454 // Optimizations for weak pointers. 2455 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::LoadWeak)) | 2456 (1 << unsigned(ARCInstKind::LoadWeakRetained)) | 2457 (1 << unsigned(ARCInstKind::StoreWeak)) | 2458 (1 << unsigned(ARCInstKind::InitWeak)) | 2459 (1 << unsigned(ARCInstKind::CopyWeak)) | 2460 (1 << unsigned(ARCInstKind::MoveWeak)) | 2461 (1 << unsigned(ARCInstKind::DestroyWeak)))) 2462 OptimizeWeakCalls(F); 2463 2464 // Optimizations for retain+release pairs. 2465 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Retain)) | 2466 (1 << unsigned(ARCInstKind::RetainRV)) | 2467 (1 << unsigned(ARCInstKind::RetainBlock)))) 2468 if (UsedInThisFunction & (1 << unsigned(ARCInstKind::Release))) 2469 // Run OptimizeSequences until it either stops making changes or 2470 // no retain+release pair nesting is detected. 2471 while (OptimizeSequences(F)) {} 2472 2473 // Optimizations if objc_autorelease is used. 2474 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Autorelease)) | 2475 (1 << unsigned(ARCInstKind::AutoreleaseRV)))) 2476 OptimizeReturns(F); 2477 2478 // Gather statistics after optimization. 2479 #ifndef NDEBUG 2480 if (AreStatisticsEnabled()) { 2481 GatherStatistics(F, true); 2482 } 2483 #endif 2484 2485 LLVM_DEBUG(dbgs() << "\n"); 2486 2487 return Changed; 2488 } 2489 2490 /// @} 2491 /// 2492 2493 PreservedAnalyses ObjCARCOptPass::run(Function &F, 2494 FunctionAnalysisManager &AM) { 2495 ObjCARCOpt OCAO; 2496 OCAO.init(F); 2497 2498 bool Changed = OCAO.run(F, AM.getResult<AAManager>(F)); 2499 bool CFGChanged = OCAO.hasCFGChanged(); 2500 if (Changed) { 2501 PreservedAnalyses PA; 2502 if (!CFGChanged) 2503 PA.preserveSet<CFGAnalyses>(); 2504 return PA; 2505 } 2506 return PreservedAnalyses::all(); 2507 } 2508