1 //===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file defines ObjC ARC optimizations. ARC stands for Automatic 11 /// Reference Counting and is a system for managing reference counts for objects 12 /// in Objective C. 13 /// 14 /// The optimizations performed include elimination of redundant, partially 15 /// redundant, and inconsequential reference count operations, elimination of 16 /// redundant weak pointer operations, and numerous minor simplifications. 17 /// 18 /// WARNING: This file knows about certain library functions. It recognizes them 19 /// by name, and hardwires knowledge of their semantics. 20 /// 21 /// WARNING: This file knows about how certain Objective-C library functions are 22 /// used. Naive LLVM IR transformations which would otherwise be 23 /// behavior-preserving may break these assumptions. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "ARCRuntimeEntryPoints.h" 28 #include "BlotMapVector.h" 29 #include "DependencyAnalysis.h" 30 #include "ObjCARC.h" 31 #include "ProvenanceAnalysis.h" 32 #include "PtrState.h" 33 #include "llvm/ADT/DenseMap.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/SmallPtrSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/Analysis/AliasAnalysis.h" 39 #include "llvm/Analysis/ObjCARCAliasAnalysis.h" 40 #include "llvm/Analysis/ObjCARCAnalysisUtils.h" 41 #include "llvm/Analysis/ObjCARCInstKind.h" 42 #include "llvm/Analysis/ObjCARCUtil.h" 43 #include "llvm/IR/BasicBlock.h" 44 #include "llvm/IR/CFG.h" 45 #include "llvm/IR/Constant.h" 46 #include "llvm/IR/Constants.h" 47 #include "llvm/IR/DerivedTypes.h" 48 #include "llvm/IR/EHPersonalities.h" 49 #include "llvm/IR/Function.h" 50 #include "llvm/IR/GlobalVariable.h" 51 #include "llvm/IR/InstIterator.h" 52 #include "llvm/IR/InstrTypes.h" 53 #include "llvm/IR/Instruction.h" 54 #include "llvm/IR/Instructions.h" 55 #include "llvm/IR/LLVMContext.h" 56 #include "llvm/IR/Metadata.h" 57 #include "llvm/IR/Type.h" 58 #include "llvm/IR/User.h" 59 #include "llvm/IR/Value.h" 60 #include "llvm/Support/Casting.h" 61 #include "llvm/Support/CommandLine.h" 62 #include "llvm/Support/Compiler.h" 63 #include "llvm/Support/Debug.h" 64 #include "llvm/Support/ErrorHandling.h" 65 #include "llvm/Support/raw_ostream.h" 66 #include "llvm/Transforms/ObjCARC.h" 67 #include <cassert> 68 #include <iterator> 69 #include <utility> 70 71 using namespace llvm; 72 using namespace llvm::objcarc; 73 74 #define DEBUG_TYPE "objc-arc-opts" 75 76 static cl::opt<unsigned> MaxPtrStates("arc-opt-max-ptr-states", 77 cl::Hidden, 78 cl::desc("Maximum number of ptr states the optimizer keeps track of"), 79 cl::init(4095)); 80 81 /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC. 82 /// @{ 83 84 /// This is similar to GetRCIdentityRoot but it stops as soon 85 /// as it finds a value with multiple uses. 86 static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { 87 // ConstantData (like ConstantPointerNull and UndefValue) is used across 88 // modules. It's never a single-use value. 89 if (isa<ConstantData>(Arg)) 90 return nullptr; 91 92 if (Arg->hasOneUse()) { 93 if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg)) 94 return FindSingleUseIdentifiedObject(BC->getOperand(0)); 95 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg)) 96 if (GEP->hasAllZeroIndices()) 97 return FindSingleUseIdentifiedObject(GEP->getPointerOperand()); 98 if (IsForwarding(GetBasicARCInstKind(Arg))) 99 return FindSingleUseIdentifiedObject( 100 cast<CallInst>(Arg)->getArgOperand(0)); 101 if (!IsObjCIdentifiedObject(Arg)) 102 return nullptr; 103 return Arg; 104 } 105 106 // If we found an identifiable object but it has multiple uses, but they are 107 // trivial uses, we can still consider this to be a single-use value. 108 if (IsObjCIdentifiedObject(Arg)) { 109 for (const User *U : Arg->users()) 110 if (!U->use_empty() || GetRCIdentityRoot(U) != Arg) 111 return nullptr; 112 113 return Arg; 114 } 115 116 return nullptr; 117 } 118 119 /// @} 120 /// 121 /// \defgroup ARCOpt ARC Optimization. 122 /// @{ 123 124 // TODO: On code like this: 125 // 126 // objc_retain(%x) 127 // stuff_that_cannot_release() 128 // objc_autorelease(%x) 129 // stuff_that_cannot_release() 130 // objc_retain(%x) 131 // stuff_that_cannot_release() 132 // objc_autorelease(%x) 133 // 134 // The second retain and autorelease can be deleted. 135 136 // TODO: It should be possible to delete 137 // objc_autoreleasePoolPush and objc_autoreleasePoolPop 138 // pairs if nothing is actually autoreleased between them. Also, autorelease 139 // calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code 140 // after inlining) can be turned into plain release calls. 141 142 // TODO: Critical-edge splitting. If the optimial insertion point is 143 // a critical edge, the current algorithm has to fail, because it doesn't 144 // know how to split edges. It should be possible to make the optimizer 145 // think in terms of edges, rather than blocks, and then split critical 146 // edges on demand. 147 148 // TODO: OptimizeSequences could generalized to be Interprocedural. 149 150 // TODO: Recognize that a bunch of other objc runtime calls have 151 // non-escaping arguments and non-releasing arguments, and may be 152 // non-autoreleasing. 153 154 // TODO: Sink autorelease calls as far as possible. Unfortunately we 155 // usually can't sink them past other calls, which would be the main 156 // case where it would be useful. 157 158 // TODO: The pointer returned from objc_loadWeakRetained is retained. 159 160 // TODO: Delete release+retain pairs (rare). 161 162 STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); 163 STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); 164 STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); 165 STATISTIC(NumRets, "Number of return value forwarding " 166 "retain+autoreleases eliminated"); 167 STATISTIC(NumRRs, "Number of retain+release paths eliminated"); 168 STATISTIC(NumPeeps, "Number of calls peephole-optimized"); 169 #ifndef NDEBUG 170 STATISTIC(NumRetainsBeforeOpt, 171 "Number of retains before optimization"); 172 STATISTIC(NumReleasesBeforeOpt, 173 "Number of releases before optimization"); 174 STATISTIC(NumRetainsAfterOpt, 175 "Number of retains after optimization"); 176 STATISTIC(NumReleasesAfterOpt, 177 "Number of releases after optimization"); 178 #endif 179 180 namespace { 181 182 /// Per-BasicBlock state. 183 class BBState { 184 /// The number of unique control paths from the entry which can reach this 185 /// block. 186 unsigned TopDownPathCount = 0; 187 188 /// The number of unique control paths to exits from this block. 189 unsigned BottomUpPathCount = 0; 190 191 /// The top-down traversal uses this to record information known about a 192 /// pointer at the bottom of each block. 193 BlotMapVector<const Value *, TopDownPtrState> PerPtrTopDown; 194 195 /// The bottom-up traversal uses this to record information known about a 196 /// pointer at the top of each block. 197 BlotMapVector<const Value *, BottomUpPtrState> PerPtrBottomUp; 198 199 /// Effective predecessors of the current block ignoring ignorable edges and 200 /// ignored backedges. 201 SmallVector<BasicBlock *, 2> Preds; 202 203 /// Effective successors of the current block ignoring ignorable edges and 204 /// ignored backedges. 205 SmallVector<BasicBlock *, 2> Succs; 206 207 public: 208 static const unsigned OverflowOccurredValue; 209 210 BBState() = default; 211 212 using top_down_ptr_iterator = decltype(PerPtrTopDown)::iterator; 213 using const_top_down_ptr_iterator = decltype(PerPtrTopDown)::const_iterator; 214 215 top_down_ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } 216 top_down_ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } 217 const_top_down_ptr_iterator top_down_ptr_begin() const { 218 return PerPtrTopDown.begin(); 219 } 220 const_top_down_ptr_iterator top_down_ptr_end() const { 221 return PerPtrTopDown.end(); 222 } 223 bool hasTopDownPtrs() const { 224 return !PerPtrTopDown.empty(); 225 } 226 227 unsigned top_down_ptr_list_size() const { 228 return std::distance(top_down_ptr_begin(), top_down_ptr_end()); 229 } 230 231 using bottom_up_ptr_iterator = decltype(PerPtrBottomUp)::iterator; 232 using const_bottom_up_ptr_iterator = 233 decltype(PerPtrBottomUp)::const_iterator; 234 235 bottom_up_ptr_iterator bottom_up_ptr_begin() { 236 return PerPtrBottomUp.begin(); 237 } 238 bottom_up_ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } 239 const_bottom_up_ptr_iterator bottom_up_ptr_begin() const { 240 return PerPtrBottomUp.begin(); 241 } 242 const_bottom_up_ptr_iterator bottom_up_ptr_end() const { 243 return PerPtrBottomUp.end(); 244 } 245 bool hasBottomUpPtrs() const { 246 return !PerPtrBottomUp.empty(); 247 } 248 249 unsigned bottom_up_ptr_list_size() const { 250 return std::distance(bottom_up_ptr_begin(), bottom_up_ptr_end()); 251 } 252 253 /// Mark this block as being an entry block, which has one path from the 254 /// entry by definition. 255 void SetAsEntry() { TopDownPathCount = 1; } 256 257 /// Mark this block as being an exit block, which has one path to an exit by 258 /// definition. 259 void SetAsExit() { BottomUpPathCount = 1; } 260 261 /// Attempt to find the PtrState object describing the top down state for 262 /// pointer Arg. Return a new initialized PtrState describing the top down 263 /// state for Arg if we do not find one. 264 TopDownPtrState &getPtrTopDownState(const Value *Arg) { 265 return PerPtrTopDown[Arg]; 266 } 267 268 /// Attempt to find the PtrState object describing the bottom up state for 269 /// pointer Arg. Return a new initialized PtrState describing the bottom up 270 /// state for Arg if we do not find one. 271 BottomUpPtrState &getPtrBottomUpState(const Value *Arg) { 272 return PerPtrBottomUp[Arg]; 273 } 274 275 /// Attempt to find the PtrState object describing the bottom up state for 276 /// pointer Arg. 277 bottom_up_ptr_iterator findPtrBottomUpState(const Value *Arg) { 278 return PerPtrBottomUp.find(Arg); 279 } 280 281 void clearBottomUpPointers() { 282 PerPtrBottomUp.clear(); 283 } 284 285 void clearTopDownPointers() { 286 PerPtrTopDown.clear(); 287 } 288 289 void InitFromPred(const BBState &Other); 290 void InitFromSucc(const BBState &Other); 291 void MergePred(const BBState &Other); 292 void MergeSucc(const BBState &Other); 293 294 /// Compute the number of possible unique paths from an entry to an exit 295 /// which pass through this block. This is only valid after both the 296 /// top-down and bottom-up traversals are complete. 297 /// 298 /// Returns true if overflow occurred. Returns false if overflow did not 299 /// occur. 300 bool GetAllPathCountWithOverflow(unsigned &PathCount) const { 301 if (TopDownPathCount == OverflowOccurredValue || 302 BottomUpPathCount == OverflowOccurredValue) 303 return true; 304 unsigned long long Product = 305 (unsigned long long)TopDownPathCount*BottomUpPathCount; 306 // Overflow occurred if any of the upper bits of Product are set or if all 307 // the lower bits of Product are all set. 308 return (Product >> 32) || 309 ((PathCount = Product) == OverflowOccurredValue); 310 } 311 312 // Specialized CFG utilities. 313 using edge_iterator = SmallVectorImpl<BasicBlock *>::const_iterator; 314 315 edge_iterator pred_begin() const { return Preds.begin(); } 316 edge_iterator pred_end() const { return Preds.end(); } 317 edge_iterator succ_begin() const { return Succs.begin(); } 318 edge_iterator succ_end() const { return Succs.end(); } 319 320 void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); } 321 void addPred(BasicBlock *Pred) { Preds.push_back(Pred); } 322 323 bool isExit() const { return Succs.empty(); } 324 }; 325 326 } // end anonymous namespace 327 328 const unsigned BBState::OverflowOccurredValue = 0xffffffff; 329 330 namespace llvm { 331 332 raw_ostream &operator<<(raw_ostream &OS, 333 BBState &BBState) LLVM_ATTRIBUTE_UNUSED; 334 335 } // end namespace llvm 336 337 void BBState::InitFromPred(const BBState &Other) { 338 PerPtrTopDown = Other.PerPtrTopDown; 339 TopDownPathCount = Other.TopDownPathCount; 340 } 341 342 void BBState::InitFromSucc(const BBState &Other) { 343 PerPtrBottomUp = Other.PerPtrBottomUp; 344 BottomUpPathCount = Other.BottomUpPathCount; 345 } 346 347 /// The top-down traversal uses this to merge information about predecessors to 348 /// form the initial state for a new block. 349 void BBState::MergePred(const BBState &Other) { 350 if (TopDownPathCount == OverflowOccurredValue) 351 return; 352 353 // Other.TopDownPathCount can be 0, in which case it is either dead or a 354 // loop backedge. Loop backedges are special. 355 TopDownPathCount += Other.TopDownPathCount; 356 357 // In order to be consistent, we clear the top down pointers when by adding 358 // TopDownPathCount becomes OverflowOccurredValue even though "true" overflow 359 // has not occurred. 360 if (TopDownPathCount == OverflowOccurredValue) { 361 clearTopDownPointers(); 362 return; 363 } 364 365 // Check for overflow. If we have overflow, fall back to conservative 366 // behavior. 367 if (TopDownPathCount < Other.TopDownPathCount) { 368 TopDownPathCount = OverflowOccurredValue; 369 clearTopDownPointers(); 370 return; 371 } 372 373 // For each entry in the other set, if our set has an entry with the same key, 374 // merge the entries. Otherwise, copy the entry and merge it with an empty 375 // entry. 376 for (auto MI = Other.top_down_ptr_begin(), ME = Other.top_down_ptr_end(); 377 MI != ME; ++MI) { 378 auto Pair = PerPtrTopDown.insert(*MI); 379 Pair.first->second.Merge(Pair.second ? TopDownPtrState() : MI->second, 380 /*TopDown=*/true); 381 } 382 383 // For each entry in our set, if the other set doesn't have an entry with the 384 // same key, force it to merge with an empty entry. 385 for (auto MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI) 386 if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end()) 387 MI->second.Merge(TopDownPtrState(), /*TopDown=*/true); 388 } 389 390 /// The bottom-up traversal uses this to merge information about successors to 391 /// form the initial state for a new block. 392 void BBState::MergeSucc(const BBState &Other) { 393 if (BottomUpPathCount == OverflowOccurredValue) 394 return; 395 396 // Other.BottomUpPathCount can be 0, in which case it is either dead or a 397 // loop backedge. Loop backedges are special. 398 BottomUpPathCount += Other.BottomUpPathCount; 399 400 // In order to be consistent, we clear the top down pointers when by adding 401 // BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow 402 // has not occurred. 403 if (BottomUpPathCount == OverflowOccurredValue) { 404 clearBottomUpPointers(); 405 return; 406 } 407 408 // Check for overflow. If we have overflow, fall back to conservative 409 // behavior. 410 if (BottomUpPathCount < Other.BottomUpPathCount) { 411 BottomUpPathCount = OverflowOccurredValue; 412 clearBottomUpPointers(); 413 return; 414 } 415 416 // For each entry in the other set, if our set has an entry with the 417 // same key, merge the entries. Otherwise, copy the entry and merge 418 // it with an empty entry. 419 for (auto MI = Other.bottom_up_ptr_begin(), ME = Other.bottom_up_ptr_end(); 420 MI != ME; ++MI) { 421 auto Pair = PerPtrBottomUp.insert(*MI); 422 Pair.first->second.Merge(Pair.second ? BottomUpPtrState() : MI->second, 423 /*TopDown=*/false); 424 } 425 426 // For each entry in our set, if the other set doesn't have an entry 427 // with the same key, force it to merge with an empty entry. 428 for (auto MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME; 429 ++MI) 430 if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end()) 431 MI->second.Merge(BottomUpPtrState(), /*TopDown=*/false); 432 } 433 434 raw_ostream &llvm::operator<<(raw_ostream &OS, BBState &BBInfo) { 435 // Dump the pointers we are tracking. 436 OS << " TopDown State:\n"; 437 if (!BBInfo.hasTopDownPtrs()) { 438 LLVM_DEBUG(dbgs() << " NONE!\n"); 439 } else { 440 for (auto I = BBInfo.top_down_ptr_begin(), E = BBInfo.top_down_ptr_end(); 441 I != E; ++I) { 442 const PtrState &P = I->second; 443 OS << " Ptr: " << *I->first 444 << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false") 445 << "\n ImpreciseRelease: " 446 << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n" 447 << " HasCFGHazards: " 448 << (P.IsCFGHazardAfflicted()?"true":"false") << "\n" 449 << " KnownPositive: " 450 << (P.HasKnownPositiveRefCount()?"true":"false") << "\n" 451 << " Seq: " 452 << P.GetSeq() << "\n"; 453 } 454 } 455 456 OS << " BottomUp State:\n"; 457 if (!BBInfo.hasBottomUpPtrs()) { 458 LLVM_DEBUG(dbgs() << " NONE!\n"); 459 } else { 460 for (auto I = BBInfo.bottom_up_ptr_begin(), E = BBInfo.bottom_up_ptr_end(); 461 I != E; ++I) { 462 const PtrState &P = I->second; 463 OS << " Ptr: " << *I->first 464 << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false") 465 << "\n ImpreciseRelease: " 466 << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n" 467 << " HasCFGHazards: " 468 << (P.IsCFGHazardAfflicted()?"true":"false") << "\n" 469 << " KnownPositive: " 470 << (P.HasKnownPositiveRefCount()?"true":"false") << "\n" 471 << " Seq: " 472 << P.GetSeq() << "\n"; 473 } 474 } 475 476 return OS; 477 } 478 479 namespace { 480 481 /// The main ARC optimization pass. 482 class ObjCARCOpt { 483 bool Changed = false; 484 bool CFGChanged = false; 485 ProvenanceAnalysis PA; 486 487 /// A cache of references to runtime entry point constants. 488 ARCRuntimeEntryPoints EP; 489 490 /// A cache of MDKinds that can be passed into other functions to propagate 491 /// MDKind identifiers. 492 ARCMDKindCache MDKindCache; 493 494 BundledRetainClaimRVs *BundledInsts = nullptr; 495 496 /// A flag indicating whether the optimization that removes or moves 497 /// retain/release pairs should be performed. 498 bool DisableRetainReleasePairing = false; 499 500 /// Flags which determine whether each of the interesting runtime functions 501 /// is in fact used in the current function. 502 unsigned UsedInThisFunction; 503 504 DenseMap<BasicBlock *, ColorVector> BlockEHColors; 505 506 bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); 507 void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, 508 ARCInstKind &Class); 509 void OptimizeIndividualCalls(Function &F); 510 511 /// Optimize an individual call, optionally passing the 512 /// GetArgRCIdentityRoot if it has already been computed. 513 void OptimizeIndividualCallImpl(Function &F, Instruction *Inst, 514 ARCInstKind Class, const Value *Arg); 515 516 /// Try to optimize an AutoreleaseRV with a RetainRV or UnsafeClaimRV. If the 517 /// optimization occurs, returns true to indicate that the caller should 518 /// assume the instructions are dead. 519 bool OptimizeInlinedAutoreleaseRVCall(Function &F, Instruction *Inst, 520 const Value *&Arg, ARCInstKind Class, 521 Instruction *AutoreleaseRV, 522 const Value *&AutoreleaseRVArg); 523 524 void CheckForCFGHazards(const BasicBlock *BB, 525 DenseMap<const BasicBlock *, BBState> &BBStates, 526 BBState &MyStates) const; 527 bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB, 528 BlotMapVector<Value *, RRInfo> &Retains, 529 BBState &MyStates); 530 bool VisitBottomUp(BasicBlock *BB, 531 DenseMap<const BasicBlock *, BBState> &BBStates, 532 BlotMapVector<Value *, RRInfo> &Retains); 533 bool VisitInstructionTopDown( 534 Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates, 535 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 536 &ReleaseInsertPtToRCIdentityRoots); 537 bool VisitTopDown( 538 BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, 539 DenseMap<Value *, RRInfo> &Releases, 540 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 541 &ReleaseInsertPtToRCIdentityRoots); 542 bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates, 543 BlotMapVector<Value *, RRInfo> &Retains, 544 DenseMap<Value *, RRInfo> &Releases); 545 546 void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, 547 BlotMapVector<Value *, RRInfo> &Retains, 548 DenseMap<Value *, RRInfo> &Releases, 549 SmallVectorImpl<Instruction *> &DeadInsts, Module *M); 550 551 bool PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates, 552 BlotMapVector<Value *, RRInfo> &Retains, 553 DenseMap<Value *, RRInfo> &Releases, Module *M, 554 Instruction *Retain, 555 SmallVectorImpl<Instruction *> &DeadInsts, 556 RRInfo &RetainsToMove, RRInfo &ReleasesToMove, 557 Value *Arg, bool KnownSafe, 558 bool &AnyPairsCompletelyEliminated); 559 560 bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, 561 BlotMapVector<Value *, RRInfo> &Retains, 562 DenseMap<Value *, RRInfo> &Releases, Module *M); 563 564 void OptimizeWeakCalls(Function &F); 565 566 bool OptimizeSequences(Function &F); 567 568 void OptimizeReturns(Function &F); 569 570 template <typename PredicateT> 571 static void cloneOpBundlesIf(CallBase *CI, 572 SmallVectorImpl<OperandBundleDef> &OpBundles, 573 PredicateT Predicate) { 574 for (unsigned I = 0, E = CI->getNumOperandBundles(); I != E; ++I) { 575 OperandBundleUse B = CI->getOperandBundleAt(I); 576 if (Predicate(B)) 577 OpBundles.emplace_back(B); 578 } 579 } 580 581 void addOpBundleForFunclet(BasicBlock *BB, 582 SmallVectorImpl<OperandBundleDef> &OpBundles) { 583 if (!BlockEHColors.empty()) { 584 const ColorVector &CV = BlockEHColors.find(BB)->second; 585 assert(CV.size() > 0 && "Uncolored block"); 586 for (BasicBlock *EHPadBB : CV) 587 if (auto *EHPad = dyn_cast<FuncletPadInst>(EHPadBB->getFirstNonPHI())) { 588 OpBundles.emplace_back("funclet", EHPad); 589 return; 590 } 591 } 592 } 593 594 #ifndef NDEBUG 595 void GatherStatistics(Function &F, bool AfterOptimization = false); 596 #endif 597 598 public: 599 void init(Function &F); 600 bool run(Function &F, AAResults &AA); 601 bool hasCFGChanged() const { return CFGChanged; } 602 }; 603 } // end anonymous namespace 604 605 /// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is 606 /// not a return value. 607 bool 608 ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { 609 // Check for the argument being from an immediately preceding call or invoke. 610 const Value *Arg = GetArgRCIdentityRoot(RetainRV); 611 if (const Instruction *Call = dyn_cast<CallBase>(Arg)) { 612 if (Call->getParent() == RetainRV->getParent()) { 613 BasicBlock::const_iterator I(Call); 614 ++I; 615 while (IsNoopInstruction(&*I)) 616 ++I; 617 if (&*I == RetainRV) 618 return false; 619 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) { 620 BasicBlock *RetainRVParent = RetainRV->getParent(); 621 if (II->getNormalDest() == RetainRVParent) { 622 BasicBlock::const_iterator I = RetainRVParent->begin(); 623 while (IsNoopInstruction(&*I)) 624 ++I; 625 if (&*I == RetainRV) 626 return false; 627 } 628 } 629 } 630 631 assert(!BundledInsts->contains(RetainRV) && 632 "a bundled retainRV's argument should be a call"); 633 634 // Turn it to a plain objc_retain. 635 Changed = true; 636 ++NumPeeps; 637 638 LLVM_DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => " 639 "objc_retain since the operand is not a return value.\n" 640 "Old = " 641 << *RetainRV << "\n"); 642 643 Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain); 644 cast<CallInst>(RetainRV)->setCalledFunction(NewDecl); 645 646 LLVM_DEBUG(dbgs() << "New = " << *RetainRV << "\n"); 647 648 return false; 649 } 650 651 bool ObjCARCOpt::OptimizeInlinedAutoreleaseRVCall( 652 Function &F, Instruction *Inst, const Value *&Arg, ARCInstKind Class, 653 Instruction *AutoreleaseRV, const Value *&AutoreleaseRVArg) { 654 if (BundledInsts->contains(Inst)) 655 return false; 656 657 // Must be in the same basic block. 658 assert(Inst->getParent() == AutoreleaseRV->getParent()); 659 660 // Must operate on the same root. 661 Arg = GetArgRCIdentityRoot(Inst); 662 AutoreleaseRVArg = GetArgRCIdentityRoot(AutoreleaseRV); 663 if (Arg != AutoreleaseRVArg) { 664 // If there isn't an exact match, check if we have equivalent PHIs. 665 const PHINode *PN = dyn_cast<PHINode>(Arg); 666 if (!PN) 667 return false; 668 669 SmallVector<const Value *, 4> ArgUsers; 670 getEquivalentPHIs(*PN, ArgUsers); 671 if (!llvm::is_contained(ArgUsers, AutoreleaseRVArg)) 672 return false; 673 } 674 675 // Okay, this is a match. Merge them. 676 ++NumPeeps; 677 LLVM_DEBUG(dbgs() << "Found inlined objc_autoreleaseReturnValue '" 678 << *AutoreleaseRV << "' paired with '" << *Inst << "'\n"); 679 680 // Delete the RV pair, starting with the AutoreleaseRV. 681 AutoreleaseRV->replaceAllUsesWith( 682 cast<CallInst>(AutoreleaseRV)->getArgOperand(0)); 683 Changed = true; 684 EraseInstruction(AutoreleaseRV); 685 if (Class == ARCInstKind::RetainRV) { 686 // AutoreleaseRV and RetainRV cancel out. Delete the RetainRV. 687 Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0)); 688 EraseInstruction(Inst); 689 return true; 690 } 691 692 // UnsafeClaimRV is a frontend peephole for RetainRV + Release. Since the 693 // AutoreleaseRV and RetainRV cancel out, replace UnsafeClaimRV with Release. 694 assert(Class == ARCInstKind::UnsafeClaimRV); 695 Value *CallArg = cast<CallInst>(Inst)->getArgOperand(0); 696 CallInst *Release = 697 CallInst::Create(EP.get(ARCRuntimeEntryPointKind::Release), CallArg, "", 698 Inst->getIterator()); 699 assert(IsAlwaysTail(ARCInstKind::UnsafeClaimRV) && 700 "Expected UnsafeClaimRV to be safe to tail call"); 701 Release->setTailCall(); 702 Inst->replaceAllUsesWith(CallArg); 703 EraseInstruction(Inst); 704 705 // Run the normal optimizations on Release. 706 OptimizeIndividualCallImpl(F, Release, ARCInstKind::Release, Arg); 707 return true; 708 } 709 710 /// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not 711 /// used as a return value. 712 void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, 713 Instruction *AutoreleaseRV, 714 ARCInstKind &Class) { 715 // Check for a return of the pointer value. 716 const Value *Ptr = GetArgRCIdentityRoot(AutoreleaseRV); 717 718 // If the argument is ConstantPointerNull or UndefValue, its other users 719 // aren't actually interesting to look at. 720 if (isa<ConstantData>(Ptr)) 721 return; 722 723 SmallVector<const Value *, 2> Users; 724 Users.push_back(Ptr); 725 726 // Add PHIs that are equivalent to Ptr to Users. 727 if (const PHINode *PN = dyn_cast<PHINode>(Ptr)) 728 getEquivalentPHIs(*PN, Users); 729 730 do { 731 Ptr = Users.pop_back_val(); 732 for (const User *U : Ptr->users()) { 733 if (isa<ReturnInst>(U) || GetBasicARCInstKind(U) == ARCInstKind::RetainRV) 734 return; 735 if (isa<BitCastInst>(U)) 736 Users.push_back(U); 737 } 738 } while (!Users.empty()); 739 740 Changed = true; 741 ++NumPeeps; 742 743 LLVM_DEBUG( 744 dbgs() << "Transforming objc_autoreleaseReturnValue => " 745 "objc_autorelease since its operand is not used as a return " 746 "value.\n" 747 "Old = " 748 << *AutoreleaseRV << "\n"); 749 750 CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV); 751 Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease); 752 AutoreleaseRVCI->setCalledFunction(NewDecl); 753 AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease. 754 Class = ARCInstKind::Autorelease; 755 756 LLVM_DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n"); 757 } 758 759 /// Visit each call, one at a time, and make simplifications without doing any 760 /// additional analysis. 761 void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { 762 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n"); 763 // Reset all the flags in preparation for recomputing them. 764 UsedInThisFunction = 0; 765 766 // Store any delayed AutoreleaseRV intrinsics, so they can be easily paired 767 // with RetainRV and UnsafeClaimRV. 768 Instruction *DelayedAutoreleaseRV = nullptr; 769 const Value *DelayedAutoreleaseRVArg = nullptr; 770 auto setDelayedAutoreleaseRV = [&](Instruction *AutoreleaseRV) { 771 assert(!DelayedAutoreleaseRV || !AutoreleaseRV); 772 DelayedAutoreleaseRV = AutoreleaseRV; 773 DelayedAutoreleaseRVArg = nullptr; 774 }; 775 auto optimizeDelayedAutoreleaseRV = [&]() { 776 if (!DelayedAutoreleaseRV) 777 return; 778 OptimizeIndividualCallImpl(F, DelayedAutoreleaseRV, 779 ARCInstKind::AutoreleaseRV, 780 DelayedAutoreleaseRVArg); 781 setDelayedAutoreleaseRV(nullptr); 782 }; 783 auto shouldDelayAutoreleaseRV = [&](Instruction *NonARCInst) { 784 // Nothing to delay, but we may as well skip the logic below. 785 if (!DelayedAutoreleaseRV) 786 return true; 787 788 // If we hit the end of the basic block we're not going to find an RV-pair. 789 // Stop delaying. 790 if (NonARCInst->isTerminator()) 791 return false; 792 793 // Given the frontend rules for emitting AutoreleaseRV, RetainRV, and 794 // UnsafeClaimRV, it's probably safe to skip over even opaque function calls 795 // here since OptimizeInlinedAutoreleaseRVCall will confirm that they 796 // have the same RCIdentityRoot. However, what really matters is 797 // skipping instructions or intrinsics that the inliner could leave behind; 798 // be conservative for now and don't skip over opaque calls, which could 799 // potentially include other ARC calls. 800 auto *CB = dyn_cast<CallBase>(NonARCInst); 801 if (!CB) 802 return true; 803 return CB->getIntrinsicID() != Intrinsic::not_intrinsic; 804 }; 805 806 // Visit all objc_* calls in F. 807 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 808 Instruction *Inst = &*I++; 809 810 if (auto *CI = dyn_cast<CallInst>(Inst)) 811 if (objcarc::hasAttachedCallOpBundle(CI)) { 812 BundledInsts->insertRVCall(I->getIterator(), CI); 813 Changed = true; 814 } 815 816 ARCInstKind Class = GetBasicARCInstKind(Inst); 817 818 // Skip this loop if this instruction isn't itself an ARC intrinsic. 819 const Value *Arg = nullptr; 820 switch (Class) { 821 default: 822 optimizeDelayedAutoreleaseRV(); 823 break; 824 case ARCInstKind::CallOrUser: 825 case ARCInstKind::User: 826 case ARCInstKind::None: 827 // This is a non-ARC instruction. If we're delaying an AutoreleaseRV, 828 // check if it's safe to skip over it; if not, optimize the AutoreleaseRV 829 // now. 830 if (!shouldDelayAutoreleaseRV(Inst)) 831 optimizeDelayedAutoreleaseRV(); 832 continue; 833 case ARCInstKind::AutoreleaseRV: 834 optimizeDelayedAutoreleaseRV(); 835 setDelayedAutoreleaseRV(Inst); 836 continue; 837 case ARCInstKind::RetainRV: 838 case ARCInstKind::UnsafeClaimRV: 839 if (DelayedAutoreleaseRV) { 840 // We have a potential RV pair. Check if they cancel out. 841 if (OptimizeInlinedAutoreleaseRVCall(F, Inst, Arg, Class, 842 DelayedAutoreleaseRV, 843 DelayedAutoreleaseRVArg)) { 844 setDelayedAutoreleaseRV(nullptr); 845 continue; 846 } 847 optimizeDelayedAutoreleaseRV(); 848 } 849 break; 850 } 851 852 OptimizeIndividualCallImpl(F, Inst, Class, Arg); 853 } 854 855 // Catch the final delayed AutoreleaseRV. 856 optimizeDelayedAutoreleaseRV(); 857 } 858 859 /// This function returns true if the value is inert. An ObjC ARC runtime call 860 /// taking an inert operand can be safely deleted. 861 static bool isInertARCValue(Value *V, SmallPtrSet<Value *, 1> &VisitedPhis) { 862 V = V->stripPointerCasts(); 863 864 if (IsNullOrUndef(V)) 865 return true; 866 867 // See if this is a global attribute annotated with an 'objc_arc_inert'. 868 if (auto *GV = dyn_cast<GlobalVariable>(V)) 869 if (GV->hasAttribute("objc_arc_inert")) 870 return true; 871 872 if (auto PN = dyn_cast<PHINode>(V)) { 873 // Ignore this phi if it has already been discovered. 874 if (!VisitedPhis.insert(PN).second) 875 return true; 876 // Look through phis's operands. 877 for (Value *Opnd : PN->incoming_values()) 878 if (!isInertARCValue(Opnd, VisitedPhis)) 879 return false; 880 return true; 881 } 882 883 return false; 884 } 885 886 void ObjCARCOpt::OptimizeIndividualCallImpl(Function &F, Instruction *Inst, 887 ARCInstKind Class, 888 const Value *Arg) { 889 LLVM_DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n"); 890 891 // We can delete this call if it takes an inert value. 892 SmallPtrSet<Value *, 1> VisitedPhis; 893 894 if (BundledInsts->contains(Inst)) { 895 UsedInThisFunction |= 1 << unsigned(Class); 896 return; 897 } 898 899 if (IsNoopOnGlobal(Class)) 900 if (isInertARCValue(Inst->getOperand(0), VisitedPhis)) { 901 if (!Inst->getType()->isVoidTy()) 902 Inst->replaceAllUsesWith(Inst->getOperand(0)); 903 Inst->eraseFromParent(); 904 Changed = true; 905 return; 906 } 907 908 switch (Class) { 909 default: 910 break; 911 912 // Delete no-op casts. These function calls have special semantics, but 913 // the semantics are entirely implemented via lowering in the front-end, 914 // so by the time they reach the optimizer, they are just no-op calls 915 // which return their argument. 916 // 917 // There are gray areas here, as the ability to cast reference-counted 918 // pointers to raw void* and back allows code to break ARC assumptions, 919 // however these are currently considered to be unimportant. 920 case ARCInstKind::NoopCast: 921 Changed = true; 922 ++NumNoops; 923 LLVM_DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n"); 924 EraseInstruction(Inst); 925 return; 926 927 // If the pointer-to-weak-pointer is null, it's undefined behavior. 928 case ARCInstKind::StoreWeak: 929 case ARCInstKind::LoadWeak: 930 case ARCInstKind::LoadWeakRetained: 931 case ARCInstKind::InitWeak: 932 case ARCInstKind::DestroyWeak: { 933 CallInst *CI = cast<CallInst>(Inst); 934 if (IsNullOrUndef(CI->getArgOperand(0))) { 935 Changed = true; 936 new StoreInst(ConstantInt::getTrue(CI->getContext()), 937 PoisonValue::get(PointerType::getUnqual(CI->getContext())), 938 CI->getIterator()); 939 Value *NewValue = PoisonValue::get(CI->getType()); 940 LLVM_DEBUG( 941 dbgs() << "A null pointer-to-weak-pointer is undefined behavior." 942 "\nOld = " 943 << *CI << "\nNew = " << *NewValue << "\n"); 944 CI->replaceAllUsesWith(NewValue); 945 CI->eraseFromParent(); 946 return; 947 } 948 break; 949 } 950 case ARCInstKind::CopyWeak: 951 case ARCInstKind::MoveWeak: { 952 CallInst *CI = cast<CallInst>(Inst); 953 if (IsNullOrUndef(CI->getArgOperand(0)) || 954 IsNullOrUndef(CI->getArgOperand(1))) { 955 Changed = true; 956 new StoreInst(ConstantInt::getTrue(CI->getContext()), 957 PoisonValue::get(PointerType::getUnqual(CI->getContext())), 958 CI->getIterator()); 959 960 Value *NewValue = PoisonValue::get(CI->getType()); 961 LLVM_DEBUG( 962 dbgs() << "A null pointer-to-weak-pointer is undefined behavior." 963 "\nOld = " 964 << *CI << "\nNew = " << *NewValue << "\n"); 965 966 CI->replaceAllUsesWith(NewValue); 967 CI->eraseFromParent(); 968 return; 969 } 970 break; 971 } 972 case ARCInstKind::RetainRV: 973 if (OptimizeRetainRVCall(F, Inst)) 974 return; 975 break; 976 case ARCInstKind::AutoreleaseRV: 977 OptimizeAutoreleaseRVCall(F, Inst, Class); 978 break; 979 } 980 981 // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. 982 if (IsAutorelease(Class) && Inst->use_empty()) { 983 CallInst *Call = cast<CallInst>(Inst); 984 const Value *Arg = Call->getArgOperand(0); 985 Arg = FindSingleUseIdentifiedObject(Arg); 986 if (Arg) { 987 Changed = true; 988 ++NumAutoreleases; 989 990 // Create the declaration lazily. 991 LLVMContext &C = Inst->getContext(); 992 993 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release); 994 CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "", 995 Call->getIterator()); 996 NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), 997 MDNode::get(C, std::nullopt)); 998 999 LLVM_DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) " 1000 "since x is otherwise unused.\nOld: " 1001 << *Call << "\nNew: " << *NewCall << "\n"); 1002 1003 EraseInstruction(Call); 1004 Inst = NewCall; 1005 Class = ARCInstKind::Release; 1006 } 1007 } 1008 1009 // For functions which can never be passed stack arguments, add 1010 // a tail keyword. 1011 if (IsAlwaysTail(Class) && !cast<CallInst>(Inst)->isNoTailCall()) { 1012 Changed = true; 1013 LLVM_DEBUG( 1014 dbgs() << "Adding tail keyword to function since it can never be " 1015 "passed stack args: " 1016 << *Inst << "\n"); 1017 cast<CallInst>(Inst)->setTailCall(); 1018 } 1019 1020 // Ensure that functions that can never have a "tail" keyword due to the 1021 // semantics of ARC truly do not do so. 1022 if (IsNeverTail(Class)) { 1023 Changed = true; 1024 LLVM_DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst 1025 << "\n"); 1026 cast<CallInst>(Inst)->setTailCall(false); 1027 } 1028 1029 // Set nounwind as needed. 1030 if (IsNoThrow(Class)) { 1031 Changed = true; 1032 LLVM_DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst 1033 << "\n"); 1034 cast<CallInst>(Inst)->setDoesNotThrow(); 1035 } 1036 1037 // Note: This catches instructions unrelated to ARC. 1038 if (!IsNoopOnNull(Class)) { 1039 UsedInThisFunction |= 1 << unsigned(Class); 1040 return; 1041 } 1042 1043 // If we haven't already looked up the root, look it up now. 1044 if (!Arg) 1045 Arg = GetArgRCIdentityRoot(Inst); 1046 1047 // ARC calls with null are no-ops. Delete them. 1048 if (IsNullOrUndef(Arg)) { 1049 Changed = true; 1050 ++NumNoops; 1051 LLVM_DEBUG(dbgs() << "ARC calls with null are no-ops. Erasing: " << *Inst 1052 << "\n"); 1053 EraseInstruction(Inst); 1054 return; 1055 } 1056 1057 // Keep track of which of retain, release, autorelease, and retain_block 1058 // are actually present in this function. 1059 UsedInThisFunction |= 1 << unsigned(Class); 1060 1061 // If Arg is a PHI, and one or more incoming values to the 1062 // PHI are null, and the call is control-equivalent to the PHI, and there 1063 // are no relevant side effects between the PHI and the call, and the call 1064 // is not a release that doesn't have the clang.imprecise_release tag, the 1065 // call could be pushed up to just those paths with non-null incoming 1066 // values. For now, don't bother splitting critical edges for this. 1067 if (Class == ARCInstKind::Release && 1068 !Inst->getMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease))) 1069 return; 1070 1071 SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist; 1072 Worklist.push_back(std::make_pair(Inst, Arg)); 1073 do { 1074 std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val(); 1075 Inst = Pair.first; 1076 Arg = Pair.second; 1077 1078 const PHINode *PN = dyn_cast<PHINode>(Arg); 1079 if (!PN) 1080 continue; 1081 1082 // Determine if the PHI has any null operands, or any incoming 1083 // critical edges. 1084 bool HasNull = false; 1085 bool HasCriticalEdges = false; 1086 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1087 Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i)); 1088 if (IsNullOrUndef(Incoming)) 1089 HasNull = true; 1090 else if (PN->getIncomingBlock(i)->getTerminator()->getNumSuccessors() != 1091 1) { 1092 HasCriticalEdges = true; 1093 break; 1094 } 1095 } 1096 // If we have null operands and no critical edges, optimize. 1097 if (HasCriticalEdges) 1098 continue; 1099 if (!HasNull) 1100 continue; 1101 1102 Instruction *DepInst = nullptr; 1103 1104 // Check that there is nothing that cares about the reference 1105 // count between the call and the phi. 1106 switch (Class) { 1107 case ARCInstKind::Retain: 1108 case ARCInstKind::RetainBlock: 1109 // These can always be moved up. 1110 break; 1111 case ARCInstKind::Release: 1112 // These can't be moved across things that care about the retain 1113 // count. 1114 DepInst = findSingleDependency(NeedsPositiveRetainCount, Arg, 1115 Inst->getParent(), Inst, PA); 1116 break; 1117 case ARCInstKind::Autorelease: 1118 // These can't be moved across autorelease pool scope boundaries. 1119 DepInst = findSingleDependency(AutoreleasePoolBoundary, Arg, 1120 Inst->getParent(), Inst, PA); 1121 break; 1122 case ARCInstKind::UnsafeClaimRV: 1123 case ARCInstKind::RetainRV: 1124 case ARCInstKind::AutoreleaseRV: 1125 // Don't move these; the RV optimization depends on the autoreleaseRV 1126 // being tail called, and the retainRV being immediately after a call 1127 // (which might still happen if we get lucky with codegen layout, but 1128 // it's not worth taking the chance). 1129 continue; 1130 default: 1131 llvm_unreachable("Invalid dependence flavor"); 1132 } 1133 1134 if (DepInst != PN) 1135 continue; 1136 1137 Changed = true; 1138 ++NumPartialNoops; 1139 // Clone the call into each predecessor that has a non-null value. 1140 CallInst *CInst = cast<CallInst>(Inst); 1141 Type *ParamTy = CInst->getArgOperand(0)->getType(); 1142 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1143 Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i)); 1144 if (IsNullOrUndef(Incoming)) 1145 continue; 1146 Value *Op = PN->getIncomingValue(i); 1147 BasicBlock::iterator InsertPos = 1148 PN->getIncomingBlock(i)->back().getIterator(); 1149 SmallVector<OperandBundleDef, 1> OpBundles; 1150 cloneOpBundlesIf(CInst, OpBundles, [](const OperandBundleUse &B) { 1151 return B.getTagID() != LLVMContext::OB_funclet; 1152 }); 1153 addOpBundleForFunclet(InsertPos->getParent(), OpBundles); 1154 CallInst *Clone = CallInst::Create(CInst, OpBundles); 1155 if (Op->getType() != ParamTy) 1156 Op = new BitCastInst(Op, ParamTy, "", InsertPos); 1157 Clone->setArgOperand(0, Op); 1158 Clone->insertBefore(*InsertPos->getParent(), InsertPos); 1159 1160 LLVM_DEBUG(dbgs() << "Cloning " << *CInst << "\n" 1161 "And inserting clone at " 1162 << *InsertPos << "\n"); 1163 Worklist.push_back(std::make_pair(Clone, Incoming)); 1164 } 1165 // Erase the original call. 1166 LLVM_DEBUG(dbgs() << "Erasing: " << *CInst << "\n"); 1167 EraseInstruction(CInst); 1168 } while (!Worklist.empty()); 1169 } 1170 1171 /// If we have a top down pointer in the S_Use state, make sure that there are 1172 /// no CFG hazards by checking the states of various bottom up pointers. 1173 static void CheckForUseCFGHazard(const Sequence SuccSSeq, 1174 const bool SuccSRRIKnownSafe, 1175 TopDownPtrState &S, 1176 bool &SomeSuccHasSame, 1177 bool &AllSuccsHaveSame, 1178 bool &NotAllSeqEqualButKnownSafe, 1179 bool &ShouldContinue) { 1180 switch (SuccSSeq) { 1181 case S_CanRelease: { 1182 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) { 1183 S.ClearSequenceProgress(); 1184 break; 1185 } 1186 S.SetCFGHazardAfflicted(true); 1187 ShouldContinue = true; 1188 break; 1189 } 1190 case S_Use: 1191 SomeSuccHasSame = true; 1192 break; 1193 case S_Stop: 1194 case S_MovableRelease: 1195 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) 1196 AllSuccsHaveSame = false; 1197 else 1198 NotAllSeqEqualButKnownSafe = true; 1199 break; 1200 case S_Retain: 1201 llvm_unreachable("bottom-up pointer in retain state!"); 1202 case S_None: 1203 llvm_unreachable("This should have been handled earlier."); 1204 } 1205 } 1206 1207 /// If we have a Top Down pointer in the S_CanRelease state, make sure that 1208 /// there are no CFG hazards by checking the states of various bottom up 1209 /// pointers. 1210 static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq, 1211 const bool SuccSRRIKnownSafe, 1212 TopDownPtrState &S, 1213 bool &SomeSuccHasSame, 1214 bool &AllSuccsHaveSame, 1215 bool &NotAllSeqEqualButKnownSafe) { 1216 switch (SuccSSeq) { 1217 case S_CanRelease: 1218 SomeSuccHasSame = true; 1219 break; 1220 case S_Stop: 1221 case S_MovableRelease: 1222 case S_Use: 1223 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) 1224 AllSuccsHaveSame = false; 1225 else 1226 NotAllSeqEqualButKnownSafe = true; 1227 break; 1228 case S_Retain: 1229 llvm_unreachable("bottom-up pointer in retain state!"); 1230 case S_None: 1231 llvm_unreachable("This should have been handled earlier."); 1232 } 1233 } 1234 1235 /// Check for critical edges, loop boundaries, irreducible control flow, or 1236 /// other CFG structures where moving code across the edge would result in it 1237 /// being executed more. 1238 void 1239 ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, 1240 DenseMap<const BasicBlock *, BBState> &BBStates, 1241 BBState &MyStates) const { 1242 // If any top-down local-use or possible-dec has a succ which is earlier in 1243 // the sequence, forget it. 1244 for (auto I = MyStates.top_down_ptr_begin(), E = MyStates.top_down_ptr_end(); 1245 I != E; ++I) { 1246 TopDownPtrState &S = I->second; 1247 const Sequence Seq = I->second.GetSeq(); 1248 1249 // We only care about S_Retain, S_CanRelease, and S_Use. 1250 if (Seq == S_None) 1251 continue; 1252 1253 // Make sure that if extra top down states are added in the future that this 1254 // code is updated to handle it. 1255 assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) && 1256 "Unknown top down sequence state."); 1257 1258 const Value *Arg = I->first; 1259 bool SomeSuccHasSame = false; 1260 bool AllSuccsHaveSame = true; 1261 bool NotAllSeqEqualButKnownSafe = false; 1262 1263 for (const BasicBlock *Succ : successors(BB)) { 1264 // If VisitBottomUp has pointer information for this successor, take 1265 // what we know about it. 1266 const DenseMap<const BasicBlock *, BBState>::iterator BBI = 1267 BBStates.find(Succ); 1268 assert(BBI != BBStates.end()); 1269 const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); 1270 const Sequence SuccSSeq = SuccS.GetSeq(); 1271 1272 // If bottom up, the pointer is in an S_None state, clear the sequence 1273 // progress since the sequence in the bottom up state finished 1274 // suggesting a mismatch in between retains/releases. This is true for 1275 // all three cases that we are handling here: S_Retain, S_Use, and 1276 // S_CanRelease. 1277 if (SuccSSeq == S_None) { 1278 S.ClearSequenceProgress(); 1279 continue; 1280 } 1281 1282 // If we have S_Use or S_CanRelease, perform our check for cfg hazard 1283 // checks. 1284 const bool SuccSRRIKnownSafe = SuccS.IsKnownSafe(); 1285 1286 // *NOTE* We do not use Seq from above here since we are allowing for 1287 // S.GetSeq() to change while we are visiting basic blocks. 1288 switch(S.GetSeq()) { 1289 case S_Use: { 1290 bool ShouldContinue = false; 1291 CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, SomeSuccHasSame, 1292 AllSuccsHaveSame, NotAllSeqEqualButKnownSafe, 1293 ShouldContinue); 1294 if (ShouldContinue) 1295 continue; 1296 break; 1297 } 1298 case S_CanRelease: 1299 CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, 1300 SomeSuccHasSame, AllSuccsHaveSame, 1301 NotAllSeqEqualButKnownSafe); 1302 break; 1303 case S_Retain: 1304 case S_None: 1305 case S_Stop: 1306 case S_MovableRelease: 1307 break; 1308 } 1309 } 1310 1311 // If the state at the other end of any of the successor edges 1312 // matches the current state, require all edges to match. This 1313 // guards against loops in the middle of a sequence. 1314 if (SomeSuccHasSame && !AllSuccsHaveSame) { 1315 S.ClearSequenceProgress(); 1316 } else if (NotAllSeqEqualButKnownSafe) { 1317 // If we would have cleared the state foregoing the fact that we are known 1318 // safe, stop code motion. This is because whether or not it is safe to 1319 // remove RR pairs via KnownSafe is an orthogonal concept to whether we 1320 // are allowed to perform code motion. 1321 S.SetCFGHazardAfflicted(true); 1322 } 1323 } 1324 } 1325 1326 bool ObjCARCOpt::VisitInstructionBottomUp( 1327 Instruction *Inst, BasicBlock *BB, BlotMapVector<Value *, RRInfo> &Retains, 1328 BBState &MyStates) { 1329 bool NestingDetected = false; 1330 ARCInstKind Class = GetARCInstKind(Inst); 1331 const Value *Arg = nullptr; 1332 1333 LLVM_DEBUG(dbgs() << " Class: " << Class << "\n"); 1334 1335 switch (Class) { 1336 case ARCInstKind::Release: { 1337 Arg = GetArgRCIdentityRoot(Inst); 1338 1339 BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg); 1340 NestingDetected |= S.InitBottomUp(MDKindCache, Inst); 1341 break; 1342 } 1343 case ARCInstKind::RetainBlock: 1344 // In OptimizeIndividualCalls, we have strength reduced all optimizable 1345 // objc_retainBlocks to objc_retains. Thus at this point any 1346 // objc_retainBlocks that we see are not optimizable. 1347 break; 1348 case ARCInstKind::Retain: 1349 case ARCInstKind::RetainRV: { 1350 Arg = GetArgRCIdentityRoot(Inst); 1351 BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg); 1352 if (S.MatchWithRetain()) { 1353 // Don't do retain+release tracking for ARCInstKind::RetainRV, because 1354 // it's better to let it remain as the first instruction after a call. 1355 if (Class != ARCInstKind::RetainRV) { 1356 LLVM_DEBUG(dbgs() << " Matching with: " << *Inst << "\n"); 1357 Retains[Inst] = S.GetRRInfo(); 1358 } 1359 S.ClearSequenceProgress(); 1360 } 1361 // A retain moving bottom up can be a use. 1362 break; 1363 } 1364 case ARCInstKind::AutoreleasepoolPop: 1365 // Conservatively, clear MyStates for all known pointers. 1366 MyStates.clearBottomUpPointers(); 1367 return NestingDetected; 1368 case ARCInstKind::AutoreleasepoolPush: 1369 case ARCInstKind::None: 1370 // These are irrelevant. 1371 return NestingDetected; 1372 default: 1373 break; 1374 } 1375 1376 // Consider any other possible effects of this instruction on each 1377 // pointer being tracked. 1378 for (auto MI = MyStates.bottom_up_ptr_begin(), 1379 ME = MyStates.bottom_up_ptr_end(); 1380 MI != ME; ++MI) { 1381 const Value *Ptr = MI->first; 1382 if (Ptr == Arg) 1383 continue; // Handled above. 1384 BottomUpPtrState &S = MI->second; 1385 1386 if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class)) 1387 continue; 1388 1389 S.HandlePotentialUse(BB, Inst, Ptr, PA, Class); 1390 } 1391 1392 return NestingDetected; 1393 } 1394 1395 bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB, 1396 DenseMap<const BasicBlock *, BBState> &BBStates, 1397 BlotMapVector<Value *, RRInfo> &Retains) { 1398 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n"); 1399 1400 bool NestingDetected = false; 1401 BBState &MyStates = BBStates[BB]; 1402 1403 // Merge the states from each successor to compute the initial state 1404 // for the current block. 1405 BBState::edge_iterator SI(MyStates.succ_begin()), 1406 SE(MyStates.succ_end()); 1407 if (SI != SE) { 1408 const BasicBlock *Succ = *SI; 1409 DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); 1410 assert(I != BBStates.end()); 1411 MyStates.InitFromSucc(I->second); 1412 ++SI; 1413 for (; SI != SE; ++SI) { 1414 Succ = *SI; 1415 I = BBStates.find(Succ); 1416 assert(I != BBStates.end()); 1417 MyStates.MergeSucc(I->second); 1418 } 1419 } 1420 1421 LLVM_DEBUG(dbgs() << "Before:\n" 1422 << BBStates[BB] << "\n" 1423 << "Performing Dataflow:\n"); 1424 1425 // Visit all the instructions, bottom-up. 1426 for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { 1427 Instruction *Inst = &*std::prev(I); 1428 1429 // Invoke instructions are visited as part of their successors (below). 1430 if (isa<InvokeInst>(Inst)) 1431 continue; 1432 1433 LLVM_DEBUG(dbgs() << " Visiting " << *Inst << "\n"); 1434 1435 NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); 1436 1437 // Bail out if the number of pointers being tracked becomes too large so 1438 // that this pass can complete in a reasonable amount of time. 1439 if (MyStates.bottom_up_ptr_list_size() > MaxPtrStates) { 1440 DisableRetainReleasePairing = true; 1441 return false; 1442 } 1443 } 1444 1445 // If there's a predecessor with an invoke, visit the invoke as if it were 1446 // part of this block, since we can't insert code after an invoke in its own 1447 // block, and we don't want to split critical edges. 1448 for (BBState::edge_iterator PI(MyStates.pred_begin()), 1449 PE(MyStates.pred_end()); PI != PE; ++PI) { 1450 BasicBlock *Pred = *PI; 1451 if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back())) 1452 NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); 1453 } 1454 1455 LLVM_DEBUG(dbgs() << "\nFinal State:\n" << BBStates[BB] << "\n"); 1456 1457 return NestingDetected; 1458 } 1459 1460 // Fill ReleaseInsertPtToRCIdentityRoots, which is a map from insertion points 1461 // to the set of RC identity roots that would be released by the release calls 1462 // moved to the insertion points. 1463 static void collectReleaseInsertPts( 1464 const BlotMapVector<Value *, RRInfo> &Retains, 1465 DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1466 &ReleaseInsertPtToRCIdentityRoots) { 1467 for (const auto &P : Retains) { 1468 // Retains is a map from an objc_retain call to a RRInfo of the RC identity 1469 // root of the call. Get the RC identity root of the objc_retain call. 1470 Instruction *Retain = cast<Instruction>(P.first); 1471 Value *Root = GetRCIdentityRoot(Retain->getOperand(0)); 1472 // Collect all the insertion points of the objc_release calls that release 1473 // the RC identity root of the objc_retain call. 1474 for (const Instruction *InsertPt : P.second.ReverseInsertPts) 1475 ReleaseInsertPtToRCIdentityRoots[InsertPt].insert(Root); 1476 } 1477 } 1478 1479 // Get the RC identity roots from an insertion point of an objc_release call. 1480 // Return nullptr if the passed instruction isn't an insertion point. 1481 static const SmallPtrSet<const Value *, 2> * 1482 getRCIdentityRootsFromReleaseInsertPt( 1483 const Instruction *InsertPt, 1484 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1485 &ReleaseInsertPtToRCIdentityRoots) { 1486 auto I = ReleaseInsertPtToRCIdentityRoots.find(InsertPt); 1487 if (I == ReleaseInsertPtToRCIdentityRoots.end()) 1488 return nullptr; 1489 return &I->second; 1490 } 1491 1492 bool ObjCARCOpt::VisitInstructionTopDown( 1493 Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates, 1494 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1495 &ReleaseInsertPtToRCIdentityRoots) { 1496 bool NestingDetected = false; 1497 ARCInstKind Class = GetARCInstKind(Inst); 1498 const Value *Arg = nullptr; 1499 1500 // Make sure a call to objc_retain isn't moved past insertion points of calls 1501 // to objc_release. 1502 if (const SmallPtrSet<const Value *, 2> *Roots = 1503 getRCIdentityRootsFromReleaseInsertPt( 1504 Inst, ReleaseInsertPtToRCIdentityRoots)) 1505 for (const auto *Root : *Roots) { 1506 TopDownPtrState &S = MyStates.getPtrTopDownState(Root); 1507 // Disable code motion if the current position is S_Retain to prevent 1508 // moving the objc_retain call past objc_release calls. If it's 1509 // S_CanRelease or larger, it's not necessary to disable code motion as 1510 // the insertion points that prevent the objc_retain call from moving down 1511 // should have been set already. 1512 if (S.GetSeq() == S_Retain) 1513 S.SetCFGHazardAfflicted(true); 1514 } 1515 1516 LLVM_DEBUG(dbgs() << " Class: " << Class << "\n"); 1517 1518 switch (Class) { 1519 case ARCInstKind::RetainBlock: 1520 // In OptimizeIndividualCalls, we have strength reduced all optimizable 1521 // objc_retainBlocks to objc_retains. Thus at this point any 1522 // objc_retainBlocks that we see are not optimizable. We need to break since 1523 // a retain can be a potential use. 1524 break; 1525 case ARCInstKind::Retain: 1526 case ARCInstKind::RetainRV: { 1527 Arg = GetArgRCIdentityRoot(Inst); 1528 TopDownPtrState &S = MyStates.getPtrTopDownState(Arg); 1529 NestingDetected |= S.InitTopDown(Class, Inst); 1530 // A retain can be a potential use; proceed to the generic checking 1531 // code below. 1532 break; 1533 } 1534 case ARCInstKind::Release: { 1535 Arg = GetArgRCIdentityRoot(Inst); 1536 TopDownPtrState &S = MyStates.getPtrTopDownState(Arg); 1537 // Try to form a tentative pair in between this release instruction and the 1538 // top down pointers that we are tracking. 1539 if (S.MatchWithRelease(MDKindCache, Inst)) { 1540 // If we succeed, copy S's RRInfo into the Release -> {Retain Set 1541 // Map}. Then we clear S. 1542 LLVM_DEBUG(dbgs() << " Matching with: " << *Inst << "\n"); 1543 Releases[Inst] = S.GetRRInfo(); 1544 S.ClearSequenceProgress(); 1545 } 1546 break; 1547 } 1548 case ARCInstKind::AutoreleasepoolPop: 1549 // Conservatively, clear MyStates for all known pointers. 1550 MyStates.clearTopDownPointers(); 1551 return false; 1552 case ARCInstKind::AutoreleasepoolPush: 1553 case ARCInstKind::None: 1554 // These can not be uses of 1555 return false; 1556 default: 1557 break; 1558 } 1559 1560 // Consider any other possible effects of this instruction on each 1561 // pointer being tracked. 1562 for (auto MI = MyStates.top_down_ptr_begin(), 1563 ME = MyStates.top_down_ptr_end(); 1564 MI != ME; ++MI) { 1565 const Value *Ptr = MI->first; 1566 if (Ptr == Arg) 1567 continue; // Handled above. 1568 TopDownPtrState &S = MI->second; 1569 if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class, *BundledInsts)) 1570 continue; 1571 1572 S.HandlePotentialUse(Inst, Ptr, PA, Class); 1573 } 1574 1575 return NestingDetected; 1576 } 1577 1578 bool ObjCARCOpt::VisitTopDown( 1579 BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, 1580 DenseMap<Value *, RRInfo> &Releases, 1581 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1582 &ReleaseInsertPtToRCIdentityRoots) { 1583 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n"); 1584 bool NestingDetected = false; 1585 BBState &MyStates = BBStates[BB]; 1586 1587 // Merge the states from each predecessor to compute the initial state 1588 // for the current block. 1589 BBState::edge_iterator PI(MyStates.pred_begin()), 1590 PE(MyStates.pred_end()); 1591 if (PI != PE) { 1592 const BasicBlock *Pred = *PI; 1593 DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); 1594 assert(I != BBStates.end()); 1595 MyStates.InitFromPred(I->second); 1596 ++PI; 1597 for (; PI != PE; ++PI) { 1598 Pred = *PI; 1599 I = BBStates.find(Pred); 1600 assert(I != BBStates.end()); 1601 MyStates.MergePred(I->second); 1602 } 1603 } 1604 1605 // Check that BB and MyStates have the same number of predecessors. This 1606 // prevents retain calls that live outside a loop from being moved into the 1607 // loop. 1608 if (!BB->hasNPredecessors(MyStates.pred_end() - MyStates.pred_begin())) 1609 for (auto I = MyStates.top_down_ptr_begin(), 1610 E = MyStates.top_down_ptr_end(); 1611 I != E; ++I) 1612 I->second.SetCFGHazardAfflicted(true); 1613 1614 LLVM_DEBUG(dbgs() << "Before:\n" 1615 << BBStates[BB] << "\n" 1616 << "Performing Dataflow:\n"); 1617 1618 // Visit all the instructions, top-down. 1619 for (Instruction &Inst : *BB) { 1620 LLVM_DEBUG(dbgs() << " Visiting " << Inst << "\n"); 1621 1622 NestingDetected |= VisitInstructionTopDown( 1623 &Inst, Releases, MyStates, ReleaseInsertPtToRCIdentityRoots); 1624 1625 // Bail out if the number of pointers being tracked becomes too large so 1626 // that this pass can complete in a reasonable amount of time. 1627 if (MyStates.top_down_ptr_list_size() > MaxPtrStates) { 1628 DisableRetainReleasePairing = true; 1629 return false; 1630 } 1631 } 1632 1633 LLVM_DEBUG(dbgs() << "\nState Before Checking for CFG Hazards:\n" 1634 << BBStates[BB] << "\n\n"); 1635 CheckForCFGHazards(BB, BBStates, MyStates); 1636 LLVM_DEBUG(dbgs() << "Final State:\n" << BBStates[BB] << "\n"); 1637 return NestingDetected; 1638 } 1639 1640 static void 1641 ComputePostOrders(Function &F, 1642 SmallVectorImpl<BasicBlock *> &PostOrder, 1643 SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder, 1644 unsigned NoObjCARCExceptionsMDKind, 1645 DenseMap<const BasicBlock *, BBState> &BBStates) { 1646 /// The visited set, for doing DFS walks. 1647 SmallPtrSet<BasicBlock *, 16> Visited; 1648 1649 // Do DFS, computing the PostOrder. 1650 SmallPtrSet<BasicBlock *, 16> OnStack; 1651 SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack; 1652 1653 // Functions always have exactly one entry block, and we don't have 1654 // any other block that we treat like an entry block. 1655 BasicBlock *EntryBB = &F.getEntryBlock(); 1656 BBState &MyStates = BBStates[EntryBB]; 1657 MyStates.SetAsEntry(); 1658 Instruction *EntryTI = EntryBB->getTerminator(); 1659 SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI))); 1660 Visited.insert(EntryBB); 1661 OnStack.insert(EntryBB); 1662 do { 1663 dfs_next_succ: 1664 BasicBlock *CurrBB = SuccStack.back().first; 1665 succ_iterator SE(CurrBB->getTerminator(), false); 1666 1667 while (SuccStack.back().second != SE) { 1668 BasicBlock *SuccBB = *SuccStack.back().second++; 1669 if (Visited.insert(SuccBB).second) { 1670 SuccStack.push_back( 1671 std::make_pair(SuccBB, succ_iterator(SuccBB->getTerminator()))); 1672 BBStates[CurrBB].addSucc(SuccBB); 1673 BBState &SuccStates = BBStates[SuccBB]; 1674 SuccStates.addPred(CurrBB); 1675 OnStack.insert(SuccBB); 1676 goto dfs_next_succ; 1677 } 1678 1679 if (!OnStack.count(SuccBB)) { 1680 BBStates[CurrBB].addSucc(SuccBB); 1681 BBStates[SuccBB].addPred(CurrBB); 1682 } 1683 } 1684 OnStack.erase(CurrBB); 1685 PostOrder.push_back(CurrBB); 1686 SuccStack.pop_back(); 1687 } while (!SuccStack.empty()); 1688 1689 Visited.clear(); 1690 1691 // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. 1692 // Functions may have many exits, and there also blocks which we treat 1693 // as exits due to ignored edges. 1694 SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack; 1695 for (BasicBlock &ExitBB : F) { 1696 BBState &MyStates = BBStates[&ExitBB]; 1697 if (!MyStates.isExit()) 1698 continue; 1699 1700 MyStates.SetAsExit(); 1701 1702 PredStack.push_back(std::make_pair(&ExitBB, MyStates.pred_begin())); 1703 Visited.insert(&ExitBB); 1704 while (!PredStack.empty()) { 1705 reverse_dfs_next_succ: 1706 BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end(); 1707 while (PredStack.back().second != PE) { 1708 BasicBlock *BB = *PredStack.back().second++; 1709 if (Visited.insert(BB).second) { 1710 PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin())); 1711 goto reverse_dfs_next_succ; 1712 } 1713 } 1714 ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first); 1715 } 1716 } 1717 } 1718 1719 // Visit the function both top-down and bottom-up. 1720 bool ObjCARCOpt::Visit(Function &F, 1721 DenseMap<const BasicBlock *, BBState> &BBStates, 1722 BlotMapVector<Value *, RRInfo> &Retains, 1723 DenseMap<Value *, RRInfo> &Releases) { 1724 // Use reverse-postorder traversals, because we magically know that loops 1725 // will be well behaved, i.e. they won't repeatedly call retain on a single 1726 // pointer without doing a release. We can't use the ReversePostOrderTraversal 1727 // class here because we want the reverse-CFG postorder to consider each 1728 // function exit point, and we want to ignore selected cycle edges. 1729 SmallVector<BasicBlock *, 16> PostOrder; 1730 SmallVector<BasicBlock *, 16> ReverseCFGPostOrder; 1731 ComputePostOrders(F, PostOrder, ReverseCFGPostOrder, 1732 MDKindCache.get(ARCMDKindID::NoObjCARCExceptions), 1733 BBStates); 1734 1735 // Use reverse-postorder on the reverse CFG for bottom-up. 1736 bool BottomUpNestingDetected = false; 1737 for (BasicBlock *BB : llvm::reverse(ReverseCFGPostOrder)) { 1738 BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains); 1739 if (DisableRetainReleasePairing) 1740 return false; 1741 } 1742 1743 DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1744 ReleaseInsertPtToRCIdentityRoots; 1745 collectReleaseInsertPts(Retains, ReleaseInsertPtToRCIdentityRoots); 1746 1747 // Use reverse-postorder for top-down. 1748 bool TopDownNestingDetected = false; 1749 for (BasicBlock *BB : llvm::reverse(PostOrder)) { 1750 TopDownNestingDetected |= 1751 VisitTopDown(BB, BBStates, Releases, ReleaseInsertPtToRCIdentityRoots); 1752 if (DisableRetainReleasePairing) 1753 return false; 1754 } 1755 1756 return TopDownNestingDetected && BottomUpNestingDetected; 1757 } 1758 1759 /// Move the calls in RetainsToMove and ReleasesToMove. 1760 void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove, 1761 RRInfo &ReleasesToMove, 1762 BlotMapVector<Value *, RRInfo> &Retains, 1763 DenseMap<Value *, RRInfo> &Releases, 1764 SmallVectorImpl<Instruction *> &DeadInsts, 1765 Module *M) { 1766 Type *ArgTy = Arg->getType(); 1767 Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext())); 1768 1769 LLVM_DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n"); 1770 1771 // Insert the new retain and release calls. 1772 for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) { 1773 Value *MyArg = ArgTy == ParamTy ? Arg 1774 : new BitCastInst(Arg, ParamTy, "", 1775 InsertPt->getIterator()); 1776 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); 1777 SmallVector<OperandBundleDef, 1> BundleList; 1778 addOpBundleForFunclet(InsertPt->getParent(), BundleList); 1779 CallInst *Call = 1780 CallInst::Create(Decl, MyArg, BundleList, "", InsertPt->getIterator()); 1781 Call->setDoesNotThrow(); 1782 Call->setTailCall(); 1783 1784 LLVM_DEBUG(dbgs() << "Inserting new Retain: " << *Call 1785 << "\n" 1786 "At insertion point: " 1787 << *InsertPt << "\n"); 1788 } 1789 for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) { 1790 Value *MyArg = ArgTy == ParamTy ? Arg 1791 : new BitCastInst(Arg, ParamTy, "", 1792 InsertPt->getIterator()); 1793 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release); 1794 SmallVector<OperandBundleDef, 1> BundleList; 1795 addOpBundleForFunclet(InsertPt->getParent(), BundleList); 1796 CallInst *Call = 1797 CallInst::Create(Decl, MyArg, BundleList, "", InsertPt->getIterator()); 1798 // Attach a clang.imprecise_release metadata tag, if appropriate. 1799 if (MDNode *M = ReleasesToMove.ReleaseMetadata) 1800 Call->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), M); 1801 Call->setDoesNotThrow(); 1802 if (ReleasesToMove.IsTailCallRelease) 1803 Call->setTailCall(); 1804 1805 LLVM_DEBUG(dbgs() << "Inserting new Release: " << *Call 1806 << "\n" 1807 "At insertion point: " 1808 << *InsertPt << "\n"); 1809 } 1810 1811 // Delete the original retain and release calls. 1812 for (Instruction *OrigRetain : RetainsToMove.Calls) { 1813 Retains.blot(OrigRetain); 1814 DeadInsts.push_back(OrigRetain); 1815 LLVM_DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n"); 1816 } 1817 for (Instruction *OrigRelease : ReleasesToMove.Calls) { 1818 Releases.erase(OrigRelease); 1819 DeadInsts.push_back(OrigRelease); 1820 LLVM_DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n"); 1821 } 1822 } 1823 1824 bool ObjCARCOpt::PairUpRetainsAndReleases( 1825 DenseMap<const BasicBlock *, BBState> &BBStates, 1826 BlotMapVector<Value *, RRInfo> &Retains, 1827 DenseMap<Value *, RRInfo> &Releases, Module *M, 1828 Instruction *Retain, 1829 SmallVectorImpl<Instruction *> &DeadInsts, RRInfo &RetainsToMove, 1830 RRInfo &ReleasesToMove, Value *Arg, bool KnownSafe, 1831 bool &AnyPairsCompletelyEliminated) { 1832 // If a pair happens in a region where it is known that the reference count 1833 // is already incremented, we can similarly ignore possible decrements unless 1834 // we are dealing with a retainable object with multiple provenance sources. 1835 bool KnownSafeTD = true, KnownSafeBU = true; 1836 bool CFGHazardAfflicted = false; 1837 1838 // Connect the dots between the top-down-collected RetainsToMove and 1839 // bottom-up-collected ReleasesToMove to form sets of related calls. 1840 // This is an iterative process so that we connect multiple releases 1841 // to multiple retains if needed. 1842 unsigned OldDelta = 0; 1843 unsigned NewDelta = 0; 1844 unsigned OldCount = 0; 1845 unsigned NewCount = 0; 1846 bool FirstRelease = true; 1847 for (SmallVector<Instruction *, 4> NewRetains{Retain};;) { 1848 SmallVector<Instruction *, 4> NewReleases; 1849 for (Instruction *NewRetain : NewRetains) { 1850 auto It = Retains.find(NewRetain); 1851 assert(It != Retains.end()); 1852 const RRInfo &NewRetainRRI = It->second; 1853 KnownSafeTD &= NewRetainRRI.KnownSafe; 1854 CFGHazardAfflicted |= NewRetainRRI.CFGHazardAfflicted; 1855 for (Instruction *NewRetainRelease : NewRetainRRI.Calls) { 1856 auto Jt = Releases.find(NewRetainRelease); 1857 if (Jt == Releases.end()) 1858 return false; 1859 const RRInfo &NewRetainReleaseRRI = Jt->second; 1860 1861 // If the release does not have a reference to the retain as well, 1862 // something happened which is unaccounted for. Do not do anything. 1863 // 1864 // This can happen if we catch an additive overflow during path count 1865 // merging. 1866 if (!NewRetainReleaseRRI.Calls.count(NewRetain)) 1867 return false; 1868 1869 if (ReleasesToMove.Calls.insert(NewRetainRelease).second) { 1870 // If we overflow when we compute the path count, don't remove/move 1871 // anything. 1872 const BBState &NRRBBState = BBStates[NewRetainRelease->getParent()]; 1873 unsigned PathCount = BBState::OverflowOccurredValue; 1874 if (NRRBBState.GetAllPathCountWithOverflow(PathCount)) 1875 return false; 1876 assert(PathCount != BBState::OverflowOccurredValue && 1877 "PathCount at this point can not be " 1878 "OverflowOccurredValue."); 1879 OldDelta -= PathCount; 1880 1881 // Merge the ReleaseMetadata and IsTailCallRelease values. 1882 if (FirstRelease) { 1883 ReleasesToMove.ReleaseMetadata = 1884 NewRetainReleaseRRI.ReleaseMetadata; 1885 ReleasesToMove.IsTailCallRelease = 1886 NewRetainReleaseRRI.IsTailCallRelease; 1887 FirstRelease = false; 1888 } else { 1889 if (ReleasesToMove.ReleaseMetadata != 1890 NewRetainReleaseRRI.ReleaseMetadata) 1891 ReleasesToMove.ReleaseMetadata = nullptr; 1892 if (ReleasesToMove.IsTailCallRelease != 1893 NewRetainReleaseRRI.IsTailCallRelease) 1894 ReleasesToMove.IsTailCallRelease = false; 1895 } 1896 1897 // Collect the optimal insertion points. 1898 if (!KnownSafe) 1899 for (Instruction *RIP : NewRetainReleaseRRI.ReverseInsertPts) { 1900 if (ReleasesToMove.ReverseInsertPts.insert(RIP).second) { 1901 // If we overflow when we compute the path count, don't 1902 // remove/move anything. 1903 const BBState &RIPBBState = BBStates[RIP->getParent()]; 1904 PathCount = BBState::OverflowOccurredValue; 1905 if (RIPBBState.GetAllPathCountWithOverflow(PathCount)) 1906 return false; 1907 assert(PathCount != BBState::OverflowOccurredValue && 1908 "PathCount at this point can not be " 1909 "OverflowOccurredValue."); 1910 NewDelta -= PathCount; 1911 } 1912 } 1913 NewReleases.push_back(NewRetainRelease); 1914 } 1915 } 1916 } 1917 NewRetains.clear(); 1918 if (NewReleases.empty()) break; 1919 1920 // Back the other way. 1921 for (Instruction *NewRelease : NewReleases) { 1922 auto It = Releases.find(NewRelease); 1923 assert(It != Releases.end()); 1924 const RRInfo &NewReleaseRRI = It->second; 1925 KnownSafeBU &= NewReleaseRRI.KnownSafe; 1926 CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted; 1927 for (Instruction *NewReleaseRetain : NewReleaseRRI.Calls) { 1928 auto Jt = Retains.find(NewReleaseRetain); 1929 if (Jt == Retains.end()) 1930 return false; 1931 const RRInfo &NewReleaseRetainRRI = Jt->second; 1932 1933 // If the retain does not have a reference to the release as well, 1934 // something happened which is unaccounted for. Do not do anything. 1935 // 1936 // This can happen if we catch an additive overflow during path count 1937 // merging. 1938 if (!NewReleaseRetainRRI.Calls.count(NewRelease)) 1939 return false; 1940 1941 if (RetainsToMove.Calls.insert(NewReleaseRetain).second) { 1942 // If we overflow when we compute the path count, don't remove/move 1943 // anything. 1944 const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()]; 1945 unsigned PathCount = BBState::OverflowOccurredValue; 1946 if (NRRBBState.GetAllPathCountWithOverflow(PathCount)) 1947 return false; 1948 assert(PathCount != BBState::OverflowOccurredValue && 1949 "PathCount at this point can not be " 1950 "OverflowOccurredValue."); 1951 OldDelta += PathCount; 1952 OldCount += PathCount; 1953 1954 // Collect the optimal insertion points. 1955 if (!KnownSafe) 1956 for (Instruction *RIP : NewReleaseRetainRRI.ReverseInsertPts) { 1957 if (RetainsToMove.ReverseInsertPts.insert(RIP).second) { 1958 // If we overflow when we compute the path count, don't 1959 // remove/move anything. 1960 const BBState &RIPBBState = BBStates[RIP->getParent()]; 1961 1962 PathCount = BBState::OverflowOccurredValue; 1963 if (RIPBBState.GetAllPathCountWithOverflow(PathCount)) 1964 return false; 1965 assert(PathCount != BBState::OverflowOccurredValue && 1966 "PathCount at this point can not be " 1967 "OverflowOccurredValue."); 1968 NewDelta += PathCount; 1969 NewCount += PathCount; 1970 } 1971 } 1972 NewRetains.push_back(NewReleaseRetain); 1973 } 1974 } 1975 } 1976 if (NewRetains.empty()) break; 1977 } 1978 1979 // We can only remove pointers if we are known safe in both directions. 1980 bool UnconditionallySafe = KnownSafeTD && KnownSafeBU; 1981 if (UnconditionallySafe) { 1982 RetainsToMove.ReverseInsertPts.clear(); 1983 ReleasesToMove.ReverseInsertPts.clear(); 1984 NewCount = 0; 1985 } else { 1986 // Determine whether the new insertion points we computed preserve the 1987 // balance of retain and release calls through the program. 1988 // TODO: If the fully aggressive solution isn't valid, try to find a 1989 // less aggressive solution which is. 1990 if (NewDelta != 0) 1991 return false; 1992 1993 // At this point, we are not going to remove any RR pairs, but we still are 1994 // able to move RR pairs. If one of our pointers is afflicted with 1995 // CFGHazards, we cannot perform such code motion so exit early. 1996 const bool WillPerformCodeMotion = 1997 !RetainsToMove.ReverseInsertPts.empty() || 1998 !ReleasesToMove.ReverseInsertPts.empty(); 1999 if (CFGHazardAfflicted && WillPerformCodeMotion) 2000 return false; 2001 } 2002 2003 // Determine whether the original call points are balanced in the retain and 2004 // release calls through the program. If not, conservatively don't touch 2005 // them. 2006 // TODO: It's theoretically possible to do code motion in this case, as 2007 // long as the existing imbalances are maintained. 2008 if (OldDelta != 0) 2009 return false; 2010 2011 Changed = true; 2012 assert(OldCount != 0 && "Unreachable code?"); 2013 NumRRs += OldCount - NewCount; 2014 // Set to true if we completely removed any RR pairs. 2015 AnyPairsCompletelyEliminated = NewCount == 0; 2016 2017 // We can move calls! 2018 return true; 2019 } 2020 2021 /// Identify pairings between the retains and releases, and delete and/or move 2022 /// them. 2023 bool ObjCARCOpt::PerformCodePlacement( 2024 DenseMap<const BasicBlock *, BBState> &BBStates, 2025 BlotMapVector<Value *, RRInfo> &Retains, 2026 DenseMap<Value *, RRInfo> &Releases, Module *M) { 2027 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n"); 2028 2029 bool AnyPairsCompletelyEliminated = false; 2030 SmallVector<Instruction *, 8> DeadInsts; 2031 2032 // Visit each retain. 2033 for (BlotMapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), 2034 E = Retains.end(); 2035 I != E; ++I) { 2036 Value *V = I->first; 2037 if (!V) continue; // blotted 2038 2039 Instruction *Retain = cast<Instruction>(V); 2040 2041 LLVM_DEBUG(dbgs() << "Visiting: " << *Retain << "\n"); 2042 2043 Value *Arg = GetArgRCIdentityRoot(Retain); 2044 2045 // If the object being released is in static or stack storage, we know it's 2046 // not being managed by ObjC reference counting, so we can delete pairs 2047 // regardless of what possible decrements or uses lie between them. 2048 bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg); 2049 2050 // A constant pointer can't be pointing to an object on the heap. It may 2051 // be reference-counted, but it won't be deleted. 2052 if (const LoadInst *LI = dyn_cast<LoadInst>(Arg)) 2053 if (const GlobalVariable *GV = 2054 dyn_cast<GlobalVariable>( 2055 GetRCIdentityRoot(LI->getPointerOperand()))) 2056 if (GV->isConstant()) 2057 KnownSafe = true; 2058 2059 // Connect the dots between the top-down-collected RetainsToMove and 2060 // bottom-up-collected ReleasesToMove to form sets of related calls. 2061 RRInfo RetainsToMove, ReleasesToMove; 2062 2063 bool PerformMoveCalls = PairUpRetainsAndReleases( 2064 BBStates, Retains, Releases, M, Retain, DeadInsts, 2065 RetainsToMove, ReleasesToMove, Arg, KnownSafe, 2066 AnyPairsCompletelyEliminated); 2067 2068 if (PerformMoveCalls) { 2069 // Ok, everything checks out and we're all set. Let's move/delete some 2070 // code! 2071 MoveCalls(Arg, RetainsToMove, ReleasesToMove, 2072 Retains, Releases, DeadInsts, M); 2073 } 2074 } 2075 2076 // Now that we're done moving everything, we can delete the newly dead 2077 // instructions, as we no longer need them as insert points. 2078 while (!DeadInsts.empty()) 2079 EraseInstruction(DeadInsts.pop_back_val()); 2080 2081 return AnyPairsCompletelyEliminated; 2082 } 2083 2084 /// Weak pointer optimizations. 2085 void ObjCARCOpt::OptimizeWeakCalls(Function &F) { 2086 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n"); 2087 2088 // First, do memdep-style RLE and S2L optimizations. We can't use memdep 2089 // itself because it uses AliasAnalysis and we need to do provenance 2090 // queries instead. 2091 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 2092 Instruction *Inst = &*I++; 2093 2094 LLVM_DEBUG(dbgs() << "Visiting: " << *Inst << "\n"); 2095 2096 ARCInstKind Class = GetBasicARCInstKind(Inst); 2097 if (Class != ARCInstKind::LoadWeak && 2098 Class != ARCInstKind::LoadWeakRetained) 2099 continue; 2100 2101 // Delete objc_loadWeak calls with no users. 2102 if (Class == ARCInstKind::LoadWeak && Inst->use_empty()) { 2103 Inst->eraseFromParent(); 2104 Changed = true; 2105 continue; 2106 } 2107 2108 // TODO: For now, just look for an earlier available version of this value 2109 // within the same block. Theoretically, we could do memdep-style non-local 2110 // analysis too, but that would want caching. A better approach would be to 2111 // use the technique that EarlyCSE uses. 2112 inst_iterator Current = std::prev(I); 2113 BasicBlock *CurrentBB = &*Current.getBasicBlockIterator(); 2114 for (BasicBlock::iterator B = CurrentBB->begin(), 2115 J = Current.getInstructionIterator(); 2116 J != B; --J) { 2117 Instruction *EarlierInst = &*std::prev(J); 2118 ARCInstKind EarlierClass = GetARCInstKind(EarlierInst); 2119 switch (EarlierClass) { 2120 case ARCInstKind::LoadWeak: 2121 case ARCInstKind::LoadWeakRetained: { 2122 // If this is loading from the same pointer, replace this load's value 2123 // with that one. 2124 CallInst *Call = cast<CallInst>(Inst); 2125 CallInst *EarlierCall = cast<CallInst>(EarlierInst); 2126 Value *Arg = Call->getArgOperand(0); 2127 Value *EarlierArg = EarlierCall->getArgOperand(0); 2128 switch (PA.getAA()->alias(Arg, EarlierArg)) { 2129 case AliasResult::MustAlias: 2130 Changed = true; 2131 // If the load has a builtin retain, insert a plain retain for it. 2132 if (Class == ARCInstKind::LoadWeakRetained) { 2133 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); 2134 CallInst *CI = 2135 CallInst::Create(Decl, EarlierCall, "", Call->getIterator()); 2136 CI->setTailCall(); 2137 } 2138 // Zap the fully redundant load. 2139 Call->replaceAllUsesWith(EarlierCall); 2140 Call->eraseFromParent(); 2141 goto clobbered; 2142 case AliasResult::MayAlias: 2143 case AliasResult::PartialAlias: 2144 goto clobbered; 2145 case AliasResult::NoAlias: 2146 break; 2147 } 2148 break; 2149 } 2150 case ARCInstKind::StoreWeak: 2151 case ARCInstKind::InitWeak: { 2152 // If this is storing to the same pointer and has the same size etc. 2153 // replace this load's value with the stored value. 2154 CallInst *Call = cast<CallInst>(Inst); 2155 CallInst *EarlierCall = cast<CallInst>(EarlierInst); 2156 Value *Arg = Call->getArgOperand(0); 2157 Value *EarlierArg = EarlierCall->getArgOperand(0); 2158 switch (PA.getAA()->alias(Arg, EarlierArg)) { 2159 case AliasResult::MustAlias: 2160 Changed = true; 2161 // If the load has a builtin retain, insert a plain retain for it. 2162 if (Class == ARCInstKind::LoadWeakRetained) { 2163 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); 2164 CallInst *CI = 2165 CallInst::Create(Decl, EarlierCall, "", Call->getIterator()); 2166 CI->setTailCall(); 2167 } 2168 // Zap the fully redundant load. 2169 Call->replaceAllUsesWith(EarlierCall->getArgOperand(1)); 2170 Call->eraseFromParent(); 2171 goto clobbered; 2172 case AliasResult::MayAlias: 2173 case AliasResult::PartialAlias: 2174 goto clobbered; 2175 case AliasResult::NoAlias: 2176 break; 2177 } 2178 break; 2179 } 2180 case ARCInstKind::MoveWeak: 2181 case ARCInstKind::CopyWeak: 2182 // TOOD: Grab the copied value. 2183 goto clobbered; 2184 case ARCInstKind::AutoreleasepoolPush: 2185 case ARCInstKind::None: 2186 case ARCInstKind::IntrinsicUser: 2187 case ARCInstKind::User: 2188 // Weak pointers are only modified through the weak entry points 2189 // (and arbitrary calls, which could call the weak entry points). 2190 break; 2191 default: 2192 // Anything else could modify the weak pointer. 2193 goto clobbered; 2194 } 2195 } 2196 clobbered:; 2197 } 2198 2199 // Then, for each destroyWeak with an alloca operand, check to see if 2200 // the alloca and all its users can be zapped. 2201 for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) { 2202 ARCInstKind Class = GetBasicARCInstKind(&Inst); 2203 if (Class != ARCInstKind::DestroyWeak) 2204 continue; 2205 2206 CallInst *Call = cast<CallInst>(&Inst); 2207 Value *Arg = Call->getArgOperand(0); 2208 if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) { 2209 for (User *U : Alloca->users()) { 2210 const Instruction *UserInst = cast<Instruction>(U); 2211 switch (GetBasicARCInstKind(UserInst)) { 2212 case ARCInstKind::InitWeak: 2213 case ARCInstKind::StoreWeak: 2214 case ARCInstKind::DestroyWeak: 2215 continue; 2216 default: 2217 goto done; 2218 } 2219 } 2220 Changed = true; 2221 for (User *U : llvm::make_early_inc_range(Alloca->users())) { 2222 CallInst *UserInst = cast<CallInst>(U); 2223 switch (GetBasicARCInstKind(UserInst)) { 2224 case ARCInstKind::InitWeak: 2225 case ARCInstKind::StoreWeak: 2226 // These functions return their second argument. 2227 UserInst->replaceAllUsesWith(UserInst->getArgOperand(1)); 2228 break; 2229 case ARCInstKind::DestroyWeak: 2230 // No return value. 2231 break; 2232 default: 2233 llvm_unreachable("alloca really is used!"); 2234 } 2235 UserInst->eraseFromParent(); 2236 } 2237 Alloca->eraseFromParent(); 2238 done:; 2239 } 2240 } 2241 } 2242 2243 /// Identify program paths which execute sequences of retains and releases which 2244 /// can be eliminated. 2245 bool ObjCARCOpt::OptimizeSequences(Function &F) { 2246 // Releases, Retains - These are used to store the results of the main flow 2247 // analysis. These use Value* as the key instead of Instruction* so that the 2248 // map stays valid when we get around to rewriting code and calls get 2249 // replaced by arguments. 2250 DenseMap<Value *, RRInfo> Releases; 2251 BlotMapVector<Value *, RRInfo> Retains; 2252 2253 // This is used during the traversal of the function to track the 2254 // states for each identified object at each block. 2255 DenseMap<const BasicBlock *, BBState> BBStates; 2256 2257 // Analyze the CFG of the function, and all instructions. 2258 bool NestingDetected = Visit(F, BBStates, Retains, Releases); 2259 2260 if (DisableRetainReleasePairing) 2261 return false; 2262 2263 // Transform. 2264 bool AnyPairsCompletelyEliminated = PerformCodePlacement(BBStates, Retains, 2265 Releases, 2266 F.getParent()); 2267 2268 return AnyPairsCompletelyEliminated && NestingDetected; 2269 } 2270 2271 /// Check if there is a dependent call earlier that does not have anything in 2272 /// between the Retain and the call that can affect the reference count of their 2273 /// shared pointer argument. Note that Retain need not be in BB. 2274 static CallInst *HasSafePathToPredecessorCall(const Value *Arg, 2275 Instruction *Retain, 2276 ProvenanceAnalysis &PA) { 2277 auto *Call = dyn_cast_or_null<CallInst>(findSingleDependency( 2278 CanChangeRetainCount, Arg, Retain->getParent(), Retain, PA)); 2279 2280 // Check that the pointer is the return value of the call. 2281 if (!Call || Arg != Call) 2282 return nullptr; 2283 2284 // Check that the call is a regular call. 2285 ARCInstKind Class = GetBasicARCInstKind(Call); 2286 return Class == ARCInstKind::CallOrUser || Class == ARCInstKind::Call 2287 ? Call 2288 : nullptr; 2289 } 2290 2291 /// Find a dependent retain that precedes the given autorelease for which there 2292 /// is nothing in between the two instructions that can affect the ref count of 2293 /// Arg. 2294 static CallInst * 2295 FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB, 2296 Instruction *Autorelease, 2297 ProvenanceAnalysis &PA) { 2298 auto *Retain = dyn_cast_or_null<CallInst>( 2299 findSingleDependency(CanChangeRetainCount, Arg, BB, Autorelease, PA)); 2300 2301 // Check that we found a retain with the same argument. 2302 if (!Retain || !IsRetain(GetBasicARCInstKind(Retain)) || 2303 GetArgRCIdentityRoot(Retain) != Arg) { 2304 return nullptr; 2305 } 2306 2307 return Retain; 2308 } 2309 2310 /// Look for an ``autorelease'' instruction dependent on Arg such that there are 2311 /// no instructions dependent on Arg that need a positive ref count in between 2312 /// the autorelease and the ret. 2313 static CallInst * 2314 FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB, 2315 ReturnInst *Ret, 2316 ProvenanceAnalysis &PA) { 2317 SmallPtrSet<Instruction *, 4> DepInsts; 2318 auto *Autorelease = dyn_cast_or_null<CallInst>( 2319 findSingleDependency(NeedsPositiveRetainCount, Arg, BB, Ret, PA)); 2320 2321 if (!Autorelease) 2322 return nullptr; 2323 ARCInstKind AutoreleaseClass = GetBasicARCInstKind(Autorelease); 2324 if (!IsAutorelease(AutoreleaseClass)) 2325 return nullptr; 2326 if (GetArgRCIdentityRoot(Autorelease) != Arg) 2327 return nullptr; 2328 2329 return Autorelease; 2330 } 2331 2332 /// Look for this pattern: 2333 /// \code 2334 /// %call = call i8* @something(...) 2335 /// %2 = call i8* @objc_retain(i8* %call) 2336 /// %3 = call i8* @objc_autorelease(i8* %2) 2337 /// ret i8* %3 2338 /// \endcode 2339 /// And delete the retain and autorelease. 2340 void ObjCARCOpt::OptimizeReturns(Function &F) { 2341 if (!F.getReturnType()->isPointerTy()) 2342 return; 2343 2344 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n"); 2345 2346 for (BasicBlock &BB: F) { 2347 ReturnInst *Ret = dyn_cast<ReturnInst>(&BB.back()); 2348 if (!Ret) 2349 continue; 2350 2351 LLVM_DEBUG(dbgs() << "Visiting: " << *Ret << "\n"); 2352 2353 const Value *Arg = GetRCIdentityRoot(Ret->getOperand(0)); 2354 2355 // Look for an ``autorelease'' instruction that is a predecessor of Ret and 2356 // dependent on Arg such that there are no instructions dependent on Arg 2357 // that need a positive ref count in between the autorelease and Ret. 2358 CallInst *Autorelease = 2359 FindPredecessorAutoreleaseWithSafePath(Arg, &BB, Ret, PA); 2360 2361 if (!Autorelease) 2362 continue; 2363 2364 CallInst *Retain = FindPredecessorRetainWithSafePath( 2365 Arg, Autorelease->getParent(), Autorelease, PA); 2366 2367 if (!Retain) 2368 continue; 2369 2370 // Check that there is nothing that can affect the reference count 2371 // between the retain and the call. Note that Retain need not be in BB. 2372 CallInst *Call = HasSafePathToPredecessorCall(Arg, Retain, PA); 2373 2374 // Don't remove retainRV/autoreleaseRV pairs if the call isn't a tail call. 2375 if (!Call || 2376 (!Call->isTailCall() && 2377 GetBasicARCInstKind(Retain) == ARCInstKind::RetainRV && 2378 GetBasicARCInstKind(Autorelease) == ARCInstKind::AutoreleaseRV)) 2379 continue; 2380 2381 // If so, we can zap the retain and autorelease. 2382 Changed = true; 2383 ++NumRets; 2384 LLVM_DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: " << *Autorelease 2385 << "\n"); 2386 BundledInsts->eraseInst(Retain); 2387 EraseInstruction(Autorelease); 2388 } 2389 } 2390 2391 #ifndef NDEBUG 2392 void 2393 ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) { 2394 Statistic &NumRetains = 2395 AfterOptimization ? NumRetainsAfterOpt : NumRetainsBeforeOpt; 2396 Statistic &NumReleases = 2397 AfterOptimization ? NumReleasesAfterOpt : NumReleasesBeforeOpt; 2398 2399 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 2400 Instruction *Inst = &*I++; 2401 switch (GetBasicARCInstKind(Inst)) { 2402 default: 2403 break; 2404 case ARCInstKind::Retain: 2405 ++NumRetains; 2406 break; 2407 case ARCInstKind::Release: 2408 ++NumReleases; 2409 break; 2410 } 2411 } 2412 } 2413 #endif 2414 2415 void ObjCARCOpt::init(Function &F) { 2416 if (!EnableARCOpts) 2417 return; 2418 2419 // Intuitively, objc_retain and others are nocapture, however in practice 2420 // they are not, because they return their argument value. And objc_release 2421 // calls finalizers which can have arbitrary side effects. 2422 MDKindCache.init(F.getParent()); 2423 2424 // Initialize our runtime entry point cache. 2425 EP.init(F.getParent()); 2426 2427 // Compute which blocks are in which funclet. 2428 if (F.hasPersonalityFn() && 2429 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 2430 BlockEHColors = colorEHFunclets(F); 2431 } 2432 2433 bool ObjCARCOpt::run(Function &F, AAResults &AA) { 2434 if (!EnableARCOpts) 2435 return false; 2436 2437 Changed = CFGChanged = false; 2438 BundledRetainClaimRVs BRV(/*ContractPass=*/false); 2439 BundledInsts = &BRV; 2440 2441 LLVM_DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() 2442 << " >>>" 2443 "\n"); 2444 2445 std::pair<bool, bool> R = BundledInsts->insertAfterInvokes(F, nullptr); 2446 Changed |= R.first; 2447 CFGChanged |= R.second; 2448 2449 PA.setAA(&AA); 2450 2451 #ifndef NDEBUG 2452 if (AreStatisticsEnabled()) { 2453 GatherStatistics(F, false); 2454 } 2455 #endif 2456 2457 // This pass performs several distinct transformations. As a compile-time aid 2458 // when compiling code that isn't ObjC, skip these if the relevant ObjC 2459 // library functions aren't declared. 2460 2461 // Preliminary optimizations. This also computes UsedInThisFunction. 2462 OptimizeIndividualCalls(F); 2463 2464 // Optimizations for weak pointers. 2465 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::LoadWeak)) | 2466 (1 << unsigned(ARCInstKind::LoadWeakRetained)) | 2467 (1 << unsigned(ARCInstKind::StoreWeak)) | 2468 (1 << unsigned(ARCInstKind::InitWeak)) | 2469 (1 << unsigned(ARCInstKind::CopyWeak)) | 2470 (1 << unsigned(ARCInstKind::MoveWeak)) | 2471 (1 << unsigned(ARCInstKind::DestroyWeak)))) 2472 OptimizeWeakCalls(F); 2473 2474 // Optimizations for retain+release pairs. 2475 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Retain)) | 2476 (1 << unsigned(ARCInstKind::RetainRV)) | 2477 (1 << unsigned(ARCInstKind::RetainBlock)))) 2478 if (UsedInThisFunction & (1 << unsigned(ARCInstKind::Release))) 2479 // Run OptimizeSequences until it either stops making changes or 2480 // no retain+release pair nesting is detected. 2481 while (OptimizeSequences(F)) {} 2482 2483 // Optimizations if objc_autorelease is used. 2484 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Autorelease)) | 2485 (1 << unsigned(ARCInstKind::AutoreleaseRV)))) 2486 OptimizeReturns(F); 2487 2488 // Gather statistics after optimization. 2489 #ifndef NDEBUG 2490 if (AreStatisticsEnabled()) { 2491 GatherStatistics(F, true); 2492 } 2493 #endif 2494 2495 LLVM_DEBUG(dbgs() << "\n"); 2496 2497 return Changed; 2498 } 2499 2500 /// @} 2501 /// 2502 2503 PreservedAnalyses ObjCARCOptPass::run(Function &F, 2504 FunctionAnalysisManager &AM) { 2505 ObjCARCOpt OCAO; 2506 OCAO.init(F); 2507 2508 bool Changed = OCAO.run(F, AM.getResult<AAManager>(F)); 2509 bool CFGChanged = OCAO.hasCFGChanged(); 2510 if (Changed) { 2511 PreservedAnalyses PA; 2512 if (!CFGChanged) 2513 PA.preserveSet<CFGAnalyses>(); 2514 return PA; 2515 } 2516 return PreservedAnalyses::all(); 2517 } 2518