1 //===- ObjCARCOpts.cpp - ObjC ARC Optimization ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file defines ObjC ARC optimizations. ARC stands for Automatic 11 /// Reference Counting and is a system for managing reference counts for objects 12 /// in Objective C. 13 /// 14 /// The optimizations performed include elimination of redundant, partially 15 /// redundant, and inconsequential reference count operations, elimination of 16 /// redundant weak pointer operations, and numerous minor simplifications. 17 /// 18 /// WARNING: This file knows about certain library functions. It recognizes them 19 /// by name, and hardwires knowledge of their semantics. 20 /// 21 /// WARNING: This file knows about how certain Objective-C library functions are 22 /// used. Naive LLVM IR transformations which would otherwise be 23 /// behavior-preserving may break these assumptions. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "ARCRuntimeEntryPoints.h" 28 #include "BlotMapVector.h" 29 #include "DependencyAnalysis.h" 30 #include "ObjCARC.h" 31 #include "ProvenanceAnalysis.h" 32 #include "PtrState.h" 33 #include "llvm/ADT/DenseMap.h" 34 #include "llvm/ADT/STLExtras.h" 35 #include "llvm/ADT/SmallPtrSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/Analysis/AliasAnalysis.h" 39 #include "llvm/Analysis/ObjCARCAliasAnalysis.h" 40 #include "llvm/Analysis/ObjCARCAnalysisUtils.h" 41 #include "llvm/Analysis/ObjCARCInstKind.h" 42 #include "llvm/Analysis/ObjCARCUtil.h" 43 #include "llvm/IR/BasicBlock.h" 44 #include "llvm/IR/CFG.h" 45 #include "llvm/IR/Constant.h" 46 #include "llvm/IR/Constants.h" 47 #include "llvm/IR/DerivedTypes.h" 48 #include "llvm/IR/EHPersonalities.h" 49 #include "llvm/IR/Function.h" 50 #include "llvm/IR/GlobalVariable.h" 51 #include "llvm/IR/InstIterator.h" 52 #include "llvm/IR/InstrTypes.h" 53 #include "llvm/IR/Instruction.h" 54 #include "llvm/IR/Instructions.h" 55 #include "llvm/IR/LLVMContext.h" 56 #include "llvm/IR/Metadata.h" 57 #include "llvm/IR/Type.h" 58 #include "llvm/IR/User.h" 59 #include "llvm/IR/Value.h" 60 #include "llvm/Support/Casting.h" 61 #include "llvm/Support/CommandLine.h" 62 #include "llvm/Support/Compiler.h" 63 #include "llvm/Support/Debug.h" 64 #include "llvm/Support/ErrorHandling.h" 65 #include "llvm/Support/raw_ostream.h" 66 #include "llvm/Transforms/ObjCARC.h" 67 #include <cassert> 68 #include <iterator> 69 #include <utility> 70 71 using namespace llvm; 72 using namespace llvm::objcarc; 73 74 #define DEBUG_TYPE "objc-arc-opts" 75 76 static cl::opt<unsigned> MaxPtrStates("arc-opt-max-ptr-states", 77 cl::Hidden, 78 cl::desc("Maximum number of ptr states the optimizer keeps track of"), 79 cl::init(4095)); 80 81 /// \defgroup ARCUtilities Utility declarations/definitions specific to ARC. 82 /// @{ 83 84 /// This is similar to GetRCIdentityRoot but it stops as soon 85 /// as it finds a value with multiple uses. 86 static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { 87 // ConstantData (like ConstantPointerNull and UndefValue) is used across 88 // modules. It's never a single-use value. 89 if (isa<ConstantData>(Arg)) 90 return nullptr; 91 92 if (Arg->hasOneUse()) { 93 if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg)) 94 return FindSingleUseIdentifiedObject(BC->getOperand(0)); 95 if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg)) 96 if (GEP->hasAllZeroIndices()) 97 return FindSingleUseIdentifiedObject(GEP->getPointerOperand()); 98 if (IsForwarding(GetBasicARCInstKind(Arg))) 99 return FindSingleUseIdentifiedObject( 100 cast<CallInst>(Arg)->getArgOperand(0)); 101 if (!IsObjCIdentifiedObject(Arg)) 102 return nullptr; 103 return Arg; 104 } 105 106 // If we found an identifiable object but it has multiple uses, but they are 107 // trivial uses, we can still consider this to be a single-use value. 108 if (IsObjCIdentifiedObject(Arg)) { 109 for (const User *U : Arg->users()) 110 if (!U->use_empty() || GetRCIdentityRoot(U) != Arg) 111 return nullptr; 112 113 return Arg; 114 } 115 116 return nullptr; 117 } 118 119 /// @} 120 /// 121 /// \defgroup ARCOpt ARC Optimization. 122 /// @{ 123 124 // TODO: On code like this: 125 // 126 // objc_retain(%x) 127 // stuff_that_cannot_release() 128 // objc_autorelease(%x) 129 // stuff_that_cannot_release() 130 // objc_retain(%x) 131 // stuff_that_cannot_release() 132 // objc_autorelease(%x) 133 // 134 // The second retain and autorelease can be deleted. 135 136 // TODO: It should be possible to delete 137 // objc_autoreleasePoolPush and objc_autoreleasePoolPop 138 // pairs if nothing is actually autoreleased between them. Also, autorelease 139 // calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code 140 // after inlining) can be turned into plain release calls. 141 142 // TODO: Critical-edge splitting. If the optimial insertion point is 143 // a critical edge, the current algorithm has to fail, because it doesn't 144 // know how to split edges. It should be possible to make the optimizer 145 // think in terms of edges, rather than blocks, and then split critical 146 // edges on demand. 147 148 // TODO: OptimizeSequences could generalized to be Interprocedural. 149 150 // TODO: Recognize that a bunch of other objc runtime calls have 151 // non-escaping arguments and non-releasing arguments, and may be 152 // non-autoreleasing. 153 154 // TODO: Sink autorelease calls as far as possible. Unfortunately we 155 // usually can't sink them past other calls, which would be the main 156 // case where it would be useful. 157 158 // TODO: The pointer returned from objc_loadWeakRetained is retained. 159 160 // TODO: Delete release+retain pairs (rare). 161 162 STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); 163 STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); 164 STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases"); 165 STATISTIC(NumRets, "Number of return value forwarding " 166 "retain+autoreleases eliminated"); 167 STATISTIC(NumRRs, "Number of retain+release paths eliminated"); 168 STATISTIC(NumPeeps, "Number of calls peephole-optimized"); 169 #ifndef NDEBUG 170 STATISTIC(NumRetainsBeforeOpt, 171 "Number of retains before optimization"); 172 STATISTIC(NumReleasesBeforeOpt, 173 "Number of releases before optimization"); 174 STATISTIC(NumRetainsAfterOpt, 175 "Number of retains after optimization"); 176 STATISTIC(NumReleasesAfterOpt, 177 "Number of releases after optimization"); 178 #endif 179 180 namespace { 181 182 /// Per-BasicBlock state. 183 class BBState { 184 /// The number of unique control paths from the entry which can reach this 185 /// block. 186 unsigned TopDownPathCount = 0; 187 188 /// The number of unique control paths to exits from this block. 189 unsigned BottomUpPathCount = 0; 190 191 /// The top-down traversal uses this to record information known about a 192 /// pointer at the bottom of each block. 193 BlotMapVector<const Value *, TopDownPtrState> PerPtrTopDown; 194 195 /// The bottom-up traversal uses this to record information known about a 196 /// pointer at the top of each block. 197 BlotMapVector<const Value *, BottomUpPtrState> PerPtrBottomUp; 198 199 /// Effective predecessors of the current block ignoring ignorable edges and 200 /// ignored backedges. 201 SmallVector<BasicBlock *, 2> Preds; 202 203 /// Effective successors of the current block ignoring ignorable edges and 204 /// ignored backedges. 205 SmallVector<BasicBlock *, 2> Succs; 206 207 public: 208 static const unsigned OverflowOccurredValue; 209 210 BBState() = default; 211 212 using top_down_ptr_iterator = decltype(PerPtrTopDown)::iterator; 213 using const_top_down_ptr_iterator = decltype(PerPtrTopDown)::const_iterator; 214 215 top_down_ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); } 216 top_down_ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); } 217 const_top_down_ptr_iterator top_down_ptr_begin() const { 218 return PerPtrTopDown.begin(); 219 } 220 const_top_down_ptr_iterator top_down_ptr_end() const { 221 return PerPtrTopDown.end(); 222 } 223 bool hasTopDownPtrs() const { 224 return !PerPtrTopDown.empty(); 225 } 226 227 unsigned top_down_ptr_list_size() const { 228 return std::distance(top_down_ptr_begin(), top_down_ptr_end()); 229 } 230 231 using bottom_up_ptr_iterator = decltype(PerPtrBottomUp)::iterator; 232 using const_bottom_up_ptr_iterator = 233 decltype(PerPtrBottomUp)::const_iterator; 234 235 bottom_up_ptr_iterator bottom_up_ptr_begin() { 236 return PerPtrBottomUp.begin(); 237 } 238 bottom_up_ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); } 239 const_bottom_up_ptr_iterator bottom_up_ptr_begin() const { 240 return PerPtrBottomUp.begin(); 241 } 242 const_bottom_up_ptr_iterator bottom_up_ptr_end() const { 243 return PerPtrBottomUp.end(); 244 } 245 bool hasBottomUpPtrs() const { 246 return !PerPtrBottomUp.empty(); 247 } 248 249 unsigned bottom_up_ptr_list_size() const { 250 return std::distance(bottom_up_ptr_begin(), bottom_up_ptr_end()); 251 } 252 253 /// Mark this block as being an entry block, which has one path from the 254 /// entry by definition. 255 void SetAsEntry() { TopDownPathCount = 1; } 256 257 /// Mark this block as being an exit block, which has one path to an exit by 258 /// definition. 259 void SetAsExit() { BottomUpPathCount = 1; } 260 261 /// Attempt to find the PtrState object describing the top down state for 262 /// pointer Arg. Return a new initialized PtrState describing the top down 263 /// state for Arg if we do not find one. 264 TopDownPtrState &getPtrTopDownState(const Value *Arg) { 265 return PerPtrTopDown[Arg]; 266 } 267 268 /// Attempt to find the PtrState object describing the bottom up state for 269 /// pointer Arg. Return a new initialized PtrState describing the bottom up 270 /// state for Arg if we do not find one. 271 BottomUpPtrState &getPtrBottomUpState(const Value *Arg) { 272 return PerPtrBottomUp[Arg]; 273 } 274 275 /// Attempt to find the PtrState object describing the bottom up state for 276 /// pointer Arg. 277 bottom_up_ptr_iterator findPtrBottomUpState(const Value *Arg) { 278 return PerPtrBottomUp.find(Arg); 279 } 280 281 void clearBottomUpPointers() { 282 PerPtrBottomUp.clear(); 283 } 284 285 void clearTopDownPointers() { 286 PerPtrTopDown.clear(); 287 } 288 289 void InitFromPred(const BBState &Other); 290 void InitFromSucc(const BBState &Other); 291 void MergePred(const BBState &Other); 292 void MergeSucc(const BBState &Other); 293 294 /// Compute the number of possible unique paths from an entry to an exit 295 /// which pass through this block. This is only valid after both the 296 /// top-down and bottom-up traversals are complete. 297 /// 298 /// Returns true if overflow occurred. Returns false if overflow did not 299 /// occur. 300 bool GetAllPathCountWithOverflow(unsigned &PathCount) const { 301 if (TopDownPathCount == OverflowOccurredValue || 302 BottomUpPathCount == OverflowOccurredValue) 303 return true; 304 unsigned long long Product = 305 (unsigned long long)TopDownPathCount*BottomUpPathCount; 306 // Overflow occurred if any of the upper bits of Product are set or if all 307 // the lower bits of Product are all set. 308 return (Product >> 32) || 309 ((PathCount = Product) == OverflowOccurredValue); 310 } 311 312 // Specialized CFG utilities. 313 using edge_iterator = SmallVectorImpl<BasicBlock *>::const_iterator; 314 315 edge_iterator pred_begin() const { return Preds.begin(); } 316 edge_iterator pred_end() const { return Preds.end(); } 317 edge_iterator succ_begin() const { return Succs.begin(); } 318 edge_iterator succ_end() const { return Succs.end(); } 319 320 void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); } 321 void addPred(BasicBlock *Pred) { Preds.push_back(Pred); } 322 323 bool isExit() const { return Succs.empty(); } 324 }; 325 326 } // end anonymous namespace 327 328 const unsigned BBState::OverflowOccurredValue = 0xffffffff; 329 330 namespace llvm { 331 332 raw_ostream &operator<<(raw_ostream &OS, 333 BBState &BBState) LLVM_ATTRIBUTE_UNUSED; 334 335 } // end namespace llvm 336 337 void BBState::InitFromPred(const BBState &Other) { 338 PerPtrTopDown = Other.PerPtrTopDown; 339 TopDownPathCount = Other.TopDownPathCount; 340 } 341 342 void BBState::InitFromSucc(const BBState &Other) { 343 PerPtrBottomUp = Other.PerPtrBottomUp; 344 BottomUpPathCount = Other.BottomUpPathCount; 345 } 346 347 /// The top-down traversal uses this to merge information about predecessors to 348 /// form the initial state for a new block. 349 void BBState::MergePred(const BBState &Other) { 350 if (TopDownPathCount == OverflowOccurredValue) 351 return; 352 353 // Other.TopDownPathCount can be 0, in which case it is either dead or a 354 // loop backedge. Loop backedges are special. 355 TopDownPathCount += Other.TopDownPathCount; 356 357 // In order to be consistent, we clear the top down pointers when by adding 358 // TopDownPathCount becomes OverflowOccurredValue even though "true" overflow 359 // has not occurred. 360 if (TopDownPathCount == OverflowOccurredValue) { 361 clearTopDownPointers(); 362 return; 363 } 364 365 // Check for overflow. If we have overflow, fall back to conservative 366 // behavior. 367 if (TopDownPathCount < Other.TopDownPathCount) { 368 TopDownPathCount = OverflowOccurredValue; 369 clearTopDownPointers(); 370 return; 371 } 372 373 // For each entry in the other set, if our set has an entry with the same key, 374 // merge the entries. Otherwise, copy the entry and merge it with an empty 375 // entry. 376 for (auto MI = Other.top_down_ptr_begin(), ME = Other.top_down_ptr_end(); 377 MI != ME; ++MI) { 378 auto Pair = PerPtrTopDown.insert(*MI); 379 Pair.first->second.Merge(Pair.second ? TopDownPtrState() : MI->second, 380 /*TopDown=*/true); 381 } 382 383 // For each entry in our set, if the other set doesn't have an entry with the 384 // same key, force it to merge with an empty entry. 385 for (auto MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI) 386 if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end()) 387 MI->second.Merge(TopDownPtrState(), /*TopDown=*/true); 388 } 389 390 /// The bottom-up traversal uses this to merge information about successors to 391 /// form the initial state for a new block. 392 void BBState::MergeSucc(const BBState &Other) { 393 if (BottomUpPathCount == OverflowOccurredValue) 394 return; 395 396 // Other.BottomUpPathCount can be 0, in which case it is either dead or a 397 // loop backedge. Loop backedges are special. 398 BottomUpPathCount += Other.BottomUpPathCount; 399 400 // In order to be consistent, we clear the top down pointers when by adding 401 // BottomUpPathCount becomes OverflowOccurredValue even though "true" overflow 402 // has not occurred. 403 if (BottomUpPathCount == OverflowOccurredValue) { 404 clearBottomUpPointers(); 405 return; 406 } 407 408 // Check for overflow. If we have overflow, fall back to conservative 409 // behavior. 410 if (BottomUpPathCount < Other.BottomUpPathCount) { 411 BottomUpPathCount = OverflowOccurredValue; 412 clearBottomUpPointers(); 413 return; 414 } 415 416 // For each entry in the other set, if our set has an entry with the 417 // same key, merge the entries. Otherwise, copy the entry and merge 418 // it with an empty entry. 419 for (auto MI = Other.bottom_up_ptr_begin(), ME = Other.bottom_up_ptr_end(); 420 MI != ME; ++MI) { 421 auto Pair = PerPtrBottomUp.insert(*MI); 422 Pair.first->second.Merge(Pair.second ? BottomUpPtrState() : MI->second, 423 /*TopDown=*/false); 424 } 425 426 // For each entry in our set, if the other set doesn't have an entry 427 // with the same key, force it to merge with an empty entry. 428 for (auto MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME; 429 ++MI) 430 if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end()) 431 MI->second.Merge(BottomUpPtrState(), /*TopDown=*/false); 432 } 433 434 raw_ostream &llvm::operator<<(raw_ostream &OS, BBState &BBInfo) { 435 // Dump the pointers we are tracking. 436 OS << " TopDown State:\n"; 437 if (!BBInfo.hasTopDownPtrs()) { 438 LLVM_DEBUG(dbgs() << " NONE!\n"); 439 } else { 440 for (auto I = BBInfo.top_down_ptr_begin(), E = BBInfo.top_down_ptr_end(); 441 I != E; ++I) { 442 const PtrState &P = I->second; 443 OS << " Ptr: " << *I->first 444 << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false") 445 << "\n ImpreciseRelease: " 446 << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n" 447 << " HasCFGHazards: " 448 << (P.IsCFGHazardAfflicted()?"true":"false") << "\n" 449 << " KnownPositive: " 450 << (P.HasKnownPositiveRefCount()?"true":"false") << "\n" 451 << " Seq: " 452 << P.GetSeq() << "\n"; 453 } 454 } 455 456 OS << " BottomUp State:\n"; 457 if (!BBInfo.hasBottomUpPtrs()) { 458 LLVM_DEBUG(dbgs() << " NONE!\n"); 459 } else { 460 for (auto I = BBInfo.bottom_up_ptr_begin(), E = BBInfo.bottom_up_ptr_end(); 461 I != E; ++I) { 462 const PtrState &P = I->second; 463 OS << " Ptr: " << *I->first 464 << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false") 465 << "\n ImpreciseRelease: " 466 << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n" 467 << " HasCFGHazards: " 468 << (P.IsCFGHazardAfflicted()?"true":"false") << "\n" 469 << " KnownPositive: " 470 << (P.HasKnownPositiveRefCount()?"true":"false") << "\n" 471 << " Seq: " 472 << P.GetSeq() << "\n"; 473 } 474 } 475 476 return OS; 477 } 478 479 namespace { 480 481 /// The main ARC optimization pass. 482 class ObjCARCOpt { 483 bool Changed = false; 484 bool CFGChanged = false; 485 ProvenanceAnalysis PA; 486 487 /// A cache of references to runtime entry point constants. 488 ARCRuntimeEntryPoints EP; 489 490 /// A cache of MDKinds that can be passed into other functions to propagate 491 /// MDKind identifiers. 492 ARCMDKindCache MDKindCache; 493 494 BundledRetainClaimRVs *BundledInsts = nullptr; 495 496 /// A flag indicating whether the optimization that removes or moves 497 /// retain/release pairs should be performed. 498 bool DisableRetainReleasePairing = false; 499 500 /// Flags which determine whether each of the interesting runtime functions 501 /// is in fact used in the current function. 502 unsigned UsedInThisFunction; 503 504 DenseMap<BasicBlock *, ColorVector> BlockEHColors; 505 506 bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); 507 void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, 508 ARCInstKind &Class); 509 void OptimizeIndividualCalls(Function &F); 510 511 /// Optimize an individual call, optionally passing the 512 /// GetArgRCIdentityRoot if it has already been computed. 513 void OptimizeIndividualCallImpl(Function &F, Instruction *Inst, 514 ARCInstKind Class, const Value *Arg); 515 516 /// Try to optimize an AutoreleaseRV with a RetainRV or UnsafeClaimRV. If the 517 /// optimization occurs, returns true to indicate that the caller should 518 /// assume the instructions are dead. 519 bool OptimizeInlinedAutoreleaseRVCall(Function &F, Instruction *Inst, 520 const Value *&Arg, ARCInstKind Class, 521 Instruction *AutoreleaseRV, 522 const Value *&AutoreleaseRVArg); 523 524 void CheckForCFGHazards(const BasicBlock *BB, 525 DenseMap<const BasicBlock *, BBState> &BBStates, 526 BBState &MyStates) const; 527 bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB, 528 BlotMapVector<Value *, RRInfo> &Retains, 529 BBState &MyStates); 530 bool VisitBottomUp(BasicBlock *BB, 531 DenseMap<const BasicBlock *, BBState> &BBStates, 532 BlotMapVector<Value *, RRInfo> &Retains); 533 bool VisitInstructionTopDown( 534 Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates, 535 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 536 &ReleaseInsertPtToRCIdentityRoots); 537 bool VisitTopDown( 538 BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, 539 DenseMap<Value *, RRInfo> &Releases, 540 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 541 &ReleaseInsertPtToRCIdentityRoots); 542 bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates, 543 BlotMapVector<Value *, RRInfo> &Retains, 544 DenseMap<Value *, RRInfo> &Releases); 545 546 void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove, 547 BlotMapVector<Value *, RRInfo> &Retains, 548 DenseMap<Value *, RRInfo> &Releases, 549 SmallVectorImpl<Instruction *> &DeadInsts, Module *M); 550 551 bool PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates, 552 BlotMapVector<Value *, RRInfo> &Retains, 553 DenseMap<Value *, RRInfo> &Releases, Module *M, 554 Instruction *Retain, 555 SmallVectorImpl<Instruction *> &DeadInsts, 556 RRInfo &RetainsToMove, RRInfo &ReleasesToMove, 557 Value *Arg, bool KnownSafe, 558 bool &AnyPairsCompletelyEliminated); 559 560 bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, 561 BlotMapVector<Value *, RRInfo> &Retains, 562 DenseMap<Value *, RRInfo> &Releases, Module *M); 563 564 void OptimizeWeakCalls(Function &F); 565 566 bool OptimizeSequences(Function &F); 567 568 void OptimizeReturns(Function &F); 569 570 template <typename PredicateT> 571 static void cloneOpBundlesIf(CallBase *CI, 572 SmallVectorImpl<OperandBundleDef> &OpBundles, 573 PredicateT Predicate) { 574 for (unsigned I = 0, E = CI->getNumOperandBundles(); I != E; ++I) { 575 OperandBundleUse B = CI->getOperandBundleAt(I); 576 if (Predicate(B)) 577 OpBundles.emplace_back(B); 578 } 579 } 580 581 void addOpBundleForFunclet(BasicBlock *BB, 582 SmallVectorImpl<OperandBundleDef> &OpBundles) { 583 if (!BlockEHColors.empty()) { 584 const ColorVector &CV = BlockEHColors.find(BB)->second; 585 assert(CV.size() > 0 && "Uncolored block"); 586 for (BasicBlock *EHPadBB : CV) 587 if (auto *EHPad = dyn_cast<FuncletPadInst>(EHPadBB->getFirstNonPHI())) { 588 OpBundles.emplace_back("funclet", EHPad); 589 return; 590 } 591 } 592 } 593 594 #ifndef NDEBUG 595 void GatherStatistics(Function &F, bool AfterOptimization = false); 596 #endif 597 598 public: 599 void init(Function &F); 600 bool run(Function &F, AAResults &AA); 601 bool hasCFGChanged() const { return CFGChanged; } 602 }; 603 } // end anonymous namespace 604 605 /// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is 606 /// not a return value. 607 bool 608 ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { 609 // Check for the argument being from an immediately preceding call or invoke. 610 const Value *Arg = GetArgRCIdentityRoot(RetainRV); 611 if (const Instruction *Call = dyn_cast<CallBase>(Arg)) { 612 if (Call->getParent() == RetainRV->getParent()) { 613 BasicBlock::const_iterator I(Call); 614 ++I; 615 while (IsNoopInstruction(&*I)) 616 ++I; 617 if (&*I == RetainRV) 618 return false; 619 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) { 620 BasicBlock *RetainRVParent = RetainRV->getParent(); 621 if (II->getNormalDest() == RetainRVParent) { 622 BasicBlock::const_iterator I = RetainRVParent->begin(); 623 while (IsNoopInstruction(&*I)) 624 ++I; 625 if (&*I == RetainRV) 626 return false; 627 } 628 } 629 } 630 631 assert(!BundledInsts->contains(RetainRV) && 632 "a bundled retainRV's argument should be a call"); 633 634 // Turn it to a plain objc_retain. 635 Changed = true; 636 ++NumPeeps; 637 638 LLVM_DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => " 639 "objc_retain since the operand is not a return value.\n" 640 "Old = " 641 << *RetainRV << "\n"); 642 643 Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain); 644 cast<CallInst>(RetainRV)->setCalledFunction(NewDecl); 645 646 LLVM_DEBUG(dbgs() << "New = " << *RetainRV << "\n"); 647 648 return false; 649 } 650 651 bool ObjCARCOpt::OptimizeInlinedAutoreleaseRVCall( 652 Function &F, Instruction *Inst, const Value *&Arg, ARCInstKind Class, 653 Instruction *AutoreleaseRV, const Value *&AutoreleaseRVArg) { 654 if (BundledInsts->contains(Inst)) 655 return false; 656 657 // Must be in the same basic block. 658 assert(Inst->getParent() == AutoreleaseRV->getParent()); 659 660 // Must operate on the same root. 661 Arg = GetArgRCIdentityRoot(Inst); 662 AutoreleaseRVArg = GetArgRCIdentityRoot(AutoreleaseRV); 663 if (Arg != AutoreleaseRVArg) { 664 // If there isn't an exact match, check if we have equivalent PHIs. 665 const PHINode *PN = dyn_cast<PHINode>(Arg); 666 if (!PN) 667 return false; 668 669 SmallVector<const Value *, 4> ArgUsers; 670 getEquivalentPHIs(*PN, ArgUsers); 671 if (!llvm::is_contained(ArgUsers, AutoreleaseRVArg)) 672 return false; 673 } 674 675 // Okay, this is a match. Merge them. 676 ++NumPeeps; 677 LLVM_DEBUG(dbgs() << "Found inlined objc_autoreleaseReturnValue '" 678 << *AutoreleaseRV << "' paired with '" << *Inst << "'\n"); 679 680 // Delete the RV pair, starting with the AutoreleaseRV. 681 AutoreleaseRV->replaceAllUsesWith( 682 cast<CallInst>(AutoreleaseRV)->getArgOperand(0)); 683 Changed = true; 684 EraseInstruction(AutoreleaseRV); 685 if (Class == ARCInstKind::RetainRV) { 686 // AutoreleaseRV and RetainRV cancel out. Delete the RetainRV. 687 Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0)); 688 EraseInstruction(Inst); 689 return true; 690 } 691 692 // UnsafeClaimRV is a frontend peephole for RetainRV + Release. Since the 693 // AutoreleaseRV and RetainRV cancel out, replace UnsafeClaimRV with Release. 694 assert(Class == ARCInstKind::UnsafeClaimRV); 695 Value *CallArg = cast<CallInst>(Inst)->getArgOperand(0); 696 CallInst *Release = CallInst::Create( 697 EP.get(ARCRuntimeEntryPointKind::Release), CallArg, "", Inst); 698 assert(IsAlwaysTail(ARCInstKind::UnsafeClaimRV) && 699 "Expected UnsafeClaimRV to be safe to tail call"); 700 Release->setTailCall(); 701 Inst->replaceAllUsesWith(CallArg); 702 EraseInstruction(Inst); 703 704 // Run the normal optimizations on Release. 705 OptimizeIndividualCallImpl(F, Release, ARCInstKind::Release, Arg); 706 return true; 707 } 708 709 /// Turn objc_autoreleaseReturnValue into objc_autorelease if the result is not 710 /// used as a return value. 711 void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, 712 Instruction *AutoreleaseRV, 713 ARCInstKind &Class) { 714 // Check for a return of the pointer value. 715 const Value *Ptr = GetArgRCIdentityRoot(AutoreleaseRV); 716 717 // If the argument is ConstantPointerNull or UndefValue, its other users 718 // aren't actually interesting to look at. 719 if (isa<ConstantData>(Ptr)) 720 return; 721 722 SmallVector<const Value *, 2> Users; 723 Users.push_back(Ptr); 724 725 // Add PHIs that are equivalent to Ptr to Users. 726 if (const PHINode *PN = dyn_cast<PHINode>(Ptr)) 727 getEquivalentPHIs(*PN, Users); 728 729 do { 730 Ptr = Users.pop_back_val(); 731 for (const User *U : Ptr->users()) { 732 if (isa<ReturnInst>(U) || GetBasicARCInstKind(U) == ARCInstKind::RetainRV) 733 return; 734 if (isa<BitCastInst>(U)) 735 Users.push_back(U); 736 } 737 } while (!Users.empty()); 738 739 Changed = true; 740 ++NumPeeps; 741 742 LLVM_DEBUG( 743 dbgs() << "Transforming objc_autoreleaseReturnValue => " 744 "objc_autorelease since its operand is not used as a return " 745 "value.\n" 746 "Old = " 747 << *AutoreleaseRV << "\n"); 748 749 CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV); 750 Function *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease); 751 AutoreleaseRVCI->setCalledFunction(NewDecl); 752 AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease. 753 Class = ARCInstKind::Autorelease; 754 755 LLVM_DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n"); 756 } 757 758 /// Visit each call, one at a time, and make simplifications without doing any 759 /// additional analysis. 760 void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { 761 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n"); 762 // Reset all the flags in preparation for recomputing them. 763 UsedInThisFunction = 0; 764 765 // Store any delayed AutoreleaseRV intrinsics, so they can be easily paired 766 // with RetainRV and UnsafeClaimRV. 767 Instruction *DelayedAutoreleaseRV = nullptr; 768 const Value *DelayedAutoreleaseRVArg = nullptr; 769 auto setDelayedAutoreleaseRV = [&](Instruction *AutoreleaseRV) { 770 assert(!DelayedAutoreleaseRV || !AutoreleaseRV); 771 DelayedAutoreleaseRV = AutoreleaseRV; 772 DelayedAutoreleaseRVArg = nullptr; 773 }; 774 auto optimizeDelayedAutoreleaseRV = [&]() { 775 if (!DelayedAutoreleaseRV) 776 return; 777 OptimizeIndividualCallImpl(F, DelayedAutoreleaseRV, 778 ARCInstKind::AutoreleaseRV, 779 DelayedAutoreleaseRVArg); 780 setDelayedAutoreleaseRV(nullptr); 781 }; 782 auto shouldDelayAutoreleaseRV = [&](Instruction *NonARCInst) { 783 // Nothing to delay, but we may as well skip the logic below. 784 if (!DelayedAutoreleaseRV) 785 return true; 786 787 // If we hit the end of the basic block we're not going to find an RV-pair. 788 // Stop delaying. 789 if (NonARCInst->isTerminator()) 790 return false; 791 792 // Given the frontend rules for emitting AutoreleaseRV, RetainRV, and 793 // UnsafeClaimRV, it's probably safe to skip over even opaque function calls 794 // here since OptimizeInlinedAutoreleaseRVCall will confirm that they 795 // have the same RCIdentityRoot. However, what really matters is 796 // skipping instructions or intrinsics that the inliner could leave behind; 797 // be conservative for now and don't skip over opaque calls, which could 798 // potentially include other ARC calls. 799 auto *CB = dyn_cast<CallBase>(NonARCInst); 800 if (!CB) 801 return true; 802 return CB->getIntrinsicID() != Intrinsic::not_intrinsic; 803 }; 804 805 // Visit all objc_* calls in F. 806 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 807 Instruction *Inst = &*I++; 808 809 if (auto *CI = dyn_cast<CallInst>(Inst)) 810 if (objcarc::hasAttachedCallOpBundle(CI)) { 811 BundledInsts->insertRVCall(&*I, CI); 812 Changed = true; 813 } 814 815 ARCInstKind Class = GetBasicARCInstKind(Inst); 816 817 // Skip this loop if this instruction isn't itself an ARC intrinsic. 818 const Value *Arg = nullptr; 819 switch (Class) { 820 default: 821 optimizeDelayedAutoreleaseRV(); 822 break; 823 case ARCInstKind::CallOrUser: 824 case ARCInstKind::User: 825 case ARCInstKind::None: 826 // This is a non-ARC instruction. If we're delaying an AutoreleaseRV, 827 // check if it's safe to skip over it; if not, optimize the AutoreleaseRV 828 // now. 829 if (!shouldDelayAutoreleaseRV(Inst)) 830 optimizeDelayedAutoreleaseRV(); 831 continue; 832 case ARCInstKind::AutoreleaseRV: 833 optimizeDelayedAutoreleaseRV(); 834 setDelayedAutoreleaseRV(Inst); 835 continue; 836 case ARCInstKind::RetainRV: 837 case ARCInstKind::UnsafeClaimRV: 838 if (DelayedAutoreleaseRV) { 839 // We have a potential RV pair. Check if they cancel out. 840 if (OptimizeInlinedAutoreleaseRVCall(F, Inst, Arg, Class, 841 DelayedAutoreleaseRV, 842 DelayedAutoreleaseRVArg)) { 843 setDelayedAutoreleaseRV(nullptr); 844 continue; 845 } 846 optimizeDelayedAutoreleaseRV(); 847 } 848 break; 849 } 850 851 OptimizeIndividualCallImpl(F, Inst, Class, Arg); 852 } 853 854 // Catch the final delayed AutoreleaseRV. 855 optimizeDelayedAutoreleaseRV(); 856 } 857 858 /// This function returns true if the value is inert. An ObjC ARC runtime call 859 /// taking an inert operand can be safely deleted. 860 static bool isInertARCValue(Value *V, SmallPtrSet<Value *, 1> &VisitedPhis) { 861 V = V->stripPointerCasts(); 862 863 if (IsNullOrUndef(V)) 864 return true; 865 866 // See if this is a global attribute annotated with an 'objc_arc_inert'. 867 if (auto *GV = dyn_cast<GlobalVariable>(V)) 868 if (GV->hasAttribute("objc_arc_inert")) 869 return true; 870 871 if (auto PN = dyn_cast<PHINode>(V)) { 872 // Ignore this phi if it has already been discovered. 873 if (!VisitedPhis.insert(PN).second) 874 return true; 875 // Look through phis's operands. 876 for (Value *Opnd : PN->incoming_values()) 877 if (!isInertARCValue(Opnd, VisitedPhis)) 878 return false; 879 return true; 880 } 881 882 return false; 883 } 884 885 void ObjCARCOpt::OptimizeIndividualCallImpl(Function &F, Instruction *Inst, 886 ARCInstKind Class, 887 const Value *Arg) { 888 LLVM_DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n"); 889 890 // We can delete this call if it takes an inert value. 891 SmallPtrSet<Value *, 1> VisitedPhis; 892 893 if (BundledInsts->contains(Inst)) { 894 UsedInThisFunction |= 1 << unsigned(Class); 895 return; 896 } 897 898 if (IsNoopOnGlobal(Class)) 899 if (isInertARCValue(Inst->getOperand(0), VisitedPhis)) { 900 if (!Inst->getType()->isVoidTy()) 901 Inst->replaceAllUsesWith(Inst->getOperand(0)); 902 Inst->eraseFromParent(); 903 Changed = true; 904 return; 905 } 906 907 switch (Class) { 908 default: 909 break; 910 911 // Delete no-op casts. These function calls have special semantics, but 912 // the semantics are entirely implemented via lowering in the front-end, 913 // so by the time they reach the optimizer, they are just no-op calls 914 // which return their argument. 915 // 916 // There are gray areas here, as the ability to cast reference-counted 917 // pointers to raw void* and back allows code to break ARC assumptions, 918 // however these are currently considered to be unimportant. 919 case ARCInstKind::NoopCast: 920 Changed = true; 921 ++NumNoops; 922 LLVM_DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n"); 923 EraseInstruction(Inst); 924 return; 925 926 // If the pointer-to-weak-pointer is null, it's undefined behavior. 927 case ARCInstKind::StoreWeak: 928 case ARCInstKind::LoadWeak: 929 case ARCInstKind::LoadWeakRetained: 930 case ARCInstKind::InitWeak: 931 case ARCInstKind::DestroyWeak: { 932 CallInst *CI = cast<CallInst>(Inst); 933 if (IsNullOrUndef(CI->getArgOperand(0))) { 934 Changed = true; 935 new StoreInst(ConstantInt::getTrue(CI->getContext()), 936 PoisonValue::get(PointerType::getUnqual(CI->getContext())), 937 CI); 938 Value *NewValue = PoisonValue::get(CI->getType()); 939 LLVM_DEBUG( 940 dbgs() << "A null pointer-to-weak-pointer is undefined behavior." 941 "\nOld = " 942 << *CI << "\nNew = " << *NewValue << "\n"); 943 CI->replaceAllUsesWith(NewValue); 944 CI->eraseFromParent(); 945 return; 946 } 947 break; 948 } 949 case ARCInstKind::CopyWeak: 950 case ARCInstKind::MoveWeak: { 951 CallInst *CI = cast<CallInst>(Inst); 952 if (IsNullOrUndef(CI->getArgOperand(0)) || 953 IsNullOrUndef(CI->getArgOperand(1))) { 954 Changed = true; 955 new StoreInst(ConstantInt::getTrue(CI->getContext()), 956 PoisonValue::get(PointerType::getUnqual(CI->getContext())), 957 CI); 958 959 Value *NewValue = PoisonValue::get(CI->getType()); 960 LLVM_DEBUG( 961 dbgs() << "A null pointer-to-weak-pointer is undefined behavior." 962 "\nOld = " 963 << *CI << "\nNew = " << *NewValue << "\n"); 964 965 CI->replaceAllUsesWith(NewValue); 966 CI->eraseFromParent(); 967 return; 968 } 969 break; 970 } 971 case ARCInstKind::RetainRV: 972 if (OptimizeRetainRVCall(F, Inst)) 973 return; 974 break; 975 case ARCInstKind::AutoreleaseRV: 976 OptimizeAutoreleaseRVCall(F, Inst, Class); 977 break; 978 } 979 980 // objc_autorelease(x) -> objc_release(x) if x is otherwise unused. 981 if (IsAutorelease(Class) && Inst->use_empty()) { 982 CallInst *Call = cast<CallInst>(Inst); 983 const Value *Arg = Call->getArgOperand(0); 984 Arg = FindSingleUseIdentifiedObject(Arg); 985 if (Arg) { 986 Changed = true; 987 ++NumAutoreleases; 988 989 // Create the declaration lazily. 990 LLVMContext &C = Inst->getContext(); 991 992 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release); 993 CallInst *NewCall = 994 CallInst::Create(Decl, Call->getArgOperand(0), "", Call); 995 NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), 996 MDNode::get(C, std::nullopt)); 997 998 LLVM_DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) " 999 "since x is otherwise unused.\nOld: " 1000 << *Call << "\nNew: " << *NewCall << "\n"); 1001 1002 EraseInstruction(Call); 1003 Inst = NewCall; 1004 Class = ARCInstKind::Release; 1005 } 1006 } 1007 1008 // For functions which can never be passed stack arguments, add 1009 // a tail keyword. 1010 if (IsAlwaysTail(Class) && !cast<CallInst>(Inst)->isNoTailCall()) { 1011 Changed = true; 1012 LLVM_DEBUG( 1013 dbgs() << "Adding tail keyword to function since it can never be " 1014 "passed stack args: " 1015 << *Inst << "\n"); 1016 cast<CallInst>(Inst)->setTailCall(); 1017 } 1018 1019 // Ensure that functions that can never have a "tail" keyword due to the 1020 // semantics of ARC truly do not do so. 1021 if (IsNeverTail(Class)) { 1022 Changed = true; 1023 LLVM_DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst 1024 << "\n"); 1025 cast<CallInst>(Inst)->setTailCall(false); 1026 } 1027 1028 // Set nounwind as needed. 1029 if (IsNoThrow(Class)) { 1030 Changed = true; 1031 LLVM_DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst 1032 << "\n"); 1033 cast<CallInst>(Inst)->setDoesNotThrow(); 1034 } 1035 1036 // Note: This catches instructions unrelated to ARC. 1037 if (!IsNoopOnNull(Class)) { 1038 UsedInThisFunction |= 1 << unsigned(Class); 1039 return; 1040 } 1041 1042 // If we haven't already looked up the root, look it up now. 1043 if (!Arg) 1044 Arg = GetArgRCIdentityRoot(Inst); 1045 1046 // ARC calls with null are no-ops. Delete them. 1047 if (IsNullOrUndef(Arg)) { 1048 Changed = true; 1049 ++NumNoops; 1050 LLVM_DEBUG(dbgs() << "ARC calls with null are no-ops. Erasing: " << *Inst 1051 << "\n"); 1052 EraseInstruction(Inst); 1053 return; 1054 } 1055 1056 // Keep track of which of retain, release, autorelease, and retain_block 1057 // are actually present in this function. 1058 UsedInThisFunction |= 1 << unsigned(Class); 1059 1060 // If Arg is a PHI, and one or more incoming values to the 1061 // PHI are null, and the call is control-equivalent to the PHI, and there 1062 // are no relevant side effects between the PHI and the call, and the call 1063 // is not a release that doesn't have the clang.imprecise_release tag, the 1064 // call could be pushed up to just those paths with non-null incoming 1065 // values. For now, don't bother splitting critical edges for this. 1066 if (Class == ARCInstKind::Release && 1067 !Inst->getMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease))) 1068 return; 1069 1070 SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist; 1071 Worklist.push_back(std::make_pair(Inst, Arg)); 1072 do { 1073 std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val(); 1074 Inst = Pair.first; 1075 Arg = Pair.second; 1076 1077 const PHINode *PN = dyn_cast<PHINode>(Arg); 1078 if (!PN) 1079 continue; 1080 1081 // Determine if the PHI has any null operands, or any incoming 1082 // critical edges. 1083 bool HasNull = false; 1084 bool HasCriticalEdges = false; 1085 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1086 Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i)); 1087 if (IsNullOrUndef(Incoming)) 1088 HasNull = true; 1089 else if (PN->getIncomingBlock(i)->getTerminator()->getNumSuccessors() != 1090 1) { 1091 HasCriticalEdges = true; 1092 break; 1093 } 1094 } 1095 // If we have null operands and no critical edges, optimize. 1096 if (HasCriticalEdges) 1097 continue; 1098 if (!HasNull) 1099 continue; 1100 1101 Instruction *DepInst = nullptr; 1102 1103 // Check that there is nothing that cares about the reference 1104 // count between the call and the phi. 1105 switch (Class) { 1106 case ARCInstKind::Retain: 1107 case ARCInstKind::RetainBlock: 1108 // These can always be moved up. 1109 break; 1110 case ARCInstKind::Release: 1111 // These can't be moved across things that care about the retain 1112 // count. 1113 DepInst = findSingleDependency(NeedsPositiveRetainCount, Arg, 1114 Inst->getParent(), Inst, PA); 1115 break; 1116 case ARCInstKind::Autorelease: 1117 // These can't be moved across autorelease pool scope boundaries. 1118 DepInst = findSingleDependency(AutoreleasePoolBoundary, Arg, 1119 Inst->getParent(), Inst, PA); 1120 break; 1121 case ARCInstKind::UnsafeClaimRV: 1122 case ARCInstKind::RetainRV: 1123 case ARCInstKind::AutoreleaseRV: 1124 // Don't move these; the RV optimization depends on the autoreleaseRV 1125 // being tail called, and the retainRV being immediately after a call 1126 // (which might still happen if we get lucky with codegen layout, but 1127 // it's not worth taking the chance). 1128 continue; 1129 default: 1130 llvm_unreachable("Invalid dependence flavor"); 1131 } 1132 1133 if (DepInst != PN) 1134 continue; 1135 1136 Changed = true; 1137 ++NumPartialNoops; 1138 // Clone the call into each predecessor that has a non-null value. 1139 CallInst *CInst = cast<CallInst>(Inst); 1140 Type *ParamTy = CInst->getArgOperand(0)->getType(); 1141 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1142 Value *Incoming = GetRCIdentityRoot(PN->getIncomingValue(i)); 1143 if (IsNullOrUndef(Incoming)) 1144 continue; 1145 Value *Op = PN->getIncomingValue(i); 1146 Instruction *InsertPos = &PN->getIncomingBlock(i)->back(); 1147 SmallVector<OperandBundleDef, 1> OpBundles; 1148 cloneOpBundlesIf(CInst, OpBundles, [](const OperandBundleUse &B) { 1149 return B.getTagID() != LLVMContext::OB_funclet; 1150 }); 1151 addOpBundleForFunclet(InsertPos->getParent(), OpBundles); 1152 CallInst *Clone = CallInst::Create(CInst, OpBundles); 1153 if (Op->getType() != ParamTy) 1154 Op = new BitCastInst(Op, ParamTy, "", InsertPos); 1155 Clone->setArgOperand(0, Op); 1156 Clone->insertBefore(InsertPos); 1157 1158 LLVM_DEBUG(dbgs() << "Cloning " << *CInst << "\n" 1159 "And inserting clone at " 1160 << *InsertPos << "\n"); 1161 Worklist.push_back(std::make_pair(Clone, Incoming)); 1162 } 1163 // Erase the original call. 1164 LLVM_DEBUG(dbgs() << "Erasing: " << *CInst << "\n"); 1165 EraseInstruction(CInst); 1166 } while (!Worklist.empty()); 1167 } 1168 1169 /// If we have a top down pointer in the S_Use state, make sure that there are 1170 /// no CFG hazards by checking the states of various bottom up pointers. 1171 static void CheckForUseCFGHazard(const Sequence SuccSSeq, 1172 const bool SuccSRRIKnownSafe, 1173 TopDownPtrState &S, 1174 bool &SomeSuccHasSame, 1175 bool &AllSuccsHaveSame, 1176 bool &NotAllSeqEqualButKnownSafe, 1177 bool &ShouldContinue) { 1178 switch (SuccSSeq) { 1179 case S_CanRelease: { 1180 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) { 1181 S.ClearSequenceProgress(); 1182 break; 1183 } 1184 S.SetCFGHazardAfflicted(true); 1185 ShouldContinue = true; 1186 break; 1187 } 1188 case S_Use: 1189 SomeSuccHasSame = true; 1190 break; 1191 case S_Stop: 1192 case S_MovableRelease: 1193 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) 1194 AllSuccsHaveSame = false; 1195 else 1196 NotAllSeqEqualButKnownSafe = true; 1197 break; 1198 case S_Retain: 1199 llvm_unreachable("bottom-up pointer in retain state!"); 1200 case S_None: 1201 llvm_unreachable("This should have been handled earlier."); 1202 } 1203 } 1204 1205 /// If we have a Top Down pointer in the S_CanRelease state, make sure that 1206 /// there are no CFG hazards by checking the states of various bottom up 1207 /// pointers. 1208 static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq, 1209 const bool SuccSRRIKnownSafe, 1210 TopDownPtrState &S, 1211 bool &SomeSuccHasSame, 1212 bool &AllSuccsHaveSame, 1213 bool &NotAllSeqEqualButKnownSafe) { 1214 switch (SuccSSeq) { 1215 case S_CanRelease: 1216 SomeSuccHasSame = true; 1217 break; 1218 case S_Stop: 1219 case S_MovableRelease: 1220 case S_Use: 1221 if (!S.IsKnownSafe() && !SuccSRRIKnownSafe) 1222 AllSuccsHaveSame = false; 1223 else 1224 NotAllSeqEqualButKnownSafe = true; 1225 break; 1226 case S_Retain: 1227 llvm_unreachable("bottom-up pointer in retain state!"); 1228 case S_None: 1229 llvm_unreachable("This should have been handled earlier."); 1230 } 1231 } 1232 1233 /// Check for critical edges, loop boundaries, irreducible control flow, or 1234 /// other CFG structures where moving code across the edge would result in it 1235 /// being executed more. 1236 void 1237 ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, 1238 DenseMap<const BasicBlock *, BBState> &BBStates, 1239 BBState &MyStates) const { 1240 // If any top-down local-use or possible-dec has a succ which is earlier in 1241 // the sequence, forget it. 1242 for (auto I = MyStates.top_down_ptr_begin(), E = MyStates.top_down_ptr_end(); 1243 I != E; ++I) { 1244 TopDownPtrState &S = I->second; 1245 const Sequence Seq = I->second.GetSeq(); 1246 1247 // We only care about S_Retain, S_CanRelease, and S_Use. 1248 if (Seq == S_None) 1249 continue; 1250 1251 // Make sure that if extra top down states are added in the future that this 1252 // code is updated to handle it. 1253 assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) && 1254 "Unknown top down sequence state."); 1255 1256 const Value *Arg = I->first; 1257 bool SomeSuccHasSame = false; 1258 bool AllSuccsHaveSame = true; 1259 bool NotAllSeqEqualButKnownSafe = false; 1260 1261 for (const BasicBlock *Succ : successors(BB)) { 1262 // If VisitBottomUp has pointer information for this successor, take 1263 // what we know about it. 1264 const DenseMap<const BasicBlock *, BBState>::iterator BBI = 1265 BBStates.find(Succ); 1266 assert(BBI != BBStates.end()); 1267 const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); 1268 const Sequence SuccSSeq = SuccS.GetSeq(); 1269 1270 // If bottom up, the pointer is in an S_None state, clear the sequence 1271 // progress since the sequence in the bottom up state finished 1272 // suggesting a mismatch in between retains/releases. This is true for 1273 // all three cases that we are handling here: S_Retain, S_Use, and 1274 // S_CanRelease. 1275 if (SuccSSeq == S_None) { 1276 S.ClearSequenceProgress(); 1277 continue; 1278 } 1279 1280 // If we have S_Use or S_CanRelease, perform our check for cfg hazard 1281 // checks. 1282 const bool SuccSRRIKnownSafe = SuccS.IsKnownSafe(); 1283 1284 // *NOTE* We do not use Seq from above here since we are allowing for 1285 // S.GetSeq() to change while we are visiting basic blocks. 1286 switch(S.GetSeq()) { 1287 case S_Use: { 1288 bool ShouldContinue = false; 1289 CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, SomeSuccHasSame, 1290 AllSuccsHaveSame, NotAllSeqEqualButKnownSafe, 1291 ShouldContinue); 1292 if (ShouldContinue) 1293 continue; 1294 break; 1295 } 1296 case S_CanRelease: 1297 CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S, 1298 SomeSuccHasSame, AllSuccsHaveSame, 1299 NotAllSeqEqualButKnownSafe); 1300 break; 1301 case S_Retain: 1302 case S_None: 1303 case S_Stop: 1304 case S_MovableRelease: 1305 break; 1306 } 1307 } 1308 1309 // If the state at the other end of any of the successor edges 1310 // matches the current state, require all edges to match. This 1311 // guards against loops in the middle of a sequence. 1312 if (SomeSuccHasSame && !AllSuccsHaveSame) { 1313 S.ClearSequenceProgress(); 1314 } else if (NotAllSeqEqualButKnownSafe) { 1315 // If we would have cleared the state foregoing the fact that we are known 1316 // safe, stop code motion. This is because whether or not it is safe to 1317 // remove RR pairs via KnownSafe is an orthogonal concept to whether we 1318 // are allowed to perform code motion. 1319 S.SetCFGHazardAfflicted(true); 1320 } 1321 } 1322 } 1323 1324 bool ObjCARCOpt::VisitInstructionBottomUp( 1325 Instruction *Inst, BasicBlock *BB, BlotMapVector<Value *, RRInfo> &Retains, 1326 BBState &MyStates) { 1327 bool NestingDetected = false; 1328 ARCInstKind Class = GetARCInstKind(Inst); 1329 const Value *Arg = nullptr; 1330 1331 LLVM_DEBUG(dbgs() << " Class: " << Class << "\n"); 1332 1333 switch (Class) { 1334 case ARCInstKind::Release: { 1335 Arg = GetArgRCIdentityRoot(Inst); 1336 1337 BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg); 1338 NestingDetected |= S.InitBottomUp(MDKindCache, Inst); 1339 break; 1340 } 1341 case ARCInstKind::RetainBlock: 1342 // In OptimizeIndividualCalls, we have strength reduced all optimizable 1343 // objc_retainBlocks to objc_retains. Thus at this point any 1344 // objc_retainBlocks that we see are not optimizable. 1345 break; 1346 case ARCInstKind::Retain: 1347 case ARCInstKind::RetainRV: { 1348 Arg = GetArgRCIdentityRoot(Inst); 1349 BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg); 1350 if (S.MatchWithRetain()) { 1351 // Don't do retain+release tracking for ARCInstKind::RetainRV, because 1352 // it's better to let it remain as the first instruction after a call. 1353 if (Class != ARCInstKind::RetainRV) { 1354 LLVM_DEBUG(dbgs() << " Matching with: " << *Inst << "\n"); 1355 Retains[Inst] = S.GetRRInfo(); 1356 } 1357 S.ClearSequenceProgress(); 1358 } 1359 // A retain moving bottom up can be a use. 1360 break; 1361 } 1362 case ARCInstKind::AutoreleasepoolPop: 1363 // Conservatively, clear MyStates for all known pointers. 1364 MyStates.clearBottomUpPointers(); 1365 return NestingDetected; 1366 case ARCInstKind::AutoreleasepoolPush: 1367 case ARCInstKind::None: 1368 // These are irrelevant. 1369 return NestingDetected; 1370 default: 1371 break; 1372 } 1373 1374 // Consider any other possible effects of this instruction on each 1375 // pointer being tracked. 1376 for (auto MI = MyStates.bottom_up_ptr_begin(), 1377 ME = MyStates.bottom_up_ptr_end(); 1378 MI != ME; ++MI) { 1379 const Value *Ptr = MI->first; 1380 if (Ptr == Arg) 1381 continue; // Handled above. 1382 BottomUpPtrState &S = MI->second; 1383 1384 if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class)) 1385 continue; 1386 1387 S.HandlePotentialUse(BB, Inst, Ptr, PA, Class); 1388 } 1389 1390 return NestingDetected; 1391 } 1392 1393 bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB, 1394 DenseMap<const BasicBlock *, BBState> &BBStates, 1395 BlotMapVector<Value *, RRInfo> &Retains) { 1396 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n"); 1397 1398 bool NestingDetected = false; 1399 BBState &MyStates = BBStates[BB]; 1400 1401 // Merge the states from each successor to compute the initial state 1402 // for the current block. 1403 BBState::edge_iterator SI(MyStates.succ_begin()), 1404 SE(MyStates.succ_end()); 1405 if (SI != SE) { 1406 const BasicBlock *Succ = *SI; 1407 DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ); 1408 assert(I != BBStates.end()); 1409 MyStates.InitFromSucc(I->second); 1410 ++SI; 1411 for (; SI != SE; ++SI) { 1412 Succ = *SI; 1413 I = BBStates.find(Succ); 1414 assert(I != BBStates.end()); 1415 MyStates.MergeSucc(I->second); 1416 } 1417 } 1418 1419 LLVM_DEBUG(dbgs() << "Before:\n" 1420 << BBStates[BB] << "\n" 1421 << "Performing Dataflow:\n"); 1422 1423 // Visit all the instructions, bottom-up. 1424 for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { 1425 Instruction *Inst = &*std::prev(I); 1426 1427 // Invoke instructions are visited as part of their successors (below). 1428 if (isa<InvokeInst>(Inst)) 1429 continue; 1430 1431 LLVM_DEBUG(dbgs() << " Visiting " << *Inst << "\n"); 1432 1433 NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); 1434 1435 // Bail out if the number of pointers being tracked becomes too large so 1436 // that this pass can complete in a reasonable amount of time. 1437 if (MyStates.bottom_up_ptr_list_size() > MaxPtrStates) { 1438 DisableRetainReleasePairing = true; 1439 return false; 1440 } 1441 } 1442 1443 // If there's a predecessor with an invoke, visit the invoke as if it were 1444 // part of this block, since we can't insert code after an invoke in its own 1445 // block, and we don't want to split critical edges. 1446 for (BBState::edge_iterator PI(MyStates.pred_begin()), 1447 PE(MyStates.pred_end()); PI != PE; ++PI) { 1448 BasicBlock *Pred = *PI; 1449 if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back())) 1450 NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); 1451 } 1452 1453 LLVM_DEBUG(dbgs() << "\nFinal State:\n" << BBStates[BB] << "\n"); 1454 1455 return NestingDetected; 1456 } 1457 1458 // Fill ReleaseInsertPtToRCIdentityRoots, which is a map from insertion points 1459 // to the set of RC identity roots that would be released by the release calls 1460 // moved to the insertion points. 1461 static void collectReleaseInsertPts( 1462 const BlotMapVector<Value *, RRInfo> &Retains, 1463 DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1464 &ReleaseInsertPtToRCIdentityRoots) { 1465 for (const auto &P : Retains) { 1466 // Retains is a map from an objc_retain call to a RRInfo of the RC identity 1467 // root of the call. Get the RC identity root of the objc_retain call. 1468 Instruction *Retain = cast<Instruction>(P.first); 1469 Value *Root = GetRCIdentityRoot(Retain->getOperand(0)); 1470 // Collect all the insertion points of the objc_release calls that release 1471 // the RC identity root of the objc_retain call. 1472 for (const Instruction *InsertPt : P.second.ReverseInsertPts) 1473 ReleaseInsertPtToRCIdentityRoots[InsertPt].insert(Root); 1474 } 1475 } 1476 1477 // Get the RC identity roots from an insertion point of an objc_release call. 1478 // Return nullptr if the passed instruction isn't an insertion point. 1479 static const SmallPtrSet<const Value *, 2> * 1480 getRCIdentityRootsFromReleaseInsertPt( 1481 const Instruction *InsertPt, 1482 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1483 &ReleaseInsertPtToRCIdentityRoots) { 1484 auto I = ReleaseInsertPtToRCIdentityRoots.find(InsertPt); 1485 if (I == ReleaseInsertPtToRCIdentityRoots.end()) 1486 return nullptr; 1487 return &I->second; 1488 } 1489 1490 bool ObjCARCOpt::VisitInstructionTopDown( 1491 Instruction *Inst, DenseMap<Value *, RRInfo> &Releases, BBState &MyStates, 1492 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1493 &ReleaseInsertPtToRCIdentityRoots) { 1494 bool NestingDetected = false; 1495 ARCInstKind Class = GetARCInstKind(Inst); 1496 const Value *Arg = nullptr; 1497 1498 // Make sure a call to objc_retain isn't moved past insertion points of calls 1499 // to objc_release. 1500 if (const SmallPtrSet<const Value *, 2> *Roots = 1501 getRCIdentityRootsFromReleaseInsertPt( 1502 Inst, ReleaseInsertPtToRCIdentityRoots)) 1503 for (const auto *Root : *Roots) { 1504 TopDownPtrState &S = MyStates.getPtrTopDownState(Root); 1505 // Disable code motion if the current position is S_Retain to prevent 1506 // moving the objc_retain call past objc_release calls. If it's 1507 // S_CanRelease or larger, it's not necessary to disable code motion as 1508 // the insertion points that prevent the objc_retain call from moving down 1509 // should have been set already. 1510 if (S.GetSeq() == S_Retain) 1511 S.SetCFGHazardAfflicted(true); 1512 } 1513 1514 LLVM_DEBUG(dbgs() << " Class: " << Class << "\n"); 1515 1516 switch (Class) { 1517 case ARCInstKind::RetainBlock: 1518 // In OptimizeIndividualCalls, we have strength reduced all optimizable 1519 // objc_retainBlocks to objc_retains. Thus at this point any 1520 // objc_retainBlocks that we see are not optimizable. We need to break since 1521 // a retain can be a potential use. 1522 break; 1523 case ARCInstKind::Retain: 1524 case ARCInstKind::RetainRV: { 1525 Arg = GetArgRCIdentityRoot(Inst); 1526 TopDownPtrState &S = MyStates.getPtrTopDownState(Arg); 1527 NestingDetected |= S.InitTopDown(Class, Inst); 1528 // A retain can be a potential use; proceed to the generic checking 1529 // code below. 1530 break; 1531 } 1532 case ARCInstKind::Release: { 1533 Arg = GetArgRCIdentityRoot(Inst); 1534 TopDownPtrState &S = MyStates.getPtrTopDownState(Arg); 1535 // Try to form a tentative pair in between this release instruction and the 1536 // top down pointers that we are tracking. 1537 if (S.MatchWithRelease(MDKindCache, Inst)) { 1538 // If we succeed, copy S's RRInfo into the Release -> {Retain Set 1539 // Map}. Then we clear S. 1540 LLVM_DEBUG(dbgs() << " Matching with: " << *Inst << "\n"); 1541 Releases[Inst] = S.GetRRInfo(); 1542 S.ClearSequenceProgress(); 1543 } 1544 break; 1545 } 1546 case ARCInstKind::AutoreleasepoolPop: 1547 // Conservatively, clear MyStates for all known pointers. 1548 MyStates.clearTopDownPointers(); 1549 return false; 1550 case ARCInstKind::AutoreleasepoolPush: 1551 case ARCInstKind::None: 1552 // These can not be uses of 1553 return false; 1554 default: 1555 break; 1556 } 1557 1558 // Consider any other possible effects of this instruction on each 1559 // pointer being tracked. 1560 for (auto MI = MyStates.top_down_ptr_begin(), 1561 ME = MyStates.top_down_ptr_end(); 1562 MI != ME; ++MI) { 1563 const Value *Ptr = MI->first; 1564 if (Ptr == Arg) 1565 continue; // Handled above. 1566 TopDownPtrState &S = MI->second; 1567 if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class, *BundledInsts)) 1568 continue; 1569 1570 S.HandlePotentialUse(Inst, Ptr, PA, Class); 1571 } 1572 1573 return NestingDetected; 1574 } 1575 1576 bool ObjCARCOpt::VisitTopDown( 1577 BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, 1578 DenseMap<Value *, RRInfo> &Releases, 1579 const DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1580 &ReleaseInsertPtToRCIdentityRoots) { 1581 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n"); 1582 bool NestingDetected = false; 1583 BBState &MyStates = BBStates[BB]; 1584 1585 // Merge the states from each predecessor to compute the initial state 1586 // for the current block. 1587 BBState::edge_iterator PI(MyStates.pred_begin()), 1588 PE(MyStates.pred_end()); 1589 if (PI != PE) { 1590 const BasicBlock *Pred = *PI; 1591 DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); 1592 assert(I != BBStates.end()); 1593 MyStates.InitFromPred(I->second); 1594 ++PI; 1595 for (; PI != PE; ++PI) { 1596 Pred = *PI; 1597 I = BBStates.find(Pred); 1598 assert(I != BBStates.end()); 1599 MyStates.MergePred(I->second); 1600 } 1601 } 1602 1603 // Check that BB and MyStates have the same number of predecessors. This 1604 // prevents retain calls that live outside a loop from being moved into the 1605 // loop. 1606 if (!BB->hasNPredecessors(MyStates.pred_end() - MyStates.pred_begin())) 1607 for (auto I = MyStates.top_down_ptr_begin(), 1608 E = MyStates.top_down_ptr_end(); 1609 I != E; ++I) 1610 I->second.SetCFGHazardAfflicted(true); 1611 1612 LLVM_DEBUG(dbgs() << "Before:\n" 1613 << BBStates[BB] << "\n" 1614 << "Performing Dataflow:\n"); 1615 1616 // Visit all the instructions, top-down. 1617 for (Instruction &Inst : *BB) { 1618 LLVM_DEBUG(dbgs() << " Visiting " << Inst << "\n"); 1619 1620 NestingDetected |= VisitInstructionTopDown( 1621 &Inst, Releases, MyStates, ReleaseInsertPtToRCIdentityRoots); 1622 1623 // Bail out if the number of pointers being tracked becomes too large so 1624 // that this pass can complete in a reasonable amount of time. 1625 if (MyStates.top_down_ptr_list_size() > MaxPtrStates) { 1626 DisableRetainReleasePairing = true; 1627 return false; 1628 } 1629 } 1630 1631 LLVM_DEBUG(dbgs() << "\nState Before Checking for CFG Hazards:\n" 1632 << BBStates[BB] << "\n\n"); 1633 CheckForCFGHazards(BB, BBStates, MyStates); 1634 LLVM_DEBUG(dbgs() << "Final State:\n" << BBStates[BB] << "\n"); 1635 return NestingDetected; 1636 } 1637 1638 static void 1639 ComputePostOrders(Function &F, 1640 SmallVectorImpl<BasicBlock *> &PostOrder, 1641 SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder, 1642 unsigned NoObjCARCExceptionsMDKind, 1643 DenseMap<const BasicBlock *, BBState> &BBStates) { 1644 /// The visited set, for doing DFS walks. 1645 SmallPtrSet<BasicBlock *, 16> Visited; 1646 1647 // Do DFS, computing the PostOrder. 1648 SmallPtrSet<BasicBlock *, 16> OnStack; 1649 SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack; 1650 1651 // Functions always have exactly one entry block, and we don't have 1652 // any other block that we treat like an entry block. 1653 BasicBlock *EntryBB = &F.getEntryBlock(); 1654 BBState &MyStates = BBStates[EntryBB]; 1655 MyStates.SetAsEntry(); 1656 Instruction *EntryTI = EntryBB->getTerminator(); 1657 SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI))); 1658 Visited.insert(EntryBB); 1659 OnStack.insert(EntryBB); 1660 do { 1661 dfs_next_succ: 1662 BasicBlock *CurrBB = SuccStack.back().first; 1663 succ_iterator SE(CurrBB->getTerminator(), false); 1664 1665 while (SuccStack.back().second != SE) { 1666 BasicBlock *SuccBB = *SuccStack.back().second++; 1667 if (Visited.insert(SuccBB).second) { 1668 SuccStack.push_back( 1669 std::make_pair(SuccBB, succ_iterator(SuccBB->getTerminator()))); 1670 BBStates[CurrBB].addSucc(SuccBB); 1671 BBState &SuccStates = BBStates[SuccBB]; 1672 SuccStates.addPred(CurrBB); 1673 OnStack.insert(SuccBB); 1674 goto dfs_next_succ; 1675 } 1676 1677 if (!OnStack.count(SuccBB)) { 1678 BBStates[CurrBB].addSucc(SuccBB); 1679 BBStates[SuccBB].addPred(CurrBB); 1680 } 1681 } 1682 OnStack.erase(CurrBB); 1683 PostOrder.push_back(CurrBB); 1684 SuccStack.pop_back(); 1685 } while (!SuccStack.empty()); 1686 1687 Visited.clear(); 1688 1689 // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. 1690 // Functions may have many exits, and there also blocks which we treat 1691 // as exits due to ignored edges. 1692 SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack; 1693 for (BasicBlock &ExitBB : F) { 1694 BBState &MyStates = BBStates[&ExitBB]; 1695 if (!MyStates.isExit()) 1696 continue; 1697 1698 MyStates.SetAsExit(); 1699 1700 PredStack.push_back(std::make_pair(&ExitBB, MyStates.pred_begin())); 1701 Visited.insert(&ExitBB); 1702 while (!PredStack.empty()) { 1703 reverse_dfs_next_succ: 1704 BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end(); 1705 while (PredStack.back().second != PE) { 1706 BasicBlock *BB = *PredStack.back().second++; 1707 if (Visited.insert(BB).second) { 1708 PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin())); 1709 goto reverse_dfs_next_succ; 1710 } 1711 } 1712 ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first); 1713 } 1714 } 1715 } 1716 1717 // Visit the function both top-down and bottom-up. 1718 bool ObjCARCOpt::Visit(Function &F, 1719 DenseMap<const BasicBlock *, BBState> &BBStates, 1720 BlotMapVector<Value *, RRInfo> &Retains, 1721 DenseMap<Value *, RRInfo> &Releases) { 1722 // Use reverse-postorder traversals, because we magically know that loops 1723 // will be well behaved, i.e. they won't repeatedly call retain on a single 1724 // pointer without doing a release. We can't use the ReversePostOrderTraversal 1725 // class here because we want the reverse-CFG postorder to consider each 1726 // function exit point, and we want to ignore selected cycle edges. 1727 SmallVector<BasicBlock *, 16> PostOrder; 1728 SmallVector<BasicBlock *, 16> ReverseCFGPostOrder; 1729 ComputePostOrders(F, PostOrder, ReverseCFGPostOrder, 1730 MDKindCache.get(ARCMDKindID::NoObjCARCExceptions), 1731 BBStates); 1732 1733 // Use reverse-postorder on the reverse CFG for bottom-up. 1734 bool BottomUpNestingDetected = false; 1735 for (BasicBlock *BB : llvm::reverse(ReverseCFGPostOrder)) { 1736 BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains); 1737 if (DisableRetainReleasePairing) 1738 return false; 1739 } 1740 1741 DenseMap<const Instruction *, SmallPtrSet<const Value *, 2>> 1742 ReleaseInsertPtToRCIdentityRoots; 1743 collectReleaseInsertPts(Retains, ReleaseInsertPtToRCIdentityRoots); 1744 1745 // Use reverse-postorder for top-down. 1746 bool TopDownNestingDetected = false; 1747 for (BasicBlock *BB : llvm::reverse(PostOrder)) { 1748 TopDownNestingDetected |= 1749 VisitTopDown(BB, BBStates, Releases, ReleaseInsertPtToRCIdentityRoots); 1750 if (DisableRetainReleasePairing) 1751 return false; 1752 } 1753 1754 return TopDownNestingDetected && BottomUpNestingDetected; 1755 } 1756 1757 /// Move the calls in RetainsToMove and ReleasesToMove. 1758 void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove, 1759 RRInfo &ReleasesToMove, 1760 BlotMapVector<Value *, RRInfo> &Retains, 1761 DenseMap<Value *, RRInfo> &Releases, 1762 SmallVectorImpl<Instruction *> &DeadInsts, 1763 Module *M) { 1764 Type *ArgTy = Arg->getType(); 1765 Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext())); 1766 1767 LLVM_DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n"); 1768 1769 // Insert the new retain and release calls. 1770 for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) { 1771 Value *MyArg = ArgTy == ParamTy ? Arg : 1772 new BitCastInst(Arg, ParamTy, "", InsertPt); 1773 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); 1774 SmallVector<OperandBundleDef, 1> BundleList; 1775 addOpBundleForFunclet(InsertPt->getParent(), BundleList); 1776 CallInst *Call = CallInst::Create(Decl, MyArg, BundleList, "", InsertPt); 1777 Call->setDoesNotThrow(); 1778 Call->setTailCall(); 1779 1780 LLVM_DEBUG(dbgs() << "Inserting new Retain: " << *Call 1781 << "\n" 1782 "At insertion point: " 1783 << *InsertPt << "\n"); 1784 } 1785 for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) { 1786 Value *MyArg = ArgTy == ParamTy ? Arg : 1787 new BitCastInst(Arg, ParamTy, "", InsertPt); 1788 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Release); 1789 SmallVector<OperandBundleDef, 1> BundleList; 1790 addOpBundleForFunclet(InsertPt->getParent(), BundleList); 1791 CallInst *Call = CallInst::Create(Decl, MyArg, BundleList, "", InsertPt); 1792 // Attach a clang.imprecise_release metadata tag, if appropriate. 1793 if (MDNode *M = ReleasesToMove.ReleaseMetadata) 1794 Call->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), M); 1795 Call->setDoesNotThrow(); 1796 if (ReleasesToMove.IsTailCallRelease) 1797 Call->setTailCall(); 1798 1799 LLVM_DEBUG(dbgs() << "Inserting new Release: " << *Call 1800 << "\n" 1801 "At insertion point: " 1802 << *InsertPt << "\n"); 1803 } 1804 1805 // Delete the original retain and release calls. 1806 for (Instruction *OrigRetain : RetainsToMove.Calls) { 1807 Retains.blot(OrigRetain); 1808 DeadInsts.push_back(OrigRetain); 1809 LLVM_DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n"); 1810 } 1811 for (Instruction *OrigRelease : ReleasesToMove.Calls) { 1812 Releases.erase(OrigRelease); 1813 DeadInsts.push_back(OrigRelease); 1814 LLVM_DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n"); 1815 } 1816 } 1817 1818 bool ObjCARCOpt::PairUpRetainsAndReleases( 1819 DenseMap<const BasicBlock *, BBState> &BBStates, 1820 BlotMapVector<Value *, RRInfo> &Retains, 1821 DenseMap<Value *, RRInfo> &Releases, Module *M, 1822 Instruction *Retain, 1823 SmallVectorImpl<Instruction *> &DeadInsts, RRInfo &RetainsToMove, 1824 RRInfo &ReleasesToMove, Value *Arg, bool KnownSafe, 1825 bool &AnyPairsCompletelyEliminated) { 1826 // If a pair happens in a region where it is known that the reference count 1827 // is already incremented, we can similarly ignore possible decrements unless 1828 // we are dealing with a retainable object with multiple provenance sources. 1829 bool KnownSafeTD = true, KnownSafeBU = true; 1830 bool CFGHazardAfflicted = false; 1831 1832 // Connect the dots between the top-down-collected RetainsToMove and 1833 // bottom-up-collected ReleasesToMove to form sets of related calls. 1834 // This is an iterative process so that we connect multiple releases 1835 // to multiple retains if needed. 1836 unsigned OldDelta = 0; 1837 unsigned NewDelta = 0; 1838 unsigned OldCount = 0; 1839 unsigned NewCount = 0; 1840 bool FirstRelease = true; 1841 for (SmallVector<Instruction *, 4> NewRetains{Retain};;) { 1842 SmallVector<Instruction *, 4> NewReleases; 1843 for (Instruction *NewRetain : NewRetains) { 1844 auto It = Retains.find(NewRetain); 1845 assert(It != Retains.end()); 1846 const RRInfo &NewRetainRRI = It->second; 1847 KnownSafeTD &= NewRetainRRI.KnownSafe; 1848 CFGHazardAfflicted |= NewRetainRRI.CFGHazardAfflicted; 1849 for (Instruction *NewRetainRelease : NewRetainRRI.Calls) { 1850 auto Jt = Releases.find(NewRetainRelease); 1851 if (Jt == Releases.end()) 1852 return false; 1853 const RRInfo &NewRetainReleaseRRI = Jt->second; 1854 1855 // If the release does not have a reference to the retain as well, 1856 // something happened which is unaccounted for. Do not do anything. 1857 // 1858 // This can happen if we catch an additive overflow during path count 1859 // merging. 1860 if (!NewRetainReleaseRRI.Calls.count(NewRetain)) 1861 return false; 1862 1863 if (ReleasesToMove.Calls.insert(NewRetainRelease).second) { 1864 // If we overflow when we compute the path count, don't remove/move 1865 // anything. 1866 const BBState &NRRBBState = BBStates[NewRetainRelease->getParent()]; 1867 unsigned PathCount = BBState::OverflowOccurredValue; 1868 if (NRRBBState.GetAllPathCountWithOverflow(PathCount)) 1869 return false; 1870 assert(PathCount != BBState::OverflowOccurredValue && 1871 "PathCount at this point can not be " 1872 "OverflowOccurredValue."); 1873 OldDelta -= PathCount; 1874 1875 // Merge the ReleaseMetadata and IsTailCallRelease values. 1876 if (FirstRelease) { 1877 ReleasesToMove.ReleaseMetadata = 1878 NewRetainReleaseRRI.ReleaseMetadata; 1879 ReleasesToMove.IsTailCallRelease = 1880 NewRetainReleaseRRI.IsTailCallRelease; 1881 FirstRelease = false; 1882 } else { 1883 if (ReleasesToMove.ReleaseMetadata != 1884 NewRetainReleaseRRI.ReleaseMetadata) 1885 ReleasesToMove.ReleaseMetadata = nullptr; 1886 if (ReleasesToMove.IsTailCallRelease != 1887 NewRetainReleaseRRI.IsTailCallRelease) 1888 ReleasesToMove.IsTailCallRelease = false; 1889 } 1890 1891 // Collect the optimal insertion points. 1892 if (!KnownSafe) 1893 for (Instruction *RIP : NewRetainReleaseRRI.ReverseInsertPts) { 1894 if (ReleasesToMove.ReverseInsertPts.insert(RIP).second) { 1895 // If we overflow when we compute the path count, don't 1896 // remove/move anything. 1897 const BBState &RIPBBState = BBStates[RIP->getParent()]; 1898 PathCount = BBState::OverflowOccurredValue; 1899 if (RIPBBState.GetAllPathCountWithOverflow(PathCount)) 1900 return false; 1901 assert(PathCount != BBState::OverflowOccurredValue && 1902 "PathCount at this point can not be " 1903 "OverflowOccurredValue."); 1904 NewDelta -= PathCount; 1905 } 1906 } 1907 NewReleases.push_back(NewRetainRelease); 1908 } 1909 } 1910 } 1911 NewRetains.clear(); 1912 if (NewReleases.empty()) break; 1913 1914 // Back the other way. 1915 for (Instruction *NewRelease : NewReleases) { 1916 auto It = Releases.find(NewRelease); 1917 assert(It != Releases.end()); 1918 const RRInfo &NewReleaseRRI = It->second; 1919 KnownSafeBU &= NewReleaseRRI.KnownSafe; 1920 CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted; 1921 for (Instruction *NewReleaseRetain : NewReleaseRRI.Calls) { 1922 auto Jt = Retains.find(NewReleaseRetain); 1923 if (Jt == Retains.end()) 1924 return false; 1925 const RRInfo &NewReleaseRetainRRI = Jt->second; 1926 1927 // If the retain does not have a reference to the release as well, 1928 // something happened which is unaccounted for. Do not do anything. 1929 // 1930 // This can happen if we catch an additive overflow during path count 1931 // merging. 1932 if (!NewReleaseRetainRRI.Calls.count(NewRelease)) 1933 return false; 1934 1935 if (RetainsToMove.Calls.insert(NewReleaseRetain).second) { 1936 // If we overflow when we compute the path count, don't remove/move 1937 // anything. 1938 const BBState &NRRBBState = BBStates[NewReleaseRetain->getParent()]; 1939 unsigned PathCount = BBState::OverflowOccurredValue; 1940 if (NRRBBState.GetAllPathCountWithOverflow(PathCount)) 1941 return false; 1942 assert(PathCount != BBState::OverflowOccurredValue && 1943 "PathCount at this point can not be " 1944 "OverflowOccurredValue."); 1945 OldDelta += PathCount; 1946 OldCount += PathCount; 1947 1948 // Collect the optimal insertion points. 1949 if (!KnownSafe) 1950 for (Instruction *RIP : NewReleaseRetainRRI.ReverseInsertPts) { 1951 if (RetainsToMove.ReverseInsertPts.insert(RIP).second) { 1952 // If we overflow when we compute the path count, don't 1953 // remove/move anything. 1954 const BBState &RIPBBState = BBStates[RIP->getParent()]; 1955 1956 PathCount = BBState::OverflowOccurredValue; 1957 if (RIPBBState.GetAllPathCountWithOverflow(PathCount)) 1958 return false; 1959 assert(PathCount != BBState::OverflowOccurredValue && 1960 "PathCount at this point can not be " 1961 "OverflowOccurredValue."); 1962 NewDelta += PathCount; 1963 NewCount += PathCount; 1964 } 1965 } 1966 NewRetains.push_back(NewReleaseRetain); 1967 } 1968 } 1969 } 1970 if (NewRetains.empty()) break; 1971 } 1972 1973 // We can only remove pointers if we are known safe in both directions. 1974 bool UnconditionallySafe = KnownSafeTD && KnownSafeBU; 1975 if (UnconditionallySafe) { 1976 RetainsToMove.ReverseInsertPts.clear(); 1977 ReleasesToMove.ReverseInsertPts.clear(); 1978 NewCount = 0; 1979 } else { 1980 // Determine whether the new insertion points we computed preserve the 1981 // balance of retain and release calls through the program. 1982 // TODO: If the fully aggressive solution isn't valid, try to find a 1983 // less aggressive solution which is. 1984 if (NewDelta != 0) 1985 return false; 1986 1987 // At this point, we are not going to remove any RR pairs, but we still are 1988 // able to move RR pairs. If one of our pointers is afflicted with 1989 // CFGHazards, we cannot perform such code motion so exit early. 1990 const bool WillPerformCodeMotion = 1991 !RetainsToMove.ReverseInsertPts.empty() || 1992 !ReleasesToMove.ReverseInsertPts.empty(); 1993 if (CFGHazardAfflicted && WillPerformCodeMotion) 1994 return false; 1995 } 1996 1997 // Determine whether the original call points are balanced in the retain and 1998 // release calls through the program. If not, conservatively don't touch 1999 // them. 2000 // TODO: It's theoretically possible to do code motion in this case, as 2001 // long as the existing imbalances are maintained. 2002 if (OldDelta != 0) 2003 return false; 2004 2005 Changed = true; 2006 assert(OldCount != 0 && "Unreachable code?"); 2007 NumRRs += OldCount - NewCount; 2008 // Set to true if we completely removed any RR pairs. 2009 AnyPairsCompletelyEliminated = NewCount == 0; 2010 2011 // We can move calls! 2012 return true; 2013 } 2014 2015 /// Identify pairings between the retains and releases, and delete and/or move 2016 /// them. 2017 bool ObjCARCOpt::PerformCodePlacement( 2018 DenseMap<const BasicBlock *, BBState> &BBStates, 2019 BlotMapVector<Value *, RRInfo> &Retains, 2020 DenseMap<Value *, RRInfo> &Releases, Module *M) { 2021 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n"); 2022 2023 bool AnyPairsCompletelyEliminated = false; 2024 SmallVector<Instruction *, 8> DeadInsts; 2025 2026 // Visit each retain. 2027 for (BlotMapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), 2028 E = Retains.end(); 2029 I != E; ++I) { 2030 Value *V = I->first; 2031 if (!V) continue; // blotted 2032 2033 Instruction *Retain = cast<Instruction>(V); 2034 2035 LLVM_DEBUG(dbgs() << "Visiting: " << *Retain << "\n"); 2036 2037 Value *Arg = GetArgRCIdentityRoot(Retain); 2038 2039 // If the object being released is in static or stack storage, we know it's 2040 // not being managed by ObjC reference counting, so we can delete pairs 2041 // regardless of what possible decrements or uses lie between them. 2042 bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg); 2043 2044 // A constant pointer can't be pointing to an object on the heap. It may 2045 // be reference-counted, but it won't be deleted. 2046 if (const LoadInst *LI = dyn_cast<LoadInst>(Arg)) 2047 if (const GlobalVariable *GV = 2048 dyn_cast<GlobalVariable>( 2049 GetRCIdentityRoot(LI->getPointerOperand()))) 2050 if (GV->isConstant()) 2051 KnownSafe = true; 2052 2053 // Connect the dots between the top-down-collected RetainsToMove and 2054 // bottom-up-collected ReleasesToMove to form sets of related calls. 2055 RRInfo RetainsToMove, ReleasesToMove; 2056 2057 bool PerformMoveCalls = PairUpRetainsAndReleases( 2058 BBStates, Retains, Releases, M, Retain, DeadInsts, 2059 RetainsToMove, ReleasesToMove, Arg, KnownSafe, 2060 AnyPairsCompletelyEliminated); 2061 2062 if (PerformMoveCalls) { 2063 // Ok, everything checks out and we're all set. Let's move/delete some 2064 // code! 2065 MoveCalls(Arg, RetainsToMove, ReleasesToMove, 2066 Retains, Releases, DeadInsts, M); 2067 } 2068 } 2069 2070 // Now that we're done moving everything, we can delete the newly dead 2071 // instructions, as we no longer need them as insert points. 2072 while (!DeadInsts.empty()) 2073 EraseInstruction(DeadInsts.pop_back_val()); 2074 2075 return AnyPairsCompletelyEliminated; 2076 } 2077 2078 /// Weak pointer optimizations. 2079 void ObjCARCOpt::OptimizeWeakCalls(Function &F) { 2080 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n"); 2081 2082 // First, do memdep-style RLE and S2L optimizations. We can't use memdep 2083 // itself because it uses AliasAnalysis and we need to do provenance 2084 // queries instead. 2085 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 2086 Instruction *Inst = &*I++; 2087 2088 LLVM_DEBUG(dbgs() << "Visiting: " << *Inst << "\n"); 2089 2090 ARCInstKind Class = GetBasicARCInstKind(Inst); 2091 if (Class != ARCInstKind::LoadWeak && 2092 Class != ARCInstKind::LoadWeakRetained) 2093 continue; 2094 2095 // Delete objc_loadWeak calls with no users. 2096 if (Class == ARCInstKind::LoadWeak && Inst->use_empty()) { 2097 Inst->eraseFromParent(); 2098 Changed = true; 2099 continue; 2100 } 2101 2102 // TODO: For now, just look for an earlier available version of this value 2103 // within the same block. Theoretically, we could do memdep-style non-local 2104 // analysis too, but that would want caching. A better approach would be to 2105 // use the technique that EarlyCSE uses. 2106 inst_iterator Current = std::prev(I); 2107 BasicBlock *CurrentBB = &*Current.getBasicBlockIterator(); 2108 for (BasicBlock::iterator B = CurrentBB->begin(), 2109 J = Current.getInstructionIterator(); 2110 J != B; --J) { 2111 Instruction *EarlierInst = &*std::prev(J); 2112 ARCInstKind EarlierClass = GetARCInstKind(EarlierInst); 2113 switch (EarlierClass) { 2114 case ARCInstKind::LoadWeak: 2115 case ARCInstKind::LoadWeakRetained: { 2116 // If this is loading from the same pointer, replace this load's value 2117 // with that one. 2118 CallInst *Call = cast<CallInst>(Inst); 2119 CallInst *EarlierCall = cast<CallInst>(EarlierInst); 2120 Value *Arg = Call->getArgOperand(0); 2121 Value *EarlierArg = EarlierCall->getArgOperand(0); 2122 switch (PA.getAA()->alias(Arg, EarlierArg)) { 2123 case AliasResult::MustAlias: 2124 Changed = true; 2125 // If the load has a builtin retain, insert a plain retain for it. 2126 if (Class == ARCInstKind::LoadWeakRetained) { 2127 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); 2128 CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call); 2129 CI->setTailCall(); 2130 } 2131 // Zap the fully redundant load. 2132 Call->replaceAllUsesWith(EarlierCall); 2133 Call->eraseFromParent(); 2134 goto clobbered; 2135 case AliasResult::MayAlias: 2136 case AliasResult::PartialAlias: 2137 goto clobbered; 2138 case AliasResult::NoAlias: 2139 break; 2140 } 2141 break; 2142 } 2143 case ARCInstKind::StoreWeak: 2144 case ARCInstKind::InitWeak: { 2145 // If this is storing to the same pointer and has the same size etc. 2146 // replace this load's value with the stored value. 2147 CallInst *Call = cast<CallInst>(Inst); 2148 CallInst *EarlierCall = cast<CallInst>(EarlierInst); 2149 Value *Arg = Call->getArgOperand(0); 2150 Value *EarlierArg = EarlierCall->getArgOperand(0); 2151 switch (PA.getAA()->alias(Arg, EarlierArg)) { 2152 case AliasResult::MustAlias: 2153 Changed = true; 2154 // If the load has a builtin retain, insert a plain retain for it. 2155 if (Class == ARCInstKind::LoadWeakRetained) { 2156 Function *Decl = EP.get(ARCRuntimeEntryPointKind::Retain); 2157 CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call); 2158 CI->setTailCall(); 2159 } 2160 // Zap the fully redundant load. 2161 Call->replaceAllUsesWith(EarlierCall->getArgOperand(1)); 2162 Call->eraseFromParent(); 2163 goto clobbered; 2164 case AliasResult::MayAlias: 2165 case AliasResult::PartialAlias: 2166 goto clobbered; 2167 case AliasResult::NoAlias: 2168 break; 2169 } 2170 break; 2171 } 2172 case ARCInstKind::MoveWeak: 2173 case ARCInstKind::CopyWeak: 2174 // TOOD: Grab the copied value. 2175 goto clobbered; 2176 case ARCInstKind::AutoreleasepoolPush: 2177 case ARCInstKind::None: 2178 case ARCInstKind::IntrinsicUser: 2179 case ARCInstKind::User: 2180 // Weak pointers are only modified through the weak entry points 2181 // (and arbitrary calls, which could call the weak entry points). 2182 break; 2183 default: 2184 // Anything else could modify the weak pointer. 2185 goto clobbered; 2186 } 2187 } 2188 clobbered:; 2189 } 2190 2191 // Then, for each destroyWeak with an alloca operand, check to see if 2192 // the alloca and all its users can be zapped. 2193 for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) { 2194 ARCInstKind Class = GetBasicARCInstKind(&Inst); 2195 if (Class != ARCInstKind::DestroyWeak) 2196 continue; 2197 2198 CallInst *Call = cast<CallInst>(&Inst); 2199 Value *Arg = Call->getArgOperand(0); 2200 if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) { 2201 for (User *U : Alloca->users()) { 2202 const Instruction *UserInst = cast<Instruction>(U); 2203 switch (GetBasicARCInstKind(UserInst)) { 2204 case ARCInstKind::InitWeak: 2205 case ARCInstKind::StoreWeak: 2206 case ARCInstKind::DestroyWeak: 2207 continue; 2208 default: 2209 goto done; 2210 } 2211 } 2212 Changed = true; 2213 for (User *U : llvm::make_early_inc_range(Alloca->users())) { 2214 CallInst *UserInst = cast<CallInst>(U); 2215 switch (GetBasicARCInstKind(UserInst)) { 2216 case ARCInstKind::InitWeak: 2217 case ARCInstKind::StoreWeak: 2218 // These functions return their second argument. 2219 UserInst->replaceAllUsesWith(UserInst->getArgOperand(1)); 2220 break; 2221 case ARCInstKind::DestroyWeak: 2222 // No return value. 2223 break; 2224 default: 2225 llvm_unreachable("alloca really is used!"); 2226 } 2227 UserInst->eraseFromParent(); 2228 } 2229 Alloca->eraseFromParent(); 2230 done:; 2231 } 2232 } 2233 } 2234 2235 /// Identify program paths which execute sequences of retains and releases which 2236 /// can be eliminated. 2237 bool ObjCARCOpt::OptimizeSequences(Function &F) { 2238 // Releases, Retains - These are used to store the results of the main flow 2239 // analysis. These use Value* as the key instead of Instruction* so that the 2240 // map stays valid when we get around to rewriting code and calls get 2241 // replaced by arguments. 2242 DenseMap<Value *, RRInfo> Releases; 2243 BlotMapVector<Value *, RRInfo> Retains; 2244 2245 // This is used during the traversal of the function to track the 2246 // states for each identified object at each block. 2247 DenseMap<const BasicBlock *, BBState> BBStates; 2248 2249 // Analyze the CFG of the function, and all instructions. 2250 bool NestingDetected = Visit(F, BBStates, Retains, Releases); 2251 2252 if (DisableRetainReleasePairing) 2253 return false; 2254 2255 // Transform. 2256 bool AnyPairsCompletelyEliminated = PerformCodePlacement(BBStates, Retains, 2257 Releases, 2258 F.getParent()); 2259 2260 return AnyPairsCompletelyEliminated && NestingDetected; 2261 } 2262 2263 /// Check if there is a dependent call earlier that does not have anything in 2264 /// between the Retain and the call that can affect the reference count of their 2265 /// shared pointer argument. Note that Retain need not be in BB. 2266 static CallInst *HasSafePathToPredecessorCall(const Value *Arg, 2267 Instruction *Retain, 2268 ProvenanceAnalysis &PA) { 2269 auto *Call = dyn_cast_or_null<CallInst>(findSingleDependency( 2270 CanChangeRetainCount, Arg, Retain->getParent(), Retain, PA)); 2271 2272 // Check that the pointer is the return value of the call. 2273 if (!Call || Arg != Call) 2274 return nullptr; 2275 2276 // Check that the call is a regular call. 2277 ARCInstKind Class = GetBasicARCInstKind(Call); 2278 return Class == ARCInstKind::CallOrUser || Class == ARCInstKind::Call 2279 ? Call 2280 : nullptr; 2281 } 2282 2283 /// Find a dependent retain that precedes the given autorelease for which there 2284 /// is nothing in between the two instructions that can affect the ref count of 2285 /// Arg. 2286 static CallInst * 2287 FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB, 2288 Instruction *Autorelease, 2289 ProvenanceAnalysis &PA) { 2290 auto *Retain = dyn_cast_or_null<CallInst>( 2291 findSingleDependency(CanChangeRetainCount, Arg, BB, Autorelease, PA)); 2292 2293 // Check that we found a retain with the same argument. 2294 if (!Retain || !IsRetain(GetBasicARCInstKind(Retain)) || 2295 GetArgRCIdentityRoot(Retain) != Arg) { 2296 return nullptr; 2297 } 2298 2299 return Retain; 2300 } 2301 2302 /// Look for an ``autorelease'' instruction dependent on Arg such that there are 2303 /// no instructions dependent on Arg that need a positive ref count in between 2304 /// the autorelease and the ret. 2305 static CallInst * 2306 FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB, 2307 ReturnInst *Ret, 2308 ProvenanceAnalysis &PA) { 2309 SmallPtrSet<Instruction *, 4> DepInsts; 2310 auto *Autorelease = dyn_cast_or_null<CallInst>( 2311 findSingleDependency(NeedsPositiveRetainCount, Arg, BB, Ret, PA)); 2312 2313 if (!Autorelease) 2314 return nullptr; 2315 ARCInstKind AutoreleaseClass = GetBasicARCInstKind(Autorelease); 2316 if (!IsAutorelease(AutoreleaseClass)) 2317 return nullptr; 2318 if (GetArgRCIdentityRoot(Autorelease) != Arg) 2319 return nullptr; 2320 2321 return Autorelease; 2322 } 2323 2324 /// Look for this pattern: 2325 /// \code 2326 /// %call = call i8* @something(...) 2327 /// %2 = call i8* @objc_retain(i8* %call) 2328 /// %3 = call i8* @objc_autorelease(i8* %2) 2329 /// ret i8* %3 2330 /// \endcode 2331 /// And delete the retain and autorelease. 2332 void ObjCARCOpt::OptimizeReturns(Function &F) { 2333 if (!F.getReturnType()->isPointerTy()) 2334 return; 2335 2336 LLVM_DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n"); 2337 2338 for (BasicBlock &BB: F) { 2339 ReturnInst *Ret = dyn_cast<ReturnInst>(&BB.back()); 2340 if (!Ret) 2341 continue; 2342 2343 LLVM_DEBUG(dbgs() << "Visiting: " << *Ret << "\n"); 2344 2345 const Value *Arg = GetRCIdentityRoot(Ret->getOperand(0)); 2346 2347 // Look for an ``autorelease'' instruction that is a predecessor of Ret and 2348 // dependent on Arg such that there are no instructions dependent on Arg 2349 // that need a positive ref count in between the autorelease and Ret. 2350 CallInst *Autorelease = 2351 FindPredecessorAutoreleaseWithSafePath(Arg, &BB, Ret, PA); 2352 2353 if (!Autorelease) 2354 continue; 2355 2356 CallInst *Retain = FindPredecessorRetainWithSafePath( 2357 Arg, Autorelease->getParent(), Autorelease, PA); 2358 2359 if (!Retain) 2360 continue; 2361 2362 // Check that there is nothing that can affect the reference count 2363 // between the retain and the call. Note that Retain need not be in BB. 2364 CallInst *Call = HasSafePathToPredecessorCall(Arg, Retain, PA); 2365 2366 // Don't remove retainRV/autoreleaseRV pairs if the call isn't a tail call. 2367 if (!Call || 2368 (!Call->isTailCall() && 2369 GetBasicARCInstKind(Retain) == ARCInstKind::RetainRV && 2370 GetBasicARCInstKind(Autorelease) == ARCInstKind::AutoreleaseRV)) 2371 continue; 2372 2373 // If so, we can zap the retain and autorelease. 2374 Changed = true; 2375 ++NumRets; 2376 LLVM_DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: " << *Autorelease 2377 << "\n"); 2378 BundledInsts->eraseInst(Retain); 2379 EraseInstruction(Autorelease); 2380 } 2381 } 2382 2383 #ifndef NDEBUG 2384 void 2385 ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) { 2386 Statistic &NumRetains = 2387 AfterOptimization ? NumRetainsAfterOpt : NumRetainsBeforeOpt; 2388 Statistic &NumReleases = 2389 AfterOptimization ? NumReleasesAfterOpt : NumReleasesBeforeOpt; 2390 2391 for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { 2392 Instruction *Inst = &*I++; 2393 switch (GetBasicARCInstKind(Inst)) { 2394 default: 2395 break; 2396 case ARCInstKind::Retain: 2397 ++NumRetains; 2398 break; 2399 case ARCInstKind::Release: 2400 ++NumReleases; 2401 break; 2402 } 2403 } 2404 } 2405 #endif 2406 2407 void ObjCARCOpt::init(Function &F) { 2408 if (!EnableARCOpts) 2409 return; 2410 2411 // Intuitively, objc_retain and others are nocapture, however in practice 2412 // they are not, because they return their argument value. And objc_release 2413 // calls finalizers which can have arbitrary side effects. 2414 MDKindCache.init(F.getParent()); 2415 2416 // Initialize our runtime entry point cache. 2417 EP.init(F.getParent()); 2418 2419 // Compute which blocks are in which funclet. 2420 if (F.hasPersonalityFn() && 2421 isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) 2422 BlockEHColors = colorEHFunclets(F); 2423 } 2424 2425 bool ObjCARCOpt::run(Function &F, AAResults &AA) { 2426 if (!EnableARCOpts) 2427 return false; 2428 2429 Changed = CFGChanged = false; 2430 BundledRetainClaimRVs BRV(/*ContractPass=*/false); 2431 BundledInsts = &BRV; 2432 2433 LLVM_DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() 2434 << " >>>" 2435 "\n"); 2436 2437 std::pair<bool, bool> R = BundledInsts->insertAfterInvokes(F, nullptr); 2438 Changed |= R.first; 2439 CFGChanged |= R.second; 2440 2441 PA.setAA(&AA); 2442 2443 #ifndef NDEBUG 2444 if (AreStatisticsEnabled()) { 2445 GatherStatistics(F, false); 2446 } 2447 #endif 2448 2449 // This pass performs several distinct transformations. As a compile-time aid 2450 // when compiling code that isn't ObjC, skip these if the relevant ObjC 2451 // library functions aren't declared. 2452 2453 // Preliminary optimizations. This also computes UsedInThisFunction. 2454 OptimizeIndividualCalls(F); 2455 2456 // Optimizations for weak pointers. 2457 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::LoadWeak)) | 2458 (1 << unsigned(ARCInstKind::LoadWeakRetained)) | 2459 (1 << unsigned(ARCInstKind::StoreWeak)) | 2460 (1 << unsigned(ARCInstKind::InitWeak)) | 2461 (1 << unsigned(ARCInstKind::CopyWeak)) | 2462 (1 << unsigned(ARCInstKind::MoveWeak)) | 2463 (1 << unsigned(ARCInstKind::DestroyWeak)))) 2464 OptimizeWeakCalls(F); 2465 2466 // Optimizations for retain+release pairs. 2467 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Retain)) | 2468 (1 << unsigned(ARCInstKind::RetainRV)) | 2469 (1 << unsigned(ARCInstKind::RetainBlock)))) 2470 if (UsedInThisFunction & (1 << unsigned(ARCInstKind::Release))) 2471 // Run OptimizeSequences until it either stops making changes or 2472 // no retain+release pair nesting is detected. 2473 while (OptimizeSequences(F)) {} 2474 2475 // Optimizations if objc_autorelease is used. 2476 if (UsedInThisFunction & ((1 << unsigned(ARCInstKind::Autorelease)) | 2477 (1 << unsigned(ARCInstKind::AutoreleaseRV)))) 2478 OptimizeReturns(F); 2479 2480 // Gather statistics after optimization. 2481 #ifndef NDEBUG 2482 if (AreStatisticsEnabled()) { 2483 GatherStatistics(F, true); 2484 } 2485 #endif 2486 2487 LLVM_DEBUG(dbgs() << "\n"); 2488 2489 return Changed; 2490 } 2491 2492 /// @} 2493 /// 2494 2495 PreservedAnalyses ObjCARCOptPass::run(Function &F, 2496 FunctionAnalysisManager &AM) { 2497 ObjCARCOpt OCAO; 2498 OCAO.init(F); 2499 2500 bool Changed = OCAO.run(F, AM.getResult<AAManager>(F)); 2501 bool CFGChanged = OCAO.hasCFGChanged(); 2502 if (Changed) { 2503 PreservedAnalyses PA; 2504 if (!CFGChanged) 2505 PA.preserveSet<CFGAnalyses>(); 2506 return PA; 2507 } 2508 return PreservedAnalyses::all(); 2509 } 2510