1 //===- MemorySSA.cpp - Memory SSA Builder ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the MemorySSA class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Analysis/MemorySSA.h" 14 #include "llvm/ADT/DenseMap.h" 15 #include "llvm/ADT/DenseMapInfo.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/DepthFirstIterator.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/None.h" 20 #include "llvm/ADT/Optional.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallPtrSet.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/iterator.h" 25 #include "llvm/ADT/iterator_range.h" 26 #include "llvm/Analysis/AliasAnalysis.h" 27 #include "llvm/Analysis/CFGPrinter.h" 28 #include "llvm/Analysis/IteratedDominanceFrontier.h" 29 #include "llvm/Analysis/MemoryLocation.h" 30 #include "llvm/Config/llvm-config.h" 31 #include "llvm/IR/AssemblyAnnotationWriter.h" 32 #include "llvm/IR/BasicBlock.h" 33 #include "llvm/IR/Dominators.h" 34 #include "llvm/IR/Function.h" 35 #include "llvm/IR/Instruction.h" 36 #include "llvm/IR/Instructions.h" 37 #include "llvm/IR/IntrinsicInst.h" 38 #include "llvm/IR/Intrinsics.h" 39 #include "llvm/IR/LLVMContext.h" 40 #include "llvm/IR/PassManager.h" 41 #include "llvm/IR/Use.h" 42 #include "llvm/InitializePasses.h" 43 #include "llvm/Pass.h" 44 #include "llvm/Support/AtomicOrdering.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/CommandLine.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Debug.h" 49 #include "llvm/Support/ErrorHandling.h" 50 #include "llvm/Support/FormattedStream.h" 51 #include "llvm/Support/raw_ostream.h" 52 #include <algorithm> 53 #include <cassert> 54 #include <cstdlib> 55 #include <iterator> 56 #include <memory> 57 #include <utility> 58 59 using namespace llvm; 60 61 #define DEBUG_TYPE "memoryssa" 62 63 static cl::opt<std::string> 64 DotCFGMSSA("dot-cfg-mssa", 65 cl::value_desc("file name for generated dot file"), 66 cl::desc("file name for generated dot file"), cl::init("")); 67 68 INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, 69 true) 70 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 71 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 72 INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, 73 true) 74 75 INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa", 76 "Memory SSA Printer", false, false) 77 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 78 INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa", 79 "Memory SSA Printer", false, false) 80 81 static cl::opt<unsigned> MaxCheckLimit( 82 "memssa-check-limit", cl::Hidden, cl::init(100), 83 cl::desc("The maximum number of stores/phis MemorySSA" 84 "will consider trying to walk past (default = 100)")); 85 86 // Always verify MemorySSA if expensive checking is enabled. 87 #ifdef EXPENSIVE_CHECKS 88 bool llvm::VerifyMemorySSA = true; 89 #else 90 bool llvm::VerifyMemorySSA = false; 91 #endif 92 /// Enables memory ssa as a dependency for loop passes in legacy pass manager. 93 cl::opt<bool> llvm::EnableMSSALoopDependency( 94 "enable-mssa-loop-dependency", cl::Hidden, cl::init(true), 95 cl::desc("Enable MemorySSA dependency for loop pass manager")); 96 97 static cl::opt<bool, true> 98 VerifyMemorySSAX("verify-memoryssa", cl::location(VerifyMemorySSA), 99 cl::Hidden, cl::desc("Enable verification of MemorySSA.")); 100 101 namespace llvm { 102 103 /// An assembly annotator class to print Memory SSA information in 104 /// comments. 105 class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { 106 friend class MemorySSA; 107 108 const MemorySSA *MSSA; 109 110 public: 111 MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {} 112 113 void emitBasicBlockStartAnnot(const BasicBlock *BB, 114 formatted_raw_ostream &OS) override { 115 if (MemoryAccess *MA = MSSA->getMemoryAccess(BB)) 116 OS << "; " << *MA << "\n"; 117 } 118 119 void emitInstructionAnnot(const Instruction *I, 120 formatted_raw_ostream &OS) override { 121 if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) 122 OS << "; " << *MA << "\n"; 123 } 124 }; 125 126 } // end namespace llvm 127 128 namespace { 129 130 /// Our current alias analysis API differentiates heavily between calls and 131 /// non-calls, and functions called on one usually assert on the other. 132 /// This class encapsulates the distinction to simplify other code that wants 133 /// "Memory affecting instructions and related data" to use as a key. 134 /// For example, this class is used as a densemap key in the use optimizer. 135 class MemoryLocOrCall { 136 public: 137 bool IsCall = false; 138 139 MemoryLocOrCall(MemoryUseOrDef *MUD) 140 : MemoryLocOrCall(MUD->getMemoryInst()) {} 141 MemoryLocOrCall(const MemoryUseOrDef *MUD) 142 : MemoryLocOrCall(MUD->getMemoryInst()) {} 143 144 MemoryLocOrCall(Instruction *Inst) { 145 if (auto *C = dyn_cast<CallBase>(Inst)) { 146 IsCall = true; 147 Call = C; 148 } else { 149 IsCall = false; 150 // There is no such thing as a memorylocation for a fence inst, and it is 151 // unique in that regard. 152 if (!isa<FenceInst>(Inst)) 153 Loc = MemoryLocation::get(Inst); 154 } 155 } 156 157 explicit MemoryLocOrCall(const MemoryLocation &Loc) : Loc(Loc) {} 158 159 const CallBase *getCall() const { 160 assert(IsCall); 161 return Call; 162 } 163 164 MemoryLocation getLoc() const { 165 assert(!IsCall); 166 return Loc; 167 } 168 169 bool operator==(const MemoryLocOrCall &Other) const { 170 if (IsCall != Other.IsCall) 171 return false; 172 173 if (!IsCall) 174 return Loc == Other.Loc; 175 176 if (Call->getCalledOperand() != Other.Call->getCalledOperand()) 177 return false; 178 179 return Call->arg_size() == Other.Call->arg_size() && 180 std::equal(Call->arg_begin(), Call->arg_end(), 181 Other.Call->arg_begin()); 182 } 183 184 private: 185 union { 186 const CallBase *Call; 187 MemoryLocation Loc; 188 }; 189 }; 190 191 } // end anonymous namespace 192 193 namespace llvm { 194 195 template <> struct DenseMapInfo<MemoryLocOrCall> { 196 static inline MemoryLocOrCall getEmptyKey() { 197 return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey()); 198 } 199 200 static inline MemoryLocOrCall getTombstoneKey() { 201 return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey()); 202 } 203 204 static unsigned getHashValue(const MemoryLocOrCall &MLOC) { 205 if (!MLOC.IsCall) 206 return hash_combine( 207 MLOC.IsCall, 208 DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); 209 210 hash_code hash = 211 hash_combine(MLOC.IsCall, DenseMapInfo<const Value *>::getHashValue( 212 MLOC.getCall()->getCalledOperand())); 213 214 for (const Value *Arg : MLOC.getCall()->args()) 215 hash = hash_combine(hash, DenseMapInfo<const Value *>::getHashValue(Arg)); 216 return hash; 217 } 218 219 static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) { 220 return LHS == RHS; 221 } 222 }; 223 224 } // end namespace llvm 225 226 /// This does one-way checks to see if Use could theoretically be hoisted above 227 /// MayClobber. This will not check the other way around. 228 /// 229 /// This assumes that, for the purposes of MemorySSA, Use comes directly after 230 /// MayClobber, with no potentially clobbering operations in between them. 231 /// (Where potentially clobbering ops are memory barriers, aliased stores, etc.) 232 static bool areLoadsReorderable(const LoadInst *Use, 233 const LoadInst *MayClobber) { 234 bool VolatileUse = Use->isVolatile(); 235 bool VolatileClobber = MayClobber->isVolatile(); 236 // Volatile operations may never be reordered with other volatile operations. 237 if (VolatileUse && VolatileClobber) 238 return false; 239 // Otherwise, volatile doesn't matter here. From the language reference: 240 // 'optimizers may change the order of volatile operations relative to 241 // non-volatile operations.'" 242 243 // If a load is seq_cst, it cannot be moved above other loads. If its ordering 244 // is weaker, it can be moved above other loads. We just need to be sure that 245 // MayClobber isn't an acquire load, because loads can't be moved above 246 // acquire loads. 247 // 248 // Note that this explicitly *does* allow the free reordering of monotonic (or 249 // weaker) loads of the same address. 250 bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent; 251 bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(), 252 AtomicOrdering::Acquire); 253 return !(SeqCstUse || MayClobberIsAcquire); 254 } 255 256 namespace { 257 258 struct ClobberAlias { 259 bool IsClobber; 260 Optional<AliasResult> AR; 261 }; 262 263 } // end anonymous namespace 264 265 // Return a pair of {IsClobber (bool), AR (AliasResult)}. It relies on AR being 266 // ignored if IsClobber = false. 267 template <typename AliasAnalysisType> 268 static ClobberAlias 269 instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, 270 const Instruction *UseInst, AliasAnalysisType &AA) { 271 Instruction *DefInst = MD->getMemoryInst(); 272 assert(DefInst && "Defining instruction not actually an instruction"); 273 Optional<AliasResult> AR; 274 275 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { 276 // These intrinsics will show up as affecting memory, but they are just 277 // markers, mostly. 278 // 279 // FIXME: We probably don't actually want MemorySSA to model these at all 280 // (including creating MemoryAccesses for them): we just end up inventing 281 // clobbers where they don't really exist at all. Please see D43269 for 282 // context. 283 switch (II->getIntrinsicID()) { 284 case Intrinsic::invariant_start: 285 case Intrinsic::invariant_end: 286 case Intrinsic::assume: 287 case Intrinsic::experimental_noalias_scope_decl: 288 return {false, NoAlias}; 289 case Intrinsic::dbg_addr: 290 case Intrinsic::dbg_declare: 291 case Intrinsic::dbg_label: 292 case Intrinsic::dbg_value: 293 llvm_unreachable("debuginfo shouldn't have associated defs!"); 294 default: 295 break; 296 } 297 } 298 299 if (auto *CB = dyn_cast_or_null<CallBase>(UseInst)) { 300 ModRefInfo I = AA.getModRefInfo(DefInst, CB); 301 AR = isMustSet(I) ? MustAlias : MayAlias; 302 return {isModOrRefSet(I), AR}; 303 } 304 305 if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) 306 if (auto *UseLoad = dyn_cast_or_null<LoadInst>(UseInst)) 307 return {!areLoadsReorderable(UseLoad, DefLoad), MayAlias}; 308 309 ModRefInfo I = AA.getModRefInfo(DefInst, UseLoc); 310 AR = isMustSet(I) ? MustAlias : MayAlias; 311 return {isModSet(I), AR}; 312 } 313 314 template <typename AliasAnalysisType> 315 static ClobberAlias instructionClobbersQuery(MemoryDef *MD, 316 const MemoryUseOrDef *MU, 317 const MemoryLocOrCall &UseMLOC, 318 AliasAnalysisType &AA) { 319 // FIXME: This is a temporary hack to allow a single instructionClobbersQuery 320 // to exist while MemoryLocOrCall is pushed through places. 321 if (UseMLOC.IsCall) 322 return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(), 323 AA); 324 return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(), 325 AA); 326 } 327 328 // Return true when MD may alias MU, return false otherwise. 329 bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, 330 AliasAnalysis &AA) { 331 return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA).IsClobber; 332 } 333 334 namespace { 335 336 struct UpwardsMemoryQuery { 337 // True if our original query started off as a call 338 bool IsCall = false; 339 // The pointer location we started the query with. This will be empty if 340 // IsCall is true. 341 MemoryLocation StartingLoc; 342 // This is the instruction we were querying about. 343 const Instruction *Inst = nullptr; 344 // The MemoryAccess we actually got called with, used to test local domination 345 const MemoryAccess *OriginalAccess = nullptr; 346 Optional<AliasResult> AR = MayAlias; 347 bool SkipSelfAccess = false; 348 349 UpwardsMemoryQuery() = default; 350 351 UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access) 352 : IsCall(isa<CallBase>(Inst)), Inst(Inst), OriginalAccess(Access) { 353 if (!IsCall) 354 StartingLoc = MemoryLocation::get(Inst); 355 } 356 }; 357 358 } // end anonymous namespace 359 360 template <typename AliasAnalysisType> 361 static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA, 362 const Instruction *I) { 363 // If the memory can't be changed, then loads of the memory can't be 364 // clobbered. 365 if (auto *LI = dyn_cast<LoadInst>(I)) 366 return I->hasMetadata(LLVMContext::MD_invariant_load) || 367 AA.pointsToConstantMemory(MemoryLocation::get(LI)); 368 return false; 369 } 370 371 /// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing 372 /// inbetween `Start` and `ClobberAt` can clobbers `Start`. 373 /// 374 /// This is meant to be as simple and self-contained as possible. Because it 375 /// uses no cache, etc., it can be relatively expensive. 376 /// 377 /// \param Start The MemoryAccess that we want to walk from. 378 /// \param ClobberAt A clobber for Start. 379 /// \param StartLoc The MemoryLocation for Start. 380 /// \param MSSA The MemorySSA instance that Start and ClobberAt belong to. 381 /// \param Query The UpwardsMemoryQuery we used for our search. 382 /// \param AA The AliasAnalysis we used for our search. 383 /// \param AllowImpreciseClobber Always false, unless we do relaxed verify. 384 385 template <typename AliasAnalysisType> 386 LLVM_ATTRIBUTE_UNUSED static void 387 checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt, 388 const MemoryLocation &StartLoc, const MemorySSA &MSSA, 389 const UpwardsMemoryQuery &Query, AliasAnalysisType &AA, 390 bool AllowImpreciseClobber = false) { 391 assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?"); 392 393 if (MSSA.isLiveOnEntryDef(Start)) { 394 assert(MSSA.isLiveOnEntryDef(ClobberAt) && 395 "liveOnEntry must clobber itself"); 396 return; 397 } 398 399 bool FoundClobber = false; 400 DenseSet<ConstMemoryAccessPair> VisitedPhis; 401 SmallVector<ConstMemoryAccessPair, 8> Worklist; 402 Worklist.emplace_back(Start, StartLoc); 403 // Walk all paths from Start to ClobberAt, while looking for clobbers. If one 404 // is found, complain. 405 while (!Worklist.empty()) { 406 auto MAP = Worklist.pop_back_val(); 407 // All we care about is that nothing from Start to ClobberAt clobbers Start. 408 // We learn nothing from revisiting nodes. 409 if (!VisitedPhis.insert(MAP).second) 410 continue; 411 412 for (const auto *MA : def_chain(MAP.first)) { 413 if (MA == ClobberAt) { 414 if (const auto *MD = dyn_cast<MemoryDef>(MA)) { 415 // instructionClobbersQuery isn't essentially free, so don't use `|=`, 416 // since it won't let us short-circuit. 417 // 418 // Also, note that this can't be hoisted out of the `Worklist` loop, 419 // since MD may only act as a clobber for 1 of N MemoryLocations. 420 FoundClobber = FoundClobber || MSSA.isLiveOnEntryDef(MD); 421 if (!FoundClobber) { 422 ClobberAlias CA = 423 instructionClobbersQuery(MD, MAP.second, Query.Inst, AA); 424 if (CA.IsClobber) { 425 FoundClobber = true; 426 // Not used: CA.AR; 427 } 428 } 429 } 430 break; 431 } 432 433 // We should never hit liveOnEntry, unless it's the clobber. 434 assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?"); 435 436 if (const auto *MD = dyn_cast<MemoryDef>(MA)) { 437 // If Start is a Def, skip self. 438 if (MD == Start) 439 continue; 440 441 assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) 442 .IsClobber && 443 "Found clobber before reaching ClobberAt!"); 444 continue; 445 } 446 447 if (const auto *MU = dyn_cast<MemoryUse>(MA)) { 448 (void)MU; 449 assert (MU == Start && 450 "Can only find use in def chain if Start is a use"); 451 continue; 452 } 453 454 assert(isa<MemoryPhi>(MA)); 455 456 // Add reachable phi predecessors 457 for (auto ItB = upward_defs_begin( 458 {const_cast<MemoryAccess *>(MA), MAP.second}, 459 MSSA.getDomTree()), 460 ItE = upward_defs_end(); 461 ItB != ItE; ++ItB) 462 if (MSSA.getDomTree().isReachableFromEntry(ItB.getPhiArgBlock())) 463 Worklist.emplace_back(*ItB); 464 } 465 } 466 467 // If the verify is done following an optimization, it's possible that 468 // ClobberAt was a conservative clobbering, that we can now infer is not a 469 // true clobbering access. Don't fail the verify if that's the case. 470 // We do have accesses that claim they're optimized, but could be optimized 471 // further. Updating all these can be expensive, so allow it for now (FIXME). 472 if (AllowImpreciseClobber) 473 return; 474 475 // If ClobberAt is a MemoryPhi, we can assume something above it acted as a 476 // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point. 477 assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) && 478 "ClobberAt never acted as a clobber"); 479 } 480 481 namespace { 482 483 /// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up 484 /// in one class. 485 template <class AliasAnalysisType> class ClobberWalker { 486 /// Save a few bytes by using unsigned instead of size_t. 487 using ListIndex = unsigned; 488 489 /// Represents a span of contiguous MemoryDefs, potentially ending in a 490 /// MemoryPhi. 491 struct DefPath { 492 MemoryLocation Loc; 493 // Note that, because we always walk in reverse, Last will always dominate 494 // First. Also note that First and Last are inclusive. 495 MemoryAccess *First; 496 MemoryAccess *Last; 497 Optional<ListIndex> Previous; 498 499 DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last, 500 Optional<ListIndex> Previous) 501 : Loc(Loc), First(First), Last(Last), Previous(Previous) {} 502 503 DefPath(const MemoryLocation &Loc, MemoryAccess *Init, 504 Optional<ListIndex> Previous) 505 : DefPath(Loc, Init, Init, Previous) {} 506 }; 507 508 const MemorySSA &MSSA; 509 AliasAnalysisType &AA; 510 DominatorTree &DT; 511 UpwardsMemoryQuery *Query; 512 unsigned *UpwardWalkLimit; 513 514 // Phi optimization bookkeeping: 515 // List of DefPath to process during the current phi optimization walk. 516 SmallVector<DefPath, 32> Paths; 517 // List of visited <Access, Location> pairs; we can skip paths already 518 // visited with the same memory location. 519 DenseSet<ConstMemoryAccessPair> VisitedPhis; 520 // Record if phi translation has been performed during the current phi 521 // optimization walk, as merging alias results after phi translation can 522 // yield incorrect results. Context in PR46156. 523 bool PerformedPhiTranslation = false; 524 525 /// Find the nearest def or phi that `From` can legally be optimized to. 526 const MemoryAccess *getWalkTarget(const MemoryPhi *From) const { 527 assert(From->getNumOperands() && "Phi with no operands?"); 528 529 BasicBlock *BB = From->getBlock(); 530 MemoryAccess *Result = MSSA.getLiveOnEntryDef(); 531 DomTreeNode *Node = DT.getNode(BB); 532 while ((Node = Node->getIDom())) { 533 auto *Defs = MSSA.getBlockDefs(Node->getBlock()); 534 if (Defs) 535 return &*Defs->rbegin(); 536 } 537 return Result; 538 } 539 540 /// Result of calling walkToPhiOrClobber. 541 struct UpwardsWalkResult { 542 /// The "Result" of the walk. Either a clobber, the last thing we walked, or 543 /// both. Include alias info when clobber found. 544 MemoryAccess *Result; 545 bool IsKnownClobber; 546 Optional<AliasResult> AR; 547 }; 548 549 /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last. 550 /// This will update Desc.Last as it walks. It will (optionally) also stop at 551 /// StopAt. 552 /// 553 /// This does not test for whether StopAt is a clobber 554 UpwardsWalkResult 555 walkToPhiOrClobber(DefPath &Desc, const MemoryAccess *StopAt = nullptr, 556 const MemoryAccess *SkipStopAt = nullptr) const { 557 assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world"); 558 assert(UpwardWalkLimit && "Need a valid walk limit"); 559 bool LimitAlreadyReached = false; 560 // (*UpwardWalkLimit) may be 0 here, due to the loop in tryOptimizePhi. Set 561 // it to 1. This will not do any alias() calls. It either returns in the 562 // first iteration in the loop below, or is set back to 0 if all def chains 563 // are free of MemoryDefs. 564 if (!*UpwardWalkLimit) { 565 *UpwardWalkLimit = 1; 566 LimitAlreadyReached = true; 567 } 568 569 for (MemoryAccess *Current : def_chain(Desc.Last)) { 570 Desc.Last = Current; 571 if (Current == StopAt || Current == SkipStopAt) 572 return {Current, false, MayAlias}; 573 574 if (auto *MD = dyn_cast<MemoryDef>(Current)) { 575 if (MSSA.isLiveOnEntryDef(MD)) 576 return {MD, true, MustAlias}; 577 578 if (!--*UpwardWalkLimit) 579 return {Current, true, MayAlias}; 580 581 ClobberAlias CA = 582 instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA); 583 if (CA.IsClobber) 584 return {MD, true, CA.AR}; 585 } 586 } 587 588 if (LimitAlreadyReached) 589 *UpwardWalkLimit = 0; 590 591 assert(isa<MemoryPhi>(Desc.Last) && 592 "Ended at a non-clobber that's not a phi?"); 593 return {Desc.Last, false, MayAlias}; 594 } 595 596 void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches, 597 ListIndex PriorNode) { 598 auto UpwardDefsBegin = upward_defs_begin({Phi, Paths[PriorNode].Loc}, DT, 599 &PerformedPhiTranslation); 600 auto UpwardDefs = make_range(UpwardDefsBegin, upward_defs_end()); 601 for (const MemoryAccessPair &P : UpwardDefs) { 602 PausedSearches.push_back(Paths.size()); 603 Paths.emplace_back(P.second, P.first, PriorNode); 604 } 605 } 606 607 /// Represents a search that terminated after finding a clobber. This clobber 608 /// may or may not be present in the path of defs from LastNode..SearchStart, 609 /// since it may have been retrieved from cache. 610 struct TerminatedPath { 611 MemoryAccess *Clobber; 612 ListIndex LastNode; 613 }; 614 615 /// Get an access that keeps us from optimizing to the given phi. 616 /// 617 /// PausedSearches is an array of indices into the Paths array. Its incoming 618 /// value is the indices of searches that stopped at the last phi optimization 619 /// target. It's left in an unspecified state. 620 /// 621 /// If this returns None, NewPaused is a vector of searches that terminated 622 /// at StopWhere. Otherwise, NewPaused is left in an unspecified state. 623 Optional<TerminatedPath> 624 getBlockingAccess(const MemoryAccess *StopWhere, 625 SmallVectorImpl<ListIndex> &PausedSearches, 626 SmallVectorImpl<ListIndex> &NewPaused, 627 SmallVectorImpl<TerminatedPath> &Terminated) { 628 assert(!PausedSearches.empty() && "No searches to continue?"); 629 630 // BFS vs DFS really doesn't make a difference here, so just do a DFS with 631 // PausedSearches as our stack. 632 while (!PausedSearches.empty()) { 633 ListIndex PathIndex = PausedSearches.pop_back_val(); 634 DefPath &Node = Paths[PathIndex]; 635 636 // If we've already visited this path with this MemoryLocation, we don't 637 // need to do so again. 638 // 639 // NOTE: That we just drop these paths on the ground makes caching 640 // behavior sporadic. e.g. given a diamond: 641 // A 642 // B C 643 // D 644 // 645 // ...If we walk D, B, A, C, we'll only cache the result of phi 646 // optimization for A, B, and D; C will be skipped because it dies here. 647 // This arguably isn't the worst thing ever, since: 648 // - We generally query things in a top-down order, so if we got below D 649 // without needing cache entries for {C, MemLoc}, then chances are 650 // that those cache entries would end up ultimately unused. 651 // - We still cache things for A, so C only needs to walk up a bit. 652 // If this behavior becomes problematic, we can fix without a ton of extra 653 // work. 654 if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) { 655 if (PerformedPhiTranslation) { 656 // If visiting this path performed Phi translation, don't continue, 657 // since it may not be correct to merge results from two paths if one 658 // relies on the phi translation. 659 TerminatedPath Term{Node.Last, PathIndex}; 660 return Term; 661 } 662 continue; 663 } 664 665 const MemoryAccess *SkipStopWhere = nullptr; 666 if (Query->SkipSelfAccess && Node.Loc == Query->StartingLoc) { 667 assert(isa<MemoryDef>(Query->OriginalAccess)); 668 SkipStopWhere = Query->OriginalAccess; 669 } 670 671 UpwardsWalkResult Res = walkToPhiOrClobber(Node, 672 /*StopAt=*/StopWhere, 673 /*SkipStopAt=*/SkipStopWhere); 674 if (Res.IsKnownClobber) { 675 assert(Res.Result != StopWhere && Res.Result != SkipStopWhere); 676 677 // If this wasn't a cache hit, we hit a clobber when walking. That's a 678 // failure. 679 TerminatedPath Term{Res.Result, PathIndex}; 680 if (!MSSA.dominates(Res.Result, StopWhere)) 681 return Term; 682 683 // Otherwise, it's a valid thing to potentially optimize to. 684 Terminated.push_back(Term); 685 continue; 686 } 687 688 if (Res.Result == StopWhere || Res.Result == SkipStopWhere) { 689 // We've hit our target. Save this path off for if we want to continue 690 // walking. If we are in the mode of skipping the OriginalAccess, and 691 // we've reached back to the OriginalAccess, do not save path, we've 692 // just looped back to self. 693 if (Res.Result != SkipStopWhere) 694 NewPaused.push_back(PathIndex); 695 continue; 696 } 697 698 assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber"); 699 addSearches(cast<MemoryPhi>(Res.Result), PausedSearches, PathIndex); 700 } 701 702 return None; 703 } 704 705 template <typename T, typename Walker> 706 struct generic_def_path_iterator 707 : public iterator_facade_base<generic_def_path_iterator<T, Walker>, 708 std::forward_iterator_tag, T *> { 709 generic_def_path_iterator() {} 710 generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} 711 712 T &operator*() const { return curNode(); } 713 714 generic_def_path_iterator &operator++() { 715 N = curNode().Previous; 716 return *this; 717 } 718 719 bool operator==(const generic_def_path_iterator &O) const { 720 if (N.hasValue() != O.N.hasValue()) 721 return false; 722 return !N.hasValue() || *N == *O.N; 723 } 724 725 private: 726 T &curNode() const { return W->Paths[*N]; } 727 728 Walker *W = nullptr; 729 Optional<ListIndex> N = None; 730 }; 731 732 using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>; 733 using const_def_path_iterator = 734 generic_def_path_iterator<const DefPath, const ClobberWalker>; 735 736 iterator_range<def_path_iterator> def_path(ListIndex From) { 737 return make_range(def_path_iterator(this, From), def_path_iterator()); 738 } 739 740 iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const { 741 return make_range(const_def_path_iterator(this, From), 742 const_def_path_iterator()); 743 } 744 745 struct OptznResult { 746 /// The path that contains our result. 747 TerminatedPath PrimaryClobber; 748 /// The paths that we can legally cache back from, but that aren't 749 /// necessarily the result of the Phi optimization. 750 SmallVector<TerminatedPath, 4> OtherClobbers; 751 }; 752 753 ListIndex defPathIndex(const DefPath &N) const { 754 // The assert looks nicer if we don't need to do &N 755 const DefPath *NP = &N; 756 assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() && 757 "Out of bounds DefPath!"); 758 return NP - &Paths.front(); 759 } 760 761 /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths 762 /// that act as legal clobbers. Note that this won't return *all* clobbers. 763 /// 764 /// Phi optimization algorithm tl;dr: 765 /// - Find the earliest def/phi, A, we can optimize to 766 /// - Find if all paths from the starting memory access ultimately reach A 767 /// - If not, optimization isn't possible. 768 /// - Otherwise, walk from A to another clobber or phi, A'. 769 /// - If A' is a def, we're done. 770 /// - If A' is a phi, try to optimize it. 771 /// 772 /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path 773 /// terminates when a MemoryAccess that clobbers said MemoryLocation is found. 774 OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start, 775 const MemoryLocation &Loc) { 776 assert(Paths.empty() && VisitedPhis.empty() && !PerformedPhiTranslation && 777 "Reset the optimization state."); 778 779 Paths.emplace_back(Loc, Start, Phi, None); 780 // Stores how many "valid" optimization nodes we had prior to calling 781 // addSearches/getBlockingAccess. Necessary for caching if we had a blocker. 782 auto PriorPathsSize = Paths.size(); 783 784 SmallVector<ListIndex, 16> PausedSearches; 785 SmallVector<ListIndex, 8> NewPaused; 786 SmallVector<TerminatedPath, 4> TerminatedPaths; 787 788 addSearches(Phi, PausedSearches, 0); 789 790 // Moves the TerminatedPath with the "most dominated" Clobber to the end of 791 // Paths. 792 auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) { 793 assert(!Paths.empty() && "Need a path to move"); 794 auto Dom = Paths.begin(); 795 for (auto I = std::next(Dom), E = Paths.end(); I != E; ++I) 796 if (!MSSA.dominates(I->Clobber, Dom->Clobber)) 797 Dom = I; 798 auto Last = Paths.end() - 1; 799 if (Last != Dom) 800 std::iter_swap(Last, Dom); 801 }; 802 803 MemoryPhi *Current = Phi; 804 while (true) { 805 assert(!MSSA.isLiveOnEntryDef(Current) && 806 "liveOnEntry wasn't treated as a clobber?"); 807 808 const auto *Target = getWalkTarget(Current); 809 // If a TerminatedPath doesn't dominate Target, then it wasn't a legal 810 // optimization for the prior phi. 811 assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) { 812 return MSSA.dominates(P.Clobber, Target); 813 })); 814 815 // FIXME: This is broken, because the Blocker may be reported to be 816 // liveOnEntry, and we'll happily wait for that to disappear (read: never) 817 // For the moment, this is fine, since we do nothing with blocker info. 818 if (Optional<TerminatedPath> Blocker = getBlockingAccess( 819 Target, PausedSearches, NewPaused, TerminatedPaths)) { 820 821 // Find the node we started at. We can't search based on N->Last, since 822 // we may have gone around a loop with a different MemoryLocation. 823 auto Iter = find_if(def_path(Blocker->LastNode), [&](const DefPath &N) { 824 return defPathIndex(N) < PriorPathsSize; 825 }); 826 assert(Iter != def_path_iterator()); 827 828 DefPath &CurNode = *Iter; 829 assert(CurNode.Last == Current); 830 831 // Two things: 832 // A. We can't reliably cache all of NewPaused back. Consider a case 833 // where we have two paths in NewPaused; one of which can't optimize 834 // above this phi, whereas the other can. If we cache the second path 835 // back, we'll end up with suboptimal cache entries. We can handle 836 // cases like this a bit better when we either try to find all 837 // clobbers that block phi optimization, or when our cache starts 838 // supporting unfinished searches. 839 // B. We can't reliably cache TerminatedPaths back here without doing 840 // extra checks; consider a case like: 841 // T 842 // / \ 843 // D C 844 // \ / 845 // S 846 // Where T is our target, C is a node with a clobber on it, D is a 847 // diamond (with a clobber *only* on the left or right node, N), and 848 // S is our start. Say we walk to D, through the node opposite N 849 // (read: ignoring the clobber), and see a cache entry in the top 850 // node of D. That cache entry gets put into TerminatedPaths. We then 851 // walk up to C (N is later in our worklist), find the clobber, and 852 // quit. If we append TerminatedPaths to OtherClobbers, we'll cache 853 // the bottom part of D to the cached clobber, ignoring the clobber 854 // in N. Again, this problem goes away if we start tracking all 855 // blockers for a given phi optimization. 856 TerminatedPath Result{CurNode.Last, defPathIndex(CurNode)}; 857 return {Result, {}}; 858 } 859 860 // If there's nothing left to search, then all paths led to valid clobbers 861 // that we got from our cache; pick the nearest to the start, and allow 862 // the rest to be cached back. 863 if (NewPaused.empty()) { 864 MoveDominatedPathToEnd(TerminatedPaths); 865 TerminatedPath Result = TerminatedPaths.pop_back_val(); 866 return {Result, std::move(TerminatedPaths)}; 867 } 868 869 MemoryAccess *DefChainEnd = nullptr; 870 SmallVector<TerminatedPath, 4> Clobbers; 871 for (ListIndex Paused : NewPaused) { 872 UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]); 873 if (WR.IsKnownClobber) 874 Clobbers.push_back({WR.Result, Paused}); 875 else 876 // Micro-opt: If we hit the end of the chain, save it. 877 DefChainEnd = WR.Result; 878 } 879 880 if (!TerminatedPaths.empty()) { 881 // If we couldn't find the dominating phi/liveOnEntry in the above loop, 882 // do it now. 883 if (!DefChainEnd) 884 for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target))) 885 DefChainEnd = MA; 886 assert(DefChainEnd && "Failed to find dominating phi/liveOnEntry"); 887 888 // If any of the terminated paths don't dominate the phi we'll try to 889 // optimize, we need to figure out what they are and quit. 890 const BasicBlock *ChainBB = DefChainEnd->getBlock(); 891 for (const TerminatedPath &TP : TerminatedPaths) { 892 // Because we know that DefChainEnd is as "high" as we can go, we 893 // don't need local dominance checks; BB dominance is sufficient. 894 if (DT.dominates(ChainBB, TP.Clobber->getBlock())) 895 Clobbers.push_back(TP); 896 } 897 } 898 899 // If we have clobbers in the def chain, find the one closest to Current 900 // and quit. 901 if (!Clobbers.empty()) { 902 MoveDominatedPathToEnd(Clobbers); 903 TerminatedPath Result = Clobbers.pop_back_val(); 904 return {Result, std::move(Clobbers)}; 905 } 906 907 assert(all_of(NewPaused, 908 [&](ListIndex I) { return Paths[I].Last == DefChainEnd; })); 909 910 // Because liveOnEntry is a clobber, this must be a phi. 911 auto *DefChainPhi = cast<MemoryPhi>(DefChainEnd); 912 913 PriorPathsSize = Paths.size(); 914 PausedSearches.clear(); 915 for (ListIndex I : NewPaused) 916 addSearches(DefChainPhi, PausedSearches, I); 917 NewPaused.clear(); 918 919 Current = DefChainPhi; 920 } 921 } 922 923 void verifyOptResult(const OptznResult &R) const { 924 assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) { 925 return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber); 926 })); 927 } 928 929 void resetPhiOptznState() { 930 Paths.clear(); 931 VisitedPhis.clear(); 932 PerformedPhiTranslation = false; 933 } 934 935 public: 936 ClobberWalker(const MemorySSA &MSSA, AliasAnalysisType &AA, DominatorTree &DT) 937 : MSSA(MSSA), AA(AA), DT(DT) {} 938 939 AliasAnalysisType *getAA() { return &AA; } 940 /// Finds the nearest clobber for the given query, optimizing phis if 941 /// possible. 942 MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q, 943 unsigned &UpWalkLimit) { 944 Query = &Q; 945 UpwardWalkLimit = &UpWalkLimit; 946 // Starting limit must be > 0. 947 if (!UpWalkLimit) 948 UpWalkLimit++; 949 950 MemoryAccess *Current = Start; 951 // This walker pretends uses don't exist. If we're handed one, silently grab 952 // its def. (This has the nice side-effect of ensuring we never cache uses) 953 if (auto *MU = dyn_cast<MemoryUse>(Start)) 954 Current = MU->getDefiningAccess(); 955 956 DefPath FirstDesc(Q.StartingLoc, Current, Current, None); 957 // Fast path for the overly-common case (no crazy phi optimization 958 // necessary) 959 UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc); 960 MemoryAccess *Result; 961 if (WalkResult.IsKnownClobber) { 962 Result = WalkResult.Result; 963 Q.AR = WalkResult.AR; 964 } else { 965 OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last), 966 Current, Q.StartingLoc); 967 verifyOptResult(OptRes); 968 resetPhiOptznState(); 969 Result = OptRes.PrimaryClobber.Clobber; 970 } 971 972 #ifdef EXPENSIVE_CHECKS 973 if (!Q.SkipSelfAccess && *UpwardWalkLimit > 0) 974 checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA); 975 #endif 976 return Result; 977 } 978 }; 979 980 struct RenamePassData { 981 DomTreeNode *DTN; 982 DomTreeNode::const_iterator ChildIt; 983 MemoryAccess *IncomingVal; 984 985 RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, 986 MemoryAccess *M) 987 : DTN(D), ChildIt(It), IncomingVal(M) {} 988 989 void swap(RenamePassData &RHS) { 990 std::swap(DTN, RHS.DTN); 991 std::swap(ChildIt, RHS.ChildIt); 992 std::swap(IncomingVal, RHS.IncomingVal); 993 } 994 }; 995 996 } // end anonymous namespace 997 998 namespace llvm { 999 1000 template <class AliasAnalysisType> class MemorySSA::ClobberWalkerBase { 1001 ClobberWalker<AliasAnalysisType> Walker; 1002 MemorySSA *MSSA; 1003 1004 public: 1005 ClobberWalkerBase(MemorySSA *M, AliasAnalysisType *A, DominatorTree *D) 1006 : Walker(*M, *A, *D), MSSA(M) {} 1007 1008 MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, 1009 const MemoryLocation &, 1010 unsigned &); 1011 // Third argument (bool), defines whether the clobber search should skip the 1012 // original queried access. If true, there will be a follow-up query searching 1013 // for a clobber access past "self". Note that the Optimized access is not 1014 // updated if a new clobber is found by this SkipSelf search. If this 1015 // additional query becomes heavily used we may decide to cache the result. 1016 // Walker instantiations will decide how to set the SkipSelf bool. 1017 MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool); 1018 }; 1019 1020 /// A MemorySSAWalker that does AA walks to disambiguate accesses. It no 1021 /// longer does caching on its own, but the name has been retained for the 1022 /// moment. 1023 template <class AliasAnalysisType> 1024 class MemorySSA::CachingWalker final : public MemorySSAWalker { 1025 ClobberWalkerBase<AliasAnalysisType> *Walker; 1026 1027 public: 1028 CachingWalker(MemorySSA *M, ClobberWalkerBase<AliasAnalysisType> *W) 1029 : MemorySSAWalker(M), Walker(W) {} 1030 ~CachingWalker() override = default; 1031 1032 using MemorySSAWalker::getClobberingMemoryAccess; 1033 1034 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) { 1035 return Walker->getClobberingMemoryAccessBase(MA, UWL, false); 1036 } 1037 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, 1038 const MemoryLocation &Loc, 1039 unsigned &UWL) { 1040 return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL); 1041 } 1042 1043 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override { 1044 unsigned UpwardWalkLimit = MaxCheckLimit; 1045 return getClobberingMemoryAccess(MA, UpwardWalkLimit); 1046 } 1047 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, 1048 const MemoryLocation &Loc) override { 1049 unsigned UpwardWalkLimit = MaxCheckLimit; 1050 return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit); 1051 } 1052 1053 void invalidateInfo(MemoryAccess *MA) override { 1054 if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) 1055 MUD->resetOptimized(); 1056 } 1057 }; 1058 1059 template <class AliasAnalysisType> 1060 class MemorySSA::SkipSelfWalker final : public MemorySSAWalker { 1061 ClobberWalkerBase<AliasAnalysisType> *Walker; 1062 1063 public: 1064 SkipSelfWalker(MemorySSA *M, ClobberWalkerBase<AliasAnalysisType> *W) 1065 : MemorySSAWalker(M), Walker(W) {} 1066 ~SkipSelfWalker() override = default; 1067 1068 using MemorySSAWalker::getClobberingMemoryAccess; 1069 1070 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) { 1071 return Walker->getClobberingMemoryAccessBase(MA, UWL, true); 1072 } 1073 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, 1074 const MemoryLocation &Loc, 1075 unsigned &UWL) { 1076 return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL); 1077 } 1078 1079 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override { 1080 unsigned UpwardWalkLimit = MaxCheckLimit; 1081 return getClobberingMemoryAccess(MA, UpwardWalkLimit); 1082 } 1083 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, 1084 const MemoryLocation &Loc) override { 1085 unsigned UpwardWalkLimit = MaxCheckLimit; 1086 return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit); 1087 } 1088 1089 void invalidateInfo(MemoryAccess *MA) override { 1090 if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) 1091 MUD->resetOptimized(); 1092 } 1093 }; 1094 1095 } // end namespace llvm 1096 1097 void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal, 1098 bool RenameAllUses) { 1099 // Pass through values to our successors 1100 for (const BasicBlock *S : successors(BB)) { 1101 auto It = PerBlockAccesses.find(S); 1102 // Rename the phi nodes in our successor block 1103 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) 1104 continue; 1105 AccessList *Accesses = It->second.get(); 1106 auto *Phi = cast<MemoryPhi>(&Accesses->front()); 1107 if (RenameAllUses) { 1108 bool ReplacementDone = false; 1109 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) 1110 if (Phi->getIncomingBlock(I) == BB) { 1111 Phi->setIncomingValue(I, IncomingVal); 1112 ReplacementDone = true; 1113 } 1114 (void) ReplacementDone; 1115 assert(ReplacementDone && "Incomplete phi during partial rename"); 1116 } else 1117 Phi->addIncoming(IncomingVal, BB); 1118 } 1119 } 1120 1121 /// Rename a single basic block into MemorySSA form. 1122 /// Uses the standard SSA renaming algorithm. 1123 /// \returns The new incoming value. 1124 MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal, 1125 bool RenameAllUses) { 1126 auto It = PerBlockAccesses.find(BB); 1127 // Skip most processing if the list is empty. 1128 if (It != PerBlockAccesses.end()) { 1129 AccessList *Accesses = It->second.get(); 1130 for (MemoryAccess &L : *Accesses) { 1131 if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&L)) { 1132 if (MUD->getDefiningAccess() == nullptr || RenameAllUses) 1133 MUD->setDefiningAccess(IncomingVal); 1134 if (isa<MemoryDef>(&L)) 1135 IncomingVal = &L; 1136 } else { 1137 IncomingVal = &L; 1138 } 1139 } 1140 } 1141 return IncomingVal; 1142 } 1143 1144 /// This is the standard SSA renaming algorithm. 1145 /// 1146 /// We walk the dominator tree in preorder, renaming accesses, and then filling 1147 /// in phi nodes in our successors. 1148 void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal, 1149 SmallPtrSetImpl<BasicBlock *> &Visited, 1150 bool SkipVisited, bool RenameAllUses) { 1151 assert(Root && "Trying to rename accesses in an unreachable block"); 1152 1153 SmallVector<RenamePassData, 32> WorkStack; 1154 // Skip everything if we already renamed this block and we are skipping. 1155 // Note: You can't sink this into the if, because we need it to occur 1156 // regardless of whether we skip blocks or not. 1157 bool AlreadyVisited = !Visited.insert(Root->getBlock()).second; 1158 if (SkipVisited && AlreadyVisited) 1159 return; 1160 1161 IncomingVal = renameBlock(Root->getBlock(), IncomingVal, RenameAllUses); 1162 renameSuccessorPhis(Root->getBlock(), IncomingVal, RenameAllUses); 1163 WorkStack.push_back({Root, Root->begin(), IncomingVal}); 1164 1165 while (!WorkStack.empty()) { 1166 DomTreeNode *Node = WorkStack.back().DTN; 1167 DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt; 1168 IncomingVal = WorkStack.back().IncomingVal; 1169 1170 if (ChildIt == Node->end()) { 1171 WorkStack.pop_back(); 1172 } else { 1173 DomTreeNode *Child = *ChildIt; 1174 ++WorkStack.back().ChildIt; 1175 BasicBlock *BB = Child->getBlock(); 1176 // Note: You can't sink this into the if, because we need it to occur 1177 // regardless of whether we skip blocks or not. 1178 AlreadyVisited = !Visited.insert(BB).second; 1179 if (SkipVisited && AlreadyVisited) { 1180 // We already visited this during our renaming, which can happen when 1181 // being asked to rename multiple blocks. Figure out the incoming val, 1182 // which is the last def. 1183 // Incoming value can only change if there is a block def, and in that 1184 // case, it's the last block def in the list. 1185 if (auto *BlockDefs = getWritableBlockDefs(BB)) 1186 IncomingVal = &*BlockDefs->rbegin(); 1187 } else 1188 IncomingVal = renameBlock(BB, IncomingVal, RenameAllUses); 1189 renameSuccessorPhis(BB, IncomingVal, RenameAllUses); 1190 WorkStack.push_back({Child, Child->begin(), IncomingVal}); 1191 } 1192 } 1193 } 1194 1195 /// This handles unreachable block accesses by deleting phi nodes in 1196 /// unreachable blocks, and marking all other unreachable MemoryAccess's as 1197 /// being uses of the live on entry definition. 1198 void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { 1199 assert(!DT->isReachableFromEntry(BB) && 1200 "Reachable block found while handling unreachable blocks"); 1201 1202 // Make sure phi nodes in our reachable successors end up with a 1203 // LiveOnEntryDef for our incoming edge, even though our block is forward 1204 // unreachable. We could just disconnect these blocks from the CFG fully, 1205 // but we do not right now. 1206 for (const BasicBlock *S : successors(BB)) { 1207 if (!DT->isReachableFromEntry(S)) 1208 continue; 1209 auto It = PerBlockAccesses.find(S); 1210 // Rename the phi nodes in our successor block 1211 if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) 1212 continue; 1213 AccessList *Accesses = It->second.get(); 1214 auto *Phi = cast<MemoryPhi>(&Accesses->front()); 1215 Phi->addIncoming(LiveOnEntryDef.get(), BB); 1216 } 1217 1218 auto It = PerBlockAccesses.find(BB); 1219 if (It == PerBlockAccesses.end()) 1220 return; 1221 1222 auto &Accesses = It->second; 1223 for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) { 1224 auto Next = std::next(AI); 1225 // If we have a phi, just remove it. We are going to replace all 1226 // users with live on entry. 1227 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI)) 1228 UseOrDef->setDefiningAccess(LiveOnEntryDef.get()); 1229 else 1230 Accesses->erase(AI); 1231 AI = Next; 1232 } 1233 } 1234 1235 MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) 1236 : AA(nullptr), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), 1237 SkipWalker(nullptr), NextID(0) { 1238 // Build MemorySSA using a batch alias analysis. This reuses the internal 1239 // state that AA collects during an alias()/getModRefInfo() call. This is 1240 // safe because there are no CFG changes while building MemorySSA and can 1241 // significantly reduce the time spent by the compiler in AA, because we will 1242 // make queries about all the instructions in the Function. 1243 assert(AA && "No alias analysis?"); 1244 BatchAAResults BatchAA(*AA); 1245 buildMemorySSA(BatchAA); 1246 // Intentionally leave AA to nullptr while building so we don't accidently 1247 // use non-batch AliasAnalysis. 1248 this->AA = AA; 1249 // Also create the walker here. 1250 getWalker(); 1251 } 1252 1253 MemorySSA::~MemorySSA() { 1254 // Drop all our references 1255 for (const auto &Pair : PerBlockAccesses) 1256 for (MemoryAccess &MA : *Pair.second) 1257 MA.dropAllReferences(); 1258 } 1259 1260 MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { 1261 auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); 1262 1263 if (Res.second) 1264 Res.first->second = std::make_unique<AccessList>(); 1265 return Res.first->second.get(); 1266 } 1267 1268 MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) { 1269 auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr)); 1270 1271 if (Res.second) 1272 Res.first->second = std::make_unique<DefsList>(); 1273 return Res.first->second.get(); 1274 } 1275 1276 namespace llvm { 1277 1278 /// This class is a batch walker of all MemoryUse's in the program, and points 1279 /// their defining access at the thing that actually clobbers them. Because it 1280 /// is a batch walker that touches everything, it does not operate like the 1281 /// other walkers. This walker is basically performing a top-down SSA renaming 1282 /// pass, where the version stack is used as the cache. This enables it to be 1283 /// significantly more time and memory efficient than using the regular walker, 1284 /// which is walking bottom-up. 1285 class MemorySSA::OptimizeUses { 1286 public: 1287 OptimizeUses(MemorySSA *MSSA, CachingWalker<BatchAAResults> *Walker, 1288 BatchAAResults *BAA, DominatorTree *DT) 1289 : MSSA(MSSA), Walker(Walker), AA(BAA), DT(DT) {} 1290 1291 void optimizeUses(); 1292 1293 private: 1294 /// This represents where a given memorylocation is in the stack. 1295 struct MemlocStackInfo { 1296 // This essentially is keeping track of versions of the stack. Whenever 1297 // the stack changes due to pushes or pops, these versions increase. 1298 unsigned long StackEpoch; 1299 unsigned long PopEpoch; 1300 // This is the lower bound of places on the stack to check. It is equal to 1301 // the place the last stack walk ended. 1302 // Note: Correctness depends on this being initialized to 0, which densemap 1303 // does 1304 unsigned long LowerBound; 1305 const BasicBlock *LowerBoundBlock; 1306 // This is where the last walk for this memory location ended. 1307 unsigned long LastKill; 1308 bool LastKillValid; 1309 Optional<AliasResult> AR; 1310 }; 1311 1312 void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &, 1313 SmallVectorImpl<MemoryAccess *> &, 1314 DenseMap<MemoryLocOrCall, MemlocStackInfo> &); 1315 1316 MemorySSA *MSSA; 1317 CachingWalker<BatchAAResults> *Walker; 1318 BatchAAResults *AA; 1319 DominatorTree *DT; 1320 }; 1321 1322 } // end namespace llvm 1323 1324 /// Optimize the uses in a given block This is basically the SSA renaming 1325 /// algorithm, with one caveat: We are able to use a single stack for all 1326 /// MemoryUses. This is because the set of *possible* reaching MemoryDefs is 1327 /// the same for every MemoryUse. The *actual* clobbering MemoryDef is just 1328 /// going to be some position in that stack of possible ones. 1329 /// 1330 /// We track the stack positions that each MemoryLocation needs 1331 /// to check, and last ended at. This is because we only want to check the 1332 /// things that changed since last time. The same MemoryLocation should 1333 /// get clobbered by the same store (getModRefInfo does not use invariantness or 1334 /// things like this, and if they start, we can modify MemoryLocOrCall to 1335 /// include relevant data) 1336 void MemorySSA::OptimizeUses::optimizeUsesInBlock( 1337 const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch, 1338 SmallVectorImpl<MemoryAccess *> &VersionStack, 1339 DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) { 1340 1341 /// If no accesses, nothing to do. 1342 MemorySSA::AccessList *Accesses = MSSA->getWritableBlockAccesses(BB); 1343 if (Accesses == nullptr) 1344 return; 1345 1346 // Pop everything that doesn't dominate the current block off the stack, 1347 // increment the PopEpoch to account for this. 1348 while (true) { 1349 assert( 1350 !VersionStack.empty() && 1351 "Version stack should have liveOnEntry sentinel dominating everything"); 1352 BasicBlock *BackBlock = VersionStack.back()->getBlock(); 1353 if (DT->dominates(BackBlock, BB)) 1354 break; 1355 while (VersionStack.back()->getBlock() == BackBlock) 1356 VersionStack.pop_back(); 1357 ++PopEpoch; 1358 } 1359 1360 for (MemoryAccess &MA : *Accesses) { 1361 auto *MU = dyn_cast<MemoryUse>(&MA); 1362 if (!MU) { 1363 VersionStack.push_back(&MA); 1364 ++StackEpoch; 1365 continue; 1366 } 1367 1368 if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) { 1369 MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true, None); 1370 continue; 1371 } 1372 1373 MemoryLocOrCall UseMLOC(MU); 1374 auto &LocInfo = LocStackInfo[UseMLOC]; 1375 // If the pop epoch changed, it means we've removed stuff from top of 1376 // stack due to changing blocks. We may have to reset the lower bound or 1377 // last kill info. 1378 if (LocInfo.PopEpoch != PopEpoch) { 1379 LocInfo.PopEpoch = PopEpoch; 1380 LocInfo.StackEpoch = StackEpoch; 1381 // If the lower bound was in something that no longer dominates us, we 1382 // have to reset it. 1383 // We can't simply track stack size, because the stack may have had 1384 // pushes/pops in the meantime. 1385 // XXX: This is non-optimal, but only is slower cases with heavily 1386 // branching dominator trees. To get the optimal number of queries would 1387 // be to make lowerbound and lastkill a per-loc stack, and pop it until 1388 // the top of that stack dominates us. This does not seem worth it ATM. 1389 // A much cheaper optimization would be to always explore the deepest 1390 // branch of the dominator tree first. This will guarantee this resets on 1391 // the smallest set of blocks. 1392 if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB && 1393 !DT->dominates(LocInfo.LowerBoundBlock, BB)) { 1394 // Reset the lower bound of things to check. 1395 // TODO: Some day we should be able to reset to last kill, rather than 1396 // 0. 1397 LocInfo.LowerBound = 0; 1398 LocInfo.LowerBoundBlock = VersionStack[0]->getBlock(); 1399 LocInfo.LastKillValid = false; 1400 } 1401 } else if (LocInfo.StackEpoch != StackEpoch) { 1402 // If all that has changed is the StackEpoch, we only have to check the 1403 // new things on the stack, because we've checked everything before. In 1404 // this case, the lower bound of things to check remains the same. 1405 LocInfo.PopEpoch = PopEpoch; 1406 LocInfo.StackEpoch = StackEpoch; 1407 } 1408 if (!LocInfo.LastKillValid) { 1409 LocInfo.LastKill = VersionStack.size() - 1; 1410 LocInfo.LastKillValid = true; 1411 LocInfo.AR = MayAlias; 1412 } 1413 1414 // At this point, we should have corrected last kill and LowerBound to be 1415 // in bounds. 1416 assert(LocInfo.LowerBound < VersionStack.size() && 1417 "Lower bound out of range"); 1418 assert(LocInfo.LastKill < VersionStack.size() && 1419 "Last kill info out of range"); 1420 // In any case, the new upper bound is the top of the stack. 1421 unsigned long UpperBound = VersionStack.size() - 1; 1422 1423 if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) { 1424 LLVM_DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " (" 1425 << *(MU->getMemoryInst()) << ")" 1426 << " because there are " 1427 << UpperBound - LocInfo.LowerBound 1428 << " stores to disambiguate\n"); 1429 // Because we did not walk, LastKill is no longer valid, as this may 1430 // have been a kill. 1431 LocInfo.LastKillValid = false; 1432 continue; 1433 } 1434 bool FoundClobberResult = false; 1435 unsigned UpwardWalkLimit = MaxCheckLimit; 1436 while (UpperBound > LocInfo.LowerBound) { 1437 if (isa<MemoryPhi>(VersionStack[UpperBound])) { 1438 // For phis, use the walker, see where we ended up, go there 1439 MemoryAccess *Result = 1440 Walker->getClobberingMemoryAccess(MU, UpwardWalkLimit); 1441 // We are guaranteed to find it or something is wrong 1442 while (VersionStack[UpperBound] != Result) { 1443 assert(UpperBound != 0); 1444 --UpperBound; 1445 } 1446 FoundClobberResult = true; 1447 break; 1448 } 1449 1450 MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]); 1451 ClobberAlias CA = instructionClobbersQuery(MD, MU, UseMLOC, *AA); 1452 if (CA.IsClobber) { 1453 FoundClobberResult = true; 1454 LocInfo.AR = CA.AR; 1455 break; 1456 } 1457 --UpperBound; 1458 } 1459 1460 // Note: Phis always have AliasResult AR set to MayAlias ATM. 1461 1462 // At the end of this loop, UpperBound is either a clobber, or lower bound 1463 // PHI walking may cause it to be < LowerBound, and in fact, < LastKill. 1464 if (FoundClobberResult || UpperBound < LocInfo.LastKill) { 1465 // We were last killed now by where we got to 1466 if (MSSA->isLiveOnEntryDef(VersionStack[UpperBound])) 1467 LocInfo.AR = None; 1468 MU->setDefiningAccess(VersionStack[UpperBound], true, LocInfo.AR); 1469 LocInfo.LastKill = UpperBound; 1470 } else { 1471 // Otherwise, we checked all the new ones, and now we know we can get to 1472 // LastKill. 1473 MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true, LocInfo.AR); 1474 } 1475 LocInfo.LowerBound = VersionStack.size() - 1; 1476 LocInfo.LowerBoundBlock = BB; 1477 } 1478 } 1479 1480 /// Optimize uses to point to their actual clobbering definitions. 1481 void MemorySSA::OptimizeUses::optimizeUses() { 1482 SmallVector<MemoryAccess *, 16> VersionStack; 1483 DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo; 1484 VersionStack.push_back(MSSA->getLiveOnEntryDef()); 1485 1486 unsigned long StackEpoch = 1; 1487 unsigned long PopEpoch = 1; 1488 // We perform a non-recursive top-down dominator tree walk. 1489 for (const auto *DomNode : depth_first(DT->getRootNode())) 1490 optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack, 1491 LocStackInfo); 1492 } 1493 1494 void MemorySSA::placePHINodes( 1495 const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks) { 1496 // Determine where our MemoryPhi's should go 1497 ForwardIDFCalculator IDFs(*DT); 1498 IDFs.setDefiningBlocks(DefiningBlocks); 1499 SmallVector<BasicBlock *, 32> IDFBlocks; 1500 IDFs.calculate(IDFBlocks); 1501 1502 // Now place MemoryPhi nodes. 1503 for (auto &BB : IDFBlocks) 1504 createMemoryPhi(BB); 1505 } 1506 1507 void MemorySSA::buildMemorySSA(BatchAAResults &BAA) { 1508 // We create an access to represent "live on entry", for things like 1509 // arguments or users of globals, where the memory they use is defined before 1510 // the beginning of the function. We do not actually insert it into the IR. 1511 // We do not define a live on exit for the immediate uses, and thus our 1512 // semantics do *not* imply that something with no immediate uses can simply 1513 // be removed. 1514 BasicBlock &StartingPoint = F.getEntryBlock(); 1515 LiveOnEntryDef.reset(new MemoryDef(F.getContext(), nullptr, nullptr, 1516 &StartingPoint, NextID++)); 1517 1518 // We maintain lists of memory accesses per-block, trading memory for time. We 1519 // could just look up the memory access for every possible instruction in the 1520 // stream. 1521 SmallPtrSet<BasicBlock *, 32> DefiningBlocks; 1522 // Go through each block, figure out where defs occur, and chain together all 1523 // the accesses. 1524 for (BasicBlock &B : F) { 1525 bool InsertIntoDef = false; 1526 AccessList *Accesses = nullptr; 1527 DefsList *Defs = nullptr; 1528 for (Instruction &I : B) { 1529 MemoryUseOrDef *MUD = createNewAccess(&I, &BAA); 1530 if (!MUD) 1531 continue; 1532 1533 if (!Accesses) 1534 Accesses = getOrCreateAccessList(&B); 1535 Accesses->push_back(MUD); 1536 if (isa<MemoryDef>(MUD)) { 1537 InsertIntoDef = true; 1538 if (!Defs) 1539 Defs = getOrCreateDefsList(&B); 1540 Defs->push_back(*MUD); 1541 } 1542 } 1543 if (InsertIntoDef) 1544 DefiningBlocks.insert(&B); 1545 } 1546 placePHINodes(DefiningBlocks); 1547 1548 // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get 1549 // filled in with all blocks. 1550 SmallPtrSet<BasicBlock *, 16> Visited; 1551 renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); 1552 1553 ClobberWalkerBase<BatchAAResults> WalkerBase(this, &BAA, DT); 1554 CachingWalker<BatchAAResults> WalkerLocal(this, &WalkerBase); 1555 OptimizeUses(this, &WalkerLocal, &BAA, DT).optimizeUses(); 1556 1557 // Mark the uses in unreachable blocks as live on entry, so that they go 1558 // somewhere. 1559 for (auto &BB : F) 1560 if (!Visited.count(&BB)) 1561 markUnreachableAsLiveOnEntry(&BB); 1562 } 1563 1564 MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); } 1565 1566 MemorySSA::CachingWalker<AliasAnalysis> *MemorySSA::getWalkerImpl() { 1567 if (Walker) 1568 return Walker.get(); 1569 1570 if (!WalkerBase) 1571 WalkerBase = 1572 std::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT); 1573 1574 Walker = 1575 std::make_unique<CachingWalker<AliasAnalysis>>(this, WalkerBase.get()); 1576 return Walker.get(); 1577 } 1578 1579 MemorySSAWalker *MemorySSA::getSkipSelfWalker() { 1580 if (SkipWalker) 1581 return SkipWalker.get(); 1582 1583 if (!WalkerBase) 1584 WalkerBase = 1585 std::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT); 1586 1587 SkipWalker = 1588 std::make_unique<SkipSelfWalker<AliasAnalysis>>(this, WalkerBase.get()); 1589 return SkipWalker.get(); 1590 } 1591 1592 1593 // This is a helper function used by the creation routines. It places NewAccess 1594 // into the access and defs lists for a given basic block, at the given 1595 // insertion point. 1596 void MemorySSA::insertIntoListsForBlock(MemoryAccess *NewAccess, 1597 const BasicBlock *BB, 1598 InsertionPlace Point) { 1599 auto *Accesses = getOrCreateAccessList(BB); 1600 if (Point == Beginning) { 1601 // If it's a phi node, it goes first, otherwise, it goes after any phi 1602 // nodes. 1603 if (isa<MemoryPhi>(NewAccess)) { 1604 Accesses->push_front(NewAccess); 1605 auto *Defs = getOrCreateDefsList(BB); 1606 Defs->push_front(*NewAccess); 1607 } else { 1608 auto AI = find_if_not( 1609 *Accesses, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); 1610 Accesses->insert(AI, NewAccess); 1611 if (!isa<MemoryUse>(NewAccess)) { 1612 auto *Defs = getOrCreateDefsList(BB); 1613 auto DI = find_if_not( 1614 *Defs, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); 1615 Defs->insert(DI, *NewAccess); 1616 } 1617 } 1618 } else { 1619 Accesses->push_back(NewAccess); 1620 if (!isa<MemoryUse>(NewAccess)) { 1621 auto *Defs = getOrCreateDefsList(BB); 1622 Defs->push_back(*NewAccess); 1623 } 1624 } 1625 BlockNumberingValid.erase(BB); 1626 } 1627 1628 void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB, 1629 AccessList::iterator InsertPt) { 1630 auto *Accesses = getWritableBlockAccesses(BB); 1631 bool WasEnd = InsertPt == Accesses->end(); 1632 Accesses->insert(AccessList::iterator(InsertPt), What); 1633 if (!isa<MemoryUse>(What)) { 1634 auto *Defs = getOrCreateDefsList(BB); 1635 // If we got asked to insert at the end, we have an easy job, just shove it 1636 // at the end. If we got asked to insert before an existing def, we also get 1637 // an iterator. If we got asked to insert before a use, we have to hunt for 1638 // the next def. 1639 if (WasEnd) { 1640 Defs->push_back(*What); 1641 } else if (isa<MemoryDef>(InsertPt)) { 1642 Defs->insert(InsertPt->getDefsIterator(), *What); 1643 } else { 1644 while (InsertPt != Accesses->end() && !isa<MemoryDef>(InsertPt)) 1645 ++InsertPt; 1646 // Either we found a def, or we are inserting at the end 1647 if (InsertPt == Accesses->end()) 1648 Defs->push_back(*What); 1649 else 1650 Defs->insert(InsertPt->getDefsIterator(), *What); 1651 } 1652 } 1653 BlockNumberingValid.erase(BB); 1654 } 1655 1656 void MemorySSA::prepareForMoveTo(MemoryAccess *What, BasicBlock *BB) { 1657 // Keep it in the lookup tables, remove from the lists 1658 removeFromLists(What, false); 1659 1660 // Note that moving should implicitly invalidate the optimized state of a 1661 // MemoryUse (and Phis can't be optimized). However, it doesn't do so for a 1662 // MemoryDef. 1663 if (auto *MD = dyn_cast<MemoryDef>(What)) 1664 MD->resetOptimized(); 1665 What->setBlock(BB); 1666 } 1667 1668 // Move What before Where in the IR. The end result is that What will belong to 1669 // the right lists and have the right Block set, but will not otherwise be 1670 // correct. It will not have the right defining access, and if it is a def, 1671 // things below it will not properly be updated. 1672 void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, 1673 AccessList::iterator Where) { 1674 prepareForMoveTo(What, BB); 1675 insertIntoListsBefore(What, BB, Where); 1676 } 1677 1678 void MemorySSA::moveTo(MemoryAccess *What, BasicBlock *BB, 1679 InsertionPlace Point) { 1680 if (isa<MemoryPhi>(What)) { 1681 assert(Point == Beginning && 1682 "Can only move a Phi at the beginning of the block"); 1683 // Update lookup table entry 1684 ValueToMemoryAccess.erase(What->getBlock()); 1685 bool Inserted = ValueToMemoryAccess.insert({BB, What}).second; 1686 (void)Inserted; 1687 assert(Inserted && "Cannot move a Phi to a block that already has one"); 1688 } 1689 1690 prepareForMoveTo(What, BB); 1691 insertIntoListsForBlock(What, BB, Point); 1692 } 1693 1694 MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { 1695 assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB"); 1696 MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); 1697 // Phi's always are placed at the front of the block. 1698 insertIntoListsForBlock(Phi, BB, Beginning); 1699 ValueToMemoryAccess[BB] = Phi; 1700 return Phi; 1701 } 1702 1703 MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, 1704 MemoryAccess *Definition, 1705 const MemoryUseOrDef *Template, 1706 bool CreationMustSucceed) { 1707 assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI"); 1708 MemoryUseOrDef *NewAccess = createNewAccess(I, AA, Template); 1709 if (CreationMustSucceed) 1710 assert(NewAccess != nullptr && "Tried to create a memory access for a " 1711 "non-memory touching instruction"); 1712 if (NewAccess) { 1713 assert((!Definition || !isa<MemoryUse>(Definition)) && 1714 "A use cannot be a defining access"); 1715 NewAccess->setDefiningAccess(Definition); 1716 } 1717 return NewAccess; 1718 } 1719 1720 // Return true if the instruction has ordering constraints. 1721 // Note specifically that this only considers stores and loads 1722 // because others are still considered ModRef by getModRefInfo. 1723 static inline bool isOrdered(const Instruction *I) { 1724 if (auto *SI = dyn_cast<StoreInst>(I)) { 1725 if (!SI->isUnordered()) 1726 return true; 1727 } else if (auto *LI = dyn_cast<LoadInst>(I)) { 1728 if (!LI->isUnordered()) 1729 return true; 1730 } 1731 return false; 1732 } 1733 1734 /// Helper function to create new memory accesses 1735 template <typename AliasAnalysisType> 1736 MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I, 1737 AliasAnalysisType *AAP, 1738 const MemoryUseOrDef *Template) { 1739 // The assume intrinsic has a control dependency which we model by claiming 1740 // that it writes arbitrarily. Debuginfo intrinsics may be considered 1741 // clobbers when we have a nonstandard AA pipeline. Ignore these fake memory 1742 // dependencies here. 1743 // FIXME: Replace this special casing with a more accurate modelling of 1744 // assume's control dependency. 1745 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 1746 switch (II->getIntrinsicID()) { 1747 default: 1748 break; 1749 case Intrinsic::assume: 1750 case Intrinsic::experimental_noalias_scope_decl: 1751 return nullptr; 1752 } 1753 } 1754 1755 // Using a nonstandard AA pipelines might leave us with unexpected modref 1756 // results for I, so add a check to not model instructions that may not read 1757 // from or write to memory. This is necessary for correctness. 1758 if (!I->mayReadFromMemory() && !I->mayWriteToMemory()) 1759 return nullptr; 1760 1761 bool Def, Use; 1762 if (Template) { 1763 Def = isa<MemoryDef>(Template); 1764 Use = isa<MemoryUse>(Template); 1765 #if !defined(NDEBUG) 1766 ModRefInfo ModRef = AAP->getModRefInfo(I, None); 1767 bool DefCheck, UseCheck; 1768 DefCheck = isModSet(ModRef) || isOrdered(I); 1769 UseCheck = isRefSet(ModRef); 1770 assert(Def == DefCheck && (Def || Use == UseCheck) && "Invalid template"); 1771 #endif 1772 } else { 1773 // Find out what affect this instruction has on memory. 1774 ModRefInfo ModRef = AAP->getModRefInfo(I, None); 1775 // The isOrdered check is used to ensure that volatiles end up as defs 1776 // (atomics end up as ModRef right now anyway). Until we separate the 1777 // ordering chain from the memory chain, this enables people to see at least 1778 // some relative ordering to volatiles. Note that getClobberingMemoryAccess 1779 // will still give an answer that bypasses other volatile loads. TODO: 1780 // Separate memory aliasing and ordering into two different chains so that 1781 // we can precisely represent both "what memory will this read/write/is 1782 // clobbered by" and "what instructions can I move this past". 1783 Def = isModSet(ModRef) || isOrdered(I); 1784 Use = isRefSet(ModRef); 1785 } 1786 1787 // It's possible for an instruction to not modify memory at all. During 1788 // construction, we ignore them. 1789 if (!Def && !Use) 1790 return nullptr; 1791 1792 MemoryUseOrDef *MUD; 1793 if (Def) 1794 MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++); 1795 else 1796 MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent()); 1797 ValueToMemoryAccess[I] = MUD; 1798 return MUD; 1799 } 1800 1801 /// Properly remove \p MA from all of MemorySSA's lookup tables. 1802 void MemorySSA::removeFromLookups(MemoryAccess *MA) { 1803 assert(MA->use_empty() && 1804 "Trying to remove memory access that still has uses"); 1805 BlockNumbering.erase(MA); 1806 if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) 1807 MUD->setDefiningAccess(nullptr); 1808 // Invalidate our walker's cache if necessary 1809 if (!isa<MemoryUse>(MA)) 1810 getWalker()->invalidateInfo(MA); 1811 1812 Value *MemoryInst; 1813 if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) 1814 MemoryInst = MUD->getMemoryInst(); 1815 else 1816 MemoryInst = MA->getBlock(); 1817 1818 auto VMA = ValueToMemoryAccess.find(MemoryInst); 1819 if (VMA->second == MA) 1820 ValueToMemoryAccess.erase(VMA); 1821 } 1822 1823 /// Properly remove \p MA from all of MemorySSA's lists. 1824 /// 1825 /// Because of the way the intrusive list and use lists work, it is important to 1826 /// do removal in the right order. 1827 /// ShouldDelete defaults to true, and will cause the memory access to also be 1828 /// deleted, not just removed. 1829 void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) { 1830 BasicBlock *BB = MA->getBlock(); 1831 // The access list owns the reference, so we erase it from the non-owning list 1832 // first. 1833 if (!isa<MemoryUse>(MA)) { 1834 auto DefsIt = PerBlockDefs.find(BB); 1835 std::unique_ptr<DefsList> &Defs = DefsIt->second; 1836 Defs->remove(*MA); 1837 if (Defs->empty()) 1838 PerBlockDefs.erase(DefsIt); 1839 } 1840 1841 // The erase call here will delete it. If we don't want it deleted, we call 1842 // remove instead. 1843 auto AccessIt = PerBlockAccesses.find(BB); 1844 std::unique_ptr<AccessList> &Accesses = AccessIt->second; 1845 if (ShouldDelete) 1846 Accesses->erase(MA); 1847 else 1848 Accesses->remove(MA); 1849 1850 if (Accesses->empty()) { 1851 PerBlockAccesses.erase(AccessIt); 1852 BlockNumberingValid.erase(BB); 1853 } 1854 } 1855 1856 void MemorySSA::print(raw_ostream &OS) const { 1857 MemorySSAAnnotatedWriter Writer(this); 1858 F.print(OS, &Writer); 1859 } 1860 1861 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1862 LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); } 1863 #endif 1864 1865 void MemorySSA::verifyMemorySSA() const { 1866 verifyOrderingDominationAndDefUses(F); 1867 verifyDominationNumbers(F); 1868 verifyPrevDefInPhis(F); 1869 // Previously, the verification used to also verify that the clobberingAccess 1870 // cached by MemorySSA is the same as the clobberingAccess found at a later 1871 // query to AA. This does not hold true in general due to the current fragility 1872 // of BasicAA which has arbitrary caps on the things it analyzes before giving 1873 // up. As a result, transformations that are correct, will lead to BasicAA 1874 // returning different Alias answers before and after that transformation. 1875 // Invalidating MemorySSA is not an option, as the results in BasicAA can be so 1876 // random, in the worst case we'd need to rebuild MemorySSA from scratch after 1877 // every transformation, which defeats the purpose of using it. For such an 1878 // example, see test4 added in D51960. 1879 } 1880 1881 void MemorySSA::verifyPrevDefInPhis(Function &F) const { 1882 #if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS) 1883 for (const BasicBlock &BB : F) { 1884 if (MemoryPhi *Phi = getMemoryAccess(&BB)) { 1885 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { 1886 auto *Pred = Phi->getIncomingBlock(I); 1887 auto *IncAcc = Phi->getIncomingValue(I); 1888 // If Pred has no unreachable predecessors, get last def looking at 1889 // IDoms. If, while walkings IDoms, any of these has an unreachable 1890 // predecessor, then the incoming def can be any access. 1891 if (auto *DTNode = DT->getNode(Pred)) { 1892 while (DTNode) { 1893 if (auto *DefList = getBlockDefs(DTNode->getBlock())) { 1894 auto *LastAcc = &*(--DefList->end()); 1895 assert(LastAcc == IncAcc && 1896 "Incorrect incoming access into phi."); 1897 break; 1898 } 1899 DTNode = DTNode->getIDom(); 1900 } 1901 } else { 1902 // If Pred has unreachable predecessors, but has at least a Def, the 1903 // incoming access can be the last Def in Pred, or it could have been 1904 // optimized to LoE. After an update, though, the LoE may have been 1905 // replaced by another access, so IncAcc may be any access. 1906 // If Pred has unreachable predecessors and no Defs, incoming access 1907 // should be LoE; However, after an update, it may be any access. 1908 } 1909 } 1910 } 1911 } 1912 #endif 1913 } 1914 1915 /// Verify that all of the blocks we believe to have valid domination numbers 1916 /// actually have valid domination numbers. 1917 void MemorySSA::verifyDominationNumbers(const Function &F) const { 1918 #ifndef NDEBUG 1919 if (BlockNumberingValid.empty()) 1920 return; 1921 1922 SmallPtrSet<const BasicBlock *, 16> ValidBlocks = BlockNumberingValid; 1923 for (const BasicBlock &BB : F) { 1924 if (!ValidBlocks.count(&BB)) 1925 continue; 1926 1927 ValidBlocks.erase(&BB); 1928 1929 const AccessList *Accesses = getBlockAccesses(&BB); 1930 // It's correct to say an empty block has valid numbering. 1931 if (!Accesses) 1932 continue; 1933 1934 // Block numbering starts at 1. 1935 unsigned long LastNumber = 0; 1936 for (const MemoryAccess &MA : *Accesses) { 1937 auto ThisNumberIter = BlockNumbering.find(&MA); 1938 assert(ThisNumberIter != BlockNumbering.end() && 1939 "MemoryAccess has no domination number in a valid block!"); 1940 1941 unsigned long ThisNumber = ThisNumberIter->second; 1942 assert(ThisNumber > LastNumber && 1943 "Domination numbers should be strictly increasing!"); 1944 LastNumber = ThisNumber; 1945 } 1946 } 1947 1948 assert(ValidBlocks.empty() && 1949 "All valid BasicBlocks should exist in F -- dangling pointers?"); 1950 #endif 1951 } 1952 1953 /// Verify ordering: the order and existence of MemoryAccesses matches the 1954 /// order and existence of memory affecting instructions. 1955 /// Verify domination: each definition dominates all of its uses. 1956 /// Verify def-uses: the immediate use information - walk all the memory 1957 /// accesses and verifying that, for each use, it appears in the appropriate 1958 /// def's use list 1959 void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const { 1960 #if !defined(NDEBUG) 1961 // Walk all the blocks, comparing what the lookups think and what the access 1962 // lists think, as well as the order in the blocks vs the order in the access 1963 // lists. 1964 SmallVector<MemoryAccess *, 32> ActualAccesses; 1965 SmallVector<MemoryAccess *, 32> ActualDefs; 1966 for (BasicBlock &B : F) { 1967 const AccessList *AL = getBlockAccesses(&B); 1968 const auto *DL = getBlockDefs(&B); 1969 MemoryPhi *Phi = getMemoryAccess(&B); 1970 if (Phi) { 1971 // Verify ordering. 1972 ActualAccesses.push_back(Phi); 1973 ActualDefs.push_back(Phi); 1974 // Verify domination 1975 for (const Use &U : Phi->uses()) 1976 assert(dominates(Phi, U) && "Memory PHI does not dominate it's uses"); 1977 #if defined(EXPENSIVE_CHECKS) 1978 // Verify def-uses. 1979 assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance( 1980 pred_begin(&B), pred_end(&B))) && 1981 "Incomplete MemoryPhi Node"); 1982 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { 1983 verifyUseInDefs(Phi->getIncomingValue(I), Phi); 1984 assert(is_contained(predecessors(&B), Phi->getIncomingBlock(I)) && 1985 "Incoming phi block not a block predecessor"); 1986 } 1987 #endif 1988 } 1989 1990 for (Instruction &I : B) { 1991 MemoryUseOrDef *MA = getMemoryAccess(&I); 1992 assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) && 1993 "We have memory affecting instructions " 1994 "in this block but they are not in the " 1995 "access list or defs list"); 1996 if (MA) { 1997 // Verify ordering. 1998 ActualAccesses.push_back(MA); 1999 if (MemoryAccess *MD = dyn_cast<MemoryDef>(MA)) { 2000 // Verify ordering. 2001 ActualDefs.push_back(MA); 2002 // Verify domination. 2003 for (const Use &U : MD->uses()) 2004 assert(dominates(MD, U) && 2005 "Memory Def does not dominate it's uses"); 2006 } 2007 #if defined(EXPENSIVE_CHECKS) 2008 // Verify def-uses. 2009 verifyUseInDefs(MA->getDefiningAccess(), MA); 2010 #endif 2011 } 2012 } 2013 // Either we hit the assert, really have no accesses, or we have both 2014 // accesses and an access list. Same with defs. 2015 if (!AL && !DL) 2016 continue; 2017 // Verify ordering. 2018 assert(AL->size() == ActualAccesses.size() && 2019 "We don't have the same number of accesses in the block as on the " 2020 "access list"); 2021 assert((DL || ActualDefs.size() == 0) && 2022 "Either we should have a defs list, or we should have no defs"); 2023 assert((!DL || DL->size() == ActualDefs.size()) && 2024 "We don't have the same number of defs in the block as on the " 2025 "def list"); 2026 auto ALI = AL->begin(); 2027 auto AAI = ActualAccesses.begin(); 2028 while (ALI != AL->end() && AAI != ActualAccesses.end()) { 2029 assert(&*ALI == *AAI && "Not the same accesses in the same order"); 2030 ++ALI; 2031 ++AAI; 2032 } 2033 ActualAccesses.clear(); 2034 if (DL) { 2035 auto DLI = DL->begin(); 2036 auto ADI = ActualDefs.begin(); 2037 while (DLI != DL->end() && ADI != ActualDefs.end()) { 2038 assert(&*DLI == *ADI && "Not the same defs in the same order"); 2039 ++DLI; 2040 ++ADI; 2041 } 2042 } 2043 ActualDefs.clear(); 2044 } 2045 #endif 2046 } 2047 2048 /// Verify the def-use lists in MemorySSA, by verifying that \p Use 2049 /// appears in the use list of \p Def. 2050 void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { 2051 #ifndef NDEBUG 2052 // The live on entry use may cause us to get a NULL def here 2053 if (!Def) 2054 assert(isLiveOnEntryDef(Use) && 2055 "Null def but use not point to live on entry def"); 2056 else 2057 assert(is_contained(Def->users(), Use) && 2058 "Did not find use in def's use list"); 2059 #endif 2060 } 2061 2062 /// Perform a local numbering on blocks so that instruction ordering can be 2063 /// determined in constant time. 2064 /// TODO: We currently just number in order. If we numbered by N, we could 2065 /// allow at least N-1 sequences of insertBefore or insertAfter (and at least 2066 /// log2(N) sequences of mixed before and after) without needing to invalidate 2067 /// the numbering. 2068 void MemorySSA::renumberBlock(const BasicBlock *B) const { 2069 // The pre-increment ensures the numbers really start at 1. 2070 unsigned long CurrentNumber = 0; 2071 const AccessList *AL = getBlockAccesses(B); 2072 assert(AL != nullptr && "Asking to renumber an empty block"); 2073 for (const auto &I : *AL) 2074 BlockNumbering[&I] = ++CurrentNumber; 2075 BlockNumberingValid.insert(B); 2076 } 2077 2078 /// Determine, for two memory accesses in the same block, 2079 /// whether \p Dominator dominates \p Dominatee. 2080 /// \returns True if \p Dominator dominates \p Dominatee. 2081 bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, 2082 const MemoryAccess *Dominatee) const { 2083 const BasicBlock *DominatorBlock = Dominator->getBlock(); 2084 2085 assert((DominatorBlock == Dominatee->getBlock()) && 2086 "Asking for local domination when accesses are in different blocks!"); 2087 // A node dominates itself. 2088 if (Dominatee == Dominator) 2089 return true; 2090 2091 // When Dominatee is defined on function entry, it is not dominated by another 2092 // memory access. 2093 if (isLiveOnEntryDef(Dominatee)) 2094 return false; 2095 2096 // When Dominator is defined on function entry, it dominates the other memory 2097 // access. 2098 if (isLiveOnEntryDef(Dominator)) 2099 return true; 2100 2101 if (!BlockNumberingValid.count(DominatorBlock)) 2102 renumberBlock(DominatorBlock); 2103 2104 unsigned long DominatorNum = BlockNumbering.lookup(Dominator); 2105 // All numbers start with 1 2106 assert(DominatorNum != 0 && "Block was not numbered properly"); 2107 unsigned long DominateeNum = BlockNumbering.lookup(Dominatee); 2108 assert(DominateeNum != 0 && "Block was not numbered properly"); 2109 return DominatorNum < DominateeNum; 2110 } 2111 2112 bool MemorySSA::dominates(const MemoryAccess *Dominator, 2113 const MemoryAccess *Dominatee) const { 2114 if (Dominator == Dominatee) 2115 return true; 2116 2117 if (isLiveOnEntryDef(Dominatee)) 2118 return false; 2119 2120 if (Dominator->getBlock() != Dominatee->getBlock()) 2121 return DT->dominates(Dominator->getBlock(), Dominatee->getBlock()); 2122 return locallyDominates(Dominator, Dominatee); 2123 } 2124 2125 bool MemorySSA::dominates(const MemoryAccess *Dominator, 2126 const Use &Dominatee) const { 2127 if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Dominatee.getUser())) { 2128 BasicBlock *UseBB = MP->getIncomingBlock(Dominatee); 2129 // The def must dominate the incoming block of the phi. 2130 if (UseBB != Dominator->getBlock()) 2131 return DT->dominates(Dominator->getBlock(), UseBB); 2132 // If the UseBB and the DefBB are the same, compare locally. 2133 return locallyDominates(Dominator, cast<MemoryAccess>(Dominatee)); 2134 } 2135 // If it's not a PHI node use, the normal dominates can already handle it. 2136 return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser())); 2137 } 2138 2139 const static char LiveOnEntryStr[] = "liveOnEntry"; 2140 2141 void MemoryAccess::print(raw_ostream &OS) const { 2142 switch (getValueID()) { 2143 case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS); 2144 case MemoryDefVal: return static_cast<const MemoryDef *>(this)->print(OS); 2145 case MemoryUseVal: return static_cast<const MemoryUse *>(this)->print(OS); 2146 } 2147 llvm_unreachable("invalid value id"); 2148 } 2149 2150 void MemoryDef::print(raw_ostream &OS) const { 2151 MemoryAccess *UO = getDefiningAccess(); 2152 2153 auto printID = [&OS](MemoryAccess *A) { 2154 if (A && A->getID()) 2155 OS << A->getID(); 2156 else 2157 OS << LiveOnEntryStr; 2158 }; 2159 2160 OS << getID() << " = MemoryDef("; 2161 printID(UO); 2162 OS << ")"; 2163 2164 if (isOptimized()) { 2165 OS << "->"; 2166 printID(getOptimized()); 2167 2168 if (Optional<AliasResult> AR = getOptimizedAccessType()) 2169 OS << " " << *AR; 2170 } 2171 } 2172 2173 void MemoryPhi::print(raw_ostream &OS) const { 2174 bool First = true; 2175 OS << getID() << " = MemoryPhi("; 2176 for (const auto &Op : operands()) { 2177 BasicBlock *BB = getIncomingBlock(Op); 2178 MemoryAccess *MA = cast<MemoryAccess>(Op); 2179 if (!First) 2180 OS << ','; 2181 else 2182 First = false; 2183 2184 OS << '{'; 2185 if (BB->hasName()) 2186 OS << BB->getName(); 2187 else 2188 BB->printAsOperand(OS, false); 2189 OS << ','; 2190 if (unsigned ID = MA->getID()) 2191 OS << ID; 2192 else 2193 OS << LiveOnEntryStr; 2194 OS << '}'; 2195 } 2196 OS << ')'; 2197 } 2198 2199 void MemoryUse::print(raw_ostream &OS) const { 2200 MemoryAccess *UO = getDefiningAccess(); 2201 OS << "MemoryUse("; 2202 if (UO && UO->getID()) 2203 OS << UO->getID(); 2204 else 2205 OS << LiveOnEntryStr; 2206 OS << ')'; 2207 2208 if (Optional<AliasResult> AR = getOptimizedAccessType()) 2209 OS << " " << *AR; 2210 } 2211 2212 void MemoryAccess::dump() const { 2213 // Cannot completely remove virtual function even in release mode. 2214 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2215 print(dbgs()); 2216 dbgs() << "\n"; 2217 #endif 2218 } 2219 2220 char MemorySSAPrinterLegacyPass::ID = 0; 2221 2222 MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) { 2223 initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); 2224 } 2225 2226 void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { 2227 AU.setPreservesAll(); 2228 AU.addRequired<MemorySSAWrapperPass>(); 2229 } 2230 2231 class DOTFuncMSSAInfo { 2232 private: 2233 const Function &F; 2234 MemorySSAAnnotatedWriter MSSAWriter; 2235 2236 public: 2237 DOTFuncMSSAInfo(const Function &F, MemorySSA &MSSA) 2238 : F(F), MSSAWriter(&MSSA) {} 2239 2240 const Function *getFunction() { return &F; } 2241 MemorySSAAnnotatedWriter &getWriter() { return MSSAWriter; } 2242 }; 2243 2244 namespace llvm { 2245 2246 template <> 2247 struct GraphTraits<DOTFuncMSSAInfo *> : public GraphTraits<const BasicBlock *> { 2248 static NodeRef getEntryNode(DOTFuncMSSAInfo *CFGInfo) { 2249 return &(CFGInfo->getFunction()->getEntryBlock()); 2250 } 2251 2252 // nodes_iterator/begin/end - Allow iteration over all nodes in the graph 2253 using nodes_iterator = pointer_iterator<Function::const_iterator>; 2254 2255 static nodes_iterator nodes_begin(DOTFuncMSSAInfo *CFGInfo) { 2256 return nodes_iterator(CFGInfo->getFunction()->begin()); 2257 } 2258 2259 static nodes_iterator nodes_end(DOTFuncMSSAInfo *CFGInfo) { 2260 return nodes_iterator(CFGInfo->getFunction()->end()); 2261 } 2262 2263 static size_t size(DOTFuncMSSAInfo *CFGInfo) { 2264 return CFGInfo->getFunction()->size(); 2265 } 2266 }; 2267 2268 template <> 2269 struct DOTGraphTraits<DOTFuncMSSAInfo *> : public DefaultDOTGraphTraits { 2270 2271 DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} 2272 2273 static std::string getGraphName(DOTFuncMSSAInfo *CFGInfo) { 2274 return "MSSA CFG for '" + CFGInfo->getFunction()->getName().str() + 2275 "' function"; 2276 } 2277 2278 std::string getNodeLabel(const BasicBlock *Node, DOTFuncMSSAInfo *CFGInfo) { 2279 return DOTGraphTraits<DOTFuncInfo *>::getCompleteNodeLabel( 2280 Node, nullptr, 2281 [CFGInfo](raw_string_ostream &OS, const BasicBlock &BB) -> void { 2282 BB.print(OS, &CFGInfo->getWriter(), true, true); 2283 }, 2284 [](std::string &S, unsigned &I, unsigned Idx) -> void { 2285 std::string Str = S.substr(I, Idx - I); 2286 StringRef SR = Str; 2287 if (SR.count(" = MemoryDef(") || SR.count(" = MemoryPhi(") || 2288 SR.count("MemoryUse(")) 2289 return; 2290 DOTGraphTraits<DOTFuncInfo *>::eraseComment(S, I, Idx); 2291 }); 2292 } 2293 2294 static std::string getEdgeSourceLabel(const BasicBlock *Node, 2295 const_succ_iterator I) { 2296 return DOTGraphTraits<DOTFuncInfo *>::getEdgeSourceLabel(Node, I); 2297 } 2298 2299 /// Display the raw branch weights from PGO. 2300 std::string getEdgeAttributes(const BasicBlock *Node, const_succ_iterator I, 2301 DOTFuncMSSAInfo *CFGInfo) { 2302 return ""; 2303 } 2304 2305 std::string getNodeAttributes(const BasicBlock *Node, 2306 DOTFuncMSSAInfo *CFGInfo) { 2307 return getNodeLabel(Node, CFGInfo).find(';') != std::string::npos 2308 ? "style=filled, fillcolor=lightpink" 2309 : ""; 2310 } 2311 }; 2312 2313 } // namespace llvm 2314 2315 bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { 2316 auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); 2317 if (DotCFGMSSA != "") { 2318 DOTFuncMSSAInfo CFGInfo(F, MSSA); 2319 WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA); 2320 } else 2321 MSSA.print(dbgs()); 2322 2323 if (VerifyMemorySSA) 2324 MSSA.verifyMemorySSA(); 2325 return false; 2326 } 2327 2328 AnalysisKey MemorySSAAnalysis::Key; 2329 2330 MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, 2331 FunctionAnalysisManager &AM) { 2332 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 2333 auto &AA = AM.getResult<AAManager>(F); 2334 return MemorySSAAnalysis::Result(std::make_unique<MemorySSA>(F, &AA, &DT)); 2335 } 2336 2337 bool MemorySSAAnalysis::Result::invalidate( 2338 Function &F, const PreservedAnalyses &PA, 2339 FunctionAnalysisManager::Invalidator &Inv) { 2340 auto PAC = PA.getChecker<MemorySSAAnalysis>(); 2341 return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) || 2342 Inv.invalidate<AAManager>(F, PA) || 2343 Inv.invalidate<DominatorTreeAnalysis>(F, PA); 2344 } 2345 2346 PreservedAnalyses MemorySSAPrinterPass::run(Function &F, 2347 FunctionAnalysisManager &AM) { 2348 auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); 2349 if (DotCFGMSSA != "") { 2350 DOTFuncMSSAInfo CFGInfo(F, MSSA); 2351 WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA); 2352 } else { 2353 OS << "MemorySSA for function: " << F.getName() << "\n"; 2354 MSSA.print(OS); 2355 } 2356 2357 return PreservedAnalyses::all(); 2358 } 2359 2360 PreservedAnalyses MemorySSAVerifierPass::run(Function &F, 2361 FunctionAnalysisManager &AM) { 2362 AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA(); 2363 2364 return PreservedAnalyses::all(); 2365 } 2366 2367 char MemorySSAWrapperPass::ID = 0; 2368 2369 MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) { 2370 initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry()); 2371 } 2372 2373 void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); } 2374 2375 void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 2376 AU.setPreservesAll(); 2377 AU.addRequiredTransitive<DominatorTreeWrapperPass>(); 2378 AU.addRequiredTransitive<AAResultsWrapperPass>(); 2379 } 2380 2381 bool MemorySSAWrapperPass::runOnFunction(Function &F) { 2382 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2383 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 2384 MSSA.reset(new MemorySSA(F, &AA, &DT)); 2385 return false; 2386 } 2387 2388 void MemorySSAWrapperPass::verifyAnalysis() const { 2389 if (VerifyMemorySSA) 2390 MSSA->verifyMemorySSA(); 2391 } 2392 2393 void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const { 2394 MSSA->print(OS); 2395 } 2396 2397 MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} 2398 2399 /// Walk the use-def chains starting at \p StartingAccess and find 2400 /// the MemoryAccess that actually clobbers Loc. 2401 /// 2402 /// \returns our clobbering memory access 2403 template <typename AliasAnalysisType> 2404 MemoryAccess * 2405 MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase( 2406 MemoryAccess *StartingAccess, const MemoryLocation &Loc, 2407 unsigned &UpwardWalkLimit) { 2408 if (isa<MemoryPhi>(StartingAccess)) 2409 return StartingAccess; 2410 2411 auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess); 2412 if (MSSA->isLiveOnEntryDef(StartingUseOrDef)) 2413 return StartingUseOrDef; 2414 2415 Instruction *I = StartingUseOrDef->getMemoryInst(); 2416 2417 // Conservatively, fences are always clobbers, so don't perform the walk if we 2418 // hit a fence. 2419 if (!isa<CallBase>(I) && I->isFenceLike()) 2420 return StartingUseOrDef; 2421 2422 UpwardsMemoryQuery Q; 2423 Q.OriginalAccess = StartingUseOrDef; 2424 Q.StartingLoc = Loc; 2425 Q.Inst = nullptr; 2426 Q.IsCall = false; 2427 2428 // Unlike the other function, do not walk to the def of a def, because we are 2429 // handed something we already believe is the clobbering access. 2430 // We never set SkipSelf to true in Q in this method. 2431 MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef) 2432 ? StartingUseOrDef->getDefiningAccess() 2433 : StartingUseOrDef; 2434 2435 MemoryAccess *Clobber = 2436 Walker.findClobber(DefiningAccess, Q, UpwardWalkLimit); 2437 LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); 2438 LLVM_DEBUG(dbgs() << *StartingUseOrDef << "\n"); 2439 LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); 2440 LLVM_DEBUG(dbgs() << *Clobber << "\n"); 2441 return Clobber; 2442 } 2443 2444 template <typename AliasAnalysisType> 2445 MemoryAccess * 2446 MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase( 2447 MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf) { 2448 auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA); 2449 // If this is a MemoryPhi, we can't do anything. 2450 if (!StartingAccess) 2451 return MA; 2452 2453 bool IsOptimized = false; 2454 2455 // If this is an already optimized use or def, return the optimized result. 2456 // Note: Currently, we store the optimized def result in a separate field, 2457 // since we can't use the defining access. 2458 if (StartingAccess->isOptimized()) { 2459 if (!SkipSelf || !isa<MemoryDef>(StartingAccess)) 2460 return StartingAccess->getOptimized(); 2461 IsOptimized = true; 2462 } 2463 2464 const Instruction *I = StartingAccess->getMemoryInst(); 2465 // We can't sanely do anything with a fence, since they conservatively clobber 2466 // all memory, and have no locations to get pointers from to try to 2467 // disambiguate. 2468 if (!isa<CallBase>(I) && I->isFenceLike()) 2469 return StartingAccess; 2470 2471 UpwardsMemoryQuery Q(I, StartingAccess); 2472 2473 if (isUseTriviallyOptimizableToLiveOnEntry(*Walker.getAA(), I)) { 2474 MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); 2475 StartingAccess->setOptimized(LiveOnEntry); 2476 StartingAccess->setOptimizedAccessType(None); 2477 return LiveOnEntry; 2478 } 2479 2480 MemoryAccess *OptimizedAccess; 2481 if (!IsOptimized) { 2482 // Start with the thing we already think clobbers this location 2483 MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess(); 2484 2485 // At this point, DefiningAccess may be the live on entry def. 2486 // If it is, we will not get a better result. 2487 if (MSSA->isLiveOnEntryDef(DefiningAccess)) { 2488 StartingAccess->setOptimized(DefiningAccess); 2489 StartingAccess->setOptimizedAccessType(None); 2490 return DefiningAccess; 2491 } 2492 2493 OptimizedAccess = Walker.findClobber(DefiningAccess, Q, UpwardWalkLimit); 2494 StartingAccess->setOptimized(OptimizedAccess); 2495 if (MSSA->isLiveOnEntryDef(OptimizedAccess)) 2496 StartingAccess->setOptimizedAccessType(None); 2497 else if (Q.AR == MustAlias) 2498 StartingAccess->setOptimizedAccessType(MustAlias); 2499 } else 2500 OptimizedAccess = StartingAccess->getOptimized(); 2501 2502 LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); 2503 LLVM_DEBUG(dbgs() << *StartingAccess << "\n"); 2504 LLVM_DEBUG(dbgs() << "Optimized Memory SSA clobber for " << *I << " is "); 2505 LLVM_DEBUG(dbgs() << *OptimizedAccess << "\n"); 2506 2507 MemoryAccess *Result; 2508 if (SkipSelf && isa<MemoryPhi>(OptimizedAccess) && 2509 isa<MemoryDef>(StartingAccess) && UpwardWalkLimit) { 2510 assert(isa<MemoryDef>(Q.OriginalAccess)); 2511 Q.SkipSelfAccess = true; 2512 Result = Walker.findClobber(OptimizedAccess, Q, UpwardWalkLimit); 2513 } else 2514 Result = OptimizedAccess; 2515 2516 LLVM_DEBUG(dbgs() << "Result Memory SSA clobber [SkipSelf = " << SkipSelf); 2517 LLVM_DEBUG(dbgs() << "] for " << *I << " is " << *Result << "\n"); 2518 2519 return Result; 2520 } 2521 2522 MemoryAccess * 2523 DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) { 2524 if (auto *Use = dyn_cast<MemoryUseOrDef>(MA)) 2525 return Use->getDefiningAccess(); 2526 return MA; 2527 } 2528 2529 MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( 2530 MemoryAccess *StartingAccess, const MemoryLocation &) { 2531 if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess)) 2532 return Use->getDefiningAccess(); 2533 return StartingAccess; 2534 } 2535 2536 void MemoryPhi::deleteMe(DerivedUser *Self) { 2537 delete static_cast<MemoryPhi *>(Self); 2538 } 2539 2540 void MemoryDef::deleteMe(DerivedUser *Self) { 2541 delete static_cast<MemoryDef *>(Self); 2542 } 2543 2544 void MemoryUse::deleteMe(DerivedUser *Self) { 2545 delete static_cast<MemoryUse *>(Self); 2546 } 2547 2548 bool upward_defs_iterator::IsGuaranteedLoopInvariant(Value *Ptr) const { 2549 auto IsGuaranteedLoopInvariantBase = [](Value *Ptr) { 2550 Ptr = Ptr->stripPointerCasts(); 2551 if (!isa<Instruction>(Ptr)) 2552 return true; 2553 return isa<AllocaInst>(Ptr); 2554 }; 2555 2556 Ptr = Ptr->stripPointerCasts(); 2557 if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) { 2558 return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) && 2559 GEP->hasAllConstantIndices(); 2560 } 2561 return IsGuaranteedLoopInvariantBase(Ptr); 2562 } 2563