xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass performs various transformations related to eliminating memcpy
100b57cec5SDimitry Andric // calls, or transforming sets of stores into memset's.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
150b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h"
160b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
17*0fca6ea1SDimitry Andric #include "llvm/ADT/ScopeExit.h"
180b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
190b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
200b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h"
210b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
235f757f3fSDimitry Andric #include "llvm/Analysis/CFG.h"
2404eeddc0SDimitry Andric #include "llvm/Analysis/CaptureTracking.h"
250b57cec5SDimitry Andric #include "llvm/Analysis/GlobalsModRef.h"
265f757f3fSDimitry Andric #include "llvm/Analysis/InstructionSimplify.h"
27e8d8bef9SDimitry Andric #include "llvm/Analysis/Loads.h"
280b57cec5SDimitry Andric #include "llvm/Analysis/MemoryLocation.h"
29e8d8bef9SDimitry Andric #include "llvm/Analysis/MemorySSA.h"
30e8d8bef9SDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h"
315f757f3fSDimitry Andric #include "llvm/Analysis/PostDominators.h"
320b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
330b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
340b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
350b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
360b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
370b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
380b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
390b57cec5SDimitry Andric #include "llvm/IR/Function.h"
400b57cec5SDimitry Andric #include "llvm/IR/GlobalVariable.h"
410b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
420b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h"
430b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
440b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
450b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
460b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h"
470b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h"
480b57cec5SDimitry Andric #include "llvm/IR/Module.h"
490b57cec5SDimitry Andric #include "llvm/IR/PassManager.h"
500b57cec5SDimitry Andric #include "llvm/IR/Type.h"
510b57cec5SDimitry Andric #include "llvm/IR/User.h"
520b57cec5SDimitry Andric #include "llvm/IR/Value.h"
530b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
540b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
550b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
560b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
57480093f4SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
580b57cec5SDimitry Andric #include <algorithm>
590b57cec5SDimitry Andric #include <cassert>
600b57cec5SDimitry Andric #include <cstdint>
61bdd1243dSDimitry Andric #include <optional>
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric using namespace llvm;
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric #define DEBUG_TYPE "memcpyopt"
660b57cec5SDimitry Andric 
67349cc55cSDimitry Andric static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
6881ad6265SDimitry Andric     "enable-memcpyopt-without-libcalls", cl::Hidden,
69349cc55cSDimitry Andric     cl::desc("Enable memcpyopt even when libcalls are disabled"));
70e8d8bef9SDimitry Andric 
710b57cec5SDimitry Andric STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
720b57cec5SDimitry Andric STATISTIC(NumMemSetInfer, "Number of memsets inferred");
730b57cec5SDimitry Andric STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
740b57cec5SDimitry Andric STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
75e8d8bef9SDimitry Andric STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
765f757f3fSDimitry Andric STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric namespace {
790b57cec5SDimitry Andric 
800b57cec5SDimitry Andric /// Represents a range of memset'd bytes with the ByteVal value.
810b57cec5SDimitry Andric /// This allows us to analyze stores like:
820b57cec5SDimitry Andric ///   store 0 -> P+1
830b57cec5SDimitry Andric ///   store 0 -> P+0
840b57cec5SDimitry Andric ///   store 0 -> P+3
850b57cec5SDimitry Andric ///   store 0 -> P+2
860b57cec5SDimitry Andric /// which sometimes happens with stores to arrays of structs etc.  When we see
870b57cec5SDimitry Andric /// the first store, we make a range [1, 2).  The second store extends the range
880b57cec5SDimitry Andric /// to [0, 2).  The third makes a new range [2, 3).  The fourth store joins the
890b57cec5SDimitry Andric /// two ranges into [0, 3) which is memset'able.
900b57cec5SDimitry Andric struct MemsetRange {
910b57cec5SDimitry Andric   // Start/End - A semi range that describes the span that this range covers.
920b57cec5SDimitry Andric   // The range is closed at the start and open at the end: [Start, End).
930b57cec5SDimitry Andric   int64_t Start, End;
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric   /// StartPtr - The getelementptr instruction that points to the start of the
960b57cec5SDimitry Andric   /// range.
970b57cec5SDimitry Andric   Value *StartPtr;
980b57cec5SDimitry Andric 
990b57cec5SDimitry Andric   /// Alignment - The known alignment of the first store.
10081ad6265SDimitry Andric   MaybeAlign Alignment;
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric   /// TheStores - The actual stores that make up this range.
1030b57cec5SDimitry Andric   SmallVector<Instruction *, 16> TheStores;
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric   bool isProfitableToUseMemset(const DataLayout &DL) const;
1060b57cec5SDimitry Andric };
1070b57cec5SDimitry Andric 
1080b57cec5SDimitry Andric } // end anonymous namespace
1090b57cec5SDimitry Andric 
isProfitableToUseMemset(const DataLayout & DL) const1100b57cec5SDimitry Andric bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
1110b57cec5SDimitry Andric   // If we found more than 4 stores to merge or 16 bytes, use memset.
112*0fca6ea1SDimitry Andric   if (TheStores.size() >= 4 || End - Start >= 16)
113*0fca6ea1SDimitry Andric     return true;
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric   // If there is nothing to merge, don't do anything.
116*0fca6ea1SDimitry Andric   if (TheStores.size() < 2)
117*0fca6ea1SDimitry Andric     return false;
1180b57cec5SDimitry Andric 
1190b57cec5SDimitry Andric   // If any of the stores are a memset, then it is always good to extend the
1200b57cec5SDimitry Andric   // memset.
1210b57cec5SDimitry Andric   for (Instruction *SI : TheStores)
1220b57cec5SDimitry Andric     if (!isa<StoreInst>(SI))
1230b57cec5SDimitry Andric       return true;
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric   // Assume that the code generator is capable of merging pairs of stores
1260b57cec5SDimitry Andric   // together if it wants to.
127*0fca6ea1SDimitry Andric   if (TheStores.size() == 2)
128*0fca6ea1SDimitry Andric     return false;
1290b57cec5SDimitry Andric 
1300b57cec5SDimitry Andric   // If we have fewer than 8 stores, it can still be worthwhile to do this.
1310b57cec5SDimitry Andric   // For example, merging 4 i8 stores into an i32 store is useful almost always.
1320b57cec5SDimitry Andric   // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
1330b57cec5SDimitry Andric   // memset will be split into 2 32-bit stores anyway) and doing so can
1340b57cec5SDimitry Andric   // pessimize the llvm optimizer.
1350b57cec5SDimitry Andric   //
1360b57cec5SDimitry Andric   // Since we don't have perfect knowledge here, make some assumptions: assume
1370b57cec5SDimitry Andric   // the maximum GPR width is the same size as the largest legal integer
1380b57cec5SDimitry Andric   // size. If so, check to see whether we will end up actually reducing the
1390b57cec5SDimitry Andric   // number of stores used.
1400b57cec5SDimitry Andric   unsigned Bytes = unsigned(End - Start);
1410b57cec5SDimitry Andric   unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
1420b57cec5SDimitry Andric   if (MaxIntSize == 0)
1430b57cec5SDimitry Andric     MaxIntSize = 1;
1440b57cec5SDimitry Andric   unsigned NumPointerStores = Bytes / MaxIntSize;
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric   // Assume the remaining bytes if any are done a byte at a time.
1470b57cec5SDimitry Andric   unsigned NumByteStores = Bytes % MaxIntSize;
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric   // If we will reduce the # stores (according to this heuristic), do the
1500b57cec5SDimitry Andric   // transformation.  This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
1510b57cec5SDimitry Andric   // etc.
1520b57cec5SDimitry Andric   return TheStores.size() > NumPointerStores + NumByteStores;
1530b57cec5SDimitry Andric }
1540b57cec5SDimitry Andric 
1550b57cec5SDimitry Andric namespace {
1560b57cec5SDimitry Andric 
1570b57cec5SDimitry Andric class MemsetRanges {
1580b57cec5SDimitry Andric   using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric   /// A sorted list of the memset ranges.
1610b57cec5SDimitry Andric   SmallVector<MemsetRange, 8> Ranges;
1620b57cec5SDimitry Andric 
1630b57cec5SDimitry Andric   const DataLayout &DL;
1640b57cec5SDimitry Andric 
1650b57cec5SDimitry Andric public:
MemsetRanges(const DataLayout & DL)1660b57cec5SDimitry Andric   MemsetRanges(const DataLayout &DL) : DL(DL) {}
1670b57cec5SDimitry Andric 
1680b57cec5SDimitry Andric   using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
1690b57cec5SDimitry Andric 
begin() const1700b57cec5SDimitry Andric   const_iterator begin() const { return Ranges.begin(); }
end() const1710b57cec5SDimitry Andric   const_iterator end() const { return Ranges.end(); }
empty() const1720b57cec5SDimitry Andric   bool empty() const { return Ranges.empty(); }
1730b57cec5SDimitry Andric 
addInst(int64_t OffsetFromFirst,Instruction * Inst)1740b57cec5SDimitry Andric   void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
17504eeddc0SDimitry Andric     if (auto *SI = dyn_cast<StoreInst>(Inst))
1760b57cec5SDimitry Andric       addStore(OffsetFromFirst, SI);
1770b57cec5SDimitry Andric     else
1780b57cec5SDimitry Andric       addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
1790b57cec5SDimitry Andric   }
1800b57cec5SDimitry Andric 
addStore(int64_t OffsetFromFirst,StoreInst * SI)1810b57cec5SDimitry Andric   void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
1828c6f6c0cSDimitry Andric     TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
1838c6f6c0cSDimitry Andric     assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
184bdd1243dSDimitry Andric     addRange(OffsetFromFirst, StoreSize.getFixedValue(),
185bdd1243dSDimitry Andric              SI->getPointerOperand(), SI->getAlign(), SI);
1860b57cec5SDimitry Andric   }
1870b57cec5SDimitry Andric 
addMemSet(int64_t OffsetFromFirst,MemSetInst * MSI)1880b57cec5SDimitry Andric   void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
1890b57cec5SDimitry Andric     int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
19081ad6265SDimitry Andric     addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
1910b57cec5SDimitry Andric   }
1920b57cec5SDimitry Andric 
19381ad6265SDimitry Andric   void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
19481ad6265SDimitry Andric                 Instruction *Inst);
1950b57cec5SDimitry Andric };
1960b57cec5SDimitry Andric 
1970b57cec5SDimitry Andric } // end anonymous namespace
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric /// Add a new store to the MemsetRanges data structure.  This adds a
2000b57cec5SDimitry Andric /// new range for the specified store at the specified offset, merging into
2010b57cec5SDimitry Andric /// existing ranges as appropriate.
addRange(int64_t Start,int64_t Size,Value * Ptr,MaybeAlign Alignment,Instruction * Inst)2020b57cec5SDimitry Andric void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
20381ad6265SDimitry Andric                             MaybeAlign Alignment, Instruction *Inst) {
2040b57cec5SDimitry Andric   int64_t End = Start + Size;
2050b57cec5SDimitry Andric 
2060b57cec5SDimitry Andric   range_iterator I = partition_point(
2070b57cec5SDimitry Andric       Ranges, [=](const MemsetRange &O) { return O.End < Start; });
2080b57cec5SDimitry Andric 
2090b57cec5SDimitry Andric   // We now know that I == E, in which case we didn't find anything to merge
2100b57cec5SDimitry Andric   // with, or that Start <= I->End.  If End < I->Start or I == E, then we need
2110b57cec5SDimitry Andric   // to insert a new range.  Handle this now.
2120b57cec5SDimitry Andric   if (I == Ranges.end() || End < I->Start) {
2130b57cec5SDimitry Andric     MemsetRange &R = *Ranges.insert(I, MemsetRange());
2140b57cec5SDimitry Andric     R.Start = Start;
2150b57cec5SDimitry Andric     R.End = End;
2160b57cec5SDimitry Andric     R.StartPtr = Ptr;
2170b57cec5SDimitry Andric     R.Alignment = Alignment;
2180b57cec5SDimitry Andric     R.TheStores.push_back(Inst);
2190b57cec5SDimitry Andric     return;
2200b57cec5SDimitry Andric   }
2210b57cec5SDimitry Andric 
2220b57cec5SDimitry Andric   // This store overlaps with I, add it.
2230b57cec5SDimitry Andric   I->TheStores.push_back(Inst);
2240b57cec5SDimitry Andric 
2250b57cec5SDimitry Andric   // At this point, we may have an interval that completely contains our store.
2260b57cec5SDimitry Andric   // If so, just add it to the interval and return.
2270b57cec5SDimitry Andric   if (I->Start <= Start && I->End >= End)
2280b57cec5SDimitry Andric     return;
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric   // Now we know that Start <= I->End and End >= I->Start so the range overlaps
2310b57cec5SDimitry Andric   // but is not entirely contained within the range.
2320b57cec5SDimitry Andric 
2330b57cec5SDimitry Andric   // See if the range extends the start of the range.  In this case, it couldn't
2340b57cec5SDimitry Andric   // possibly cause it to join the prior range, because otherwise we would have
2350b57cec5SDimitry Andric   // stopped on *it*.
2360b57cec5SDimitry Andric   if (Start < I->Start) {
2370b57cec5SDimitry Andric     I->Start = Start;
2380b57cec5SDimitry Andric     I->StartPtr = Ptr;
2390b57cec5SDimitry Andric     I->Alignment = Alignment;
2400b57cec5SDimitry Andric   }
2410b57cec5SDimitry Andric 
2420b57cec5SDimitry Andric   // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
2430b57cec5SDimitry Andric   // is in or right at the end of I), and that End >= I->Start.  Extend I out to
2440b57cec5SDimitry Andric   // End.
2450b57cec5SDimitry Andric   if (End > I->End) {
2460b57cec5SDimitry Andric     I->End = End;
2470b57cec5SDimitry Andric     range_iterator NextI = I;
2480b57cec5SDimitry Andric     while (++NextI != Ranges.end() && End >= NextI->Start) {
2490b57cec5SDimitry Andric       // Merge the range in.
2500b57cec5SDimitry Andric       I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
2510b57cec5SDimitry Andric       if (NextI->End > I->End)
2520b57cec5SDimitry Andric         I->End = NextI->End;
2530b57cec5SDimitry Andric       Ranges.erase(NextI);
2540b57cec5SDimitry Andric       NextI = I;
2550b57cec5SDimitry Andric     }
2560b57cec5SDimitry Andric   }
2570b57cec5SDimitry Andric }
2580b57cec5SDimitry Andric 
2590b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
2600b57cec5SDimitry Andric //                         MemCpyOptLegacyPass Pass
2610b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
2620b57cec5SDimitry Andric 
263e8d8bef9SDimitry Andric // Check that V is either not accessible by the caller, or unwinding cannot
264e8d8bef9SDimitry Andric // occur between Start and End.
mayBeVisibleThroughUnwinding(Value * V,Instruction * Start,Instruction * End)265e8d8bef9SDimitry Andric static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
266e8d8bef9SDimitry Andric                                          Instruction *End) {
267e8d8bef9SDimitry Andric   assert(Start->getParent() == End->getParent() && "Must be in same block");
26804eeddc0SDimitry Andric   // Function can't unwind, so it also can't be visible through unwinding.
26904eeddc0SDimitry Andric   if (Start->getFunction()->doesNotThrow())
270e8d8bef9SDimitry Andric     return false;
27104eeddc0SDimitry Andric 
27204eeddc0SDimitry Andric   // Object is not visible on unwind.
27304eeddc0SDimitry Andric   // TODO: Support RequiresNoCaptureBeforeUnwind case.
27404eeddc0SDimitry Andric   bool RequiresNoCaptureBeforeUnwind;
27504eeddc0SDimitry Andric   if (isNotVisibleOnUnwind(getUnderlyingObject(V),
27604eeddc0SDimitry Andric                            RequiresNoCaptureBeforeUnwind) &&
27704eeddc0SDimitry Andric       !RequiresNoCaptureBeforeUnwind)
27804eeddc0SDimitry Andric     return false;
27904eeddc0SDimitry Andric 
28004eeddc0SDimitry Andric   // Check whether there are any unwinding instructions in the range.
28104eeddc0SDimitry Andric   return any_of(make_range(Start->getIterator(), End->getIterator()),
28204eeddc0SDimitry Andric                 [](const Instruction &I) { return I.mayThrow(); });
283e8d8bef9SDimitry Andric }
284e8d8bef9SDimitry Andric 
eraseInstruction(Instruction * I)285e8d8bef9SDimitry Andric void MemCpyOptPass::eraseInstruction(Instruction *I) {
286e8d8bef9SDimitry Andric   MSSAU->removeMemoryAccess(I);
287e8d8bef9SDimitry Andric   I->eraseFromParent();
288e8d8bef9SDimitry Andric }
289e8d8bef9SDimitry Andric 
290e8d8bef9SDimitry Andric // Check for mod or ref of Loc between Start and End, excluding both boundaries.
291bdd1243dSDimitry Andric // Start and End must be in the same block.
292bdd1243dSDimitry Andric // If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
293bdd1243dSDimitry Andric // intrinsic and store it inside SkippedLifetimeStart.
accessedBetween(BatchAAResults & AA,MemoryLocation Loc,const MemoryUseOrDef * Start,const MemoryUseOrDef * End,Instruction ** SkippedLifetimeStart=nullptr)294bdd1243dSDimitry Andric static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc,
295e8d8bef9SDimitry Andric                             const MemoryUseOrDef *Start,
296bdd1243dSDimitry Andric                             const MemoryUseOrDef *End,
297bdd1243dSDimitry Andric                             Instruction **SkippedLifetimeStart = nullptr) {
298e8d8bef9SDimitry Andric   assert(Start->getBlock() == End->getBlock() && "Only local supported");
299e8d8bef9SDimitry Andric   for (const MemoryAccess &MA :
300e8d8bef9SDimitry Andric        make_range(++Start->getIterator(), End->getIterator())) {
301bdd1243dSDimitry Andric     Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
302bdd1243dSDimitry Andric     if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
303bdd1243dSDimitry Andric       auto *II = dyn_cast<IntrinsicInst>(I);
304bdd1243dSDimitry Andric       if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
305bdd1243dSDimitry Andric           SkippedLifetimeStart && !*SkippedLifetimeStart) {
306bdd1243dSDimitry Andric         *SkippedLifetimeStart = I;
307bdd1243dSDimitry Andric         continue;
308bdd1243dSDimitry Andric       }
309bdd1243dSDimitry Andric 
310e8d8bef9SDimitry Andric       return true;
311e8d8bef9SDimitry Andric     }
312bdd1243dSDimitry Andric   }
313e8d8bef9SDimitry Andric   return false;
314e8d8bef9SDimitry Andric }
315e8d8bef9SDimitry Andric 
316e8d8bef9SDimitry Andric // Check for mod of Loc between Start and End, excluding both boundaries.
317e8d8bef9SDimitry Andric // Start and End can be in different blocks.
writtenBetween(MemorySSA * MSSA,BatchAAResults & AA,MemoryLocation Loc,const MemoryUseOrDef * Start,const MemoryUseOrDef * End)318bdd1243dSDimitry Andric static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA,
31981ad6265SDimitry Andric                            MemoryLocation Loc, const MemoryUseOrDef *Start,
320e8d8bef9SDimitry Andric                            const MemoryUseOrDef *End) {
32181ad6265SDimitry Andric   if (isa<MemoryUse>(End)) {
32281ad6265SDimitry Andric     // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
32381ad6265SDimitry Andric     // Manually check read accesses between Start and End, if they are in the
32481ad6265SDimitry Andric     // same block, for clobbers. Otherwise assume Loc is clobbered.
32581ad6265SDimitry Andric     return Start->getBlock() != End->getBlock() ||
32681ad6265SDimitry Andric            any_of(
32781ad6265SDimitry Andric                make_range(std::next(Start->getIterator()), End->getIterator()),
32881ad6265SDimitry Andric                [&AA, Loc](const MemoryAccess &Acc) {
32981ad6265SDimitry Andric                  if (isa<MemoryUse>(&Acc))
33081ad6265SDimitry Andric                    return false;
33181ad6265SDimitry Andric                  Instruction *AccInst =
33281ad6265SDimitry Andric                      cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
33381ad6265SDimitry Andric                  return isModSet(AA.getModRefInfo(AccInst, Loc));
33481ad6265SDimitry Andric                });
33581ad6265SDimitry Andric   }
33681ad6265SDimitry Andric 
337e8d8bef9SDimitry Andric   // TODO: Only walk until we hit Start.
338e8d8bef9SDimitry Andric   MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
339bdd1243dSDimitry Andric       End->getDefiningAccess(), Loc, AA);
340e8d8bef9SDimitry Andric   return !MSSA->dominates(Clobber, Start);
341e8d8bef9SDimitry Andric }
342e8d8bef9SDimitry Andric 
3434542f901SDimitry Andric // Update AA metadata
combineAAMetadata(Instruction * ReplInst,Instruction * I)3444542f901SDimitry Andric static void combineAAMetadata(Instruction *ReplInst, Instruction *I) {
3454542f901SDimitry Andric   // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
3464542f901SDimitry Andric   // handled here, but combineMetadata doesn't support them yet
3474542f901SDimitry Andric   unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
3484542f901SDimitry Andric                          LLVMContext::MD_noalias,
3494542f901SDimitry Andric                          LLVMContext::MD_invariant_group,
3504542f901SDimitry Andric                          LLVMContext::MD_access_group};
3514542f901SDimitry Andric   combineMetadata(ReplInst, I, KnownIDs, true);
3524542f901SDimitry Andric }
3534542f901SDimitry Andric 
3540b57cec5SDimitry Andric /// When scanning forward over instructions, we look for some other patterns to
3550b57cec5SDimitry Andric /// fold away. In particular, this looks for stores to neighboring locations of
3560b57cec5SDimitry Andric /// memory. If it sees enough consecutive ones, it attempts to merge them
3570b57cec5SDimitry Andric /// together into a memcpy/memset.
tryMergingIntoMemset(Instruction * StartInst,Value * StartPtr,Value * ByteVal)3580b57cec5SDimitry Andric Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
3590b57cec5SDimitry Andric                                                  Value *StartPtr,
3600b57cec5SDimitry Andric                                                  Value *ByteVal) {
361*0fca6ea1SDimitry Andric   const DataLayout &DL = StartInst->getDataLayout();
3620b57cec5SDimitry Andric 
3638c6f6c0cSDimitry Andric   // We can't track scalable types
36404eeddc0SDimitry Andric   if (auto *SI = dyn_cast<StoreInst>(StartInst))
3658c6f6c0cSDimitry Andric     if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
3668c6f6c0cSDimitry Andric       return nullptr;
3678c6f6c0cSDimitry Andric 
3680b57cec5SDimitry Andric   // Okay, so we now have a single store that can be splatable.  Scan to find
3690b57cec5SDimitry Andric   // all subsequent stores of the same value to offset from the same pointer.
3700b57cec5SDimitry Andric   // Join these together into ranges, so we can decide whether contiguous blocks
3710b57cec5SDimitry Andric   // are stored.
3720b57cec5SDimitry Andric   MemsetRanges Ranges(DL);
3730b57cec5SDimitry Andric 
3740b57cec5SDimitry Andric   BasicBlock::iterator BI(StartInst);
375e8d8bef9SDimitry Andric 
376e8d8bef9SDimitry Andric   // Keeps track of the last memory use or def before the insertion point for
377e8d8bef9SDimitry Andric   // the new memset. The new MemoryDef for the inserted memsets will be inserted
3785f757f3fSDimitry Andric   // after MemInsertPoint.
379e8d8bef9SDimitry Andric   MemoryUseOrDef *MemInsertPoint = nullptr;
3800b57cec5SDimitry Andric   for (++BI; !BI->isTerminator(); ++BI) {
381e8d8bef9SDimitry Andric     auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
382e8d8bef9SDimitry Andric         MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
3835f757f3fSDimitry Andric     if (CurrentAcc)
384e8d8bef9SDimitry Andric       MemInsertPoint = CurrentAcc;
385e8d8bef9SDimitry Andric 
386fe6060f1SDimitry Andric     // Calls that only access inaccessible memory do not block merging
387fe6060f1SDimitry Andric     // accessible stores.
388fe6060f1SDimitry Andric     if (auto *CB = dyn_cast<CallBase>(BI)) {
389fe6060f1SDimitry Andric       if (CB->onlyAccessesInaccessibleMemory())
390fe6060f1SDimitry Andric         continue;
391fe6060f1SDimitry Andric     }
392fe6060f1SDimitry Andric 
3930b57cec5SDimitry Andric     if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
3940b57cec5SDimitry Andric       // If the instruction is readnone, ignore it, otherwise bail out.  We
3950b57cec5SDimitry Andric       // don't even allow readonly here because we don't want something like:
3960b57cec5SDimitry Andric       // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
3970b57cec5SDimitry Andric       if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
3980b57cec5SDimitry Andric         break;
3990b57cec5SDimitry Andric       continue;
4000b57cec5SDimitry Andric     }
4010b57cec5SDimitry Andric 
40204eeddc0SDimitry Andric     if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
4030b57cec5SDimitry Andric       // If this is a store, see if we can merge it in.
404*0fca6ea1SDimitry Andric       if (!NextStore->isSimple())
405*0fca6ea1SDimitry Andric         break;
4060b57cec5SDimitry Andric 
407e8d8bef9SDimitry Andric       Value *StoredVal = NextStore->getValueOperand();
408e8d8bef9SDimitry Andric 
409e8d8bef9SDimitry Andric       // Don't convert stores of non-integral pointer types to memsets (which
410e8d8bef9SDimitry Andric       // stores integers).
411e8d8bef9SDimitry Andric       if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
412e8d8bef9SDimitry Andric         break;
413e8d8bef9SDimitry Andric 
4148c6f6c0cSDimitry Andric       // We can't track ranges involving scalable types.
4158c6f6c0cSDimitry Andric       if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
4168c6f6c0cSDimitry Andric         break;
4178c6f6c0cSDimitry Andric 
4180b57cec5SDimitry Andric       // Check to see if this stored value is of the same byte-splattable value.
419e8d8bef9SDimitry Andric       Value *StoredByte = isBytewiseValue(StoredVal, DL);
4200b57cec5SDimitry Andric       if (isa<UndefValue>(ByteVal) && StoredByte)
4210b57cec5SDimitry Andric         ByteVal = StoredByte;
4220b57cec5SDimitry Andric       if (ByteVal != StoredByte)
4230b57cec5SDimitry Andric         break;
4240b57cec5SDimitry Andric 
4250b57cec5SDimitry Andric       // Check to see if this store is to a constant offset from the start ptr.
426bdd1243dSDimitry Andric       std::optional<int64_t> Offset =
42706c3fb27SDimitry Andric           NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);
4288bcb0991SDimitry Andric       if (!Offset)
4290b57cec5SDimitry Andric         break;
4300b57cec5SDimitry Andric 
4318bcb0991SDimitry Andric       Ranges.addStore(*Offset, NextStore);
4320b57cec5SDimitry Andric     } else {
43304eeddc0SDimitry Andric       auto *MSI = cast<MemSetInst>(BI);
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric       if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
4360b57cec5SDimitry Andric           !isa<ConstantInt>(MSI->getLength()))
4370b57cec5SDimitry Andric         break;
4380b57cec5SDimitry Andric 
4390b57cec5SDimitry Andric       // Check to see if this store is to a constant offset from the start ptr.
440bdd1243dSDimitry Andric       std::optional<int64_t> Offset =
44106c3fb27SDimitry Andric           MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);
4428bcb0991SDimitry Andric       if (!Offset)
4430b57cec5SDimitry Andric         break;
4440b57cec5SDimitry Andric 
4458bcb0991SDimitry Andric       Ranges.addMemSet(*Offset, MSI);
4460b57cec5SDimitry Andric     }
4470b57cec5SDimitry Andric   }
4480b57cec5SDimitry Andric 
4490b57cec5SDimitry Andric   // If we have no ranges, then we just had a single store with nothing that
4500b57cec5SDimitry Andric   // could be merged in.  This is a very common case of course.
4510b57cec5SDimitry Andric   if (Ranges.empty())
4520b57cec5SDimitry Andric     return nullptr;
4530b57cec5SDimitry Andric 
4540b57cec5SDimitry Andric   // If we had at least one store that could be merged in, add the starting
4550b57cec5SDimitry Andric   // store as well.  We try to avoid this unless there is at least something
4560b57cec5SDimitry Andric   // interesting as a small compile-time optimization.
4570b57cec5SDimitry Andric   Ranges.addInst(0, StartInst);
4580b57cec5SDimitry Andric 
4590b57cec5SDimitry Andric   // If we create any memsets, we put it right before the first instruction that
4600b57cec5SDimitry Andric   // isn't part of the memset block.  This ensure that the memset is dominated
4610b57cec5SDimitry Andric   // by any addressing instruction needed by the start of the block.
4620b57cec5SDimitry Andric   IRBuilder<> Builder(&*BI);
4630b57cec5SDimitry Andric 
4640b57cec5SDimitry Andric   // Now that we have full information about ranges, loop over the ranges and
4650b57cec5SDimitry Andric   // emit memset's for anything big enough to be worthwhile.
4660b57cec5SDimitry Andric   Instruction *AMemSet = nullptr;
4670b57cec5SDimitry Andric   for (const MemsetRange &Range : Ranges) {
468*0fca6ea1SDimitry Andric     if (Range.TheStores.size() == 1)
469*0fca6ea1SDimitry Andric       continue;
4700b57cec5SDimitry Andric 
4710b57cec5SDimitry Andric     // If it is profitable to lower this range to memset, do so now.
4720b57cec5SDimitry Andric     if (!Range.isProfitableToUseMemset(DL))
4730b57cec5SDimitry Andric       continue;
4740b57cec5SDimitry Andric 
4750b57cec5SDimitry Andric     // Otherwise, we do want to transform this!  Create a new memset.
4760b57cec5SDimitry Andric     // Get the starting pointer of the block.
4770b57cec5SDimitry Andric     StartPtr = Range.StartPtr;
4780b57cec5SDimitry Andric 
479480093f4SDimitry Andric     AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
48081ad6265SDimitry Andric                                    Range.Alignment);
481bdd1243dSDimitry Andric     AMemSet->mergeDIAssignID(Range.TheStores);
482bdd1243dSDimitry Andric 
4830b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
4840b57cec5SDimitry Andric                                                    : Range.TheStores) dbgs()
4850b57cec5SDimitry Andric                                               << *SI << '\n';
4860b57cec5SDimitry Andric                dbgs() << "With: " << *AMemSet << '\n');
4870b57cec5SDimitry Andric     if (!Range.TheStores.empty())
4880b57cec5SDimitry Andric       AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
4890b57cec5SDimitry Andric 
490*0fca6ea1SDimitry Andric     auto *NewDef = cast<MemoryDef>(
491*0fca6ea1SDimitry Andric         MemInsertPoint->getMemoryInst() == &*BI
492*0fca6ea1SDimitry Andric             ? MSSAU->createMemoryAccessBefore(AMemSet, nullptr, MemInsertPoint)
493*0fca6ea1SDimitry Andric             : MSSAU->createMemoryAccessAfter(AMemSet, nullptr, MemInsertPoint));
494e8d8bef9SDimitry Andric     MSSAU->insertDef(NewDef, /*RenameUses=*/true);
495e8d8bef9SDimitry Andric     MemInsertPoint = NewDef;
496e8d8bef9SDimitry Andric 
497e8d8bef9SDimitry Andric     // Zap all the stores.
498e8d8bef9SDimitry Andric     for (Instruction *SI : Range.TheStores)
499e8d8bef9SDimitry Andric       eraseInstruction(SI);
500e8d8bef9SDimitry Andric 
5010b57cec5SDimitry Andric     ++NumMemSetInfer;
5020b57cec5SDimitry Andric   }
5030b57cec5SDimitry Andric 
5040b57cec5SDimitry Andric   return AMemSet;
5050b57cec5SDimitry Andric }
5060b57cec5SDimitry Andric 
5070b57cec5SDimitry Andric // This method try to lift a store instruction before position P.
5080b57cec5SDimitry Andric // It will lift the store and its argument + that anything that
5090b57cec5SDimitry Andric // may alias with these.
5100b57cec5SDimitry Andric // The method returns true if it was successful.
moveUp(StoreInst * SI,Instruction * P,const LoadInst * LI)511e8d8bef9SDimitry Andric bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
5120b57cec5SDimitry Andric   // If the store alias this position, early bail out.
5130b57cec5SDimitry Andric   MemoryLocation StoreLoc = MemoryLocation::get(SI);
514e8d8bef9SDimitry Andric   if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
5150b57cec5SDimitry Andric     return false;
5160b57cec5SDimitry Andric 
5170b57cec5SDimitry Andric   // Keep track of the arguments of all instruction we plan to lift
5180b57cec5SDimitry Andric   // so we can make sure to lift them as well if appropriate.
5190b57cec5SDimitry Andric   DenseSet<Instruction *> Args;
520bdd1243dSDimitry Andric   auto AddArg = [&](Value *Arg) {
521bdd1243dSDimitry Andric     auto *I = dyn_cast<Instruction>(Arg);
522bdd1243dSDimitry Andric     if (I && I->getParent() == SI->getParent()) {
523bdd1243dSDimitry Andric       // Cannot hoist user of P above P
524*0fca6ea1SDimitry Andric       if (I == P)
525*0fca6ea1SDimitry Andric         return false;
526bdd1243dSDimitry Andric       Args.insert(I);
527bdd1243dSDimitry Andric     }
528bdd1243dSDimitry Andric     return true;
529bdd1243dSDimitry Andric   };
530bdd1243dSDimitry Andric   if (!AddArg(SI->getPointerOperand()))
531bdd1243dSDimitry Andric     return false;
5320b57cec5SDimitry Andric 
5330b57cec5SDimitry Andric   // Instruction to lift before P.
534e8d8bef9SDimitry Andric   SmallVector<Instruction *, 8> ToLift{SI};
5350b57cec5SDimitry Andric 
5360b57cec5SDimitry Andric   // Memory locations of lifted instructions.
5370b57cec5SDimitry Andric   SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
5380b57cec5SDimitry Andric 
5390b57cec5SDimitry Andric   // Lifted calls.
5400b57cec5SDimitry Andric   SmallVector<const CallBase *, 8> Calls;
5410b57cec5SDimitry Andric 
5420b57cec5SDimitry Andric   const MemoryLocation LoadLoc = MemoryLocation::get(LI);
5430b57cec5SDimitry Andric 
5440b57cec5SDimitry Andric   for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
5450b57cec5SDimitry Andric     auto *C = &*I;
5460b57cec5SDimitry Andric 
547e8d8bef9SDimitry Andric     // Make sure hoisting does not perform a store that was not guaranteed to
548e8d8bef9SDimitry Andric     // happen.
549e8d8bef9SDimitry Andric     if (!isGuaranteedToTransferExecutionToSuccessor(C))
550e8d8bef9SDimitry Andric       return false;
551e8d8bef9SDimitry Andric 
552bdd1243dSDimitry Andric     bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
5530b57cec5SDimitry Andric 
5540b57cec5SDimitry Andric     bool NeedLift = false;
5550b57cec5SDimitry Andric     if (Args.erase(C))
5560b57cec5SDimitry Andric       NeedLift = true;
5570b57cec5SDimitry Andric     else if (MayAlias) {
558e8d8bef9SDimitry Andric       NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
559e8d8bef9SDimitry Andric         return isModOrRefSet(AA->getModRefInfo(C, ML));
5600b57cec5SDimitry Andric       });
5610b57cec5SDimitry Andric 
5620b57cec5SDimitry Andric       if (!NeedLift)
563e8d8bef9SDimitry Andric         NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
564e8d8bef9SDimitry Andric           return isModOrRefSet(AA->getModRefInfo(C, Call));
5650b57cec5SDimitry Andric         });
5660b57cec5SDimitry Andric     }
5670b57cec5SDimitry Andric 
5680b57cec5SDimitry Andric     if (!NeedLift)
5690b57cec5SDimitry Andric       continue;
5700b57cec5SDimitry Andric 
5710b57cec5SDimitry Andric     if (MayAlias) {
5720b57cec5SDimitry Andric       // Since LI is implicitly moved downwards past the lifted instructions,
5730b57cec5SDimitry Andric       // none of them may modify its source.
574e8d8bef9SDimitry Andric       if (isModSet(AA->getModRefInfo(C, LoadLoc)))
5750b57cec5SDimitry Andric         return false;
5760b57cec5SDimitry Andric       else if (const auto *Call = dyn_cast<CallBase>(C)) {
5770b57cec5SDimitry Andric         // If we can't lift this before P, it's game over.
578e8d8bef9SDimitry Andric         if (isModOrRefSet(AA->getModRefInfo(P, Call)))
5790b57cec5SDimitry Andric           return false;
5800b57cec5SDimitry Andric 
5810b57cec5SDimitry Andric         Calls.push_back(Call);
5820b57cec5SDimitry Andric       } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
5830b57cec5SDimitry Andric         // If we can't lift this before P, it's game over.
5840b57cec5SDimitry Andric         auto ML = MemoryLocation::get(C);
585e8d8bef9SDimitry Andric         if (isModOrRefSet(AA->getModRefInfo(P, ML)))
5860b57cec5SDimitry Andric           return false;
5870b57cec5SDimitry Andric 
5880b57cec5SDimitry Andric         MemLocs.push_back(ML);
5890b57cec5SDimitry Andric       } else
5900b57cec5SDimitry Andric         // We don't know how to lift this instruction.
5910b57cec5SDimitry Andric         return false;
5920b57cec5SDimitry Andric     }
5930b57cec5SDimitry Andric 
5940b57cec5SDimitry Andric     ToLift.push_back(C);
595bdd1243dSDimitry Andric     for (Value *Op : C->operands())
596bdd1243dSDimitry Andric       if (!AddArg(Op))
597bdd1243dSDimitry Andric         return false;
598c14a5a88SDimitry Andric   }
5990b57cec5SDimitry Andric 
600e8d8bef9SDimitry Andric   // Find MSSA insertion point. Normally P will always have a corresponding
601e8d8bef9SDimitry Andric   // memory access before which we can insert. However, with non-standard AA
602e8d8bef9SDimitry Andric   // pipelines, there may be a mismatch between AA and MSSA, in which case we
603e8d8bef9SDimitry Andric   // will scan for a memory access before P. In either case, we know for sure
604e8d8bef9SDimitry Andric   // that at least the load will have a memory access.
605e8d8bef9SDimitry Andric   // TODO: Simplify this once P will be determined by MSSA, in which case the
606e8d8bef9SDimitry Andric   // discrepancy can no longer occur.
607e8d8bef9SDimitry Andric   MemoryUseOrDef *MemInsertPoint = nullptr;
608e8d8bef9SDimitry Andric   if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
609e8d8bef9SDimitry Andric     MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
610e8d8bef9SDimitry Andric   } else {
611e8d8bef9SDimitry Andric     const Instruction *ConstP = P;
612e8d8bef9SDimitry Andric     for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
613e8d8bef9SDimitry Andric                                            ++LI->getReverseIterator())) {
614e8d8bef9SDimitry Andric       if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
615e8d8bef9SDimitry Andric         MemInsertPoint = MA;
616e8d8bef9SDimitry Andric         break;
617e8d8bef9SDimitry Andric       }
618e8d8bef9SDimitry Andric     }
619e8d8bef9SDimitry Andric   }
620e8d8bef9SDimitry Andric 
621e8d8bef9SDimitry Andric   // We made it, we need to lift.
6220b57cec5SDimitry Andric   for (auto *I : llvm::reverse(ToLift)) {
6230b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
6240b57cec5SDimitry Andric     I->moveBefore(P);
625e8d8bef9SDimitry Andric     assert(MemInsertPoint && "Must have found insert point");
626e8d8bef9SDimitry Andric     if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
627e8d8bef9SDimitry Andric       MSSAU->moveAfter(MA, MemInsertPoint);
628e8d8bef9SDimitry Andric       MemInsertPoint = MA;
629e8d8bef9SDimitry Andric     }
630e8d8bef9SDimitry Andric   }
6310b57cec5SDimitry Andric 
6320b57cec5SDimitry Andric   return true;
6330b57cec5SDimitry Andric }
6340b57cec5SDimitry Andric 
processStoreOfLoad(StoreInst * SI,LoadInst * LI,const DataLayout & DL,BasicBlock::iterator & BBI)635bdd1243dSDimitry Andric bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
636bdd1243dSDimitry Andric                                        const DataLayout &DL,
637bdd1243dSDimitry Andric                                        BasicBlock::iterator &BBI) {
638*0fca6ea1SDimitry Andric   if (!LI->isSimple() || !LI->hasOneUse() || LI->getParent() != SI->getParent())
6390b57cec5SDimitry Andric     return false;
6400b57cec5SDimitry Andric 
6410b57cec5SDimitry Andric   auto *T = LI->getType();
642349cc55cSDimitry Andric   // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
643349cc55cSDimitry Andric   // the corresponding libcalls are not available.
644349cc55cSDimitry Andric   // TODO: We should really distinguish between libcall availability and
645349cc55cSDimitry Andric   // our ability to introduce intrinsics.
646349cc55cSDimitry Andric   if (T->isAggregateType() &&
647349cc55cSDimitry Andric       (EnableMemCpyOptWithoutLibcalls ||
648349cc55cSDimitry Andric        (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
6490b57cec5SDimitry Andric     MemoryLocation LoadLoc = MemoryLocation::get(LI);
6500b57cec5SDimitry Andric 
6510b57cec5SDimitry Andric     // We use alias analysis to check if an instruction may store to
6520b57cec5SDimitry Andric     // the memory we load from in between the load and the store. If
6530b57cec5SDimitry Andric     // such an instruction is found, we try to promote there instead
6540b57cec5SDimitry Andric     // of at the store position.
655e8d8bef9SDimitry Andric     // TODO: Can use MSSA for this.
6560b57cec5SDimitry Andric     Instruction *P = SI;
6570b57cec5SDimitry Andric     for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
658e8d8bef9SDimitry Andric       if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
6590b57cec5SDimitry Andric         P = &I;
6600b57cec5SDimitry Andric         break;
6610b57cec5SDimitry Andric       }
6620b57cec5SDimitry Andric     }
6630b57cec5SDimitry Andric 
6640b57cec5SDimitry Andric     // We found an instruction that may write to the loaded memory.
6650b57cec5SDimitry Andric     // We can try to promote at this position instead of the store
666fe6060f1SDimitry Andric     // position if nothing aliases the store memory after this and the store
6670b57cec5SDimitry Andric     // destination is not in the range.
6680b57cec5SDimitry Andric     if (P && P != SI) {
669e8d8bef9SDimitry Andric       if (!moveUp(SI, P, LI))
6700b57cec5SDimitry Andric         P = nullptr;
6710b57cec5SDimitry Andric     }
6720b57cec5SDimitry Andric 
6730b57cec5SDimitry Andric     // If a valid insertion position is found, then we can promote
6740b57cec5SDimitry Andric     // the load/store pair to a memcpy.
6750b57cec5SDimitry Andric     if (P) {
6760b57cec5SDimitry Andric       // If we load from memory that may alias the memory we store to,
6770b57cec5SDimitry Andric       // memmove must be used to preserve semantic. If not, memcpy can
678349cc55cSDimitry Andric       // be used. Also, if we load from constant memory, memcpy can be used
679349cc55cSDimitry Andric       // as the constant memory won't be modified.
6800b57cec5SDimitry Andric       bool UseMemMove = false;
681349cc55cSDimitry Andric       if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
6820b57cec5SDimitry Andric         UseMemMove = true;
6830b57cec5SDimitry Andric 
6840b57cec5SDimitry Andric       IRBuilder<> Builder(P);
685*0fca6ea1SDimitry Andric       Value *Size =
686*0fca6ea1SDimitry Andric           Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T));
6870b57cec5SDimitry Andric       Instruction *M;
6880b57cec5SDimitry Andric       if (UseMemMove)
689*0fca6ea1SDimitry Andric         M = Builder.CreateMemMove(SI->getPointerOperand(), SI->getAlign(),
690*0fca6ea1SDimitry Andric                                   LI->getPointerOperand(), LI->getAlign(),
691*0fca6ea1SDimitry Andric                                   Size);
6920b57cec5SDimitry Andric       else
693*0fca6ea1SDimitry Andric         M = Builder.CreateMemCpy(SI->getPointerOperand(), SI->getAlign(),
6945ffd83dbSDimitry Andric                                  LI->getPointerOperand(), LI->getAlign(), Size);
695bdd1243dSDimitry Andric       M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
6960b57cec5SDimitry Andric 
697*0fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M
698*0fca6ea1SDimitry Andric                         << "\n");
6990b57cec5SDimitry Andric 
700e8d8bef9SDimitry Andric       auto *LastDef =
701e8d8bef9SDimitry Andric           cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
7025f757f3fSDimitry Andric       auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef);
703e8d8bef9SDimitry Andric       MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
704e8d8bef9SDimitry Andric 
705e8d8bef9SDimitry Andric       eraseInstruction(SI);
706e8d8bef9SDimitry Andric       eraseInstruction(LI);
7070b57cec5SDimitry Andric       ++NumMemCpyInstr;
7080b57cec5SDimitry Andric 
7090b57cec5SDimitry Andric       // Make sure we do not invalidate the iterator.
7100b57cec5SDimitry Andric       BBI = M->getIterator();
7110b57cec5SDimitry Andric       return true;
7120b57cec5SDimitry Andric     }
7130b57cec5SDimitry Andric   }
7140b57cec5SDimitry Andric 
7150b57cec5SDimitry Andric   // Detect cases where we're performing call slot forwarding, but
7160b57cec5SDimitry Andric   // happen to be using a load-store pair to implement it, rather than
7170b57cec5SDimitry Andric   // a memcpy.
718bdd1243dSDimitry Andric   BatchAAResults BAA(*AA);
71981ad6265SDimitry Andric   auto GetCall = [&]() -> CallInst * {
72081ad6265SDimitry Andric     // We defer this expensive clobber walk until the cheap checks
72181ad6265SDimitry Andric     // have been done on the source inside performCallSlotOptzn.
722e8d8bef9SDimitry Andric     if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
723bdd1243dSDimitry Andric             MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
72481ad6265SDimitry Andric       return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
72581ad6265SDimitry Andric     return nullptr;
72681ad6265SDimitry Andric   };
7270b57cec5SDimitry Andric 
728bdd1243dSDimitry Andric   bool Changed = performCallSlotOptzn(
729e8d8bef9SDimitry Andric       LI, SI, SI->getPointerOperand()->stripPointerCasts(),
7300b57cec5SDimitry Andric       LI->getPointerOperand()->stripPointerCasts(),
7310b57cec5SDimitry Andric       DL.getTypeStoreSize(SI->getOperand(0)->getType()),
732bdd1243dSDimitry Andric       std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
733bdd1243dSDimitry Andric   if (Changed) {
734e8d8bef9SDimitry Andric     eraseInstruction(SI);
735e8d8bef9SDimitry Andric     eraseInstruction(LI);
7360b57cec5SDimitry Andric     ++NumMemCpyInstr;
7370b57cec5SDimitry Andric     return true;
7380b57cec5SDimitry Andric   }
739bdd1243dSDimitry Andric 
7405f757f3fSDimitry Andric   // If this is a load-store pair from a stack slot to a stack slot, we
7415f757f3fSDimitry Andric   // might be able to perform the stack-move optimization just as we do for
7425f757f3fSDimitry Andric   // memcpys from an alloca to an alloca.
7435f757f3fSDimitry Andric   if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
7445f757f3fSDimitry Andric     if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
7455f757f3fSDimitry Andric       if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
7465f757f3fSDimitry Andric                                 DL.getTypeStoreSize(T), BAA)) {
7475f757f3fSDimitry Andric         // Avoid invalidating the iterator.
7485f757f3fSDimitry Andric         BBI = SI->getNextNonDebugInstruction()->getIterator();
7495f757f3fSDimitry Andric         eraseInstruction(SI);
7505f757f3fSDimitry Andric         eraseInstruction(LI);
7515f757f3fSDimitry Andric         ++NumMemCpyInstr;
7525f757f3fSDimitry Andric         return true;
7535f757f3fSDimitry Andric       }
7545f757f3fSDimitry Andric     }
7555f757f3fSDimitry Andric   }
7565f757f3fSDimitry Andric 
757bdd1243dSDimitry Andric   return false;
7580b57cec5SDimitry Andric }
759bdd1243dSDimitry Andric 
processStore(StoreInst * SI,BasicBlock::iterator & BBI)760bdd1243dSDimitry Andric bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
761*0fca6ea1SDimitry Andric   if (!SI->isSimple())
762*0fca6ea1SDimitry Andric     return false;
763bdd1243dSDimitry Andric 
764bdd1243dSDimitry Andric   // Avoid merging nontemporal stores since the resulting
765bdd1243dSDimitry Andric   // memcpy/memset would not be able to preserve the nontemporal hint.
766bdd1243dSDimitry Andric   // In theory we could teach how to propagate the !nontemporal metadata to
767bdd1243dSDimitry Andric   // memset calls. However, that change would force the backend to
768bdd1243dSDimitry Andric   // conservatively expand !nontemporal memset calls back to sequences of
769bdd1243dSDimitry Andric   // store instructions (effectively undoing the merging).
770bdd1243dSDimitry Andric   if (SI->getMetadata(LLVMContext::MD_nontemporal))
771bdd1243dSDimitry Andric     return false;
772bdd1243dSDimitry Andric 
773*0fca6ea1SDimitry Andric   const DataLayout &DL = SI->getDataLayout();
774bdd1243dSDimitry Andric 
775bdd1243dSDimitry Andric   Value *StoredVal = SI->getValueOperand();
776bdd1243dSDimitry Andric 
777bdd1243dSDimitry Andric   // Not all the transforms below are correct for non-integral pointers, bail
778bdd1243dSDimitry Andric   // until we've audited the individual pieces.
779bdd1243dSDimitry Andric   if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
780bdd1243dSDimitry Andric     return false;
781bdd1243dSDimitry Andric 
782bdd1243dSDimitry Andric   // Load to store forwarding can be interpreted as memcpy.
783bdd1243dSDimitry Andric   if (auto *LI = dyn_cast<LoadInst>(StoredVal))
784bdd1243dSDimitry Andric     return processStoreOfLoad(SI, LI, DL, BBI);
7850b57cec5SDimitry Andric 
786349cc55cSDimitry Andric   // The following code creates memset intrinsics out of thin air. Don't do
787349cc55cSDimitry Andric   // this if the corresponding libfunc is not available.
788349cc55cSDimitry Andric   // TODO: We should really distinguish between libcall availability and
789349cc55cSDimitry Andric   // our ability to introduce intrinsics.
790349cc55cSDimitry Andric   if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
791349cc55cSDimitry Andric     return false;
792349cc55cSDimitry Andric 
7930b57cec5SDimitry Andric   // There are two cases that are interesting for this code to handle: memcpy
7940b57cec5SDimitry Andric   // and memset.  Right now we only handle memset.
7950b57cec5SDimitry Andric 
7960b57cec5SDimitry Andric   // Ensure that the value being stored is something that can be memset'able a
7970b57cec5SDimitry Andric   // byte at a time like "0" or "-1" or any width, as well as things like
7980b57cec5SDimitry Andric   // 0xA0A0A0A0 and 0.0.
7990b57cec5SDimitry Andric   auto *V = SI->getOperand(0);
8000b57cec5SDimitry Andric   if (Value *ByteVal = isBytewiseValue(V, DL)) {
801*0fca6ea1SDimitry Andric     if (Instruction *I =
802*0fca6ea1SDimitry Andric             tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) {
8030b57cec5SDimitry Andric       BBI = I->getIterator(); // Don't invalidate iterator.
8040b57cec5SDimitry Andric       return true;
8050b57cec5SDimitry Andric     }
8060b57cec5SDimitry Andric 
8070b57cec5SDimitry Andric     // If we have an aggregate, we try to promote it to memset regardless
8080b57cec5SDimitry Andric     // of opportunity for merging as it can expose optimization opportunities
8090b57cec5SDimitry Andric     // in subsequent passes.
8100b57cec5SDimitry Andric     auto *T = V->getType();
8110b57cec5SDimitry Andric     if (T->isAggregateType()) {
8120b57cec5SDimitry Andric       uint64_t Size = DL.getTypeStoreSize(T);
8130b57cec5SDimitry Andric       IRBuilder<> Builder(SI);
8145ffd83dbSDimitry Andric       auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
8155ffd83dbSDimitry Andric                                      SI->getAlign());
816bdd1243dSDimitry Andric       M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
8170b57cec5SDimitry Andric 
8180b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
8190b57cec5SDimitry Andric 
820349cc55cSDimitry Andric       // The newly inserted memset is immediately overwritten by the original
821349cc55cSDimitry Andric       // store, so we do not need to rename uses.
822349cc55cSDimitry Andric       auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
823*0fca6ea1SDimitry Andric       auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef);
824349cc55cSDimitry Andric       MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
825e8d8bef9SDimitry Andric 
826e8d8bef9SDimitry Andric       eraseInstruction(SI);
8270b57cec5SDimitry Andric       NumMemSetInfer++;
8280b57cec5SDimitry Andric 
8290b57cec5SDimitry Andric       // Make sure we do not invalidate the iterator.
8300b57cec5SDimitry Andric       BBI = M->getIterator();
8310b57cec5SDimitry Andric       return true;
8320b57cec5SDimitry Andric     }
8330b57cec5SDimitry Andric   }
8340b57cec5SDimitry Andric 
8350b57cec5SDimitry Andric   return false;
8360b57cec5SDimitry Andric }
8370b57cec5SDimitry Andric 
processMemSet(MemSetInst * MSI,BasicBlock::iterator & BBI)8380b57cec5SDimitry Andric bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
8390b57cec5SDimitry Andric   // See if there is another memset or store neighboring this memset which
8400b57cec5SDimitry Andric   // allows us to widen out the memset to do a single larger store.
8410b57cec5SDimitry Andric   if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
842*0fca6ea1SDimitry Andric     if (Instruction *I =
843*0fca6ea1SDimitry Andric             tryMergingIntoMemset(MSI, MSI->getDest(), MSI->getValue())) {
8440b57cec5SDimitry Andric       BBI = I->getIterator(); // Don't invalidate iterator.
8450b57cec5SDimitry Andric       return true;
8460b57cec5SDimitry Andric     }
8470b57cec5SDimitry Andric   return false;
8480b57cec5SDimitry Andric }
8490b57cec5SDimitry Andric 
8500b57cec5SDimitry Andric /// Takes a memcpy and a call that it depends on,
8510b57cec5SDimitry Andric /// and checks for the possibility of a call slot optimization by having
8520b57cec5SDimitry Andric /// the call write its result directly into the destination of the memcpy.
performCallSlotOptzn(Instruction * cpyLoad,Instruction * cpyStore,Value * cpyDest,Value * cpySrc,TypeSize cpySize,Align cpyDestAlign,BatchAAResults & BAA,std::function<CallInst * ()> GetC)853e8d8bef9SDimitry Andric bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
854e8d8bef9SDimitry Andric                                          Instruction *cpyStore, Value *cpyDest,
8558c6f6c0cSDimitry Andric                                          Value *cpySrc, TypeSize cpySize,
856*0fca6ea1SDimitry Andric                                          Align cpyDestAlign,
857*0fca6ea1SDimitry Andric                                          BatchAAResults &BAA,
85881ad6265SDimitry Andric                                          std::function<CallInst *()> GetC) {
8590b57cec5SDimitry Andric   // The general transformation to keep in mind is
8600b57cec5SDimitry Andric   //
8610b57cec5SDimitry Andric   //   call @func(..., src, ...)
8620b57cec5SDimitry Andric   //   memcpy(dest, src, ...)
8630b57cec5SDimitry Andric   //
8640b57cec5SDimitry Andric   // ->
8650b57cec5SDimitry Andric   //
8660b57cec5SDimitry Andric   //   memcpy(dest, src, ...)
8670b57cec5SDimitry Andric   //   call @func(..., dest, ...)
8680b57cec5SDimitry Andric   //
8690b57cec5SDimitry Andric   // Since moving the memcpy is technically awkward, we additionally check that
8700b57cec5SDimitry Andric   // src only holds uninitialized values at the moment of the call, meaning that
8710b57cec5SDimitry Andric   // the memcpy can be discarded rather than moved.
8720b57cec5SDimitry Andric 
8738c6f6c0cSDimitry Andric   // We can't optimize scalable types.
8748c6f6c0cSDimitry Andric   if (cpySize.isScalable())
8758c6f6c0cSDimitry Andric     return false;
8768c6f6c0cSDimitry Andric 
8770b57cec5SDimitry Andric   // Require that src be an alloca.  This simplifies the reasoning considerably.
87804eeddc0SDimitry Andric   auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
8790b57cec5SDimitry Andric   if (!srcAlloca)
8800b57cec5SDimitry Andric     return false;
8810b57cec5SDimitry Andric 
8820b57cec5SDimitry Andric   ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
8830b57cec5SDimitry Andric   if (!srcArraySize)
8840b57cec5SDimitry Andric     return false;
8850b57cec5SDimitry Andric 
886*0fca6ea1SDimitry Andric   const DataLayout &DL = cpyLoad->getDataLayout();
8875f757f3fSDimitry Andric   TypeSize SrcAllocaSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType());
8885f757f3fSDimitry Andric   // We can't optimize scalable types.
8895f757f3fSDimitry Andric   if (SrcAllocaSize.isScalable())
8905f757f3fSDimitry Andric     return false;
8915f757f3fSDimitry Andric   uint64_t srcSize = SrcAllocaSize * srcArraySize->getZExtValue();
8920b57cec5SDimitry Andric 
8938c6f6c0cSDimitry Andric   if (cpySize < srcSize)
8940b57cec5SDimitry Andric     return false;
8950b57cec5SDimitry Andric 
89681ad6265SDimitry Andric   CallInst *C = GetC();
89781ad6265SDimitry Andric   if (!C)
89881ad6265SDimitry Andric     return false;
89981ad6265SDimitry Andric 
90081ad6265SDimitry Andric   // Lifetime marks shouldn't be operated on.
90181ad6265SDimitry Andric   if (Function *F = C->getCalledFunction())
90281ad6265SDimitry Andric     if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
90381ad6265SDimitry Andric       return false;
90481ad6265SDimitry Andric 
90581ad6265SDimitry Andric   if (C->getParent() != cpyStore->getParent()) {
90681ad6265SDimitry Andric     LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
90781ad6265SDimitry Andric     return false;
90881ad6265SDimitry Andric   }
90981ad6265SDimitry Andric 
910*0fca6ea1SDimitry Andric   MemoryLocation DestLoc =
911*0fca6ea1SDimitry Andric       isa<StoreInst>(cpyStore)
912*0fca6ea1SDimitry Andric           ? MemoryLocation::get(cpyStore)
913*0fca6ea1SDimitry Andric           : MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
91481ad6265SDimitry Andric 
91581ad6265SDimitry Andric   // Check that nothing touches the dest of the copy between
91681ad6265SDimitry Andric   // the call and the store/memcpy.
917bdd1243dSDimitry Andric   Instruction *SkippedLifetimeStart = nullptr;
918bdd1243dSDimitry Andric   if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
919bdd1243dSDimitry Andric                       MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
92081ad6265SDimitry Andric     LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
92181ad6265SDimitry Andric     return false;
92281ad6265SDimitry Andric   }
92381ad6265SDimitry Andric 
924bdd1243dSDimitry Andric   // If we need to move a lifetime.start above the call, make sure that we can
925bdd1243dSDimitry Andric   // actually do so. If the argument is bitcasted for example, we would have to
926bdd1243dSDimitry Andric   // move the bitcast as well, which we don't handle.
927bdd1243dSDimitry Andric   if (SkippedLifetimeStart) {
928bdd1243dSDimitry Andric     auto *LifetimeArg =
929bdd1243dSDimitry Andric         dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1));
930bdd1243dSDimitry Andric     if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
931bdd1243dSDimitry Andric         C->comesBefore(LifetimeArg))
932bdd1243dSDimitry Andric       return false;
933bdd1243dSDimitry Andric   }
934bdd1243dSDimitry Andric 
9355f757f3fSDimitry Andric   // Check that storing to the first srcSize bytes of dest will not cause a
9365f757f3fSDimitry Andric   // trap or data race.
9375f757f3fSDimitry Andric   bool ExplicitlyDereferenceableOnly;
9385f757f3fSDimitry Andric   if (!isWritableObject(getUnderlyingObject(cpyDest),
9395f757f3fSDimitry Andric                         ExplicitlyDereferenceableOnly) ||
9405f757f3fSDimitry Andric       !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
941bdd1243dSDimitry Andric                                           DL, C, AC, DT)) {
94204eeddc0SDimitry Andric     LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
9430b57cec5SDimitry Andric     return false;
94404eeddc0SDimitry Andric   }
9450b57cec5SDimitry Andric 
946e8d8bef9SDimitry Andric   // Make sure that nothing can observe cpyDest being written early. There are
947e8d8bef9SDimitry Andric   // a number of cases to consider:
948e8d8bef9SDimitry Andric   //  1. cpyDest cannot be accessed between C and cpyStore as a precondition of
949e8d8bef9SDimitry Andric   //     the transform.
950e8d8bef9SDimitry Andric   //  2. C itself may not access cpyDest (prior to the transform). This is
951e8d8bef9SDimitry Andric   //     checked further below.
952e8d8bef9SDimitry Andric   //  3. If cpyDest is accessible to the caller of this function (potentially
953e8d8bef9SDimitry Andric   //     captured and not based on an alloca), we need to ensure that we cannot
954e8d8bef9SDimitry Andric   //     unwind between C and cpyStore. This is checked here.
955e8d8bef9SDimitry Andric   //  4. If cpyDest is potentially captured, there may be accesses to it from
956e8d8bef9SDimitry Andric   //     another thread. In this case, we need to check that cpyStore is
957e8d8bef9SDimitry Andric   //     guaranteed to be executed if C is. As it is a non-atomic access, it
958e8d8bef9SDimitry Andric   //     renders accesses from other threads undefined.
959e8d8bef9SDimitry Andric   //     TODO: This is currently not checked.
96004eeddc0SDimitry Andric   if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
961bdd1243dSDimitry Andric     LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
9620b57cec5SDimitry Andric     return false;
96304eeddc0SDimitry Andric   }
9640b57cec5SDimitry Andric 
9650b57cec5SDimitry Andric   // Check that dest points to memory that is at least as aligned as src.
9665ffd83dbSDimitry Andric   Align srcAlign = srcAlloca->getAlign();
967bdd1243dSDimitry Andric   bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
9680b57cec5SDimitry Andric   // If dest is not aligned enough and we can't increase its alignment then
9690b57cec5SDimitry Andric   // bail out.
970bdd1243dSDimitry Andric   if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
971bdd1243dSDimitry Andric     LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
9720b57cec5SDimitry Andric     return false;
973bdd1243dSDimitry Andric   }
9740b57cec5SDimitry Andric 
9750b57cec5SDimitry Andric   // Check that src is not accessed except via the call and the memcpy.  This
9760b57cec5SDimitry Andric   // guarantees that it holds only undefined values when passed in (so the final
9770b57cec5SDimitry Andric   // memcpy can be dropped), that it is not read or written between the call and
9780b57cec5SDimitry Andric   // the memcpy, and that writing beyond the end of it is undefined.
979e8d8bef9SDimitry Andric   SmallVector<User *, 8> srcUseList(srcAlloca->users());
9800b57cec5SDimitry Andric   while (!srcUseList.empty()) {
9810b57cec5SDimitry Andric     User *U = srcUseList.pop_back_val();
9820b57cec5SDimitry Andric 
9830b57cec5SDimitry Andric     if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
984e8d8bef9SDimitry Andric       append_range(srcUseList, U->users());
9850b57cec5SDimitry Andric       continue;
9860b57cec5SDimitry Andric     }
987*0fca6ea1SDimitry Andric     if (const auto *G = dyn_cast<GetElementPtrInst>(U);
988*0fca6ea1SDimitry Andric         G && G->hasAllZeroIndices()) {
989e8d8bef9SDimitry Andric       append_range(srcUseList, U->users());
9900b57cec5SDimitry Andric       continue;
9910b57cec5SDimitry Andric     }
99204eeddc0SDimitry Andric     if (const auto *IT = dyn_cast<IntrinsicInst>(U))
9930b57cec5SDimitry Andric       if (IT->isLifetimeStartOrEnd())
9940b57cec5SDimitry Andric         continue;
9950b57cec5SDimitry Andric 
996*0fca6ea1SDimitry Andric     if (U != C && U != cpyLoad) {
997*0fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << "Call slot: Source accessed by " << *U << "\n");
9980b57cec5SDimitry Andric       return false;
9990b57cec5SDimitry Andric     }
1000*0fca6ea1SDimitry Andric   }
10010b57cec5SDimitry Andric 
100204eeddc0SDimitry Andric   // Check whether src is captured by the called function, in which case there
100304eeddc0SDimitry Andric   // may be further indirect uses of src.
100404eeddc0SDimitry Andric   bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
100504eeddc0SDimitry Andric     return U->stripPointerCasts() == cpySrc &&
100604eeddc0SDimitry Andric            !C->doesNotCapture(C->getArgOperandNo(&U));
100704eeddc0SDimitry Andric   });
100804eeddc0SDimitry Andric 
100904eeddc0SDimitry Andric   // If src is captured, then check whether there are any potential uses of
101004eeddc0SDimitry Andric   // src through the captured pointer before the lifetime of src ends, either
101104eeddc0SDimitry Andric   // due to a lifetime.end or a return from the function.
101204eeddc0SDimitry Andric   if (SrcIsCaptured) {
101304eeddc0SDimitry Andric     // Check that dest is not captured before/at the call. We have already
101404eeddc0SDimitry Andric     // checked that src is not captured before it. If either had been captured,
101504eeddc0SDimitry Andric     // then the call might be comparing the argument against the captured dest
101604eeddc0SDimitry Andric     // or src pointer.
101704eeddc0SDimitry Andric     Value *DestObj = getUnderlyingObject(cpyDest);
101804eeddc0SDimitry Andric     if (!isIdentifiedFunctionLocal(DestObj) ||
101904eeddc0SDimitry Andric         PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
102004eeddc0SDimitry Andric                                    /* StoreCaptures */ true, C, DT,
102104eeddc0SDimitry Andric                                    /* IncludeI */ true))
10220b57cec5SDimitry Andric       return false;
10230b57cec5SDimitry Andric 
102404eeddc0SDimitry Andric     MemoryLocation SrcLoc =
102504eeddc0SDimitry Andric         MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
102604eeddc0SDimitry Andric     for (Instruction &I :
102704eeddc0SDimitry Andric          make_range(++C->getIterator(), C->getParent()->end())) {
102804eeddc0SDimitry Andric       // Lifetime of srcAlloca ends at lifetime.end.
102904eeddc0SDimitry Andric       if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
103004eeddc0SDimitry Andric         if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
103104eeddc0SDimitry Andric             II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
103204eeddc0SDimitry Andric             cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
103304eeddc0SDimitry Andric           break;
103404eeddc0SDimitry Andric       }
103504eeddc0SDimitry Andric 
103604eeddc0SDimitry Andric       // Lifetime of srcAlloca ends at return.
103704eeddc0SDimitry Andric       if (isa<ReturnInst>(&I))
103804eeddc0SDimitry Andric         break;
103904eeddc0SDimitry Andric 
104004eeddc0SDimitry Andric       // Ignore the direct read of src in the load.
104104eeddc0SDimitry Andric       if (&I == cpyLoad)
104204eeddc0SDimitry Andric         continue;
104304eeddc0SDimitry Andric 
104404eeddc0SDimitry Andric       // Check whether this instruction may mod/ref src through the captured
104504eeddc0SDimitry Andric       // pointer (we have already any direct mod/refs in the loop above).
104604eeddc0SDimitry Andric       // Also bail if we hit a terminator, as we don't want to scan into other
104704eeddc0SDimitry Andric       // blocks.
1048bdd1243dSDimitry Andric       if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
104904eeddc0SDimitry Andric         return false;
105004eeddc0SDimitry Andric     }
105104eeddc0SDimitry Andric   }
105204eeddc0SDimitry Andric 
10530b57cec5SDimitry Andric   // Since we're changing the parameter to the callsite, we need to make sure
10540b57cec5SDimitry Andric   // that what would be the new parameter dominates the callsite.
10555f757f3fSDimitry Andric   bool NeedMoveGEP = false;
1056e8d8bef9SDimitry Andric   if (!DT->dominates(cpyDest, C)) {
1057e8d8bef9SDimitry Andric     // Support moving a constant index GEP before the call.
1058e8d8bef9SDimitry Andric     auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1059e8d8bef9SDimitry Andric     if (GEP && GEP->hasAllConstantIndices() &&
1060e8d8bef9SDimitry Andric         DT->dominates(GEP->getPointerOperand(), C))
10615f757f3fSDimitry Andric       NeedMoveGEP = true;
1062e8d8bef9SDimitry Andric     else
10630b57cec5SDimitry Andric       return false;
1064e8d8bef9SDimitry Andric   }
10650b57cec5SDimitry Andric 
10660b57cec5SDimitry Andric   // In addition to knowing that the call does not access src in some
10670b57cec5SDimitry Andric   // unexpected manner, for example via a global, which we deduce from
10680b57cec5SDimitry Andric   // the use analysis, we also need to know that it does not sneakily
10690b57cec5SDimitry Andric   // access dest.  We rely on AA to figure this out for us.
1070bdd1243dSDimitry Andric   MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
1071bdd1243dSDimitry Andric   ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
10720b57cec5SDimitry Andric   // If necessary, perform additional analysis.
10730b57cec5SDimitry Andric   if (isModOrRefSet(MR))
1074bdd1243dSDimitry Andric     MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
10750b57cec5SDimitry Andric   if (isModOrRefSet(MR))
10760b57cec5SDimitry Andric     return false;
10770b57cec5SDimitry Andric 
10780b57cec5SDimitry Andric   // We can't create address space casts here because we don't know if they're
10790b57cec5SDimitry Andric   // safe for the target.
10805f757f3fSDimitry Andric   if (cpySrc->getType() != cpyDest->getType())
10810b57cec5SDimitry Andric     return false;
10825ffd83dbSDimitry Andric   for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
10835ffd83dbSDimitry Andric     if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
10845f757f3fSDimitry Andric         cpySrc->getType() != C->getArgOperand(ArgI)->getType())
10850b57cec5SDimitry Andric       return false;
10860b57cec5SDimitry Andric 
10870b57cec5SDimitry Andric   // All the checks have passed, so do the transformation.
10880b57cec5SDimitry Andric   bool changedArgument = false;
10895ffd83dbSDimitry Andric   for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
10905ffd83dbSDimitry Andric     if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
10910b57cec5SDimitry Andric       changedArgument = true;
10925f757f3fSDimitry Andric       C->setArgOperand(ArgI, cpyDest);
10930b57cec5SDimitry Andric     }
10940b57cec5SDimitry Andric 
10950b57cec5SDimitry Andric   if (!changedArgument)
10960b57cec5SDimitry Andric     return false;
10970b57cec5SDimitry Andric 
10980b57cec5SDimitry Andric   // If the destination wasn't sufficiently aligned then increase its alignment.
10990b57cec5SDimitry Andric   if (!isDestSufficientlyAligned) {
11000b57cec5SDimitry Andric     assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
11015ffd83dbSDimitry Andric     cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
11020b57cec5SDimitry Andric   }
11030b57cec5SDimitry Andric 
11045f757f3fSDimitry Andric   if (NeedMoveGEP) {
11055f757f3fSDimitry Andric     auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
11065f757f3fSDimitry Andric     GEP->moveBefore(C);
11075f757f3fSDimitry Andric   }
11085f757f3fSDimitry Andric 
1109bdd1243dSDimitry Andric   if (SkippedLifetimeStart) {
1110bdd1243dSDimitry Andric     SkippedLifetimeStart->moveBefore(C);
1111bdd1243dSDimitry Andric     MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1112bdd1243dSDimitry Andric                       MSSA->getMemoryAccess(C));
1113bdd1243dSDimitry Andric   }
1114bdd1243dSDimitry Andric 
11154542f901SDimitry Andric   combineAAMetadata(C, cpyLoad);
111604eeddc0SDimitry Andric   if (cpyLoad != cpyStore)
11174542f901SDimitry Andric     combineAAMetadata(C, cpyStore);
11180b57cec5SDimitry Andric 
1119e8d8bef9SDimitry Andric   ++NumCallSlot;
11200b57cec5SDimitry Andric   return true;
11210b57cec5SDimitry Andric }
11220b57cec5SDimitry Andric 
11230b57cec5SDimitry Andric /// We've found that the (upward scanning) memory dependence of memcpy 'M' is
11240b57cec5SDimitry Andric /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
processMemCpyMemCpyDependence(MemCpyInst * M,MemCpyInst * MDep,BatchAAResults & BAA)11250b57cec5SDimitry Andric bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1126bdd1243dSDimitry Andric                                                   MemCpyInst *MDep,
1127bdd1243dSDimitry Andric                                                   BatchAAResults &BAA) {
11280b57cec5SDimitry Andric   // If dep instruction is reading from our current input, then it is a noop
11290b57cec5SDimitry Andric   // transfer and substituting the input won't change this instruction. Just
11300b57cec5SDimitry Andric   // ignore the input and let someone else zap MDep. This handles cases like:
11310b57cec5SDimitry Andric   //    memcpy(a <- a)
11320b57cec5SDimitry Andric   //    memcpy(b <- a)
11330b57cec5SDimitry Andric   if (M->getSource() == MDep->getSource())
11340b57cec5SDimitry Andric     return false;
11350b57cec5SDimitry Andric 
1136*0fca6ea1SDimitry Andric   // We can only optimize non-volatile memcpy's.
1137*0fca6ea1SDimitry Andric   if (MDep->isVolatile())
1138*0fca6ea1SDimitry Andric     return false;
1139*0fca6ea1SDimitry Andric 
1140*0fca6ea1SDimitry Andric   int64_t MForwardOffset = 0;
1141*0fca6ea1SDimitry Andric   const DataLayout &DL = M->getModule()->getDataLayout();
1142*0fca6ea1SDimitry Andric   // We can only transforms memcpy's where the dest of one is the source of the
1143*0fca6ea1SDimitry Andric   // other, or they have an offset in a range.
1144*0fca6ea1SDimitry Andric   if (M->getSource() != MDep->getDest()) {
1145*0fca6ea1SDimitry Andric     std::optional<int64_t> Offset =
1146*0fca6ea1SDimitry Andric         M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL);
1147*0fca6ea1SDimitry Andric     if (!Offset || *Offset < 0)
1148*0fca6ea1SDimitry Andric       return false;
1149*0fca6ea1SDimitry Andric     MForwardOffset = *Offset;
1150*0fca6ea1SDimitry Andric   }
1151*0fca6ea1SDimitry Andric 
1152*0fca6ea1SDimitry Andric   // The length of the memcpy's must be the same, or the preceding one
11530b57cec5SDimitry Andric   // must be larger than the following one.
1154*0fca6ea1SDimitry Andric   if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) {
115504eeddc0SDimitry Andric     auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
115604eeddc0SDimitry Andric     auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1157*0fca6ea1SDimitry Andric     if (!MDepLen || !MLen ||
1158*0fca6ea1SDimitry Andric         MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
11590b57cec5SDimitry Andric       return false;
1160fe6060f1SDimitry Andric   }
11610b57cec5SDimitry Andric 
1162*0fca6ea1SDimitry Andric   IRBuilder<> Builder(M);
1163*0fca6ea1SDimitry Andric   auto *CopySource = MDep->getSource();
1164*0fca6ea1SDimitry Andric   Instruction *NewCopySource = nullptr;
1165*0fca6ea1SDimitry Andric   auto CleanupOnRet = llvm::make_scope_exit([&NewCopySource] {
1166*0fca6ea1SDimitry Andric     if (NewCopySource && NewCopySource->use_empty())
1167*0fca6ea1SDimitry Andric       // Safety: It's safe here because we will only allocate more instructions
1168*0fca6ea1SDimitry Andric       // after finishing all BatchAA queries, but we have to be careful if we
1169*0fca6ea1SDimitry Andric       // want to do something like this in another place. Then we'd probably
1170*0fca6ea1SDimitry Andric       // have to delay instruction removal until all transforms on an
1171*0fca6ea1SDimitry Andric       // instruction finished.
1172*0fca6ea1SDimitry Andric       NewCopySource->eraseFromParent();
1173*0fca6ea1SDimitry Andric   });
1174*0fca6ea1SDimitry Andric   MaybeAlign CopySourceAlign = MDep->getSourceAlign();
1175*0fca6ea1SDimitry Andric   // We just need to calculate the actual size of the copy.
1176*0fca6ea1SDimitry Andric   auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize(
1177*0fca6ea1SDimitry Andric       MemoryLocation::getForSource(M).Size);
1178*0fca6ea1SDimitry Andric 
1179*0fca6ea1SDimitry Andric   // When the forwarding offset is greater than 0, we transform
1180*0fca6ea1SDimitry Andric   //    memcpy(d1 <- s1)
1181*0fca6ea1SDimitry Andric   //    memcpy(d2 <- d1+o)
1182*0fca6ea1SDimitry Andric   // to
1183*0fca6ea1SDimitry Andric   //    memcpy(d2 <- s1+o)
1184*0fca6ea1SDimitry Andric   if (MForwardOffset > 0) {
1185*0fca6ea1SDimitry Andric     // The copy destination of `M` maybe can serve as the source of copying.
1186*0fca6ea1SDimitry Andric     std::optional<int64_t> MDestOffset =
1187*0fca6ea1SDimitry Andric         M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL);
1188*0fca6ea1SDimitry Andric     if (MDestOffset == MForwardOffset)
1189*0fca6ea1SDimitry Andric       CopySource = M->getDest();
1190*0fca6ea1SDimitry Andric     else {
1191*0fca6ea1SDimitry Andric       CopySource = Builder.CreateInBoundsPtrAdd(
1192*0fca6ea1SDimitry Andric           CopySource, Builder.getInt64(MForwardOffset));
1193*0fca6ea1SDimitry Andric       NewCopySource = dyn_cast<Instruction>(CopySource);
1194*0fca6ea1SDimitry Andric     }
1195*0fca6ea1SDimitry Andric     // We need to update `MCopyLoc` if an offset exists.
1196*0fca6ea1SDimitry Andric     MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
1197*0fca6ea1SDimitry Andric     if (CopySourceAlign)
1198*0fca6ea1SDimitry Andric       CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset);
1199*0fca6ea1SDimitry Andric   }
1200*0fca6ea1SDimitry Andric 
12010b57cec5SDimitry Andric   // Verify that the copied-from memory doesn't change in between the two
12020b57cec5SDimitry Andric   // transfers.  For example, in:
12030b57cec5SDimitry Andric   //    memcpy(a <- b)
12040b57cec5SDimitry Andric   //    *b = 42;
12050b57cec5SDimitry Andric   //    memcpy(c <- a)
12060b57cec5SDimitry Andric   // It would be invalid to transform the second memcpy into memcpy(c <- b).
12070b57cec5SDimitry Andric   //
12080b57cec5SDimitry Andric   // TODO: If the code between M and MDep is transparent to the destination "c",
12090b57cec5SDimitry Andric   // then we could still perform the xform by moving M up to the first memcpy.
1210*0fca6ea1SDimitry Andric   if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep),
1211*0fca6ea1SDimitry Andric                      MSSA->getMemoryAccess(M)))
1212e8d8bef9SDimitry Andric     return false;
12130b57cec5SDimitry Andric 
1214*0fca6ea1SDimitry Andric   // No need to create `memcpy(a <- a)`.
1215*0fca6ea1SDimitry Andric   if (BAA.isMustAlias(M->getDest(), CopySource)) {
1216*0fca6ea1SDimitry Andric     // Remove the instruction we're replacing.
1217*0fca6ea1SDimitry Andric     eraseInstruction(M);
1218*0fca6ea1SDimitry Andric     ++NumMemCpyInstr;
1219*0fca6ea1SDimitry Andric     return true;
1220*0fca6ea1SDimitry Andric   }
1221*0fca6ea1SDimitry Andric 
12220b57cec5SDimitry Andric   // If the dest of the second might alias the source of the first, then the
1223349cc55cSDimitry Andric   // source and dest might overlap. In addition, if the source of the first
1224349cc55cSDimitry Andric   // points to constant memory, they won't overlap by definition. Otherwise, we
1225349cc55cSDimitry Andric   // still want to eliminate the intermediate value, but we have to generate a
1226349cc55cSDimitry Andric   // memmove instead of memcpy.
12270b57cec5SDimitry Andric   bool UseMemMove = false;
122806c3fb27SDimitry Andric   if (isModSet(BAA.getModRefInfo(M, MemoryLocation::getForSource(MDep)))) {
122906c3fb27SDimitry Andric     // Don't convert llvm.memcpy.inline into memmove because memmove can be
123006c3fb27SDimitry Andric     // lowered as a call, and that is not allowed for llvm.memcpy.inline (and
123106c3fb27SDimitry Andric     // there is no inline version of llvm.memmove)
123206c3fb27SDimitry Andric     if (isa<MemCpyInlineInst>(M))
123306c3fb27SDimitry Andric       return false;
12340b57cec5SDimitry Andric     UseMemMove = true;
123506c3fb27SDimitry Andric   }
12360b57cec5SDimitry Andric 
12370b57cec5SDimitry Andric   // If all checks passed, then we can transform M.
12380b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1239*0fca6ea1SDimitry Andric                     << *MDep << '\n'
1240*0fca6ea1SDimitry Andric                     << *M << '\n');
12410b57cec5SDimitry Andric 
12420b57cec5SDimitry Andric   // TODO: Is this worth it if we're creating a less aligned memcpy? For
12430b57cec5SDimitry Andric   // example we could be moving from movaps -> movq on x86.
1244e8d8bef9SDimitry Andric   Instruction *NewM;
12450b57cec5SDimitry Andric   if (UseMemMove)
1246*0fca6ea1SDimitry Andric     NewM =
1247*0fca6ea1SDimitry Andric         Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
1248*0fca6ea1SDimitry Andric                               CopySourceAlign, M->getLength(), M->isVolatile());
1249fe6060f1SDimitry Andric   else if (isa<MemCpyInlineInst>(M)) {
1250fe6060f1SDimitry Andric     // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1251fe6060f1SDimitry Andric     // never allowed since that would allow the latter to be lowered as a call
1252fe6060f1SDimitry Andric     // to an external function.
1253*0fca6ea1SDimitry Andric     NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
1254*0fca6ea1SDimitry Andric                                       CopySource, CopySourceAlign,
12550b57cec5SDimitry Andric                                       M->getLength(), M->isVolatile());
1256*0fca6ea1SDimitry Andric   } else
1257*0fca6ea1SDimitry Andric     NewM =
1258*0fca6ea1SDimitry Andric         Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
1259*0fca6ea1SDimitry Andric                              CopySourceAlign, M->getLength(), M->isVolatile());
1260bdd1243dSDimitry Andric   NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
12610b57cec5SDimitry Andric 
1262e8d8bef9SDimitry Andric   assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
1263e8d8bef9SDimitry Andric   auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
12645f757f3fSDimitry Andric   auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1265e8d8bef9SDimitry Andric   MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1266e8d8bef9SDimitry Andric 
12670b57cec5SDimitry Andric   // Remove the instruction we're replacing.
1268e8d8bef9SDimitry Andric   eraseInstruction(M);
12690b57cec5SDimitry Andric   ++NumMemCpyInstr;
12700b57cec5SDimitry Andric   return true;
12710b57cec5SDimitry Andric }
12720b57cec5SDimitry Andric 
12730b57cec5SDimitry Andric /// We've found that the (upward scanning) memory dependence of \p MemCpy is
12740b57cec5SDimitry Andric /// \p MemSet.  Try to simplify \p MemSet to only set the trailing bytes that
12750b57cec5SDimitry Andric /// weren't copied over by \p MemCpy.
12760b57cec5SDimitry Andric ///
12770b57cec5SDimitry Andric /// In other words, transform:
12780b57cec5SDimitry Andric /// \code
12790b57cec5SDimitry Andric ///   memset(dst, c, dst_size);
128006c3fb27SDimitry Andric ///   ...
12810b57cec5SDimitry Andric ///   memcpy(dst, src, src_size);
12820b57cec5SDimitry Andric /// \endcode
12830b57cec5SDimitry Andric /// into:
12840b57cec5SDimitry Andric /// \code
128506c3fb27SDimitry Andric ///   ...
12860b57cec5SDimitry Andric ///   memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
128706c3fb27SDimitry Andric ///   memcpy(dst, src, src_size);
12880b57cec5SDimitry Andric /// \endcode
128906c3fb27SDimitry Andric ///
129006c3fb27SDimitry Andric /// The memset is sunk to just before the memcpy to ensure that src_size is
129106c3fb27SDimitry Andric /// present when emitting the simplified memset.
processMemSetMemCpyDependence(MemCpyInst * MemCpy,MemSetInst * MemSet,BatchAAResults & BAA)12920b57cec5SDimitry Andric bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1293bdd1243dSDimitry Andric                                                   MemSetInst *MemSet,
1294bdd1243dSDimitry Andric                                                   BatchAAResults &BAA) {
12950b57cec5SDimitry Andric   // We can only transform memset/memcpy with the same destination.
1296bdd1243dSDimitry Andric   if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
12970b57cec5SDimitry Andric     return false;
12980b57cec5SDimitry Andric 
1299*0fca6ea1SDimitry Andric   // Don't perform the transform if src_size may be zero. In that case, the
1300*0fca6ea1SDimitry Andric   // transform is essentially a complex no-op and may lead to an infinite
1301*0fca6ea1SDimitry Andric   // loop if BasicAA is smart enough to understand that dst and dst + src_size
1302*0fca6ea1SDimitry Andric   // are still MustAlias after the transform.
1303*0fca6ea1SDimitry Andric   Value *SrcSize = MemCpy->getLength();
1304*0fca6ea1SDimitry Andric   if (!isKnownNonZero(SrcSize,
1305*0fca6ea1SDimitry Andric                       SimplifyQuery(MemCpy->getDataLayout(), DT, AC, MemCpy)))
1306*0fca6ea1SDimitry Andric     return false;
1307*0fca6ea1SDimitry Andric 
1308e8d8bef9SDimitry Andric   // Check that src and dst of the memcpy aren't the same. While memcpy
1309e8d8bef9SDimitry Andric   // operands cannot partially overlap, exact equality is allowed.
1310bdd1243dSDimitry Andric   if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1311e8d8bef9SDimitry Andric     return false;
1312e8d8bef9SDimitry Andric 
1313e8d8bef9SDimitry Andric   // We know that dst up to src_size is not written. We now need to make sure
1314e8d8bef9SDimitry Andric   // that dst up to dst_size is not accessed. (If we did not move the memset,
1315e8d8bef9SDimitry Andric   // checking for reads would be sufficient.)
1316bdd1243dSDimitry Andric   if (accessedBetween(BAA, MemoryLocation::getForDest(MemSet),
1317e8d8bef9SDimitry Andric                       MSSA->getMemoryAccess(MemSet),
1318349cc55cSDimitry Andric                       MSSA->getMemoryAccess(MemCpy)))
1319e8d8bef9SDimitry Andric     return false;
13200b57cec5SDimitry Andric 
13210b57cec5SDimitry Andric   // Use the same i8* dest as the memcpy, killing the memset dest if different.
13220b57cec5SDimitry Andric   Value *Dest = MemCpy->getRawDest();
13230b57cec5SDimitry Andric   Value *DestSize = MemSet->getLength();
13240b57cec5SDimitry Andric 
1325e8d8bef9SDimitry Andric   if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1326e8d8bef9SDimitry Andric     return false;
1327e8d8bef9SDimitry Andric 
1328fe6060f1SDimitry Andric   // If the sizes are the same, simply drop the memset instead of generating
1329fe6060f1SDimitry Andric   // a replacement with zero size.
1330fe6060f1SDimitry Andric   if (DestSize == SrcSize) {
1331fe6060f1SDimitry Andric     eraseInstruction(MemSet);
1332fe6060f1SDimitry Andric     return true;
1333fe6060f1SDimitry Andric   }
1334fe6060f1SDimitry Andric 
13350b57cec5SDimitry Andric   // By default, create an unaligned memset.
133681ad6265SDimitry Andric   Align Alignment = Align(1);
13370b57cec5SDimitry Andric   // If Dest is aligned, and SrcSize is constant, use the minimum alignment
13380b57cec5SDimitry Andric   // of the sum.
133981ad6265SDimitry Andric   const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
134081ad6265SDimitry Andric                                    MemCpy->getDestAlign().valueOrOne());
13410b57cec5SDimitry Andric   if (DestAlign > 1)
134204eeddc0SDimitry Andric     if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
134381ad6265SDimitry Andric       Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
13440b57cec5SDimitry Andric 
13450b57cec5SDimitry Andric   IRBuilder<> Builder(MemCpy);
13460b57cec5SDimitry Andric 
134706c3fb27SDimitry Andric   // Preserve the debug location of the old memset for the code emitted here
134806c3fb27SDimitry Andric   // related to the new memset. This is correct according to the rules in
134906c3fb27SDimitry Andric   // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an
135006c3fb27SDimitry Andric   // instruction location", given that we move the memset within the basic
135106c3fb27SDimitry Andric   // block.
135206c3fb27SDimitry Andric   assert(MemSet->getParent() == MemCpy->getParent() &&
135306c3fb27SDimitry Andric          "Preserving debug location based on moving memset within BB.");
135406c3fb27SDimitry Andric   Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());
135506c3fb27SDimitry Andric 
13560b57cec5SDimitry Andric   // If the sizes have different types, zext the smaller one.
13570b57cec5SDimitry Andric   if (DestSize->getType() != SrcSize->getType()) {
13580b57cec5SDimitry Andric     if (DestSize->getType()->getIntegerBitWidth() >
13590b57cec5SDimitry Andric         SrcSize->getType()->getIntegerBitWidth())
13600b57cec5SDimitry Andric       SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
13610b57cec5SDimitry Andric     else
13620b57cec5SDimitry Andric       DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
13630b57cec5SDimitry Andric   }
13640b57cec5SDimitry Andric 
13650b57cec5SDimitry Andric   Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
13660b57cec5SDimitry Andric   Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
13670b57cec5SDimitry Andric   Value *MemsetLen = Builder.CreateSelect(
13680b57cec5SDimitry Andric       Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
13697a6dacacSDimitry Andric   Instruction *NewMemSet =
13707a6dacacSDimitry Andric       Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
137181ad6265SDimitry Andric                            MemSet->getOperand(1), MemsetLen, Alignment);
13720b57cec5SDimitry Andric 
1373e8d8bef9SDimitry Andric   assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
1374e8d8bef9SDimitry Andric          "MemCpy must be a MemoryDef");
137506c3fb27SDimitry Andric   // The new memset is inserted before the memcpy, and it is known that the
137606c3fb27SDimitry Andric   // memcpy's defining access is the memset about to be removed.
1377e8d8bef9SDimitry Andric   auto *LastDef =
1378e8d8bef9SDimitry Andric       cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1379*0fca6ea1SDimitry Andric   auto *NewAccess =
1380*0fca6ea1SDimitry Andric       MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef);
1381e8d8bef9SDimitry Andric   MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1382e8d8bef9SDimitry Andric 
1383e8d8bef9SDimitry Andric   eraseInstruction(MemSet);
13840b57cec5SDimitry Andric   return true;
13850b57cec5SDimitry Andric }
13860b57cec5SDimitry Andric 
13870b57cec5SDimitry Andric /// Determine whether the instruction has undefined content for the given Size,
13880b57cec5SDimitry Andric /// either because it was freshly alloca'd or started its lifetime.
hasUndefContents(MemorySSA * MSSA,BatchAAResults & AA,Value * V,MemoryDef * Def,Value * Size)1389bdd1243dSDimitry Andric static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
1390fe6060f1SDimitry Andric                              MemoryDef *Def, Value *Size) {
1391e8d8bef9SDimitry Andric   if (MSSA->isLiveOnEntryDef(Def))
1392e8d8bef9SDimitry Andric     return isa<AllocaInst>(getUnderlyingObject(V));
1393e8d8bef9SDimitry Andric 
139404eeddc0SDimitry Andric   if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1395e8d8bef9SDimitry Andric     if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
139604eeddc0SDimitry Andric       auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1397fe6060f1SDimitry Andric 
139804eeddc0SDimitry Andric       if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1399bdd1243dSDimitry Andric         if (AA.isMustAlias(V, II->getArgOperand(1)) &&
1400fe6060f1SDimitry Andric             LTSize->getZExtValue() >= CSize->getZExtValue())
1401e8d8bef9SDimitry Andric           return true;
1402e8d8bef9SDimitry Andric       }
1403fe6060f1SDimitry Andric 
1404fe6060f1SDimitry Andric       // If the lifetime.start covers a whole alloca (as it almost always
1405fe6060f1SDimitry Andric       // does) and we're querying a pointer based on that alloca, then we know
1406fe6060f1SDimitry Andric       // the memory is definitely undef, regardless of how exactly we alias.
1407fe6060f1SDimitry Andric       // The size also doesn't matter, as an out-of-bounds access would be UB.
140804eeddc0SDimitry Andric       if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1409fe6060f1SDimitry Andric         if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1410*0fca6ea1SDimitry Andric           const DataLayout &DL = Alloca->getDataLayout();
1411bdd1243dSDimitry Andric           if (std::optional<TypeSize> AllocaSize =
1412bdd1243dSDimitry Andric                   Alloca->getAllocationSize(DL))
1413bdd1243dSDimitry Andric             if (*AllocaSize == LTSize->getValue())
1414fe6060f1SDimitry Andric               return true;
1415fe6060f1SDimitry Andric         }
1416fe6060f1SDimitry Andric       }
1417e8d8bef9SDimitry Andric     }
141804eeddc0SDimitry Andric   }
1419e8d8bef9SDimitry Andric 
1420e8d8bef9SDimitry Andric   return false;
1421e8d8bef9SDimitry Andric }
1422e8d8bef9SDimitry Andric 
14230b57cec5SDimitry Andric /// Transform memcpy to memset when its source was just memset.
14240b57cec5SDimitry Andric /// In other words, turn:
14250b57cec5SDimitry Andric /// \code
14260b57cec5SDimitry Andric ///   memset(dst1, c, dst1_size);
14270b57cec5SDimitry Andric ///   memcpy(dst2, dst1, dst2_size);
14280b57cec5SDimitry Andric /// \endcode
14290b57cec5SDimitry Andric /// into:
14300b57cec5SDimitry Andric /// \code
14310b57cec5SDimitry Andric ///   memset(dst1, c, dst1_size);
14320b57cec5SDimitry Andric ///   memset(dst2, c, dst2_size);
14330b57cec5SDimitry Andric /// \endcode
14340b57cec5SDimitry Andric /// When dst2_size <= dst1_size.
performMemCpyToMemSetOptzn(MemCpyInst * MemCpy,MemSetInst * MemSet,BatchAAResults & BAA)14350b57cec5SDimitry Andric bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1436bdd1243dSDimitry Andric                                                MemSetInst *MemSet,
1437bdd1243dSDimitry Andric                                                BatchAAResults &BAA) {
14380b57cec5SDimitry Andric   // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
14390b57cec5SDimitry Andric   // memcpying from the same address. Otherwise it is hard to reason about.
1440bdd1243dSDimitry Andric   if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
14410b57cec5SDimitry Andric     return false;
14420b57cec5SDimitry Andric 
1443fe6060f1SDimitry Andric   Value *MemSetSize = MemSet->getLength();
1444fe6060f1SDimitry Andric   Value *CopySize = MemCpy->getLength();
14450b57cec5SDimitry Andric 
1446fe6060f1SDimitry Andric   if (MemSetSize != CopySize) {
14470b57cec5SDimitry Andric     // Make sure the memcpy doesn't read any more than what the memset wrote.
14480b57cec5SDimitry Andric     // Don't worry about sizes larger than i64.
1449fe6060f1SDimitry Andric 
1450fe6060f1SDimitry Andric     // A known memset size is required.
145104eeddc0SDimitry Andric     auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1452fe6060f1SDimitry Andric     if (!CMemSetSize)
1453fe6060f1SDimitry Andric       return false;
1454fe6060f1SDimitry Andric 
1455fe6060f1SDimitry Andric     // A known memcpy size is also required.
145604eeddc0SDimitry Andric     auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1457fe6060f1SDimitry Andric     if (!CCopySize)
1458fe6060f1SDimitry Andric       return false;
1459fe6060f1SDimitry Andric     if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
14600b57cec5SDimitry Andric       // If the memcpy is larger than the memset, but the memory was undef prior
14610b57cec5SDimitry Andric       // to the memset, we can just ignore the tail. Technically we're only
14620b57cec5SDimitry Andric       // interested in the bytes from MemSetSize..CopySize here, but as we can't
14630b57cec5SDimitry Andric       // easily represent this location, we use the full 0..CopySize range.
14640b57cec5SDimitry Andric       MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1465e8d8bef9SDimitry Andric       bool CanReduceSize = false;
1466e8d8bef9SDimitry Andric       MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1467e8d8bef9SDimitry Andric       MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1468bdd1243dSDimitry Andric           MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
1469e8d8bef9SDimitry Andric       if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1470bdd1243dSDimitry Andric         if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1471e8d8bef9SDimitry Andric           CanReduceSize = true;
1472e8d8bef9SDimitry Andric 
1473e8d8bef9SDimitry Andric       if (!CanReduceSize)
14740b57cec5SDimitry Andric         return false;
1475e8d8bef9SDimitry Andric       CopySize = MemSetSize;
14760b57cec5SDimitry Andric     }
1477fe6060f1SDimitry Andric   }
14780b57cec5SDimitry Andric 
14790b57cec5SDimitry Andric   IRBuilder<> Builder(MemCpy);
1480e8d8bef9SDimitry Andric   Instruction *NewM =
1481e8d8bef9SDimitry Andric       Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1482bdd1243dSDimitry Andric                            CopySize, MemCpy->getDestAlign());
1483e8d8bef9SDimitry Andric   auto *LastDef =
1484e8d8bef9SDimitry Andric       cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
14855f757f3fSDimitry Andric   auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1486e8d8bef9SDimitry Andric   MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1487e8d8bef9SDimitry Andric 
14880b57cec5SDimitry Andric   return true;
14890b57cec5SDimitry Andric }
14900b57cec5SDimitry Andric 
14915f757f3fSDimitry Andric // Attempts to optimize the pattern whereby memory is copied from an alloca to
14925f757f3fSDimitry Andric // another alloca, where the two allocas don't have conflicting mod/ref. If
14935f757f3fSDimitry Andric // successful, the two allocas can be merged into one and the transfer can be
14945f757f3fSDimitry Andric // deleted. This pattern is generated frequently in Rust, due to the ubiquity of
14955f757f3fSDimitry Andric // move operations in that language.
14965f757f3fSDimitry Andric //
14975f757f3fSDimitry Andric // Once we determine that the optimization is safe to perform, we replace all
14985f757f3fSDimitry Andric // uses of the destination alloca with the source alloca. We also "shrink wrap"
14995f757f3fSDimitry Andric // the lifetime markers of the single merged alloca to before the first use
15005f757f3fSDimitry Andric // and after the last use. Note that the "shrink wrapping" procedure is a safe
15015f757f3fSDimitry Andric // transformation only because we restrict the scope of this optimization to
15025f757f3fSDimitry Andric // allocas that aren't captured.
performStackMoveOptzn(Instruction * Load,Instruction * Store,AllocaInst * DestAlloca,AllocaInst * SrcAlloca,TypeSize Size,BatchAAResults & BAA)15035f757f3fSDimitry Andric bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15045f757f3fSDimitry Andric                                           AllocaInst *DestAlloca,
15055f757f3fSDimitry Andric                                           AllocaInst *SrcAlloca, TypeSize Size,
15065f757f3fSDimitry Andric                                           BatchAAResults &BAA) {
15075f757f3fSDimitry Andric   LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
15085f757f3fSDimitry Andric                     << *Store << "\n");
15095f757f3fSDimitry Andric 
15105f757f3fSDimitry Andric   // Make sure the two allocas are in the same address space.
15115f757f3fSDimitry Andric   if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
15125f757f3fSDimitry Andric     LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
15135f757f3fSDimitry Andric     return false;
15145f757f3fSDimitry Andric   }
15155f757f3fSDimitry Andric 
15165f757f3fSDimitry Andric   // Check that copy is full with static size.
1517*0fca6ea1SDimitry Andric   const DataLayout &DL = DestAlloca->getDataLayout();
15185f757f3fSDimitry Andric   std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
15195f757f3fSDimitry Andric   if (!SrcSize || Size != *SrcSize) {
15205f757f3fSDimitry Andric     LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
15215f757f3fSDimitry Andric     return false;
15225f757f3fSDimitry Andric   }
15235f757f3fSDimitry Andric   std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
15245f757f3fSDimitry Andric   if (!DestSize || Size != *DestSize) {
15255f757f3fSDimitry Andric     LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
15265f757f3fSDimitry Andric     return false;
15275f757f3fSDimitry Andric   }
15285f757f3fSDimitry Andric 
15295f757f3fSDimitry Andric   if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
15305f757f3fSDimitry Andric     return false;
15315f757f3fSDimitry Andric 
15325f757f3fSDimitry Andric   // Check that src and dest are never captured, unescaped allocas. Also
15335f757f3fSDimitry Andric   // find the nearest common dominator and postdominator for all users in
15345f757f3fSDimitry Andric   // order to shrink wrap the lifetimes, and instructions with noalias metadata
15355f757f3fSDimitry Andric   // to remove them.
15365f757f3fSDimitry Andric 
15375f757f3fSDimitry Andric   SmallVector<Instruction *, 4> LifetimeMarkers;
15385f757f3fSDimitry Andric   SmallSet<Instruction *, 4> NoAliasInstrs;
15395f757f3fSDimitry Andric   bool SrcNotDom = false;
15405f757f3fSDimitry Andric 
15415f757f3fSDimitry Andric   // Recursively track the user and check whether modified alias exist.
15425f757f3fSDimitry Andric   auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
15435f757f3fSDimitry Andric     bool CanBeNull, CanBeFreed;
15445f757f3fSDimitry Andric     return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
15455f757f3fSDimitry Andric   };
15465f757f3fSDimitry Andric 
15475f757f3fSDimitry Andric   auto CaptureTrackingWithModRef =
15485f757f3fSDimitry Andric       [&](Instruction *AI,
15495f757f3fSDimitry Andric           function_ref<bool(Instruction *)> ModRefCallback) -> bool {
15505f757f3fSDimitry Andric     SmallVector<Instruction *, 8> Worklist;
15515f757f3fSDimitry Andric     Worklist.push_back(AI);
15525f757f3fSDimitry Andric     unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
15535f757f3fSDimitry Andric     Worklist.reserve(MaxUsesToExplore);
15545f757f3fSDimitry Andric     SmallSet<const Use *, 20> Visited;
15555f757f3fSDimitry Andric     while (!Worklist.empty()) {
15565f757f3fSDimitry Andric       Instruction *I = Worklist.back();
15575f757f3fSDimitry Andric       Worklist.pop_back();
15585f757f3fSDimitry Andric       for (const Use &U : I->uses()) {
15595f757f3fSDimitry Andric         auto *UI = cast<Instruction>(U.getUser());
15605f757f3fSDimitry Andric         // If any use that isn't dominated by SrcAlloca exists, we move src
15615f757f3fSDimitry Andric         // alloca to the entry before the transformation.
15625f757f3fSDimitry Andric         if (!DT->dominates(SrcAlloca, UI))
15635f757f3fSDimitry Andric           SrcNotDom = true;
15645f757f3fSDimitry Andric 
15655f757f3fSDimitry Andric         if (Visited.size() >= MaxUsesToExplore) {
15665f757f3fSDimitry Andric           LLVM_DEBUG(
15675f757f3fSDimitry Andric               dbgs()
15685f757f3fSDimitry Andric               << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
15695f757f3fSDimitry Andric           return false;
15705f757f3fSDimitry Andric         }
15715f757f3fSDimitry Andric         if (!Visited.insert(&U).second)
15725f757f3fSDimitry Andric           continue;
15735f757f3fSDimitry Andric         switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
15745f757f3fSDimitry Andric         case UseCaptureKind::MAY_CAPTURE:
15755f757f3fSDimitry Andric           return false;
15765f757f3fSDimitry Andric         case UseCaptureKind::PASSTHROUGH:
15775f757f3fSDimitry Andric           // Instructions cannot have non-instruction users.
15785f757f3fSDimitry Andric           Worklist.push_back(UI);
15795f757f3fSDimitry Andric           continue;
15805f757f3fSDimitry Andric         case UseCaptureKind::NO_CAPTURE: {
15815f757f3fSDimitry Andric           if (UI->isLifetimeStartOrEnd()) {
15825f757f3fSDimitry Andric             // We note the locations of these intrinsic calls so that we can
15835f757f3fSDimitry Andric             // delete them later if the optimization succeeds, this is safe
15845f757f3fSDimitry Andric             // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
15855f757f3fSDimitry Andric             // practically fill all the bytes of the alloca with an undefined
15865f757f3fSDimitry Andric             // value, although conceptually marked as alive/dead.
15875f757f3fSDimitry Andric             int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
15885f757f3fSDimitry Andric             if (Size < 0 || Size == DestSize) {
15895f757f3fSDimitry Andric               LifetimeMarkers.push_back(UI);
15905f757f3fSDimitry Andric               continue;
15915f757f3fSDimitry Andric             }
15925f757f3fSDimitry Andric           }
15935f757f3fSDimitry Andric           if (UI->hasMetadata(LLVMContext::MD_noalias))
15945f757f3fSDimitry Andric             NoAliasInstrs.insert(UI);
15955f757f3fSDimitry Andric           if (!ModRefCallback(UI))
15965f757f3fSDimitry Andric             return false;
15975f757f3fSDimitry Andric         }
15985f757f3fSDimitry Andric         }
15995f757f3fSDimitry Andric       }
16005f757f3fSDimitry Andric     }
16015f757f3fSDimitry Andric     return true;
16025f757f3fSDimitry Andric   };
16035f757f3fSDimitry Andric 
16045f757f3fSDimitry Andric   // Check that dest has no Mod/Ref, from the alloca to the Store, except full
16055f757f3fSDimitry Andric   // size lifetime intrinsics. And collect modref inst for the reachability
16065f757f3fSDimitry Andric   // check.
16075f757f3fSDimitry Andric   ModRefInfo DestModRef = ModRefInfo::NoModRef;
16085f757f3fSDimitry Andric   MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
16095f757f3fSDimitry Andric   SmallVector<BasicBlock *, 8> ReachabilityWorklist;
16105f757f3fSDimitry Andric   auto DestModRefCallback = [&](Instruction *UI) -> bool {
16115f757f3fSDimitry Andric     // We don't care about the store itself.
16125f757f3fSDimitry Andric     if (UI == Store)
16135f757f3fSDimitry Andric       return true;
16145f757f3fSDimitry Andric     ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
16155f757f3fSDimitry Andric     DestModRef |= Res;
16165f757f3fSDimitry Andric     if (isModOrRefSet(Res)) {
16175f757f3fSDimitry Andric       // Instructions reachability checks.
16185f757f3fSDimitry Andric       // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
16195f757f3fSDimitry Andric       // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
16205f757f3fSDimitry Andric       if (UI->getParent() == Store->getParent()) {
16215f757f3fSDimitry Andric         // The same block case is special because it's the only time we're
16225f757f3fSDimitry Andric         // looking within a single block to see which instruction comes first.
16235f757f3fSDimitry Andric         // Once we start looking at multiple blocks, the first instruction of
16245f757f3fSDimitry Andric         // the block is reachable, so we only need to determine reachability
16255f757f3fSDimitry Andric         // between whole blocks.
16265f757f3fSDimitry Andric         BasicBlock *BB = UI->getParent();
16275f757f3fSDimitry Andric 
16285f757f3fSDimitry Andric         // If A comes before B, then B is definitively reachable from A.
16295f757f3fSDimitry Andric         if (UI->comesBefore(Store))
16305f757f3fSDimitry Andric           return false;
16315f757f3fSDimitry Andric 
16325f757f3fSDimitry Andric         // If the user's parent block is entry, no predecessor exists.
16335f757f3fSDimitry Andric         if (BB->isEntryBlock())
16345f757f3fSDimitry Andric           return true;
16355f757f3fSDimitry Andric 
16365f757f3fSDimitry Andric         // Otherwise, continue doing the normal per-BB CFG walk.
16375f757f3fSDimitry Andric         ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
16385f757f3fSDimitry Andric       } else {
16395f757f3fSDimitry Andric         ReachabilityWorklist.push_back(UI->getParent());
16405f757f3fSDimitry Andric       }
16415f757f3fSDimitry Andric     }
16425f757f3fSDimitry Andric     return true;
16435f757f3fSDimitry Andric   };
16445f757f3fSDimitry Andric 
16455f757f3fSDimitry Andric   if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
16465f757f3fSDimitry Andric     return false;
16475f757f3fSDimitry Andric   // Bailout if Dest may have any ModRef before Store.
16485f757f3fSDimitry Andric   if (!ReachabilityWorklist.empty() &&
16495f757f3fSDimitry Andric       isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
16505f757f3fSDimitry Andric                                      nullptr, DT, nullptr))
16515f757f3fSDimitry Andric     return false;
16525f757f3fSDimitry Andric 
16535f757f3fSDimitry Andric   // Check that, from after the Load to the end of the BB,
16545f757f3fSDimitry Andric   //   - if the dest has any Mod, src has no Ref, and
16555f757f3fSDimitry Andric   //   - if the dest has any Ref, src has no Mod except full-sized lifetimes.
16565f757f3fSDimitry Andric   MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
16575f757f3fSDimitry Andric 
16585f757f3fSDimitry Andric   auto SrcModRefCallback = [&](Instruction *UI) -> bool {
16595f757f3fSDimitry Andric     // Any ModRef post-dominated by Load doesn't matter, also Load and Store
16605f757f3fSDimitry Andric     // themselves can be ignored.
16615f757f3fSDimitry Andric     if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
16625f757f3fSDimitry Andric       return true;
16635f757f3fSDimitry Andric     ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
16645f757f3fSDimitry Andric     if ((isModSet(DestModRef) && isRefSet(Res)) ||
16655f757f3fSDimitry Andric         (isRefSet(DestModRef) && isModSet(Res)))
16665f757f3fSDimitry Andric       return false;
16675f757f3fSDimitry Andric 
16685f757f3fSDimitry Andric     return true;
16695f757f3fSDimitry Andric   };
16705f757f3fSDimitry Andric 
16715f757f3fSDimitry Andric   if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
16725f757f3fSDimitry Andric     return false;
16735f757f3fSDimitry Andric 
16745f757f3fSDimitry Andric   // We can do the transformation. First, move the SrcAlloca to the start of the
16755f757f3fSDimitry Andric   // BB.
16765f757f3fSDimitry Andric   if (SrcNotDom)
16775f757f3fSDimitry Andric     SrcAlloca->moveBefore(*SrcAlloca->getParent(),
16785f757f3fSDimitry Andric                           SrcAlloca->getParent()->getFirstInsertionPt());
16795f757f3fSDimitry Andric   // Align the allocas appropriately.
16805f757f3fSDimitry Andric   SrcAlloca->setAlignment(
16815f757f3fSDimitry Andric       std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
16825f757f3fSDimitry Andric 
16835f757f3fSDimitry Andric   // Merge the two allocas.
16845f757f3fSDimitry Andric   DestAlloca->replaceAllUsesWith(SrcAlloca);
16855f757f3fSDimitry Andric   eraseInstruction(DestAlloca);
16865f757f3fSDimitry Andric 
16875f757f3fSDimitry Andric   // Drop metadata on the source alloca.
16885f757f3fSDimitry Andric   SrcAlloca->dropUnknownNonDebugMetadata();
16895f757f3fSDimitry Andric 
16905f757f3fSDimitry Andric   // TODO: Reconstruct merged lifetime markers.
16915f757f3fSDimitry Andric   // Remove all other lifetime markers. if the original lifetime intrinsics
16925f757f3fSDimitry Andric   // exists.
16935f757f3fSDimitry Andric   if (!LifetimeMarkers.empty()) {
16945f757f3fSDimitry Andric     for (Instruction *I : LifetimeMarkers)
16955f757f3fSDimitry Andric       eraseInstruction(I);
16965f757f3fSDimitry Andric   }
16975f757f3fSDimitry Andric 
16985f757f3fSDimitry Andric   // As this transformation can cause memory accesses that didn't previously
16995f757f3fSDimitry Andric   // alias to begin to alias one another, we remove !noalias metadata from any
17005f757f3fSDimitry Andric   // uses of either alloca. This is conservative, but more precision doesn't
17015f757f3fSDimitry Andric   // seem worthwhile right now.
17025f757f3fSDimitry Andric   for (Instruction *I : NoAliasInstrs)
17035f757f3fSDimitry Andric     I->setMetadata(LLVMContext::MD_noalias, nullptr);
17045f757f3fSDimitry Andric 
17055f757f3fSDimitry Andric   LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
17065f757f3fSDimitry Andric   NumStackMove++;
17075f757f3fSDimitry Andric   return true;
17085f757f3fSDimitry Andric }
17095f757f3fSDimitry Andric 
isZeroSize(Value * Size)17105f757f3fSDimitry Andric static bool isZeroSize(Value *Size) {
17115f757f3fSDimitry Andric   if (auto *I = dyn_cast<Instruction>(Size))
1712*0fca6ea1SDimitry Andric     if (auto *Res = simplifyInstruction(I, I->getDataLayout()))
17135f757f3fSDimitry Andric       Size = Res;
17145f757f3fSDimitry Andric   // Treat undef/poison size like zero.
17155f757f3fSDimitry Andric   if (auto *C = dyn_cast<Constant>(Size))
17165f757f3fSDimitry Andric     return isa<UndefValue>(C) || C->isNullValue();
17175f757f3fSDimitry Andric   return false;
17185f757f3fSDimitry Andric }
17195f757f3fSDimitry Andric 
17200b57cec5SDimitry Andric /// Perform simplification of memcpy's.  If we have memcpy A
17210b57cec5SDimitry Andric /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
17220b57cec5SDimitry Andric /// B to be a memcpy from X to Z (or potentially a memmove, depending on
17230b57cec5SDimitry Andric /// circumstances). This allows later passes to remove the first memcpy
17240b57cec5SDimitry Andric /// altogether.
processMemCpy(MemCpyInst * M,BasicBlock::iterator & BBI)17255ffd83dbSDimitry Andric bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
17260b57cec5SDimitry Andric   // We can only optimize non-volatile memcpy's.
1727*0fca6ea1SDimitry Andric   if (M->isVolatile())
1728*0fca6ea1SDimitry Andric     return false;
17290b57cec5SDimitry Andric 
17300b57cec5SDimitry Andric   // If the source and destination of the memcpy are the same, then zap it.
17310b57cec5SDimitry Andric   if (M->getSource() == M->getDest()) {
17325ffd83dbSDimitry Andric     ++BBI;
1733e8d8bef9SDimitry Andric     eraseInstruction(M);
17345ffd83dbSDimitry Andric     return true;
17350b57cec5SDimitry Andric   }
17360b57cec5SDimitry Andric 
1737*0fca6ea1SDimitry Andric   // If the size is zero, remove the memcpy.
17385f757f3fSDimitry Andric   if (isZeroSize(M->getLength())) {
17395f757f3fSDimitry Andric     ++BBI;
17405f757f3fSDimitry Andric     eraseInstruction(M);
17415f757f3fSDimitry Andric     return true;
17425f757f3fSDimitry Andric   }
17435f757f3fSDimitry Andric 
17445f757f3fSDimitry Andric   MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
17455f757f3fSDimitry Andric   if (!MA)
17465f757f3fSDimitry Andric     // Degenerate case: memcpy marked as not accessing memory.
17475f757f3fSDimitry Andric     return false;
17485f757f3fSDimitry Andric 
17490b57cec5SDimitry Andric   // If copying from a constant, try to turn the memcpy into a memset.
175004eeddc0SDimitry Andric   if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
17510b57cec5SDimitry Andric     if (GV->isConstant() && GV->hasDefinitiveInitializer())
17520b57cec5SDimitry Andric       if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1753*0fca6ea1SDimitry Andric                                            M->getDataLayout())) {
17540b57cec5SDimitry Andric         IRBuilder<> Builder(M);
1755bdd1243dSDimitry Andric         Instruction *NewM = Builder.CreateMemSet(
1756bdd1243dSDimitry Andric             M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
17575f757f3fSDimitry Andric         auto *LastDef = cast<MemoryDef>(MA);
1758e8d8bef9SDimitry Andric         auto *NewAccess =
17595f757f3fSDimitry Andric             MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1760e8d8bef9SDimitry Andric         MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1761e8d8bef9SDimitry Andric 
1762e8d8bef9SDimitry Andric         eraseInstruction(M);
17630b57cec5SDimitry Andric         ++NumCpyToSet;
17640b57cec5SDimitry Andric         return true;
17650b57cec5SDimitry Andric       }
17660b57cec5SDimitry Andric 
1767bdd1243dSDimitry Andric   BatchAAResults BAA(*AA);
176881ad6265SDimitry Andric   // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
176981ad6265SDimitry Andric   MemoryAccess *AnyClobber = MA->getDefiningAccess();
1770e8d8bef9SDimitry Andric   MemoryLocation DestLoc = MemoryLocation::getForDest(M);
1771e8d8bef9SDimitry Andric   const MemoryAccess *DestClobber =
1772bdd1243dSDimitry Andric       MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1773e8d8bef9SDimitry Andric 
1774e8d8bef9SDimitry Andric   // Try to turn a partially redundant memset + memcpy into
177506c3fb27SDimitry Andric   // smaller memset + memcpy.  We don't need the memcpy size for this.
177606c3fb27SDimitry Andric   // The memcpy must post-dom the memset, so limit this to the same basic
1777e8d8bef9SDimitry Andric   // block. A non-local generalization is likely not worthwhile.
1778e8d8bef9SDimitry Andric   if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1779e8d8bef9SDimitry Andric     if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1780e8d8bef9SDimitry Andric       if (DestClobber->getBlock() == M->getParent())
1781bdd1243dSDimitry Andric         if (processMemSetMemCpyDependence(M, MDep, BAA))
1782e8d8bef9SDimitry Andric           return true;
1783e8d8bef9SDimitry Andric 
1784e8d8bef9SDimitry Andric   MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1785bdd1243dSDimitry Andric       AnyClobber, MemoryLocation::getForSource(M), BAA);
1786e8d8bef9SDimitry Andric 
17875f757f3fSDimitry Andric   // There are five possible optimizations we can do for memcpy:
1788e8d8bef9SDimitry Andric   //   a) memcpy-memcpy xform which exposes redundance for DSE.
1789e8d8bef9SDimitry Andric   //   b) call-memcpy xform for return slot optimization.
1790e8d8bef9SDimitry Andric   //   c) memcpy from freshly alloca'd space or space that has just started
1791e8d8bef9SDimitry Andric   //      its lifetime copies undefined data, and we can therefore eliminate
1792e8d8bef9SDimitry Andric   //      the memcpy in favor of the data that was already at the destination.
1793e8d8bef9SDimitry Andric   //   d) memcpy from a just-memset'd source can be turned into memset.
17945f757f3fSDimitry Andric   //   e) elimination of memcpy via stack-move optimization.
1795e8d8bef9SDimitry Andric   if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1796e8d8bef9SDimitry Andric     if (Instruction *MI = MD->getMemoryInst()) {
179704eeddc0SDimitry Andric       if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1798e8d8bef9SDimitry Andric         if (auto *C = dyn_cast<CallInst>(MI)) {
1799bdd1243dSDimitry Andric           if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1800bdd1243dSDimitry Andric                                    TypeSize::getFixed(CopySize->getZExtValue()),
1801bdd1243dSDimitry Andric                                    M->getDestAlign().valueOrOne(), BAA,
180281ad6265SDimitry Andric                                    [C]() -> CallInst * { return C; })) {
1803e8d8bef9SDimitry Andric             LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1804e8d8bef9SDimitry Andric                               << "    call: " << *C << "\n"
1805e8d8bef9SDimitry Andric                               << "    memcpy: " << *M << "\n");
1806e8d8bef9SDimitry Andric             eraseInstruction(M);
1807e8d8bef9SDimitry Andric             ++NumMemCpyInstr;
1808e8d8bef9SDimitry Andric             return true;
1809e8d8bef9SDimitry Andric           }
1810e8d8bef9SDimitry Andric         }
1811e8d8bef9SDimitry Andric       }
1812e8d8bef9SDimitry Andric       if (auto *MDep = dyn_cast<MemCpyInst>(MI))
18135f757f3fSDimitry Andric         if (processMemCpyMemCpyDependence(M, MDep, BAA))
18145f757f3fSDimitry Andric           return true;
1815e8d8bef9SDimitry Andric       if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1816bdd1243dSDimitry Andric         if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1817e8d8bef9SDimitry Andric           LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1818e8d8bef9SDimitry Andric           eraseInstruction(M);
1819e8d8bef9SDimitry Andric           ++NumCpyToSet;
1820e8d8bef9SDimitry Andric           return true;
1821e8d8bef9SDimitry Andric         }
1822e8d8bef9SDimitry Andric       }
1823e8d8bef9SDimitry Andric     }
1824e8d8bef9SDimitry Andric 
1825bdd1243dSDimitry Andric     if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) {
1826e8d8bef9SDimitry Andric       LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1827e8d8bef9SDimitry Andric       eraseInstruction(M);
1828e8d8bef9SDimitry Andric       ++NumMemCpyInstr;
1829e8d8bef9SDimitry Andric       return true;
1830e8d8bef9SDimitry Andric     }
1831e8d8bef9SDimitry Andric   }
18320b57cec5SDimitry Andric 
18335f757f3fSDimitry Andric   // If the transfer is from a stack slot to a stack slot, then we may be able
18345f757f3fSDimitry Andric   // to perform the stack-move optimization. See the comments in
18355f757f3fSDimitry Andric   // performStackMoveOptzn() for more details.
18365f757f3fSDimitry Andric   auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
18375f757f3fSDimitry Andric   if (!DestAlloca)
18385f757f3fSDimitry Andric     return false;
18395f757f3fSDimitry Andric   auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
18405f757f3fSDimitry Andric   if (!SrcAlloca)
18415f757f3fSDimitry Andric     return false;
18425f757f3fSDimitry Andric   ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
18435f757f3fSDimitry Andric   if (Len == nullptr)
18445f757f3fSDimitry Andric     return false;
18455f757f3fSDimitry Andric   if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
18465f757f3fSDimitry Andric                             TypeSize::getFixed(Len->getZExtValue()), BAA)) {
18475f757f3fSDimitry Andric     // Avoid invalidating the iterator.
18485f757f3fSDimitry Andric     BBI = M->getNextNonDebugInstruction()->getIterator();
18495f757f3fSDimitry Andric     eraseInstruction(M);
18505f757f3fSDimitry Andric     ++NumMemCpyInstr;
18515f757f3fSDimitry Andric     return true;
18525f757f3fSDimitry Andric   }
18535f757f3fSDimitry Andric 
18540b57cec5SDimitry Andric   return false;
18550b57cec5SDimitry Andric }
18560b57cec5SDimitry Andric 
18570b57cec5SDimitry Andric /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
18580b57cec5SDimitry Andric /// not to alias.
processMemMove(MemMoveInst * M)18590b57cec5SDimitry Andric bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1860349cc55cSDimitry Andric   // See if the source could be modified by this memmove potentially.
1861349cc55cSDimitry Andric   if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
18620b57cec5SDimitry Andric     return false;
18630b57cec5SDimitry Andric 
18640b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
18650b57cec5SDimitry Andric                     << "\n");
18660b57cec5SDimitry Andric 
18670b57cec5SDimitry Andric   // If not, then we know we can transform this.
1868*0fca6ea1SDimitry Andric   Type *ArgTys[3] = {M->getRawDest()->getType(), M->getRawSource()->getType(),
18690b57cec5SDimitry Andric                      M->getLength()->getType()};
1870*0fca6ea1SDimitry Andric   M->setCalledFunction(
1871*0fca6ea1SDimitry Andric       Intrinsic::getDeclaration(M->getModule(), Intrinsic::memcpy, ArgTys));
18720b57cec5SDimitry Andric 
1873e8d8bef9SDimitry Andric   // For MemorySSA nothing really changes (except that memcpy may imply stricter
1874e8d8bef9SDimitry Andric   // aliasing guarantees).
1875e8d8bef9SDimitry Andric 
18760b57cec5SDimitry Andric   ++NumMoveToCpy;
18770b57cec5SDimitry Andric   return true;
18780b57cec5SDimitry Andric }
18790b57cec5SDimitry Andric 
18800b57cec5SDimitry Andric /// This is called on every byval argument in call sites.
processByValArgument(CallBase & CB,unsigned ArgNo)18815ffd83dbSDimitry Andric bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1882*0fca6ea1SDimitry Andric   const DataLayout &DL = CB.getDataLayout();
18830b57cec5SDimitry Andric   // Find out what feeds this byval argument.
18845ffd83dbSDimitry Andric   Value *ByValArg = CB.getArgOperand(ArgNo);
1885fe6060f1SDimitry Andric   Type *ByValTy = CB.getParamByValType(ArgNo);
18868c6f6c0cSDimitry Andric   TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1887e8d8bef9SDimitry Andric   MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1888e8d8bef9SDimitry Andric   MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1889fe6060f1SDimitry Andric   if (!CallAccess)
1890fe6060f1SDimitry Andric     return false;
1891349cc55cSDimitry Andric   MemCpyInst *MDep = nullptr;
1892bdd1243dSDimitry Andric   BatchAAResults BAA(*AA);
1893e8d8bef9SDimitry Andric   MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1894bdd1243dSDimitry Andric       CallAccess->getDefiningAccess(), Loc, BAA);
1895e8d8bef9SDimitry Andric   if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1896e8d8bef9SDimitry Andric     MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
18970b57cec5SDimitry Andric 
18980b57cec5SDimitry Andric   // If the byval argument isn't fed by a memcpy, ignore it.  If it is fed by
18990b57cec5SDimitry Andric   // a memcpy, see if we can byval from the source of the memcpy instead of the
19000b57cec5SDimitry Andric   // result.
19010b57cec5SDimitry Andric   if (!MDep || MDep->isVolatile() ||
19020b57cec5SDimitry Andric       ByValArg->stripPointerCasts() != MDep->getDest())
19030b57cec5SDimitry Andric     return false;
19040b57cec5SDimitry Andric 
19050b57cec5SDimitry Andric   // The length of the memcpy must be larger or equal to the size of the byval.
190604eeddc0SDimitry Andric   auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
19078c6f6c0cSDimitry Andric   if (!C1 || !TypeSize::isKnownGE(
19088c6f6c0cSDimitry Andric                  TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
19090b57cec5SDimitry Andric     return false;
19100b57cec5SDimitry Andric 
19110b57cec5SDimitry Andric   // Get the alignment of the byval.  If the call doesn't specify the alignment,
19120b57cec5SDimitry Andric   // then it is some target specific value that we can't know.
19135ffd83dbSDimitry Andric   MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1914*0fca6ea1SDimitry Andric   if (!ByValAlign)
1915*0fca6ea1SDimitry Andric     return false;
19160b57cec5SDimitry Andric 
19170b57cec5SDimitry Andric   // If it is greater than the memcpy, then we check to see if we can force the
19180b57cec5SDimitry Andric   // source of the memcpy to the alignment we need.  If we fail, we bail out.
19195ffd83dbSDimitry Andric   MaybeAlign MemDepAlign = MDep->getSourceAlign();
19205ffd83dbSDimitry Andric   if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1921e8d8bef9SDimitry Andric       getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1922e8d8bef9SDimitry Andric                                  DT) < *ByValAlign)
19230b57cec5SDimitry Andric     return false;
19240b57cec5SDimitry Andric 
19255f757f3fSDimitry Andric   // The type of the memcpy source must match the byval argument
19265f757f3fSDimitry Andric   if (MDep->getSource()->getType() != ByValArg->getType())
19270b57cec5SDimitry Andric     return false;
19280b57cec5SDimitry Andric 
19290b57cec5SDimitry Andric   // Verify that the copied-from memory doesn't change in between the memcpy and
19300b57cec5SDimitry Andric   // the byval call.
19310b57cec5SDimitry Andric   //    memcpy(a <- b)
19320b57cec5SDimitry Andric   //    *b = 42;
19330b57cec5SDimitry Andric   //    foo(*a)
19340b57cec5SDimitry Andric   // It would be invalid to transform the second memcpy into foo(*b).
1935bdd1243dSDimitry Andric   if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
193606c3fb27SDimitry Andric                      MSSA->getMemoryAccess(MDep), CallAccess))
1937e8d8bef9SDimitry Andric     return false;
19380b57cec5SDimitry Andric 
19390b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
19400b57cec5SDimitry Andric                     << "  " << *MDep << "\n"
19415ffd83dbSDimitry Andric                     << "  " << CB << "\n");
19420b57cec5SDimitry Andric 
19430b57cec5SDimitry Andric   // Otherwise we're good!  Update the byval argument.
1944b121cb00SDimitry Andric   combineAAMetadata(&CB, MDep);
194506c3fb27SDimitry Andric   CB.setArgOperand(ArgNo, MDep->getSource());
194606c3fb27SDimitry Andric   ++NumMemCpyInstr;
194706c3fb27SDimitry Andric   return true;
194806c3fb27SDimitry Andric }
194906c3fb27SDimitry Andric 
195006c3fb27SDimitry Andric /// This is called on memcpy dest pointer arguments attributed as immutable
195106c3fb27SDimitry Andric /// during call. Try to use memcpy source directly if all of the following
195206c3fb27SDimitry Andric /// conditions are satisfied.
195306c3fb27SDimitry Andric /// 1. The memcpy dst is neither modified during the call nor captured by the
195406c3fb27SDimitry Andric /// call. (if readonly, noalias, nocapture attributes on call-site.)
195506c3fb27SDimitry Andric /// 2. The memcpy dst is an alloca with known alignment & size.
195606c3fb27SDimitry Andric ///     2-1. The memcpy length == the alloca size which ensures that the new
195706c3fb27SDimitry Andric ///     pointer is dereferenceable for the required range
195806c3fb27SDimitry Andric ///     2-2. The src pointer has alignment >= the alloca alignment or can be
195906c3fb27SDimitry Andric ///     enforced so.
196006c3fb27SDimitry Andric /// 3. The memcpy dst and src is not modified between the memcpy and the call.
196106c3fb27SDimitry Andric /// (if MSSA clobber check is safe.)
196206c3fb27SDimitry Andric /// 4. The memcpy src is not modified during the call. (ModRef check shows no
196306c3fb27SDimitry Andric /// Mod.)
processImmutArgument(CallBase & CB,unsigned ArgNo)196406c3fb27SDimitry Andric bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
196506c3fb27SDimitry Andric   // 1. Ensure passed argument is immutable during call.
196606c3fb27SDimitry Andric   if (!(CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
196706c3fb27SDimitry Andric         CB.paramHasAttr(ArgNo, Attribute::NoCapture)))
196806c3fb27SDimitry Andric     return false;
1969*0fca6ea1SDimitry Andric   const DataLayout &DL = CB.getDataLayout();
197006c3fb27SDimitry Andric   Value *ImmutArg = CB.getArgOperand(ArgNo);
197106c3fb27SDimitry Andric 
197206c3fb27SDimitry Andric   // 2. Check that arg is alloca
197306c3fb27SDimitry Andric   // TODO: Even if the arg gets back to branches, we can remove memcpy if all
197406c3fb27SDimitry Andric   // the alloca alignments can be enforced to source alignment.
197506c3fb27SDimitry Andric   auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());
197606c3fb27SDimitry Andric   if (!AI)
197706c3fb27SDimitry Andric     return false;
197806c3fb27SDimitry Andric 
197906c3fb27SDimitry Andric   std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
198006c3fb27SDimitry Andric   // Can't handle unknown size alloca.
198106c3fb27SDimitry Andric   // (e.g. Variable Length Array, Scalable Vector)
198206c3fb27SDimitry Andric   if (!AllocaSize || AllocaSize->isScalable())
198306c3fb27SDimitry Andric     return false;
198406c3fb27SDimitry Andric   MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));
198506c3fb27SDimitry Andric   MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
198606c3fb27SDimitry Andric   if (!CallAccess)
198706c3fb27SDimitry Andric     return false;
198806c3fb27SDimitry Andric 
198906c3fb27SDimitry Andric   MemCpyInst *MDep = nullptr;
199006c3fb27SDimitry Andric   BatchAAResults BAA(*AA);
199106c3fb27SDimitry Andric   MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
199206c3fb27SDimitry Andric       CallAccess->getDefiningAccess(), Loc, BAA);
199306c3fb27SDimitry Andric   if (auto *MD = dyn_cast<MemoryDef>(Clobber))
199406c3fb27SDimitry Andric     MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
199506c3fb27SDimitry Andric 
199606c3fb27SDimitry Andric   // If the immut argument isn't fed by a memcpy, ignore it.  If it is fed by
199706c3fb27SDimitry Andric   // a memcpy, check that the arg equals the memcpy dest.
199806c3fb27SDimitry Andric   if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
199906c3fb27SDimitry Andric     return false;
200006c3fb27SDimitry Andric 
20015f757f3fSDimitry Andric   // The type of the memcpy source must match the immut argument
20025f757f3fSDimitry Andric   if (MDep->getSource()->getType() != ImmutArg->getType())
200306c3fb27SDimitry Andric     return false;
200406c3fb27SDimitry Andric 
200506c3fb27SDimitry Andric   // 2-1. The length of the memcpy must be equal to the size of the alloca.
200606c3fb27SDimitry Andric   auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
200706c3fb27SDimitry Andric   if (!MDepLen || AllocaSize != MDepLen->getValue())
200806c3fb27SDimitry Andric     return false;
200906c3fb27SDimitry Andric 
201006c3fb27SDimitry Andric   // 2-2. the memcpy source align must be larger than or equal the alloca's
201106c3fb27SDimitry Andric   // align. If not so, we check to see if we can force the source of the memcpy
201206c3fb27SDimitry Andric   // to the alignment we need. If we fail, we bail out.
201306c3fb27SDimitry Andric   Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
201406c3fb27SDimitry Andric   Align AllocaAlign = AI->getAlign();
201506c3fb27SDimitry Andric   if (MemDepAlign < AllocaAlign &&
201606c3fb27SDimitry Andric       getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,
201706c3fb27SDimitry Andric                                  DT) < AllocaAlign)
201806c3fb27SDimitry Andric     return false;
201906c3fb27SDimitry Andric 
202006c3fb27SDimitry Andric   // 3. Verify that the source doesn't change in between the memcpy and
202106c3fb27SDimitry Andric   // the call.
202206c3fb27SDimitry Andric   //    memcpy(a <- b)
202306c3fb27SDimitry Andric   //    *b = 42;
202406c3fb27SDimitry Andric   //    foo(*a)
202506c3fb27SDimitry Andric   // It would be invalid to transform the second memcpy into foo(*b).
202606c3fb27SDimitry Andric   if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
202706c3fb27SDimitry Andric                      MSSA->getMemoryAccess(MDep), CallAccess))
202806c3fb27SDimitry Andric     return false;
202906c3fb27SDimitry Andric 
203006c3fb27SDimitry Andric   // 4. The memcpy src must not be modified during the call.
203106c3fb27SDimitry Andric   if (isModSet(AA->getModRefInfo(&CB, MemoryLocation::getForSource(MDep))))
203206c3fb27SDimitry Andric     return false;
203306c3fb27SDimitry Andric 
203406c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
203506c3fb27SDimitry Andric                     << "  " << *MDep << "\n"
203606c3fb27SDimitry Andric                     << "  " << CB << "\n");
203706c3fb27SDimitry Andric 
203806c3fb27SDimitry Andric   // Otherwise we're good!  Update the immut argument.
20394542f901SDimitry Andric   combineAAMetadata(&CB, MDep);
204006c3fb27SDimitry Andric   CB.setArgOperand(ArgNo, MDep->getSource());
20410b57cec5SDimitry Andric   ++NumMemCpyInstr;
20420b57cec5SDimitry Andric   return true;
20430b57cec5SDimitry Andric }
20440b57cec5SDimitry Andric 
20450b57cec5SDimitry Andric /// Executes one iteration of MemCpyOptPass.
iterateOnFunction(Function & F)20460b57cec5SDimitry Andric bool MemCpyOptPass::iterateOnFunction(Function &F) {
20470b57cec5SDimitry Andric   bool MadeChange = false;
20480b57cec5SDimitry Andric 
20490b57cec5SDimitry Andric   // Walk all instruction in the function.
20500b57cec5SDimitry Andric   for (BasicBlock &BB : F) {
20510b57cec5SDimitry Andric     // Skip unreachable blocks. For example processStore assumes that an
20520b57cec5SDimitry Andric     // instruction in a BB can't be dominated by a later instruction in the
20530b57cec5SDimitry Andric     // same BB (which is a scenario that can happen for an unreachable BB that
20540b57cec5SDimitry Andric     // has itself as a predecessor).
2055e8d8bef9SDimitry Andric     if (!DT->isReachableFromEntry(&BB))
20560b57cec5SDimitry Andric       continue;
20570b57cec5SDimitry Andric 
20580b57cec5SDimitry Andric     for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
20590b57cec5SDimitry Andric       // Avoid invalidating the iterator.
20600b57cec5SDimitry Andric       Instruction *I = &*BI++;
20610b57cec5SDimitry Andric 
20620b57cec5SDimitry Andric       bool RepeatInstruction = false;
20630b57cec5SDimitry Andric 
206404eeddc0SDimitry Andric       if (auto *SI = dyn_cast<StoreInst>(I))
20650b57cec5SDimitry Andric         MadeChange |= processStore(SI, BI);
206604eeddc0SDimitry Andric       else if (auto *M = dyn_cast<MemSetInst>(I))
20670b57cec5SDimitry Andric         RepeatInstruction = processMemSet(M, BI);
206804eeddc0SDimitry Andric       else if (auto *M = dyn_cast<MemCpyInst>(I))
20695ffd83dbSDimitry Andric         RepeatInstruction = processMemCpy(M, BI);
207004eeddc0SDimitry Andric       else if (auto *M = dyn_cast<MemMoveInst>(I))
20710b57cec5SDimitry Andric         RepeatInstruction = processMemMove(M);
20725ffd83dbSDimitry Andric       else if (auto *CB = dyn_cast<CallBase>(I)) {
207306c3fb27SDimitry Andric         for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
20745ffd83dbSDimitry Andric           if (CB->isByValArgument(i))
20755ffd83dbSDimitry Andric             MadeChange |= processByValArgument(*CB, i);
207606c3fb27SDimitry Andric           else if (CB->onlyReadsMemory(i))
207706c3fb27SDimitry Andric             MadeChange |= processImmutArgument(*CB, i);
207806c3fb27SDimitry Andric         }
20790b57cec5SDimitry Andric       }
20800b57cec5SDimitry Andric 
20810b57cec5SDimitry Andric       // Reprocess the instruction if desired.
20820b57cec5SDimitry Andric       if (RepeatInstruction) {
20830b57cec5SDimitry Andric         if (BI != BB.begin())
20840b57cec5SDimitry Andric           --BI;
20850b57cec5SDimitry Andric         MadeChange = true;
20860b57cec5SDimitry Andric       }
20870b57cec5SDimitry Andric     }
20880b57cec5SDimitry Andric   }
20890b57cec5SDimitry Andric 
20900b57cec5SDimitry Andric   return MadeChange;
20910b57cec5SDimitry Andric }
20920b57cec5SDimitry Andric 
run(Function & F,FunctionAnalysisManager & AM)20930b57cec5SDimitry Andric PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
20940b57cec5SDimitry Andric   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2095e8d8bef9SDimitry Andric   auto *AA = &AM.getResult<AAManager>(F);
2096e8d8bef9SDimitry Andric   auto *AC = &AM.getResult<AssumptionAnalysis>(F);
2097e8d8bef9SDimitry Andric   auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
20985f757f3fSDimitry Andric   auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
2099349cc55cSDimitry Andric   auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
21000b57cec5SDimitry Andric 
21015f757f3fSDimitry Andric   bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
21020b57cec5SDimitry Andric   if (!MadeChange)
21030b57cec5SDimitry Andric     return PreservedAnalyses::all();
21040b57cec5SDimitry Andric 
21050b57cec5SDimitry Andric   PreservedAnalyses PA;
21060b57cec5SDimitry Andric   PA.preserveSet<CFGAnalyses>();
2107e8d8bef9SDimitry Andric   PA.preserve<MemorySSAAnalysis>();
21080b57cec5SDimitry Andric   return PA;
21090b57cec5SDimitry Andric }
21100b57cec5SDimitry Andric 
runImpl(Function & F,TargetLibraryInfo * TLI_,AliasAnalysis * AA_,AssumptionCache * AC_,DominatorTree * DT_,PostDominatorTree * PDT_,MemorySSA * MSSA_)2111349cc55cSDimitry Andric bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
2112349cc55cSDimitry Andric                             AliasAnalysis *AA_, AssumptionCache *AC_,
21135f757f3fSDimitry Andric                             DominatorTree *DT_, PostDominatorTree *PDT_,
21145f757f3fSDimitry Andric                             MemorySSA *MSSA_) {
21150b57cec5SDimitry Andric   bool MadeChange = false;
21160b57cec5SDimitry Andric   TLI = TLI_;
2117e8d8bef9SDimitry Andric   AA = AA_;
2118e8d8bef9SDimitry Andric   AC = AC_;
2119e8d8bef9SDimitry Andric   DT = DT_;
21205f757f3fSDimitry Andric   PDT = PDT_;
2121e8d8bef9SDimitry Andric   MSSA = MSSA_;
2122e8d8bef9SDimitry Andric   MemorySSAUpdater MSSAU_(MSSA_);
2123349cc55cSDimitry Andric   MSSAU = &MSSAU_;
21240b57cec5SDimitry Andric 
21250b57cec5SDimitry Andric   while (true) {
21260b57cec5SDimitry Andric     if (!iterateOnFunction(F))
21270b57cec5SDimitry Andric       break;
21280b57cec5SDimitry Andric     MadeChange = true;
21290b57cec5SDimitry Andric   }
21300b57cec5SDimitry Andric 
2131349cc55cSDimitry Andric   if (VerifyMemorySSA)
2132e8d8bef9SDimitry Andric     MSSA_->verifyMemorySSA();
2133e8d8bef9SDimitry Andric 
21340b57cec5SDimitry Andric   return MadeChange;
21350b57cec5SDimitry Andric }
2136