10b57cec5SDimitry Andric //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass performs various transformations related to eliminating memcpy
100b57cec5SDimitry Andric // calls, or transforming sets of stores into memset's.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric
140b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
150b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h"
160b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
17*0fca6ea1SDimitry Andric #include "llvm/ADT/ScopeExit.h"
180b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
190b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
200b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h"
210b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
235f757f3fSDimitry Andric #include "llvm/Analysis/CFG.h"
2404eeddc0SDimitry Andric #include "llvm/Analysis/CaptureTracking.h"
250b57cec5SDimitry Andric #include "llvm/Analysis/GlobalsModRef.h"
265f757f3fSDimitry Andric #include "llvm/Analysis/InstructionSimplify.h"
27e8d8bef9SDimitry Andric #include "llvm/Analysis/Loads.h"
280b57cec5SDimitry Andric #include "llvm/Analysis/MemoryLocation.h"
29e8d8bef9SDimitry Andric #include "llvm/Analysis/MemorySSA.h"
30e8d8bef9SDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h"
315f757f3fSDimitry Andric #include "llvm/Analysis/PostDominators.h"
320b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
330b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
340b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
350b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
360b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
370b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
380b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
390b57cec5SDimitry Andric #include "llvm/IR/Function.h"
400b57cec5SDimitry Andric #include "llvm/IR/GlobalVariable.h"
410b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
420b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h"
430b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
440b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
450b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
460b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h"
470b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h"
480b57cec5SDimitry Andric #include "llvm/IR/Module.h"
490b57cec5SDimitry Andric #include "llvm/IR/PassManager.h"
500b57cec5SDimitry Andric #include "llvm/IR/Type.h"
510b57cec5SDimitry Andric #include "llvm/IR/User.h"
520b57cec5SDimitry Andric #include "llvm/IR/Value.h"
530b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
540b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
550b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
560b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
57480093f4SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
580b57cec5SDimitry Andric #include <algorithm>
590b57cec5SDimitry Andric #include <cassert>
600b57cec5SDimitry Andric #include <cstdint>
61bdd1243dSDimitry Andric #include <optional>
620b57cec5SDimitry Andric
630b57cec5SDimitry Andric using namespace llvm;
640b57cec5SDimitry Andric
650b57cec5SDimitry Andric #define DEBUG_TYPE "memcpyopt"
660b57cec5SDimitry Andric
67349cc55cSDimitry Andric static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
6881ad6265SDimitry Andric "enable-memcpyopt-without-libcalls", cl::Hidden,
69349cc55cSDimitry Andric cl::desc("Enable memcpyopt even when libcalls are disabled"));
70e8d8bef9SDimitry Andric
710b57cec5SDimitry Andric STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
720b57cec5SDimitry Andric STATISTIC(NumMemSetInfer, "Number of memsets inferred");
730b57cec5SDimitry Andric STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
740b57cec5SDimitry Andric STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
75e8d8bef9SDimitry Andric STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
765f757f3fSDimitry Andric STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
770b57cec5SDimitry Andric
780b57cec5SDimitry Andric namespace {
790b57cec5SDimitry Andric
800b57cec5SDimitry Andric /// Represents a range of memset'd bytes with the ByteVal value.
810b57cec5SDimitry Andric /// This allows us to analyze stores like:
820b57cec5SDimitry Andric /// store 0 -> P+1
830b57cec5SDimitry Andric /// store 0 -> P+0
840b57cec5SDimitry Andric /// store 0 -> P+3
850b57cec5SDimitry Andric /// store 0 -> P+2
860b57cec5SDimitry Andric /// which sometimes happens with stores to arrays of structs etc. When we see
870b57cec5SDimitry Andric /// the first store, we make a range [1, 2). The second store extends the range
880b57cec5SDimitry Andric /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
890b57cec5SDimitry Andric /// two ranges into [0, 3) which is memset'able.
900b57cec5SDimitry Andric struct MemsetRange {
910b57cec5SDimitry Andric // Start/End - A semi range that describes the span that this range covers.
920b57cec5SDimitry Andric // The range is closed at the start and open at the end: [Start, End).
930b57cec5SDimitry Andric int64_t Start, End;
940b57cec5SDimitry Andric
950b57cec5SDimitry Andric /// StartPtr - The getelementptr instruction that points to the start of the
960b57cec5SDimitry Andric /// range.
970b57cec5SDimitry Andric Value *StartPtr;
980b57cec5SDimitry Andric
990b57cec5SDimitry Andric /// Alignment - The known alignment of the first store.
10081ad6265SDimitry Andric MaybeAlign Alignment;
1010b57cec5SDimitry Andric
1020b57cec5SDimitry Andric /// TheStores - The actual stores that make up this range.
1030b57cec5SDimitry Andric SmallVector<Instruction *, 16> TheStores;
1040b57cec5SDimitry Andric
1050b57cec5SDimitry Andric bool isProfitableToUseMemset(const DataLayout &DL) const;
1060b57cec5SDimitry Andric };
1070b57cec5SDimitry Andric
1080b57cec5SDimitry Andric } // end anonymous namespace
1090b57cec5SDimitry Andric
isProfitableToUseMemset(const DataLayout & DL) const1100b57cec5SDimitry Andric bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
1110b57cec5SDimitry Andric // If we found more than 4 stores to merge or 16 bytes, use memset.
112*0fca6ea1SDimitry Andric if (TheStores.size() >= 4 || End - Start >= 16)
113*0fca6ea1SDimitry Andric return true;
1140b57cec5SDimitry Andric
1150b57cec5SDimitry Andric // If there is nothing to merge, don't do anything.
116*0fca6ea1SDimitry Andric if (TheStores.size() < 2)
117*0fca6ea1SDimitry Andric return false;
1180b57cec5SDimitry Andric
1190b57cec5SDimitry Andric // If any of the stores are a memset, then it is always good to extend the
1200b57cec5SDimitry Andric // memset.
1210b57cec5SDimitry Andric for (Instruction *SI : TheStores)
1220b57cec5SDimitry Andric if (!isa<StoreInst>(SI))
1230b57cec5SDimitry Andric return true;
1240b57cec5SDimitry Andric
1250b57cec5SDimitry Andric // Assume that the code generator is capable of merging pairs of stores
1260b57cec5SDimitry Andric // together if it wants to.
127*0fca6ea1SDimitry Andric if (TheStores.size() == 2)
128*0fca6ea1SDimitry Andric return false;
1290b57cec5SDimitry Andric
1300b57cec5SDimitry Andric // If we have fewer than 8 stores, it can still be worthwhile to do this.
1310b57cec5SDimitry Andric // For example, merging 4 i8 stores into an i32 store is useful almost always.
1320b57cec5SDimitry Andric // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
1330b57cec5SDimitry Andric // memset will be split into 2 32-bit stores anyway) and doing so can
1340b57cec5SDimitry Andric // pessimize the llvm optimizer.
1350b57cec5SDimitry Andric //
1360b57cec5SDimitry Andric // Since we don't have perfect knowledge here, make some assumptions: assume
1370b57cec5SDimitry Andric // the maximum GPR width is the same size as the largest legal integer
1380b57cec5SDimitry Andric // size. If so, check to see whether we will end up actually reducing the
1390b57cec5SDimitry Andric // number of stores used.
1400b57cec5SDimitry Andric unsigned Bytes = unsigned(End - Start);
1410b57cec5SDimitry Andric unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
1420b57cec5SDimitry Andric if (MaxIntSize == 0)
1430b57cec5SDimitry Andric MaxIntSize = 1;
1440b57cec5SDimitry Andric unsigned NumPointerStores = Bytes / MaxIntSize;
1450b57cec5SDimitry Andric
1460b57cec5SDimitry Andric // Assume the remaining bytes if any are done a byte at a time.
1470b57cec5SDimitry Andric unsigned NumByteStores = Bytes % MaxIntSize;
1480b57cec5SDimitry Andric
1490b57cec5SDimitry Andric // If we will reduce the # stores (according to this heuristic), do the
1500b57cec5SDimitry Andric // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
1510b57cec5SDimitry Andric // etc.
1520b57cec5SDimitry Andric return TheStores.size() > NumPointerStores + NumByteStores;
1530b57cec5SDimitry Andric }
1540b57cec5SDimitry Andric
1550b57cec5SDimitry Andric namespace {
1560b57cec5SDimitry Andric
1570b57cec5SDimitry Andric class MemsetRanges {
1580b57cec5SDimitry Andric using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
1590b57cec5SDimitry Andric
1600b57cec5SDimitry Andric /// A sorted list of the memset ranges.
1610b57cec5SDimitry Andric SmallVector<MemsetRange, 8> Ranges;
1620b57cec5SDimitry Andric
1630b57cec5SDimitry Andric const DataLayout &DL;
1640b57cec5SDimitry Andric
1650b57cec5SDimitry Andric public:
MemsetRanges(const DataLayout & DL)1660b57cec5SDimitry Andric MemsetRanges(const DataLayout &DL) : DL(DL) {}
1670b57cec5SDimitry Andric
1680b57cec5SDimitry Andric using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
1690b57cec5SDimitry Andric
begin() const1700b57cec5SDimitry Andric const_iterator begin() const { return Ranges.begin(); }
end() const1710b57cec5SDimitry Andric const_iterator end() const { return Ranges.end(); }
empty() const1720b57cec5SDimitry Andric bool empty() const { return Ranges.empty(); }
1730b57cec5SDimitry Andric
addInst(int64_t OffsetFromFirst,Instruction * Inst)1740b57cec5SDimitry Andric void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
17504eeddc0SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(Inst))
1760b57cec5SDimitry Andric addStore(OffsetFromFirst, SI);
1770b57cec5SDimitry Andric else
1780b57cec5SDimitry Andric addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
1790b57cec5SDimitry Andric }
1800b57cec5SDimitry Andric
addStore(int64_t OffsetFromFirst,StoreInst * SI)1810b57cec5SDimitry Andric void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
1828c6f6c0cSDimitry Andric TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
1838c6f6c0cSDimitry Andric assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
184bdd1243dSDimitry Andric addRange(OffsetFromFirst, StoreSize.getFixedValue(),
185bdd1243dSDimitry Andric SI->getPointerOperand(), SI->getAlign(), SI);
1860b57cec5SDimitry Andric }
1870b57cec5SDimitry Andric
addMemSet(int64_t OffsetFromFirst,MemSetInst * MSI)1880b57cec5SDimitry Andric void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
1890b57cec5SDimitry Andric int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
19081ad6265SDimitry Andric addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
1910b57cec5SDimitry Andric }
1920b57cec5SDimitry Andric
19381ad6265SDimitry Andric void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
19481ad6265SDimitry Andric Instruction *Inst);
1950b57cec5SDimitry Andric };
1960b57cec5SDimitry Andric
1970b57cec5SDimitry Andric } // end anonymous namespace
1980b57cec5SDimitry Andric
1990b57cec5SDimitry Andric /// Add a new store to the MemsetRanges data structure. This adds a
2000b57cec5SDimitry Andric /// new range for the specified store at the specified offset, merging into
2010b57cec5SDimitry Andric /// existing ranges as appropriate.
addRange(int64_t Start,int64_t Size,Value * Ptr,MaybeAlign Alignment,Instruction * Inst)2020b57cec5SDimitry Andric void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
20381ad6265SDimitry Andric MaybeAlign Alignment, Instruction *Inst) {
2040b57cec5SDimitry Andric int64_t End = Start + Size;
2050b57cec5SDimitry Andric
2060b57cec5SDimitry Andric range_iterator I = partition_point(
2070b57cec5SDimitry Andric Ranges, [=](const MemsetRange &O) { return O.End < Start; });
2080b57cec5SDimitry Andric
2090b57cec5SDimitry Andric // We now know that I == E, in which case we didn't find anything to merge
2100b57cec5SDimitry Andric // with, or that Start <= I->End. If End < I->Start or I == E, then we need
2110b57cec5SDimitry Andric // to insert a new range. Handle this now.
2120b57cec5SDimitry Andric if (I == Ranges.end() || End < I->Start) {
2130b57cec5SDimitry Andric MemsetRange &R = *Ranges.insert(I, MemsetRange());
2140b57cec5SDimitry Andric R.Start = Start;
2150b57cec5SDimitry Andric R.End = End;
2160b57cec5SDimitry Andric R.StartPtr = Ptr;
2170b57cec5SDimitry Andric R.Alignment = Alignment;
2180b57cec5SDimitry Andric R.TheStores.push_back(Inst);
2190b57cec5SDimitry Andric return;
2200b57cec5SDimitry Andric }
2210b57cec5SDimitry Andric
2220b57cec5SDimitry Andric // This store overlaps with I, add it.
2230b57cec5SDimitry Andric I->TheStores.push_back(Inst);
2240b57cec5SDimitry Andric
2250b57cec5SDimitry Andric // At this point, we may have an interval that completely contains our store.
2260b57cec5SDimitry Andric // If so, just add it to the interval and return.
2270b57cec5SDimitry Andric if (I->Start <= Start && I->End >= End)
2280b57cec5SDimitry Andric return;
2290b57cec5SDimitry Andric
2300b57cec5SDimitry Andric // Now we know that Start <= I->End and End >= I->Start so the range overlaps
2310b57cec5SDimitry Andric // but is not entirely contained within the range.
2320b57cec5SDimitry Andric
2330b57cec5SDimitry Andric // See if the range extends the start of the range. In this case, it couldn't
2340b57cec5SDimitry Andric // possibly cause it to join the prior range, because otherwise we would have
2350b57cec5SDimitry Andric // stopped on *it*.
2360b57cec5SDimitry Andric if (Start < I->Start) {
2370b57cec5SDimitry Andric I->Start = Start;
2380b57cec5SDimitry Andric I->StartPtr = Ptr;
2390b57cec5SDimitry Andric I->Alignment = Alignment;
2400b57cec5SDimitry Andric }
2410b57cec5SDimitry Andric
2420b57cec5SDimitry Andric // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
2430b57cec5SDimitry Andric // is in or right at the end of I), and that End >= I->Start. Extend I out to
2440b57cec5SDimitry Andric // End.
2450b57cec5SDimitry Andric if (End > I->End) {
2460b57cec5SDimitry Andric I->End = End;
2470b57cec5SDimitry Andric range_iterator NextI = I;
2480b57cec5SDimitry Andric while (++NextI != Ranges.end() && End >= NextI->Start) {
2490b57cec5SDimitry Andric // Merge the range in.
2500b57cec5SDimitry Andric I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
2510b57cec5SDimitry Andric if (NextI->End > I->End)
2520b57cec5SDimitry Andric I->End = NextI->End;
2530b57cec5SDimitry Andric Ranges.erase(NextI);
2540b57cec5SDimitry Andric NextI = I;
2550b57cec5SDimitry Andric }
2560b57cec5SDimitry Andric }
2570b57cec5SDimitry Andric }
2580b57cec5SDimitry Andric
2590b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
2600b57cec5SDimitry Andric // MemCpyOptLegacyPass Pass
2610b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
2620b57cec5SDimitry Andric
263e8d8bef9SDimitry Andric // Check that V is either not accessible by the caller, or unwinding cannot
264e8d8bef9SDimitry Andric // occur between Start and End.
mayBeVisibleThroughUnwinding(Value * V,Instruction * Start,Instruction * End)265e8d8bef9SDimitry Andric static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
266e8d8bef9SDimitry Andric Instruction *End) {
267e8d8bef9SDimitry Andric assert(Start->getParent() == End->getParent() && "Must be in same block");
26804eeddc0SDimitry Andric // Function can't unwind, so it also can't be visible through unwinding.
26904eeddc0SDimitry Andric if (Start->getFunction()->doesNotThrow())
270e8d8bef9SDimitry Andric return false;
27104eeddc0SDimitry Andric
27204eeddc0SDimitry Andric // Object is not visible on unwind.
27304eeddc0SDimitry Andric // TODO: Support RequiresNoCaptureBeforeUnwind case.
27404eeddc0SDimitry Andric bool RequiresNoCaptureBeforeUnwind;
27504eeddc0SDimitry Andric if (isNotVisibleOnUnwind(getUnderlyingObject(V),
27604eeddc0SDimitry Andric RequiresNoCaptureBeforeUnwind) &&
27704eeddc0SDimitry Andric !RequiresNoCaptureBeforeUnwind)
27804eeddc0SDimitry Andric return false;
27904eeddc0SDimitry Andric
28004eeddc0SDimitry Andric // Check whether there are any unwinding instructions in the range.
28104eeddc0SDimitry Andric return any_of(make_range(Start->getIterator(), End->getIterator()),
28204eeddc0SDimitry Andric [](const Instruction &I) { return I.mayThrow(); });
283e8d8bef9SDimitry Andric }
284e8d8bef9SDimitry Andric
eraseInstruction(Instruction * I)285e8d8bef9SDimitry Andric void MemCpyOptPass::eraseInstruction(Instruction *I) {
286e8d8bef9SDimitry Andric MSSAU->removeMemoryAccess(I);
287e8d8bef9SDimitry Andric I->eraseFromParent();
288e8d8bef9SDimitry Andric }
289e8d8bef9SDimitry Andric
290e8d8bef9SDimitry Andric // Check for mod or ref of Loc between Start and End, excluding both boundaries.
291bdd1243dSDimitry Andric // Start and End must be in the same block.
292bdd1243dSDimitry Andric // If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
293bdd1243dSDimitry Andric // intrinsic and store it inside SkippedLifetimeStart.
accessedBetween(BatchAAResults & AA,MemoryLocation Loc,const MemoryUseOrDef * Start,const MemoryUseOrDef * End,Instruction ** SkippedLifetimeStart=nullptr)294bdd1243dSDimitry Andric static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc,
295e8d8bef9SDimitry Andric const MemoryUseOrDef *Start,
296bdd1243dSDimitry Andric const MemoryUseOrDef *End,
297bdd1243dSDimitry Andric Instruction **SkippedLifetimeStart = nullptr) {
298e8d8bef9SDimitry Andric assert(Start->getBlock() == End->getBlock() && "Only local supported");
299e8d8bef9SDimitry Andric for (const MemoryAccess &MA :
300e8d8bef9SDimitry Andric make_range(++Start->getIterator(), End->getIterator())) {
301bdd1243dSDimitry Andric Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
302bdd1243dSDimitry Andric if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
303bdd1243dSDimitry Andric auto *II = dyn_cast<IntrinsicInst>(I);
304bdd1243dSDimitry Andric if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
305bdd1243dSDimitry Andric SkippedLifetimeStart && !*SkippedLifetimeStart) {
306bdd1243dSDimitry Andric *SkippedLifetimeStart = I;
307bdd1243dSDimitry Andric continue;
308bdd1243dSDimitry Andric }
309bdd1243dSDimitry Andric
310e8d8bef9SDimitry Andric return true;
311e8d8bef9SDimitry Andric }
312bdd1243dSDimitry Andric }
313e8d8bef9SDimitry Andric return false;
314e8d8bef9SDimitry Andric }
315e8d8bef9SDimitry Andric
316e8d8bef9SDimitry Andric // Check for mod of Loc between Start and End, excluding both boundaries.
317e8d8bef9SDimitry Andric // Start and End can be in different blocks.
writtenBetween(MemorySSA * MSSA,BatchAAResults & AA,MemoryLocation Loc,const MemoryUseOrDef * Start,const MemoryUseOrDef * End)318bdd1243dSDimitry Andric static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA,
31981ad6265SDimitry Andric MemoryLocation Loc, const MemoryUseOrDef *Start,
320e8d8bef9SDimitry Andric const MemoryUseOrDef *End) {
32181ad6265SDimitry Andric if (isa<MemoryUse>(End)) {
32281ad6265SDimitry Andric // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
32381ad6265SDimitry Andric // Manually check read accesses between Start and End, if they are in the
32481ad6265SDimitry Andric // same block, for clobbers. Otherwise assume Loc is clobbered.
32581ad6265SDimitry Andric return Start->getBlock() != End->getBlock() ||
32681ad6265SDimitry Andric any_of(
32781ad6265SDimitry Andric make_range(std::next(Start->getIterator()), End->getIterator()),
32881ad6265SDimitry Andric [&AA, Loc](const MemoryAccess &Acc) {
32981ad6265SDimitry Andric if (isa<MemoryUse>(&Acc))
33081ad6265SDimitry Andric return false;
33181ad6265SDimitry Andric Instruction *AccInst =
33281ad6265SDimitry Andric cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
33381ad6265SDimitry Andric return isModSet(AA.getModRefInfo(AccInst, Loc));
33481ad6265SDimitry Andric });
33581ad6265SDimitry Andric }
33681ad6265SDimitry Andric
337e8d8bef9SDimitry Andric // TODO: Only walk until we hit Start.
338e8d8bef9SDimitry Andric MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
339bdd1243dSDimitry Andric End->getDefiningAccess(), Loc, AA);
340e8d8bef9SDimitry Andric return !MSSA->dominates(Clobber, Start);
341e8d8bef9SDimitry Andric }
342e8d8bef9SDimitry Andric
3434542f901SDimitry Andric // Update AA metadata
combineAAMetadata(Instruction * ReplInst,Instruction * I)3444542f901SDimitry Andric static void combineAAMetadata(Instruction *ReplInst, Instruction *I) {
3454542f901SDimitry Andric // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
3464542f901SDimitry Andric // handled here, but combineMetadata doesn't support them yet
3474542f901SDimitry Andric unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
3484542f901SDimitry Andric LLVMContext::MD_noalias,
3494542f901SDimitry Andric LLVMContext::MD_invariant_group,
3504542f901SDimitry Andric LLVMContext::MD_access_group};
3514542f901SDimitry Andric combineMetadata(ReplInst, I, KnownIDs, true);
3524542f901SDimitry Andric }
3534542f901SDimitry Andric
3540b57cec5SDimitry Andric /// When scanning forward over instructions, we look for some other patterns to
3550b57cec5SDimitry Andric /// fold away. In particular, this looks for stores to neighboring locations of
3560b57cec5SDimitry Andric /// memory. If it sees enough consecutive ones, it attempts to merge them
3570b57cec5SDimitry Andric /// together into a memcpy/memset.
tryMergingIntoMemset(Instruction * StartInst,Value * StartPtr,Value * ByteVal)3580b57cec5SDimitry Andric Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
3590b57cec5SDimitry Andric Value *StartPtr,
3600b57cec5SDimitry Andric Value *ByteVal) {
361*0fca6ea1SDimitry Andric const DataLayout &DL = StartInst->getDataLayout();
3620b57cec5SDimitry Andric
3638c6f6c0cSDimitry Andric // We can't track scalable types
36404eeddc0SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(StartInst))
3658c6f6c0cSDimitry Andric if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
3668c6f6c0cSDimitry Andric return nullptr;
3678c6f6c0cSDimitry Andric
3680b57cec5SDimitry Andric // Okay, so we now have a single store that can be splatable. Scan to find
3690b57cec5SDimitry Andric // all subsequent stores of the same value to offset from the same pointer.
3700b57cec5SDimitry Andric // Join these together into ranges, so we can decide whether contiguous blocks
3710b57cec5SDimitry Andric // are stored.
3720b57cec5SDimitry Andric MemsetRanges Ranges(DL);
3730b57cec5SDimitry Andric
3740b57cec5SDimitry Andric BasicBlock::iterator BI(StartInst);
375e8d8bef9SDimitry Andric
376e8d8bef9SDimitry Andric // Keeps track of the last memory use or def before the insertion point for
377e8d8bef9SDimitry Andric // the new memset. The new MemoryDef for the inserted memsets will be inserted
3785f757f3fSDimitry Andric // after MemInsertPoint.
379e8d8bef9SDimitry Andric MemoryUseOrDef *MemInsertPoint = nullptr;
3800b57cec5SDimitry Andric for (++BI; !BI->isTerminator(); ++BI) {
381e8d8bef9SDimitry Andric auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
382e8d8bef9SDimitry Andric MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
3835f757f3fSDimitry Andric if (CurrentAcc)
384e8d8bef9SDimitry Andric MemInsertPoint = CurrentAcc;
385e8d8bef9SDimitry Andric
386fe6060f1SDimitry Andric // Calls that only access inaccessible memory do not block merging
387fe6060f1SDimitry Andric // accessible stores.
388fe6060f1SDimitry Andric if (auto *CB = dyn_cast<CallBase>(BI)) {
389fe6060f1SDimitry Andric if (CB->onlyAccessesInaccessibleMemory())
390fe6060f1SDimitry Andric continue;
391fe6060f1SDimitry Andric }
392fe6060f1SDimitry Andric
3930b57cec5SDimitry Andric if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
3940b57cec5SDimitry Andric // If the instruction is readnone, ignore it, otherwise bail out. We
3950b57cec5SDimitry Andric // don't even allow readonly here because we don't want something like:
3960b57cec5SDimitry Andric // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
3970b57cec5SDimitry Andric if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
3980b57cec5SDimitry Andric break;
3990b57cec5SDimitry Andric continue;
4000b57cec5SDimitry Andric }
4010b57cec5SDimitry Andric
40204eeddc0SDimitry Andric if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
4030b57cec5SDimitry Andric // If this is a store, see if we can merge it in.
404*0fca6ea1SDimitry Andric if (!NextStore->isSimple())
405*0fca6ea1SDimitry Andric break;
4060b57cec5SDimitry Andric
407e8d8bef9SDimitry Andric Value *StoredVal = NextStore->getValueOperand();
408e8d8bef9SDimitry Andric
409e8d8bef9SDimitry Andric // Don't convert stores of non-integral pointer types to memsets (which
410e8d8bef9SDimitry Andric // stores integers).
411e8d8bef9SDimitry Andric if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
412e8d8bef9SDimitry Andric break;
413e8d8bef9SDimitry Andric
4148c6f6c0cSDimitry Andric // We can't track ranges involving scalable types.
4158c6f6c0cSDimitry Andric if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
4168c6f6c0cSDimitry Andric break;
4178c6f6c0cSDimitry Andric
4180b57cec5SDimitry Andric // Check to see if this stored value is of the same byte-splattable value.
419e8d8bef9SDimitry Andric Value *StoredByte = isBytewiseValue(StoredVal, DL);
4200b57cec5SDimitry Andric if (isa<UndefValue>(ByteVal) && StoredByte)
4210b57cec5SDimitry Andric ByteVal = StoredByte;
4220b57cec5SDimitry Andric if (ByteVal != StoredByte)
4230b57cec5SDimitry Andric break;
4240b57cec5SDimitry Andric
4250b57cec5SDimitry Andric // Check to see if this store is to a constant offset from the start ptr.
426bdd1243dSDimitry Andric std::optional<int64_t> Offset =
42706c3fb27SDimitry Andric NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);
4288bcb0991SDimitry Andric if (!Offset)
4290b57cec5SDimitry Andric break;
4300b57cec5SDimitry Andric
4318bcb0991SDimitry Andric Ranges.addStore(*Offset, NextStore);
4320b57cec5SDimitry Andric } else {
43304eeddc0SDimitry Andric auto *MSI = cast<MemSetInst>(BI);
4340b57cec5SDimitry Andric
4350b57cec5SDimitry Andric if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
4360b57cec5SDimitry Andric !isa<ConstantInt>(MSI->getLength()))
4370b57cec5SDimitry Andric break;
4380b57cec5SDimitry Andric
4390b57cec5SDimitry Andric // Check to see if this store is to a constant offset from the start ptr.
440bdd1243dSDimitry Andric std::optional<int64_t> Offset =
44106c3fb27SDimitry Andric MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);
4428bcb0991SDimitry Andric if (!Offset)
4430b57cec5SDimitry Andric break;
4440b57cec5SDimitry Andric
4458bcb0991SDimitry Andric Ranges.addMemSet(*Offset, MSI);
4460b57cec5SDimitry Andric }
4470b57cec5SDimitry Andric }
4480b57cec5SDimitry Andric
4490b57cec5SDimitry Andric // If we have no ranges, then we just had a single store with nothing that
4500b57cec5SDimitry Andric // could be merged in. This is a very common case of course.
4510b57cec5SDimitry Andric if (Ranges.empty())
4520b57cec5SDimitry Andric return nullptr;
4530b57cec5SDimitry Andric
4540b57cec5SDimitry Andric // If we had at least one store that could be merged in, add the starting
4550b57cec5SDimitry Andric // store as well. We try to avoid this unless there is at least something
4560b57cec5SDimitry Andric // interesting as a small compile-time optimization.
4570b57cec5SDimitry Andric Ranges.addInst(0, StartInst);
4580b57cec5SDimitry Andric
4590b57cec5SDimitry Andric // If we create any memsets, we put it right before the first instruction that
4600b57cec5SDimitry Andric // isn't part of the memset block. This ensure that the memset is dominated
4610b57cec5SDimitry Andric // by any addressing instruction needed by the start of the block.
4620b57cec5SDimitry Andric IRBuilder<> Builder(&*BI);
4630b57cec5SDimitry Andric
4640b57cec5SDimitry Andric // Now that we have full information about ranges, loop over the ranges and
4650b57cec5SDimitry Andric // emit memset's for anything big enough to be worthwhile.
4660b57cec5SDimitry Andric Instruction *AMemSet = nullptr;
4670b57cec5SDimitry Andric for (const MemsetRange &Range : Ranges) {
468*0fca6ea1SDimitry Andric if (Range.TheStores.size() == 1)
469*0fca6ea1SDimitry Andric continue;
4700b57cec5SDimitry Andric
4710b57cec5SDimitry Andric // If it is profitable to lower this range to memset, do so now.
4720b57cec5SDimitry Andric if (!Range.isProfitableToUseMemset(DL))
4730b57cec5SDimitry Andric continue;
4740b57cec5SDimitry Andric
4750b57cec5SDimitry Andric // Otherwise, we do want to transform this! Create a new memset.
4760b57cec5SDimitry Andric // Get the starting pointer of the block.
4770b57cec5SDimitry Andric StartPtr = Range.StartPtr;
4780b57cec5SDimitry Andric
479480093f4SDimitry Andric AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
48081ad6265SDimitry Andric Range.Alignment);
481bdd1243dSDimitry Andric AMemSet->mergeDIAssignID(Range.TheStores);
482bdd1243dSDimitry Andric
4830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
4840b57cec5SDimitry Andric : Range.TheStores) dbgs()
4850b57cec5SDimitry Andric << *SI << '\n';
4860b57cec5SDimitry Andric dbgs() << "With: " << *AMemSet << '\n');
4870b57cec5SDimitry Andric if (!Range.TheStores.empty())
4880b57cec5SDimitry Andric AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
4890b57cec5SDimitry Andric
490*0fca6ea1SDimitry Andric auto *NewDef = cast<MemoryDef>(
491*0fca6ea1SDimitry Andric MemInsertPoint->getMemoryInst() == &*BI
492*0fca6ea1SDimitry Andric ? MSSAU->createMemoryAccessBefore(AMemSet, nullptr, MemInsertPoint)
493*0fca6ea1SDimitry Andric : MSSAU->createMemoryAccessAfter(AMemSet, nullptr, MemInsertPoint));
494e8d8bef9SDimitry Andric MSSAU->insertDef(NewDef, /*RenameUses=*/true);
495e8d8bef9SDimitry Andric MemInsertPoint = NewDef;
496e8d8bef9SDimitry Andric
497e8d8bef9SDimitry Andric // Zap all the stores.
498e8d8bef9SDimitry Andric for (Instruction *SI : Range.TheStores)
499e8d8bef9SDimitry Andric eraseInstruction(SI);
500e8d8bef9SDimitry Andric
5010b57cec5SDimitry Andric ++NumMemSetInfer;
5020b57cec5SDimitry Andric }
5030b57cec5SDimitry Andric
5040b57cec5SDimitry Andric return AMemSet;
5050b57cec5SDimitry Andric }
5060b57cec5SDimitry Andric
5070b57cec5SDimitry Andric // This method try to lift a store instruction before position P.
5080b57cec5SDimitry Andric // It will lift the store and its argument + that anything that
5090b57cec5SDimitry Andric // may alias with these.
5100b57cec5SDimitry Andric // The method returns true if it was successful.
moveUp(StoreInst * SI,Instruction * P,const LoadInst * LI)511e8d8bef9SDimitry Andric bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
5120b57cec5SDimitry Andric // If the store alias this position, early bail out.
5130b57cec5SDimitry Andric MemoryLocation StoreLoc = MemoryLocation::get(SI);
514e8d8bef9SDimitry Andric if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
5150b57cec5SDimitry Andric return false;
5160b57cec5SDimitry Andric
5170b57cec5SDimitry Andric // Keep track of the arguments of all instruction we plan to lift
5180b57cec5SDimitry Andric // so we can make sure to lift them as well if appropriate.
5190b57cec5SDimitry Andric DenseSet<Instruction *> Args;
520bdd1243dSDimitry Andric auto AddArg = [&](Value *Arg) {
521bdd1243dSDimitry Andric auto *I = dyn_cast<Instruction>(Arg);
522bdd1243dSDimitry Andric if (I && I->getParent() == SI->getParent()) {
523bdd1243dSDimitry Andric // Cannot hoist user of P above P
524*0fca6ea1SDimitry Andric if (I == P)
525*0fca6ea1SDimitry Andric return false;
526bdd1243dSDimitry Andric Args.insert(I);
527bdd1243dSDimitry Andric }
528bdd1243dSDimitry Andric return true;
529bdd1243dSDimitry Andric };
530bdd1243dSDimitry Andric if (!AddArg(SI->getPointerOperand()))
531bdd1243dSDimitry Andric return false;
5320b57cec5SDimitry Andric
5330b57cec5SDimitry Andric // Instruction to lift before P.
534e8d8bef9SDimitry Andric SmallVector<Instruction *, 8> ToLift{SI};
5350b57cec5SDimitry Andric
5360b57cec5SDimitry Andric // Memory locations of lifted instructions.
5370b57cec5SDimitry Andric SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
5380b57cec5SDimitry Andric
5390b57cec5SDimitry Andric // Lifted calls.
5400b57cec5SDimitry Andric SmallVector<const CallBase *, 8> Calls;
5410b57cec5SDimitry Andric
5420b57cec5SDimitry Andric const MemoryLocation LoadLoc = MemoryLocation::get(LI);
5430b57cec5SDimitry Andric
5440b57cec5SDimitry Andric for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
5450b57cec5SDimitry Andric auto *C = &*I;
5460b57cec5SDimitry Andric
547e8d8bef9SDimitry Andric // Make sure hoisting does not perform a store that was not guaranteed to
548e8d8bef9SDimitry Andric // happen.
549e8d8bef9SDimitry Andric if (!isGuaranteedToTransferExecutionToSuccessor(C))
550e8d8bef9SDimitry Andric return false;
551e8d8bef9SDimitry Andric
552bdd1243dSDimitry Andric bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
5530b57cec5SDimitry Andric
5540b57cec5SDimitry Andric bool NeedLift = false;
5550b57cec5SDimitry Andric if (Args.erase(C))
5560b57cec5SDimitry Andric NeedLift = true;
5570b57cec5SDimitry Andric else if (MayAlias) {
558e8d8bef9SDimitry Andric NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
559e8d8bef9SDimitry Andric return isModOrRefSet(AA->getModRefInfo(C, ML));
5600b57cec5SDimitry Andric });
5610b57cec5SDimitry Andric
5620b57cec5SDimitry Andric if (!NeedLift)
563e8d8bef9SDimitry Andric NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
564e8d8bef9SDimitry Andric return isModOrRefSet(AA->getModRefInfo(C, Call));
5650b57cec5SDimitry Andric });
5660b57cec5SDimitry Andric }
5670b57cec5SDimitry Andric
5680b57cec5SDimitry Andric if (!NeedLift)
5690b57cec5SDimitry Andric continue;
5700b57cec5SDimitry Andric
5710b57cec5SDimitry Andric if (MayAlias) {
5720b57cec5SDimitry Andric // Since LI is implicitly moved downwards past the lifted instructions,
5730b57cec5SDimitry Andric // none of them may modify its source.
574e8d8bef9SDimitry Andric if (isModSet(AA->getModRefInfo(C, LoadLoc)))
5750b57cec5SDimitry Andric return false;
5760b57cec5SDimitry Andric else if (const auto *Call = dyn_cast<CallBase>(C)) {
5770b57cec5SDimitry Andric // If we can't lift this before P, it's game over.
578e8d8bef9SDimitry Andric if (isModOrRefSet(AA->getModRefInfo(P, Call)))
5790b57cec5SDimitry Andric return false;
5800b57cec5SDimitry Andric
5810b57cec5SDimitry Andric Calls.push_back(Call);
5820b57cec5SDimitry Andric } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
5830b57cec5SDimitry Andric // If we can't lift this before P, it's game over.
5840b57cec5SDimitry Andric auto ML = MemoryLocation::get(C);
585e8d8bef9SDimitry Andric if (isModOrRefSet(AA->getModRefInfo(P, ML)))
5860b57cec5SDimitry Andric return false;
5870b57cec5SDimitry Andric
5880b57cec5SDimitry Andric MemLocs.push_back(ML);
5890b57cec5SDimitry Andric } else
5900b57cec5SDimitry Andric // We don't know how to lift this instruction.
5910b57cec5SDimitry Andric return false;
5920b57cec5SDimitry Andric }
5930b57cec5SDimitry Andric
5940b57cec5SDimitry Andric ToLift.push_back(C);
595bdd1243dSDimitry Andric for (Value *Op : C->operands())
596bdd1243dSDimitry Andric if (!AddArg(Op))
597bdd1243dSDimitry Andric return false;
598c14a5a88SDimitry Andric }
5990b57cec5SDimitry Andric
600e8d8bef9SDimitry Andric // Find MSSA insertion point. Normally P will always have a corresponding
601e8d8bef9SDimitry Andric // memory access before which we can insert. However, with non-standard AA
602e8d8bef9SDimitry Andric // pipelines, there may be a mismatch between AA and MSSA, in which case we
603e8d8bef9SDimitry Andric // will scan for a memory access before P. In either case, we know for sure
604e8d8bef9SDimitry Andric // that at least the load will have a memory access.
605e8d8bef9SDimitry Andric // TODO: Simplify this once P will be determined by MSSA, in which case the
606e8d8bef9SDimitry Andric // discrepancy can no longer occur.
607e8d8bef9SDimitry Andric MemoryUseOrDef *MemInsertPoint = nullptr;
608e8d8bef9SDimitry Andric if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
609e8d8bef9SDimitry Andric MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
610e8d8bef9SDimitry Andric } else {
611e8d8bef9SDimitry Andric const Instruction *ConstP = P;
612e8d8bef9SDimitry Andric for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
613e8d8bef9SDimitry Andric ++LI->getReverseIterator())) {
614e8d8bef9SDimitry Andric if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
615e8d8bef9SDimitry Andric MemInsertPoint = MA;
616e8d8bef9SDimitry Andric break;
617e8d8bef9SDimitry Andric }
618e8d8bef9SDimitry Andric }
619e8d8bef9SDimitry Andric }
620e8d8bef9SDimitry Andric
621e8d8bef9SDimitry Andric // We made it, we need to lift.
6220b57cec5SDimitry Andric for (auto *I : llvm::reverse(ToLift)) {
6230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
6240b57cec5SDimitry Andric I->moveBefore(P);
625e8d8bef9SDimitry Andric assert(MemInsertPoint && "Must have found insert point");
626e8d8bef9SDimitry Andric if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
627e8d8bef9SDimitry Andric MSSAU->moveAfter(MA, MemInsertPoint);
628e8d8bef9SDimitry Andric MemInsertPoint = MA;
629e8d8bef9SDimitry Andric }
630e8d8bef9SDimitry Andric }
6310b57cec5SDimitry Andric
6320b57cec5SDimitry Andric return true;
6330b57cec5SDimitry Andric }
6340b57cec5SDimitry Andric
processStoreOfLoad(StoreInst * SI,LoadInst * LI,const DataLayout & DL,BasicBlock::iterator & BBI)635bdd1243dSDimitry Andric bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
636bdd1243dSDimitry Andric const DataLayout &DL,
637bdd1243dSDimitry Andric BasicBlock::iterator &BBI) {
638*0fca6ea1SDimitry Andric if (!LI->isSimple() || !LI->hasOneUse() || LI->getParent() != SI->getParent())
6390b57cec5SDimitry Andric return false;
6400b57cec5SDimitry Andric
6410b57cec5SDimitry Andric auto *T = LI->getType();
642349cc55cSDimitry Andric // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
643349cc55cSDimitry Andric // the corresponding libcalls are not available.
644349cc55cSDimitry Andric // TODO: We should really distinguish between libcall availability and
645349cc55cSDimitry Andric // our ability to introduce intrinsics.
646349cc55cSDimitry Andric if (T->isAggregateType() &&
647349cc55cSDimitry Andric (EnableMemCpyOptWithoutLibcalls ||
648349cc55cSDimitry Andric (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
6490b57cec5SDimitry Andric MemoryLocation LoadLoc = MemoryLocation::get(LI);
6500b57cec5SDimitry Andric
6510b57cec5SDimitry Andric // We use alias analysis to check if an instruction may store to
6520b57cec5SDimitry Andric // the memory we load from in between the load and the store. If
6530b57cec5SDimitry Andric // such an instruction is found, we try to promote there instead
6540b57cec5SDimitry Andric // of at the store position.
655e8d8bef9SDimitry Andric // TODO: Can use MSSA for this.
6560b57cec5SDimitry Andric Instruction *P = SI;
6570b57cec5SDimitry Andric for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
658e8d8bef9SDimitry Andric if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
6590b57cec5SDimitry Andric P = &I;
6600b57cec5SDimitry Andric break;
6610b57cec5SDimitry Andric }
6620b57cec5SDimitry Andric }
6630b57cec5SDimitry Andric
6640b57cec5SDimitry Andric // We found an instruction that may write to the loaded memory.
6650b57cec5SDimitry Andric // We can try to promote at this position instead of the store
666fe6060f1SDimitry Andric // position if nothing aliases the store memory after this and the store
6670b57cec5SDimitry Andric // destination is not in the range.
6680b57cec5SDimitry Andric if (P && P != SI) {
669e8d8bef9SDimitry Andric if (!moveUp(SI, P, LI))
6700b57cec5SDimitry Andric P = nullptr;
6710b57cec5SDimitry Andric }
6720b57cec5SDimitry Andric
6730b57cec5SDimitry Andric // If a valid insertion position is found, then we can promote
6740b57cec5SDimitry Andric // the load/store pair to a memcpy.
6750b57cec5SDimitry Andric if (P) {
6760b57cec5SDimitry Andric // If we load from memory that may alias the memory we store to,
6770b57cec5SDimitry Andric // memmove must be used to preserve semantic. If not, memcpy can
678349cc55cSDimitry Andric // be used. Also, if we load from constant memory, memcpy can be used
679349cc55cSDimitry Andric // as the constant memory won't be modified.
6800b57cec5SDimitry Andric bool UseMemMove = false;
681349cc55cSDimitry Andric if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
6820b57cec5SDimitry Andric UseMemMove = true;
6830b57cec5SDimitry Andric
6840b57cec5SDimitry Andric IRBuilder<> Builder(P);
685*0fca6ea1SDimitry Andric Value *Size =
686*0fca6ea1SDimitry Andric Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T));
6870b57cec5SDimitry Andric Instruction *M;
6880b57cec5SDimitry Andric if (UseMemMove)
689*0fca6ea1SDimitry Andric M = Builder.CreateMemMove(SI->getPointerOperand(), SI->getAlign(),
690*0fca6ea1SDimitry Andric LI->getPointerOperand(), LI->getAlign(),
691*0fca6ea1SDimitry Andric Size);
6920b57cec5SDimitry Andric else
693*0fca6ea1SDimitry Andric M = Builder.CreateMemCpy(SI->getPointerOperand(), SI->getAlign(),
6945ffd83dbSDimitry Andric LI->getPointerOperand(), LI->getAlign(), Size);
695bdd1243dSDimitry Andric M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
6960b57cec5SDimitry Andric
697*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M
698*0fca6ea1SDimitry Andric << "\n");
6990b57cec5SDimitry Andric
700e8d8bef9SDimitry Andric auto *LastDef =
701e8d8bef9SDimitry Andric cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
7025f757f3fSDimitry Andric auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef);
703e8d8bef9SDimitry Andric MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
704e8d8bef9SDimitry Andric
705e8d8bef9SDimitry Andric eraseInstruction(SI);
706e8d8bef9SDimitry Andric eraseInstruction(LI);
7070b57cec5SDimitry Andric ++NumMemCpyInstr;
7080b57cec5SDimitry Andric
7090b57cec5SDimitry Andric // Make sure we do not invalidate the iterator.
7100b57cec5SDimitry Andric BBI = M->getIterator();
7110b57cec5SDimitry Andric return true;
7120b57cec5SDimitry Andric }
7130b57cec5SDimitry Andric }
7140b57cec5SDimitry Andric
7150b57cec5SDimitry Andric // Detect cases where we're performing call slot forwarding, but
7160b57cec5SDimitry Andric // happen to be using a load-store pair to implement it, rather than
7170b57cec5SDimitry Andric // a memcpy.
718bdd1243dSDimitry Andric BatchAAResults BAA(*AA);
71981ad6265SDimitry Andric auto GetCall = [&]() -> CallInst * {
72081ad6265SDimitry Andric // We defer this expensive clobber walk until the cheap checks
72181ad6265SDimitry Andric // have been done on the source inside performCallSlotOptzn.
722e8d8bef9SDimitry Andric if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
723bdd1243dSDimitry Andric MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
72481ad6265SDimitry Andric return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
72581ad6265SDimitry Andric return nullptr;
72681ad6265SDimitry Andric };
7270b57cec5SDimitry Andric
728bdd1243dSDimitry Andric bool Changed = performCallSlotOptzn(
729e8d8bef9SDimitry Andric LI, SI, SI->getPointerOperand()->stripPointerCasts(),
7300b57cec5SDimitry Andric LI->getPointerOperand()->stripPointerCasts(),
7310b57cec5SDimitry Andric DL.getTypeStoreSize(SI->getOperand(0)->getType()),
732bdd1243dSDimitry Andric std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
733bdd1243dSDimitry Andric if (Changed) {
734e8d8bef9SDimitry Andric eraseInstruction(SI);
735e8d8bef9SDimitry Andric eraseInstruction(LI);
7360b57cec5SDimitry Andric ++NumMemCpyInstr;
7370b57cec5SDimitry Andric return true;
7380b57cec5SDimitry Andric }
739bdd1243dSDimitry Andric
7405f757f3fSDimitry Andric // If this is a load-store pair from a stack slot to a stack slot, we
7415f757f3fSDimitry Andric // might be able to perform the stack-move optimization just as we do for
7425f757f3fSDimitry Andric // memcpys from an alloca to an alloca.
7435f757f3fSDimitry Andric if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
7445f757f3fSDimitry Andric if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
7455f757f3fSDimitry Andric if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
7465f757f3fSDimitry Andric DL.getTypeStoreSize(T), BAA)) {
7475f757f3fSDimitry Andric // Avoid invalidating the iterator.
7485f757f3fSDimitry Andric BBI = SI->getNextNonDebugInstruction()->getIterator();
7495f757f3fSDimitry Andric eraseInstruction(SI);
7505f757f3fSDimitry Andric eraseInstruction(LI);
7515f757f3fSDimitry Andric ++NumMemCpyInstr;
7525f757f3fSDimitry Andric return true;
7535f757f3fSDimitry Andric }
7545f757f3fSDimitry Andric }
7555f757f3fSDimitry Andric }
7565f757f3fSDimitry Andric
757bdd1243dSDimitry Andric return false;
7580b57cec5SDimitry Andric }
759bdd1243dSDimitry Andric
processStore(StoreInst * SI,BasicBlock::iterator & BBI)760bdd1243dSDimitry Andric bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
761*0fca6ea1SDimitry Andric if (!SI->isSimple())
762*0fca6ea1SDimitry Andric return false;
763bdd1243dSDimitry Andric
764bdd1243dSDimitry Andric // Avoid merging nontemporal stores since the resulting
765bdd1243dSDimitry Andric // memcpy/memset would not be able to preserve the nontemporal hint.
766bdd1243dSDimitry Andric // In theory we could teach how to propagate the !nontemporal metadata to
767bdd1243dSDimitry Andric // memset calls. However, that change would force the backend to
768bdd1243dSDimitry Andric // conservatively expand !nontemporal memset calls back to sequences of
769bdd1243dSDimitry Andric // store instructions (effectively undoing the merging).
770bdd1243dSDimitry Andric if (SI->getMetadata(LLVMContext::MD_nontemporal))
771bdd1243dSDimitry Andric return false;
772bdd1243dSDimitry Andric
773*0fca6ea1SDimitry Andric const DataLayout &DL = SI->getDataLayout();
774bdd1243dSDimitry Andric
775bdd1243dSDimitry Andric Value *StoredVal = SI->getValueOperand();
776bdd1243dSDimitry Andric
777bdd1243dSDimitry Andric // Not all the transforms below are correct for non-integral pointers, bail
778bdd1243dSDimitry Andric // until we've audited the individual pieces.
779bdd1243dSDimitry Andric if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
780bdd1243dSDimitry Andric return false;
781bdd1243dSDimitry Andric
782bdd1243dSDimitry Andric // Load to store forwarding can be interpreted as memcpy.
783bdd1243dSDimitry Andric if (auto *LI = dyn_cast<LoadInst>(StoredVal))
784bdd1243dSDimitry Andric return processStoreOfLoad(SI, LI, DL, BBI);
7850b57cec5SDimitry Andric
786349cc55cSDimitry Andric // The following code creates memset intrinsics out of thin air. Don't do
787349cc55cSDimitry Andric // this if the corresponding libfunc is not available.
788349cc55cSDimitry Andric // TODO: We should really distinguish between libcall availability and
789349cc55cSDimitry Andric // our ability to introduce intrinsics.
790349cc55cSDimitry Andric if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
791349cc55cSDimitry Andric return false;
792349cc55cSDimitry Andric
7930b57cec5SDimitry Andric // There are two cases that are interesting for this code to handle: memcpy
7940b57cec5SDimitry Andric // and memset. Right now we only handle memset.
7950b57cec5SDimitry Andric
7960b57cec5SDimitry Andric // Ensure that the value being stored is something that can be memset'able a
7970b57cec5SDimitry Andric // byte at a time like "0" or "-1" or any width, as well as things like
7980b57cec5SDimitry Andric // 0xA0A0A0A0 and 0.0.
7990b57cec5SDimitry Andric auto *V = SI->getOperand(0);
8000b57cec5SDimitry Andric if (Value *ByteVal = isBytewiseValue(V, DL)) {
801*0fca6ea1SDimitry Andric if (Instruction *I =
802*0fca6ea1SDimitry Andric tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) {
8030b57cec5SDimitry Andric BBI = I->getIterator(); // Don't invalidate iterator.
8040b57cec5SDimitry Andric return true;
8050b57cec5SDimitry Andric }
8060b57cec5SDimitry Andric
8070b57cec5SDimitry Andric // If we have an aggregate, we try to promote it to memset regardless
8080b57cec5SDimitry Andric // of opportunity for merging as it can expose optimization opportunities
8090b57cec5SDimitry Andric // in subsequent passes.
8100b57cec5SDimitry Andric auto *T = V->getType();
8110b57cec5SDimitry Andric if (T->isAggregateType()) {
8120b57cec5SDimitry Andric uint64_t Size = DL.getTypeStoreSize(T);
8130b57cec5SDimitry Andric IRBuilder<> Builder(SI);
8145ffd83dbSDimitry Andric auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
8155ffd83dbSDimitry Andric SI->getAlign());
816bdd1243dSDimitry Andric M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
8170b57cec5SDimitry Andric
8180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
8190b57cec5SDimitry Andric
820349cc55cSDimitry Andric // The newly inserted memset is immediately overwritten by the original
821349cc55cSDimitry Andric // store, so we do not need to rename uses.
822349cc55cSDimitry Andric auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
823*0fca6ea1SDimitry Andric auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef);
824349cc55cSDimitry Andric MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
825e8d8bef9SDimitry Andric
826e8d8bef9SDimitry Andric eraseInstruction(SI);
8270b57cec5SDimitry Andric NumMemSetInfer++;
8280b57cec5SDimitry Andric
8290b57cec5SDimitry Andric // Make sure we do not invalidate the iterator.
8300b57cec5SDimitry Andric BBI = M->getIterator();
8310b57cec5SDimitry Andric return true;
8320b57cec5SDimitry Andric }
8330b57cec5SDimitry Andric }
8340b57cec5SDimitry Andric
8350b57cec5SDimitry Andric return false;
8360b57cec5SDimitry Andric }
8370b57cec5SDimitry Andric
processMemSet(MemSetInst * MSI,BasicBlock::iterator & BBI)8380b57cec5SDimitry Andric bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
8390b57cec5SDimitry Andric // See if there is another memset or store neighboring this memset which
8400b57cec5SDimitry Andric // allows us to widen out the memset to do a single larger store.
8410b57cec5SDimitry Andric if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
842*0fca6ea1SDimitry Andric if (Instruction *I =
843*0fca6ea1SDimitry Andric tryMergingIntoMemset(MSI, MSI->getDest(), MSI->getValue())) {
8440b57cec5SDimitry Andric BBI = I->getIterator(); // Don't invalidate iterator.
8450b57cec5SDimitry Andric return true;
8460b57cec5SDimitry Andric }
8470b57cec5SDimitry Andric return false;
8480b57cec5SDimitry Andric }
8490b57cec5SDimitry Andric
8500b57cec5SDimitry Andric /// Takes a memcpy and a call that it depends on,
8510b57cec5SDimitry Andric /// and checks for the possibility of a call slot optimization by having
8520b57cec5SDimitry Andric /// the call write its result directly into the destination of the memcpy.
performCallSlotOptzn(Instruction * cpyLoad,Instruction * cpyStore,Value * cpyDest,Value * cpySrc,TypeSize cpySize,Align cpyDestAlign,BatchAAResults & BAA,std::function<CallInst * ()> GetC)853e8d8bef9SDimitry Andric bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
854e8d8bef9SDimitry Andric Instruction *cpyStore, Value *cpyDest,
8558c6f6c0cSDimitry Andric Value *cpySrc, TypeSize cpySize,
856*0fca6ea1SDimitry Andric Align cpyDestAlign,
857*0fca6ea1SDimitry Andric BatchAAResults &BAA,
85881ad6265SDimitry Andric std::function<CallInst *()> GetC) {
8590b57cec5SDimitry Andric // The general transformation to keep in mind is
8600b57cec5SDimitry Andric //
8610b57cec5SDimitry Andric // call @func(..., src, ...)
8620b57cec5SDimitry Andric // memcpy(dest, src, ...)
8630b57cec5SDimitry Andric //
8640b57cec5SDimitry Andric // ->
8650b57cec5SDimitry Andric //
8660b57cec5SDimitry Andric // memcpy(dest, src, ...)
8670b57cec5SDimitry Andric // call @func(..., dest, ...)
8680b57cec5SDimitry Andric //
8690b57cec5SDimitry Andric // Since moving the memcpy is technically awkward, we additionally check that
8700b57cec5SDimitry Andric // src only holds uninitialized values at the moment of the call, meaning that
8710b57cec5SDimitry Andric // the memcpy can be discarded rather than moved.
8720b57cec5SDimitry Andric
8738c6f6c0cSDimitry Andric // We can't optimize scalable types.
8748c6f6c0cSDimitry Andric if (cpySize.isScalable())
8758c6f6c0cSDimitry Andric return false;
8768c6f6c0cSDimitry Andric
8770b57cec5SDimitry Andric // Require that src be an alloca. This simplifies the reasoning considerably.
87804eeddc0SDimitry Andric auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
8790b57cec5SDimitry Andric if (!srcAlloca)
8800b57cec5SDimitry Andric return false;
8810b57cec5SDimitry Andric
8820b57cec5SDimitry Andric ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
8830b57cec5SDimitry Andric if (!srcArraySize)
8840b57cec5SDimitry Andric return false;
8850b57cec5SDimitry Andric
886*0fca6ea1SDimitry Andric const DataLayout &DL = cpyLoad->getDataLayout();
8875f757f3fSDimitry Andric TypeSize SrcAllocaSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType());
8885f757f3fSDimitry Andric // We can't optimize scalable types.
8895f757f3fSDimitry Andric if (SrcAllocaSize.isScalable())
8905f757f3fSDimitry Andric return false;
8915f757f3fSDimitry Andric uint64_t srcSize = SrcAllocaSize * srcArraySize->getZExtValue();
8920b57cec5SDimitry Andric
8938c6f6c0cSDimitry Andric if (cpySize < srcSize)
8940b57cec5SDimitry Andric return false;
8950b57cec5SDimitry Andric
89681ad6265SDimitry Andric CallInst *C = GetC();
89781ad6265SDimitry Andric if (!C)
89881ad6265SDimitry Andric return false;
89981ad6265SDimitry Andric
90081ad6265SDimitry Andric // Lifetime marks shouldn't be operated on.
90181ad6265SDimitry Andric if (Function *F = C->getCalledFunction())
90281ad6265SDimitry Andric if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
90381ad6265SDimitry Andric return false;
90481ad6265SDimitry Andric
90581ad6265SDimitry Andric if (C->getParent() != cpyStore->getParent()) {
90681ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
90781ad6265SDimitry Andric return false;
90881ad6265SDimitry Andric }
90981ad6265SDimitry Andric
910*0fca6ea1SDimitry Andric MemoryLocation DestLoc =
911*0fca6ea1SDimitry Andric isa<StoreInst>(cpyStore)
912*0fca6ea1SDimitry Andric ? MemoryLocation::get(cpyStore)
913*0fca6ea1SDimitry Andric : MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
91481ad6265SDimitry Andric
91581ad6265SDimitry Andric // Check that nothing touches the dest of the copy between
91681ad6265SDimitry Andric // the call and the store/memcpy.
917bdd1243dSDimitry Andric Instruction *SkippedLifetimeStart = nullptr;
918bdd1243dSDimitry Andric if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
919bdd1243dSDimitry Andric MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
92081ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
92181ad6265SDimitry Andric return false;
92281ad6265SDimitry Andric }
92381ad6265SDimitry Andric
924bdd1243dSDimitry Andric // If we need to move a lifetime.start above the call, make sure that we can
925bdd1243dSDimitry Andric // actually do so. If the argument is bitcasted for example, we would have to
926bdd1243dSDimitry Andric // move the bitcast as well, which we don't handle.
927bdd1243dSDimitry Andric if (SkippedLifetimeStart) {
928bdd1243dSDimitry Andric auto *LifetimeArg =
929bdd1243dSDimitry Andric dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1));
930bdd1243dSDimitry Andric if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
931bdd1243dSDimitry Andric C->comesBefore(LifetimeArg))
932bdd1243dSDimitry Andric return false;
933bdd1243dSDimitry Andric }
934bdd1243dSDimitry Andric
9355f757f3fSDimitry Andric // Check that storing to the first srcSize bytes of dest will not cause a
9365f757f3fSDimitry Andric // trap or data race.
9375f757f3fSDimitry Andric bool ExplicitlyDereferenceableOnly;
9385f757f3fSDimitry Andric if (!isWritableObject(getUnderlyingObject(cpyDest),
9395f757f3fSDimitry Andric ExplicitlyDereferenceableOnly) ||
9405f757f3fSDimitry Andric !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
941bdd1243dSDimitry Andric DL, C, AC, DT)) {
94204eeddc0SDimitry Andric LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
9430b57cec5SDimitry Andric return false;
94404eeddc0SDimitry Andric }
9450b57cec5SDimitry Andric
946e8d8bef9SDimitry Andric // Make sure that nothing can observe cpyDest being written early. There are
947e8d8bef9SDimitry Andric // a number of cases to consider:
948e8d8bef9SDimitry Andric // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
949e8d8bef9SDimitry Andric // the transform.
950e8d8bef9SDimitry Andric // 2. C itself may not access cpyDest (prior to the transform). This is
951e8d8bef9SDimitry Andric // checked further below.
952e8d8bef9SDimitry Andric // 3. If cpyDest is accessible to the caller of this function (potentially
953e8d8bef9SDimitry Andric // captured and not based on an alloca), we need to ensure that we cannot
954e8d8bef9SDimitry Andric // unwind between C and cpyStore. This is checked here.
955e8d8bef9SDimitry Andric // 4. If cpyDest is potentially captured, there may be accesses to it from
956e8d8bef9SDimitry Andric // another thread. In this case, we need to check that cpyStore is
957e8d8bef9SDimitry Andric // guaranteed to be executed if C is. As it is a non-atomic access, it
958e8d8bef9SDimitry Andric // renders accesses from other threads undefined.
959e8d8bef9SDimitry Andric // TODO: This is currently not checked.
96004eeddc0SDimitry Andric if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
961bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
9620b57cec5SDimitry Andric return false;
96304eeddc0SDimitry Andric }
9640b57cec5SDimitry Andric
9650b57cec5SDimitry Andric // Check that dest points to memory that is at least as aligned as src.
9665ffd83dbSDimitry Andric Align srcAlign = srcAlloca->getAlign();
967bdd1243dSDimitry Andric bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
9680b57cec5SDimitry Andric // If dest is not aligned enough and we can't increase its alignment then
9690b57cec5SDimitry Andric // bail out.
970bdd1243dSDimitry Andric if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
971bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
9720b57cec5SDimitry Andric return false;
973bdd1243dSDimitry Andric }
9740b57cec5SDimitry Andric
9750b57cec5SDimitry Andric // Check that src is not accessed except via the call and the memcpy. This
9760b57cec5SDimitry Andric // guarantees that it holds only undefined values when passed in (so the final
9770b57cec5SDimitry Andric // memcpy can be dropped), that it is not read or written between the call and
9780b57cec5SDimitry Andric // the memcpy, and that writing beyond the end of it is undefined.
979e8d8bef9SDimitry Andric SmallVector<User *, 8> srcUseList(srcAlloca->users());
9800b57cec5SDimitry Andric while (!srcUseList.empty()) {
9810b57cec5SDimitry Andric User *U = srcUseList.pop_back_val();
9820b57cec5SDimitry Andric
9830b57cec5SDimitry Andric if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
984e8d8bef9SDimitry Andric append_range(srcUseList, U->users());
9850b57cec5SDimitry Andric continue;
9860b57cec5SDimitry Andric }
987*0fca6ea1SDimitry Andric if (const auto *G = dyn_cast<GetElementPtrInst>(U);
988*0fca6ea1SDimitry Andric G && G->hasAllZeroIndices()) {
989e8d8bef9SDimitry Andric append_range(srcUseList, U->users());
9900b57cec5SDimitry Andric continue;
9910b57cec5SDimitry Andric }
99204eeddc0SDimitry Andric if (const auto *IT = dyn_cast<IntrinsicInst>(U))
9930b57cec5SDimitry Andric if (IT->isLifetimeStartOrEnd())
9940b57cec5SDimitry Andric continue;
9950b57cec5SDimitry Andric
996*0fca6ea1SDimitry Andric if (U != C && U != cpyLoad) {
997*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Call slot: Source accessed by " << *U << "\n");
9980b57cec5SDimitry Andric return false;
9990b57cec5SDimitry Andric }
1000*0fca6ea1SDimitry Andric }
10010b57cec5SDimitry Andric
100204eeddc0SDimitry Andric // Check whether src is captured by the called function, in which case there
100304eeddc0SDimitry Andric // may be further indirect uses of src.
100404eeddc0SDimitry Andric bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
100504eeddc0SDimitry Andric return U->stripPointerCasts() == cpySrc &&
100604eeddc0SDimitry Andric !C->doesNotCapture(C->getArgOperandNo(&U));
100704eeddc0SDimitry Andric });
100804eeddc0SDimitry Andric
100904eeddc0SDimitry Andric // If src is captured, then check whether there are any potential uses of
101004eeddc0SDimitry Andric // src through the captured pointer before the lifetime of src ends, either
101104eeddc0SDimitry Andric // due to a lifetime.end or a return from the function.
101204eeddc0SDimitry Andric if (SrcIsCaptured) {
101304eeddc0SDimitry Andric // Check that dest is not captured before/at the call. We have already
101404eeddc0SDimitry Andric // checked that src is not captured before it. If either had been captured,
101504eeddc0SDimitry Andric // then the call might be comparing the argument against the captured dest
101604eeddc0SDimitry Andric // or src pointer.
101704eeddc0SDimitry Andric Value *DestObj = getUnderlyingObject(cpyDest);
101804eeddc0SDimitry Andric if (!isIdentifiedFunctionLocal(DestObj) ||
101904eeddc0SDimitry Andric PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
102004eeddc0SDimitry Andric /* StoreCaptures */ true, C, DT,
102104eeddc0SDimitry Andric /* IncludeI */ true))
10220b57cec5SDimitry Andric return false;
10230b57cec5SDimitry Andric
102404eeddc0SDimitry Andric MemoryLocation SrcLoc =
102504eeddc0SDimitry Andric MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
102604eeddc0SDimitry Andric for (Instruction &I :
102704eeddc0SDimitry Andric make_range(++C->getIterator(), C->getParent()->end())) {
102804eeddc0SDimitry Andric // Lifetime of srcAlloca ends at lifetime.end.
102904eeddc0SDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
103004eeddc0SDimitry Andric if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
103104eeddc0SDimitry Andric II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
103204eeddc0SDimitry Andric cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
103304eeddc0SDimitry Andric break;
103404eeddc0SDimitry Andric }
103504eeddc0SDimitry Andric
103604eeddc0SDimitry Andric // Lifetime of srcAlloca ends at return.
103704eeddc0SDimitry Andric if (isa<ReturnInst>(&I))
103804eeddc0SDimitry Andric break;
103904eeddc0SDimitry Andric
104004eeddc0SDimitry Andric // Ignore the direct read of src in the load.
104104eeddc0SDimitry Andric if (&I == cpyLoad)
104204eeddc0SDimitry Andric continue;
104304eeddc0SDimitry Andric
104404eeddc0SDimitry Andric // Check whether this instruction may mod/ref src through the captured
104504eeddc0SDimitry Andric // pointer (we have already any direct mod/refs in the loop above).
104604eeddc0SDimitry Andric // Also bail if we hit a terminator, as we don't want to scan into other
104704eeddc0SDimitry Andric // blocks.
1048bdd1243dSDimitry Andric if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
104904eeddc0SDimitry Andric return false;
105004eeddc0SDimitry Andric }
105104eeddc0SDimitry Andric }
105204eeddc0SDimitry Andric
10530b57cec5SDimitry Andric // Since we're changing the parameter to the callsite, we need to make sure
10540b57cec5SDimitry Andric // that what would be the new parameter dominates the callsite.
10555f757f3fSDimitry Andric bool NeedMoveGEP = false;
1056e8d8bef9SDimitry Andric if (!DT->dominates(cpyDest, C)) {
1057e8d8bef9SDimitry Andric // Support moving a constant index GEP before the call.
1058e8d8bef9SDimitry Andric auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1059e8d8bef9SDimitry Andric if (GEP && GEP->hasAllConstantIndices() &&
1060e8d8bef9SDimitry Andric DT->dominates(GEP->getPointerOperand(), C))
10615f757f3fSDimitry Andric NeedMoveGEP = true;
1062e8d8bef9SDimitry Andric else
10630b57cec5SDimitry Andric return false;
1064e8d8bef9SDimitry Andric }
10650b57cec5SDimitry Andric
10660b57cec5SDimitry Andric // In addition to knowing that the call does not access src in some
10670b57cec5SDimitry Andric // unexpected manner, for example via a global, which we deduce from
10680b57cec5SDimitry Andric // the use analysis, we also need to know that it does not sneakily
10690b57cec5SDimitry Andric // access dest. We rely on AA to figure this out for us.
1070bdd1243dSDimitry Andric MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
1071bdd1243dSDimitry Andric ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
10720b57cec5SDimitry Andric // If necessary, perform additional analysis.
10730b57cec5SDimitry Andric if (isModOrRefSet(MR))
1074bdd1243dSDimitry Andric MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
10750b57cec5SDimitry Andric if (isModOrRefSet(MR))
10760b57cec5SDimitry Andric return false;
10770b57cec5SDimitry Andric
10780b57cec5SDimitry Andric // We can't create address space casts here because we don't know if they're
10790b57cec5SDimitry Andric // safe for the target.
10805f757f3fSDimitry Andric if (cpySrc->getType() != cpyDest->getType())
10810b57cec5SDimitry Andric return false;
10825ffd83dbSDimitry Andric for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
10835ffd83dbSDimitry Andric if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
10845f757f3fSDimitry Andric cpySrc->getType() != C->getArgOperand(ArgI)->getType())
10850b57cec5SDimitry Andric return false;
10860b57cec5SDimitry Andric
10870b57cec5SDimitry Andric // All the checks have passed, so do the transformation.
10880b57cec5SDimitry Andric bool changedArgument = false;
10895ffd83dbSDimitry Andric for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
10905ffd83dbSDimitry Andric if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
10910b57cec5SDimitry Andric changedArgument = true;
10925f757f3fSDimitry Andric C->setArgOperand(ArgI, cpyDest);
10930b57cec5SDimitry Andric }
10940b57cec5SDimitry Andric
10950b57cec5SDimitry Andric if (!changedArgument)
10960b57cec5SDimitry Andric return false;
10970b57cec5SDimitry Andric
10980b57cec5SDimitry Andric // If the destination wasn't sufficiently aligned then increase its alignment.
10990b57cec5SDimitry Andric if (!isDestSufficientlyAligned) {
11000b57cec5SDimitry Andric assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
11015ffd83dbSDimitry Andric cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
11020b57cec5SDimitry Andric }
11030b57cec5SDimitry Andric
11045f757f3fSDimitry Andric if (NeedMoveGEP) {
11055f757f3fSDimitry Andric auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
11065f757f3fSDimitry Andric GEP->moveBefore(C);
11075f757f3fSDimitry Andric }
11085f757f3fSDimitry Andric
1109bdd1243dSDimitry Andric if (SkippedLifetimeStart) {
1110bdd1243dSDimitry Andric SkippedLifetimeStart->moveBefore(C);
1111bdd1243dSDimitry Andric MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1112bdd1243dSDimitry Andric MSSA->getMemoryAccess(C));
1113bdd1243dSDimitry Andric }
1114bdd1243dSDimitry Andric
11154542f901SDimitry Andric combineAAMetadata(C, cpyLoad);
111604eeddc0SDimitry Andric if (cpyLoad != cpyStore)
11174542f901SDimitry Andric combineAAMetadata(C, cpyStore);
11180b57cec5SDimitry Andric
1119e8d8bef9SDimitry Andric ++NumCallSlot;
11200b57cec5SDimitry Andric return true;
11210b57cec5SDimitry Andric }
11220b57cec5SDimitry Andric
11230b57cec5SDimitry Andric /// We've found that the (upward scanning) memory dependence of memcpy 'M' is
11240b57cec5SDimitry Andric /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
processMemCpyMemCpyDependence(MemCpyInst * M,MemCpyInst * MDep,BatchAAResults & BAA)11250b57cec5SDimitry Andric bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1126bdd1243dSDimitry Andric MemCpyInst *MDep,
1127bdd1243dSDimitry Andric BatchAAResults &BAA) {
11280b57cec5SDimitry Andric // If dep instruction is reading from our current input, then it is a noop
11290b57cec5SDimitry Andric // transfer and substituting the input won't change this instruction. Just
11300b57cec5SDimitry Andric // ignore the input and let someone else zap MDep. This handles cases like:
11310b57cec5SDimitry Andric // memcpy(a <- a)
11320b57cec5SDimitry Andric // memcpy(b <- a)
11330b57cec5SDimitry Andric if (M->getSource() == MDep->getSource())
11340b57cec5SDimitry Andric return false;
11350b57cec5SDimitry Andric
1136*0fca6ea1SDimitry Andric // We can only optimize non-volatile memcpy's.
1137*0fca6ea1SDimitry Andric if (MDep->isVolatile())
1138*0fca6ea1SDimitry Andric return false;
1139*0fca6ea1SDimitry Andric
1140*0fca6ea1SDimitry Andric int64_t MForwardOffset = 0;
1141*0fca6ea1SDimitry Andric const DataLayout &DL = M->getModule()->getDataLayout();
1142*0fca6ea1SDimitry Andric // We can only transforms memcpy's where the dest of one is the source of the
1143*0fca6ea1SDimitry Andric // other, or they have an offset in a range.
1144*0fca6ea1SDimitry Andric if (M->getSource() != MDep->getDest()) {
1145*0fca6ea1SDimitry Andric std::optional<int64_t> Offset =
1146*0fca6ea1SDimitry Andric M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL);
1147*0fca6ea1SDimitry Andric if (!Offset || *Offset < 0)
1148*0fca6ea1SDimitry Andric return false;
1149*0fca6ea1SDimitry Andric MForwardOffset = *Offset;
1150*0fca6ea1SDimitry Andric }
1151*0fca6ea1SDimitry Andric
1152*0fca6ea1SDimitry Andric // The length of the memcpy's must be the same, or the preceding one
11530b57cec5SDimitry Andric // must be larger than the following one.
1154*0fca6ea1SDimitry Andric if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) {
115504eeddc0SDimitry Andric auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
115604eeddc0SDimitry Andric auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1157*0fca6ea1SDimitry Andric if (!MDepLen || !MLen ||
1158*0fca6ea1SDimitry Andric MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
11590b57cec5SDimitry Andric return false;
1160fe6060f1SDimitry Andric }
11610b57cec5SDimitry Andric
1162*0fca6ea1SDimitry Andric IRBuilder<> Builder(M);
1163*0fca6ea1SDimitry Andric auto *CopySource = MDep->getSource();
1164*0fca6ea1SDimitry Andric Instruction *NewCopySource = nullptr;
1165*0fca6ea1SDimitry Andric auto CleanupOnRet = llvm::make_scope_exit([&NewCopySource] {
1166*0fca6ea1SDimitry Andric if (NewCopySource && NewCopySource->use_empty())
1167*0fca6ea1SDimitry Andric // Safety: It's safe here because we will only allocate more instructions
1168*0fca6ea1SDimitry Andric // after finishing all BatchAA queries, but we have to be careful if we
1169*0fca6ea1SDimitry Andric // want to do something like this in another place. Then we'd probably
1170*0fca6ea1SDimitry Andric // have to delay instruction removal until all transforms on an
1171*0fca6ea1SDimitry Andric // instruction finished.
1172*0fca6ea1SDimitry Andric NewCopySource->eraseFromParent();
1173*0fca6ea1SDimitry Andric });
1174*0fca6ea1SDimitry Andric MaybeAlign CopySourceAlign = MDep->getSourceAlign();
1175*0fca6ea1SDimitry Andric // We just need to calculate the actual size of the copy.
1176*0fca6ea1SDimitry Andric auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize(
1177*0fca6ea1SDimitry Andric MemoryLocation::getForSource(M).Size);
1178*0fca6ea1SDimitry Andric
1179*0fca6ea1SDimitry Andric // When the forwarding offset is greater than 0, we transform
1180*0fca6ea1SDimitry Andric // memcpy(d1 <- s1)
1181*0fca6ea1SDimitry Andric // memcpy(d2 <- d1+o)
1182*0fca6ea1SDimitry Andric // to
1183*0fca6ea1SDimitry Andric // memcpy(d2 <- s1+o)
1184*0fca6ea1SDimitry Andric if (MForwardOffset > 0) {
1185*0fca6ea1SDimitry Andric // The copy destination of `M` maybe can serve as the source of copying.
1186*0fca6ea1SDimitry Andric std::optional<int64_t> MDestOffset =
1187*0fca6ea1SDimitry Andric M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL);
1188*0fca6ea1SDimitry Andric if (MDestOffset == MForwardOffset)
1189*0fca6ea1SDimitry Andric CopySource = M->getDest();
1190*0fca6ea1SDimitry Andric else {
1191*0fca6ea1SDimitry Andric CopySource = Builder.CreateInBoundsPtrAdd(
1192*0fca6ea1SDimitry Andric CopySource, Builder.getInt64(MForwardOffset));
1193*0fca6ea1SDimitry Andric NewCopySource = dyn_cast<Instruction>(CopySource);
1194*0fca6ea1SDimitry Andric }
1195*0fca6ea1SDimitry Andric // We need to update `MCopyLoc` if an offset exists.
1196*0fca6ea1SDimitry Andric MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
1197*0fca6ea1SDimitry Andric if (CopySourceAlign)
1198*0fca6ea1SDimitry Andric CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset);
1199*0fca6ea1SDimitry Andric }
1200*0fca6ea1SDimitry Andric
12010b57cec5SDimitry Andric // Verify that the copied-from memory doesn't change in between the two
12020b57cec5SDimitry Andric // transfers. For example, in:
12030b57cec5SDimitry Andric // memcpy(a <- b)
12040b57cec5SDimitry Andric // *b = 42;
12050b57cec5SDimitry Andric // memcpy(c <- a)
12060b57cec5SDimitry Andric // It would be invalid to transform the second memcpy into memcpy(c <- b).
12070b57cec5SDimitry Andric //
12080b57cec5SDimitry Andric // TODO: If the code between M and MDep is transparent to the destination "c",
12090b57cec5SDimitry Andric // then we could still perform the xform by moving M up to the first memcpy.
1210*0fca6ea1SDimitry Andric if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep),
1211*0fca6ea1SDimitry Andric MSSA->getMemoryAccess(M)))
1212e8d8bef9SDimitry Andric return false;
12130b57cec5SDimitry Andric
1214*0fca6ea1SDimitry Andric // No need to create `memcpy(a <- a)`.
1215*0fca6ea1SDimitry Andric if (BAA.isMustAlias(M->getDest(), CopySource)) {
1216*0fca6ea1SDimitry Andric // Remove the instruction we're replacing.
1217*0fca6ea1SDimitry Andric eraseInstruction(M);
1218*0fca6ea1SDimitry Andric ++NumMemCpyInstr;
1219*0fca6ea1SDimitry Andric return true;
1220*0fca6ea1SDimitry Andric }
1221*0fca6ea1SDimitry Andric
12220b57cec5SDimitry Andric // If the dest of the second might alias the source of the first, then the
1223349cc55cSDimitry Andric // source and dest might overlap. In addition, if the source of the first
1224349cc55cSDimitry Andric // points to constant memory, they won't overlap by definition. Otherwise, we
1225349cc55cSDimitry Andric // still want to eliminate the intermediate value, but we have to generate a
1226349cc55cSDimitry Andric // memmove instead of memcpy.
12270b57cec5SDimitry Andric bool UseMemMove = false;
122806c3fb27SDimitry Andric if (isModSet(BAA.getModRefInfo(M, MemoryLocation::getForSource(MDep)))) {
122906c3fb27SDimitry Andric // Don't convert llvm.memcpy.inline into memmove because memmove can be
123006c3fb27SDimitry Andric // lowered as a call, and that is not allowed for llvm.memcpy.inline (and
123106c3fb27SDimitry Andric // there is no inline version of llvm.memmove)
123206c3fb27SDimitry Andric if (isa<MemCpyInlineInst>(M))
123306c3fb27SDimitry Andric return false;
12340b57cec5SDimitry Andric UseMemMove = true;
123506c3fb27SDimitry Andric }
12360b57cec5SDimitry Andric
12370b57cec5SDimitry Andric // If all checks passed, then we can transform M.
12380b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1239*0fca6ea1SDimitry Andric << *MDep << '\n'
1240*0fca6ea1SDimitry Andric << *M << '\n');
12410b57cec5SDimitry Andric
12420b57cec5SDimitry Andric // TODO: Is this worth it if we're creating a less aligned memcpy? For
12430b57cec5SDimitry Andric // example we could be moving from movaps -> movq on x86.
1244e8d8bef9SDimitry Andric Instruction *NewM;
12450b57cec5SDimitry Andric if (UseMemMove)
1246*0fca6ea1SDimitry Andric NewM =
1247*0fca6ea1SDimitry Andric Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
1248*0fca6ea1SDimitry Andric CopySourceAlign, M->getLength(), M->isVolatile());
1249fe6060f1SDimitry Andric else if (isa<MemCpyInlineInst>(M)) {
1250fe6060f1SDimitry Andric // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1251fe6060f1SDimitry Andric // never allowed since that would allow the latter to be lowered as a call
1252fe6060f1SDimitry Andric // to an external function.
1253*0fca6ea1SDimitry Andric NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
1254*0fca6ea1SDimitry Andric CopySource, CopySourceAlign,
12550b57cec5SDimitry Andric M->getLength(), M->isVolatile());
1256*0fca6ea1SDimitry Andric } else
1257*0fca6ea1SDimitry Andric NewM =
1258*0fca6ea1SDimitry Andric Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
1259*0fca6ea1SDimitry Andric CopySourceAlign, M->getLength(), M->isVolatile());
1260bdd1243dSDimitry Andric NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
12610b57cec5SDimitry Andric
1262e8d8bef9SDimitry Andric assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
1263e8d8bef9SDimitry Andric auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
12645f757f3fSDimitry Andric auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1265e8d8bef9SDimitry Andric MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1266e8d8bef9SDimitry Andric
12670b57cec5SDimitry Andric // Remove the instruction we're replacing.
1268e8d8bef9SDimitry Andric eraseInstruction(M);
12690b57cec5SDimitry Andric ++NumMemCpyInstr;
12700b57cec5SDimitry Andric return true;
12710b57cec5SDimitry Andric }
12720b57cec5SDimitry Andric
12730b57cec5SDimitry Andric /// We've found that the (upward scanning) memory dependence of \p MemCpy is
12740b57cec5SDimitry Andric /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
12750b57cec5SDimitry Andric /// weren't copied over by \p MemCpy.
12760b57cec5SDimitry Andric ///
12770b57cec5SDimitry Andric /// In other words, transform:
12780b57cec5SDimitry Andric /// \code
12790b57cec5SDimitry Andric /// memset(dst, c, dst_size);
128006c3fb27SDimitry Andric /// ...
12810b57cec5SDimitry Andric /// memcpy(dst, src, src_size);
12820b57cec5SDimitry Andric /// \endcode
12830b57cec5SDimitry Andric /// into:
12840b57cec5SDimitry Andric /// \code
128506c3fb27SDimitry Andric /// ...
12860b57cec5SDimitry Andric /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
128706c3fb27SDimitry Andric /// memcpy(dst, src, src_size);
12880b57cec5SDimitry Andric /// \endcode
128906c3fb27SDimitry Andric ///
129006c3fb27SDimitry Andric /// The memset is sunk to just before the memcpy to ensure that src_size is
129106c3fb27SDimitry Andric /// present when emitting the simplified memset.
processMemSetMemCpyDependence(MemCpyInst * MemCpy,MemSetInst * MemSet,BatchAAResults & BAA)12920b57cec5SDimitry Andric bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1293bdd1243dSDimitry Andric MemSetInst *MemSet,
1294bdd1243dSDimitry Andric BatchAAResults &BAA) {
12950b57cec5SDimitry Andric // We can only transform memset/memcpy with the same destination.
1296bdd1243dSDimitry Andric if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
12970b57cec5SDimitry Andric return false;
12980b57cec5SDimitry Andric
1299*0fca6ea1SDimitry Andric // Don't perform the transform if src_size may be zero. In that case, the
1300*0fca6ea1SDimitry Andric // transform is essentially a complex no-op and may lead to an infinite
1301*0fca6ea1SDimitry Andric // loop if BasicAA is smart enough to understand that dst and dst + src_size
1302*0fca6ea1SDimitry Andric // are still MustAlias after the transform.
1303*0fca6ea1SDimitry Andric Value *SrcSize = MemCpy->getLength();
1304*0fca6ea1SDimitry Andric if (!isKnownNonZero(SrcSize,
1305*0fca6ea1SDimitry Andric SimplifyQuery(MemCpy->getDataLayout(), DT, AC, MemCpy)))
1306*0fca6ea1SDimitry Andric return false;
1307*0fca6ea1SDimitry Andric
1308e8d8bef9SDimitry Andric // Check that src and dst of the memcpy aren't the same. While memcpy
1309e8d8bef9SDimitry Andric // operands cannot partially overlap, exact equality is allowed.
1310bdd1243dSDimitry Andric if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1311e8d8bef9SDimitry Andric return false;
1312e8d8bef9SDimitry Andric
1313e8d8bef9SDimitry Andric // We know that dst up to src_size is not written. We now need to make sure
1314e8d8bef9SDimitry Andric // that dst up to dst_size is not accessed. (If we did not move the memset,
1315e8d8bef9SDimitry Andric // checking for reads would be sufficient.)
1316bdd1243dSDimitry Andric if (accessedBetween(BAA, MemoryLocation::getForDest(MemSet),
1317e8d8bef9SDimitry Andric MSSA->getMemoryAccess(MemSet),
1318349cc55cSDimitry Andric MSSA->getMemoryAccess(MemCpy)))
1319e8d8bef9SDimitry Andric return false;
13200b57cec5SDimitry Andric
13210b57cec5SDimitry Andric // Use the same i8* dest as the memcpy, killing the memset dest if different.
13220b57cec5SDimitry Andric Value *Dest = MemCpy->getRawDest();
13230b57cec5SDimitry Andric Value *DestSize = MemSet->getLength();
13240b57cec5SDimitry Andric
1325e8d8bef9SDimitry Andric if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1326e8d8bef9SDimitry Andric return false;
1327e8d8bef9SDimitry Andric
1328fe6060f1SDimitry Andric // If the sizes are the same, simply drop the memset instead of generating
1329fe6060f1SDimitry Andric // a replacement with zero size.
1330fe6060f1SDimitry Andric if (DestSize == SrcSize) {
1331fe6060f1SDimitry Andric eraseInstruction(MemSet);
1332fe6060f1SDimitry Andric return true;
1333fe6060f1SDimitry Andric }
1334fe6060f1SDimitry Andric
13350b57cec5SDimitry Andric // By default, create an unaligned memset.
133681ad6265SDimitry Andric Align Alignment = Align(1);
13370b57cec5SDimitry Andric // If Dest is aligned, and SrcSize is constant, use the minimum alignment
13380b57cec5SDimitry Andric // of the sum.
133981ad6265SDimitry Andric const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
134081ad6265SDimitry Andric MemCpy->getDestAlign().valueOrOne());
13410b57cec5SDimitry Andric if (DestAlign > 1)
134204eeddc0SDimitry Andric if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
134381ad6265SDimitry Andric Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
13440b57cec5SDimitry Andric
13450b57cec5SDimitry Andric IRBuilder<> Builder(MemCpy);
13460b57cec5SDimitry Andric
134706c3fb27SDimitry Andric // Preserve the debug location of the old memset for the code emitted here
134806c3fb27SDimitry Andric // related to the new memset. This is correct according to the rules in
134906c3fb27SDimitry Andric // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an
135006c3fb27SDimitry Andric // instruction location", given that we move the memset within the basic
135106c3fb27SDimitry Andric // block.
135206c3fb27SDimitry Andric assert(MemSet->getParent() == MemCpy->getParent() &&
135306c3fb27SDimitry Andric "Preserving debug location based on moving memset within BB.");
135406c3fb27SDimitry Andric Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());
135506c3fb27SDimitry Andric
13560b57cec5SDimitry Andric // If the sizes have different types, zext the smaller one.
13570b57cec5SDimitry Andric if (DestSize->getType() != SrcSize->getType()) {
13580b57cec5SDimitry Andric if (DestSize->getType()->getIntegerBitWidth() >
13590b57cec5SDimitry Andric SrcSize->getType()->getIntegerBitWidth())
13600b57cec5SDimitry Andric SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
13610b57cec5SDimitry Andric else
13620b57cec5SDimitry Andric DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
13630b57cec5SDimitry Andric }
13640b57cec5SDimitry Andric
13650b57cec5SDimitry Andric Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
13660b57cec5SDimitry Andric Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
13670b57cec5SDimitry Andric Value *MemsetLen = Builder.CreateSelect(
13680b57cec5SDimitry Andric Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
13697a6dacacSDimitry Andric Instruction *NewMemSet =
13707a6dacacSDimitry Andric Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
137181ad6265SDimitry Andric MemSet->getOperand(1), MemsetLen, Alignment);
13720b57cec5SDimitry Andric
1373e8d8bef9SDimitry Andric assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
1374e8d8bef9SDimitry Andric "MemCpy must be a MemoryDef");
137506c3fb27SDimitry Andric // The new memset is inserted before the memcpy, and it is known that the
137606c3fb27SDimitry Andric // memcpy's defining access is the memset about to be removed.
1377e8d8bef9SDimitry Andric auto *LastDef =
1378e8d8bef9SDimitry Andric cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1379*0fca6ea1SDimitry Andric auto *NewAccess =
1380*0fca6ea1SDimitry Andric MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef);
1381e8d8bef9SDimitry Andric MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1382e8d8bef9SDimitry Andric
1383e8d8bef9SDimitry Andric eraseInstruction(MemSet);
13840b57cec5SDimitry Andric return true;
13850b57cec5SDimitry Andric }
13860b57cec5SDimitry Andric
13870b57cec5SDimitry Andric /// Determine whether the instruction has undefined content for the given Size,
13880b57cec5SDimitry Andric /// either because it was freshly alloca'd or started its lifetime.
hasUndefContents(MemorySSA * MSSA,BatchAAResults & AA,Value * V,MemoryDef * Def,Value * Size)1389bdd1243dSDimitry Andric static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V,
1390fe6060f1SDimitry Andric MemoryDef *Def, Value *Size) {
1391e8d8bef9SDimitry Andric if (MSSA->isLiveOnEntryDef(Def))
1392e8d8bef9SDimitry Andric return isa<AllocaInst>(getUnderlyingObject(V));
1393e8d8bef9SDimitry Andric
139404eeddc0SDimitry Andric if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1395e8d8bef9SDimitry Andric if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
139604eeddc0SDimitry Andric auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1397fe6060f1SDimitry Andric
139804eeddc0SDimitry Andric if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1399bdd1243dSDimitry Andric if (AA.isMustAlias(V, II->getArgOperand(1)) &&
1400fe6060f1SDimitry Andric LTSize->getZExtValue() >= CSize->getZExtValue())
1401e8d8bef9SDimitry Andric return true;
1402e8d8bef9SDimitry Andric }
1403fe6060f1SDimitry Andric
1404fe6060f1SDimitry Andric // If the lifetime.start covers a whole alloca (as it almost always
1405fe6060f1SDimitry Andric // does) and we're querying a pointer based on that alloca, then we know
1406fe6060f1SDimitry Andric // the memory is definitely undef, regardless of how exactly we alias.
1407fe6060f1SDimitry Andric // The size also doesn't matter, as an out-of-bounds access would be UB.
140804eeddc0SDimitry Andric if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1409fe6060f1SDimitry Andric if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1410*0fca6ea1SDimitry Andric const DataLayout &DL = Alloca->getDataLayout();
1411bdd1243dSDimitry Andric if (std::optional<TypeSize> AllocaSize =
1412bdd1243dSDimitry Andric Alloca->getAllocationSize(DL))
1413bdd1243dSDimitry Andric if (*AllocaSize == LTSize->getValue())
1414fe6060f1SDimitry Andric return true;
1415fe6060f1SDimitry Andric }
1416fe6060f1SDimitry Andric }
1417e8d8bef9SDimitry Andric }
141804eeddc0SDimitry Andric }
1419e8d8bef9SDimitry Andric
1420e8d8bef9SDimitry Andric return false;
1421e8d8bef9SDimitry Andric }
1422e8d8bef9SDimitry Andric
14230b57cec5SDimitry Andric /// Transform memcpy to memset when its source was just memset.
14240b57cec5SDimitry Andric /// In other words, turn:
14250b57cec5SDimitry Andric /// \code
14260b57cec5SDimitry Andric /// memset(dst1, c, dst1_size);
14270b57cec5SDimitry Andric /// memcpy(dst2, dst1, dst2_size);
14280b57cec5SDimitry Andric /// \endcode
14290b57cec5SDimitry Andric /// into:
14300b57cec5SDimitry Andric /// \code
14310b57cec5SDimitry Andric /// memset(dst1, c, dst1_size);
14320b57cec5SDimitry Andric /// memset(dst2, c, dst2_size);
14330b57cec5SDimitry Andric /// \endcode
14340b57cec5SDimitry Andric /// When dst2_size <= dst1_size.
performMemCpyToMemSetOptzn(MemCpyInst * MemCpy,MemSetInst * MemSet,BatchAAResults & BAA)14350b57cec5SDimitry Andric bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1436bdd1243dSDimitry Andric MemSetInst *MemSet,
1437bdd1243dSDimitry Andric BatchAAResults &BAA) {
14380b57cec5SDimitry Andric // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
14390b57cec5SDimitry Andric // memcpying from the same address. Otherwise it is hard to reason about.
1440bdd1243dSDimitry Andric if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
14410b57cec5SDimitry Andric return false;
14420b57cec5SDimitry Andric
1443fe6060f1SDimitry Andric Value *MemSetSize = MemSet->getLength();
1444fe6060f1SDimitry Andric Value *CopySize = MemCpy->getLength();
14450b57cec5SDimitry Andric
1446fe6060f1SDimitry Andric if (MemSetSize != CopySize) {
14470b57cec5SDimitry Andric // Make sure the memcpy doesn't read any more than what the memset wrote.
14480b57cec5SDimitry Andric // Don't worry about sizes larger than i64.
1449fe6060f1SDimitry Andric
1450fe6060f1SDimitry Andric // A known memset size is required.
145104eeddc0SDimitry Andric auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1452fe6060f1SDimitry Andric if (!CMemSetSize)
1453fe6060f1SDimitry Andric return false;
1454fe6060f1SDimitry Andric
1455fe6060f1SDimitry Andric // A known memcpy size is also required.
145604eeddc0SDimitry Andric auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1457fe6060f1SDimitry Andric if (!CCopySize)
1458fe6060f1SDimitry Andric return false;
1459fe6060f1SDimitry Andric if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
14600b57cec5SDimitry Andric // If the memcpy is larger than the memset, but the memory was undef prior
14610b57cec5SDimitry Andric // to the memset, we can just ignore the tail. Technically we're only
14620b57cec5SDimitry Andric // interested in the bytes from MemSetSize..CopySize here, but as we can't
14630b57cec5SDimitry Andric // easily represent this location, we use the full 0..CopySize range.
14640b57cec5SDimitry Andric MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1465e8d8bef9SDimitry Andric bool CanReduceSize = false;
1466e8d8bef9SDimitry Andric MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1467e8d8bef9SDimitry Andric MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1468bdd1243dSDimitry Andric MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
1469e8d8bef9SDimitry Andric if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1470bdd1243dSDimitry Andric if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1471e8d8bef9SDimitry Andric CanReduceSize = true;
1472e8d8bef9SDimitry Andric
1473e8d8bef9SDimitry Andric if (!CanReduceSize)
14740b57cec5SDimitry Andric return false;
1475e8d8bef9SDimitry Andric CopySize = MemSetSize;
14760b57cec5SDimitry Andric }
1477fe6060f1SDimitry Andric }
14780b57cec5SDimitry Andric
14790b57cec5SDimitry Andric IRBuilder<> Builder(MemCpy);
1480e8d8bef9SDimitry Andric Instruction *NewM =
1481e8d8bef9SDimitry Andric Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1482bdd1243dSDimitry Andric CopySize, MemCpy->getDestAlign());
1483e8d8bef9SDimitry Andric auto *LastDef =
1484e8d8bef9SDimitry Andric cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
14855f757f3fSDimitry Andric auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1486e8d8bef9SDimitry Andric MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1487e8d8bef9SDimitry Andric
14880b57cec5SDimitry Andric return true;
14890b57cec5SDimitry Andric }
14900b57cec5SDimitry Andric
14915f757f3fSDimitry Andric // Attempts to optimize the pattern whereby memory is copied from an alloca to
14925f757f3fSDimitry Andric // another alloca, where the two allocas don't have conflicting mod/ref. If
14935f757f3fSDimitry Andric // successful, the two allocas can be merged into one and the transfer can be
14945f757f3fSDimitry Andric // deleted. This pattern is generated frequently in Rust, due to the ubiquity of
14955f757f3fSDimitry Andric // move operations in that language.
14965f757f3fSDimitry Andric //
14975f757f3fSDimitry Andric // Once we determine that the optimization is safe to perform, we replace all
14985f757f3fSDimitry Andric // uses of the destination alloca with the source alloca. We also "shrink wrap"
14995f757f3fSDimitry Andric // the lifetime markers of the single merged alloca to before the first use
15005f757f3fSDimitry Andric // and after the last use. Note that the "shrink wrapping" procedure is a safe
15015f757f3fSDimitry Andric // transformation only because we restrict the scope of this optimization to
15025f757f3fSDimitry Andric // allocas that aren't captured.
performStackMoveOptzn(Instruction * Load,Instruction * Store,AllocaInst * DestAlloca,AllocaInst * SrcAlloca,TypeSize Size,BatchAAResults & BAA)15035f757f3fSDimitry Andric bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
15045f757f3fSDimitry Andric AllocaInst *DestAlloca,
15055f757f3fSDimitry Andric AllocaInst *SrcAlloca, TypeSize Size,
15065f757f3fSDimitry Andric BatchAAResults &BAA) {
15075f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
15085f757f3fSDimitry Andric << *Store << "\n");
15095f757f3fSDimitry Andric
15105f757f3fSDimitry Andric // Make sure the two allocas are in the same address space.
15115f757f3fSDimitry Andric if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
15125f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
15135f757f3fSDimitry Andric return false;
15145f757f3fSDimitry Andric }
15155f757f3fSDimitry Andric
15165f757f3fSDimitry Andric // Check that copy is full with static size.
1517*0fca6ea1SDimitry Andric const DataLayout &DL = DestAlloca->getDataLayout();
15185f757f3fSDimitry Andric std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
15195f757f3fSDimitry Andric if (!SrcSize || Size != *SrcSize) {
15205f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
15215f757f3fSDimitry Andric return false;
15225f757f3fSDimitry Andric }
15235f757f3fSDimitry Andric std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
15245f757f3fSDimitry Andric if (!DestSize || Size != *DestSize) {
15255f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
15265f757f3fSDimitry Andric return false;
15275f757f3fSDimitry Andric }
15285f757f3fSDimitry Andric
15295f757f3fSDimitry Andric if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
15305f757f3fSDimitry Andric return false;
15315f757f3fSDimitry Andric
15325f757f3fSDimitry Andric // Check that src and dest are never captured, unescaped allocas. Also
15335f757f3fSDimitry Andric // find the nearest common dominator and postdominator for all users in
15345f757f3fSDimitry Andric // order to shrink wrap the lifetimes, and instructions with noalias metadata
15355f757f3fSDimitry Andric // to remove them.
15365f757f3fSDimitry Andric
15375f757f3fSDimitry Andric SmallVector<Instruction *, 4> LifetimeMarkers;
15385f757f3fSDimitry Andric SmallSet<Instruction *, 4> NoAliasInstrs;
15395f757f3fSDimitry Andric bool SrcNotDom = false;
15405f757f3fSDimitry Andric
15415f757f3fSDimitry Andric // Recursively track the user and check whether modified alias exist.
15425f757f3fSDimitry Andric auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
15435f757f3fSDimitry Andric bool CanBeNull, CanBeFreed;
15445f757f3fSDimitry Andric return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
15455f757f3fSDimitry Andric };
15465f757f3fSDimitry Andric
15475f757f3fSDimitry Andric auto CaptureTrackingWithModRef =
15485f757f3fSDimitry Andric [&](Instruction *AI,
15495f757f3fSDimitry Andric function_ref<bool(Instruction *)> ModRefCallback) -> bool {
15505f757f3fSDimitry Andric SmallVector<Instruction *, 8> Worklist;
15515f757f3fSDimitry Andric Worklist.push_back(AI);
15525f757f3fSDimitry Andric unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
15535f757f3fSDimitry Andric Worklist.reserve(MaxUsesToExplore);
15545f757f3fSDimitry Andric SmallSet<const Use *, 20> Visited;
15555f757f3fSDimitry Andric while (!Worklist.empty()) {
15565f757f3fSDimitry Andric Instruction *I = Worklist.back();
15575f757f3fSDimitry Andric Worklist.pop_back();
15585f757f3fSDimitry Andric for (const Use &U : I->uses()) {
15595f757f3fSDimitry Andric auto *UI = cast<Instruction>(U.getUser());
15605f757f3fSDimitry Andric // If any use that isn't dominated by SrcAlloca exists, we move src
15615f757f3fSDimitry Andric // alloca to the entry before the transformation.
15625f757f3fSDimitry Andric if (!DT->dominates(SrcAlloca, UI))
15635f757f3fSDimitry Andric SrcNotDom = true;
15645f757f3fSDimitry Andric
15655f757f3fSDimitry Andric if (Visited.size() >= MaxUsesToExplore) {
15665f757f3fSDimitry Andric LLVM_DEBUG(
15675f757f3fSDimitry Andric dbgs()
15685f757f3fSDimitry Andric << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
15695f757f3fSDimitry Andric return false;
15705f757f3fSDimitry Andric }
15715f757f3fSDimitry Andric if (!Visited.insert(&U).second)
15725f757f3fSDimitry Andric continue;
15735f757f3fSDimitry Andric switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
15745f757f3fSDimitry Andric case UseCaptureKind::MAY_CAPTURE:
15755f757f3fSDimitry Andric return false;
15765f757f3fSDimitry Andric case UseCaptureKind::PASSTHROUGH:
15775f757f3fSDimitry Andric // Instructions cannot have non-instruction users.
15785f757f3fSDimitry Andric Worklist.push_back(UI);
15795f757f3fSDimitry Andric continue;
15805f757f3fSDimitry Andric case UseCaptureKind::NO_CAPTURE: {
15815f757f3fSDimitry Andric if (UI->isLifetimeStartOrEnd()) {
15825f757f3fSDimitry Andric // We note the locations of these intrinsic calls so that we can
15835f757f3fSDimitry Andric // delete them later if the optimization succeeds, this is safe
15845f757f3fSDimitry Andric // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
15855f757f3fSDimitry Andric // practically fill all the bytes of the alloca with an undefined
15865f757f3fSDimitry Andric // value, although conceptually marked as alive/dead.
15875f757f3fSDimitry Andric int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
15885f757f3fSDimitry Andric if (Size < 0 || Size == DestSize) {
15895f757f3fSDimitry Andric LifetimeMarkers.push_back(UI);
15905f757f3fSDimitry Andric continue;
15915f757f3fSDimitry Andric }
15925f757f3fSDimitry Andric }
15935f757f3fSDimitry Andric if (UI->hasMetadata(LLVMContext::MD_noalias))
15945f757f3fSDimitry Andric NoAliasInstrs.insert(UI);
15955f757f3fSDimitry Andric if (!ModRefCallback(UI))
15965f757f3fSDimitry Andric return false;
15975f757f3fSDimitry Andric }
15985f757f3fSDimitry Andric }
15995f757f3fSDimitry Andric }
16005f757f3fSDimitry Andric }
16015f757f3fSDimitry Andric return true;
16025f757f3fSDimitry Andric };
16035f757f3fSDimitry Andric
16045f757f3fSDimitry Andric // Check that dest has no Mod/Ref, from the alloca to the Store, except full
16055f757f3fSDimitry Andric // size lifetime intrinsics. And collect modref inst for the reachability
16065f757f3fSDimitry Andric // check.
16075f757f3fSDimitry Andric ModRefInfo DestModRef = ModRefInfo::NoModRef;
16085f757f3fSDimitry Andric MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
16095f757f3fSDimitry Andric SmallVector<BasicBlock *, 8> ReachabilityWorklist;
16105f757f3fSDimitry Andric auto DestModRefCallback = [&](Instruction *UI) -> bool {
16115f757f3fSDimitry Andric // We don't care about the store itself.
16125f757f3fSDimitry Andric if (UI == Store)
16135f757f3fSDimitry Andric return true;
16145f757f3fSDimitry Andric ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
16155f757f3fSDimitry Andric DestModRef |= Res;
16165f757f3fSDimitry Andric if (isModOrRefSet(Res)) {
16175f757f3fSDimitry Andric // Instructions reachability checks.
16185f757f3fSDimitry Andric // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
16195f757f3fSDimitry Andric // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
16205f757f3fSDimitry Andric if (UI->getParent() == Store->getParent()) {
16215f757f3fSDimitry Andric // The same block case is special because it's the only time we're
16225f757f3fSDimitry Andric // looking within a single block to see which instruction comes first.
16235f757f3fSDimitry Andric // Once we start looking at multiple blocks, the first instruction of
16245f757f3fSDimitry Andric // the block is reachable, so we only need to determine reachability
16255f757f3fSDimitry Andric // between whole blocks.
16265f757f3fSDimitry Andric BasicBlock *BB = UI->getParent();
16275f757f3fSDimitry Andric
16285f757f3fSDimitry Andric // If A comes before B, then B is definitively reachable from A.
16295f757f3fSDimitry Andric if (UI->comesBefore(Store))
16305f757f3fSDimitry Andric return false;
16315f757f3fSDimitry Andric
16325f757f3fSDimitry Andric // If the user's parent block is entry, no predecessor exists.
16335f757f3fSDimitry Andric if (BB->isEntryBlock())
16345f757f3fSDimitry Andric return true;
16355f757f3fSDimitry Andric
16365f757f3fSDimitry Andric // Otherwise, continue doing the normal per-BB CFG walk.
16375f757f3fSDimitry Andric ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
16385f757f3fSDimitry Andric } else {
16395f757f3fSDimitry Andric ReachabilityWorklist.push_back(UI->getParent());
16405f757f3fSDimitry Andric }
16415f757f3fSDimitry Andric }
16425f757f3fSDimitry Andric return true;
16435f757f3fSDimitry Andric };
16445f757f3fSDimitry Andric
16455f757f3fSDimitry Andric if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
16465f757f3fSDimitry Andric return false;
16475f757f3fSDimitry Andric // Bailout if Dest may have any ModRef before Store.
16485f757f3fSDimitry Andric if (!ReachabilityWorklist.empty() &&
16495f757f3fSDimitry Andric isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
16505f757f3fSDimitry Andric nullptr, DT, nullptr))
16515f757f3fSDimitry Andric return false;
16525f757f3fSDimitry Andric
16535f757f3fSDimitry Andric // Check that, from after the Load to the end of the BB,
16545f757f3fSDimitry Andric // - if the dest has any Mod, src has no Ref, and
16555f757f3fSDimitry Andric // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
16565f757f3fSDimitry Andric MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
16575f757f3fSDimitry Andric
16585f757f3fSDimitry Andric auto SrcModRefCallback = [&](Instruction *UI) -> bool {
16595f757f3fSDimitry Andric // Any ModRef post-dominated by Load doesn't matter, also Load and Store
16605f757f3fSDimitry Andric // themselves can be ignored.
16615f757f3fSDimitry Andric if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
16625f757f3fSDimitry Andric return true;
16635f757f3fSDimitry Andric ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
16645f757f3fSDimitry Andric if ((isModSet(DestModRef) && isRefSet(Res)) ||
16655f757f3fSDimitry Andric (isRefSet(DestModRef) && isModSet(Res)))
16665f757f3fSDimitry Andric return false;
16675f757f3fSDimitry Andric
16685f757f3fSDimitry Andric return true;
16695f757f3fSDimitry Andric };
16705f757f3fSDimitry Andric
16715f757f3fSDimitry Andric if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
16725f757f3fSDimitry Andric return false;
16735f757f3fSDimitry Andric
16745f757f3fSDimitry Andric // We can do the transformation. First, move the SrcAlloca to the start of the
16755f757f3fSDimitry Andric // BB.
16765f757f3fSDimitry Andric if (SrcNotDom)
16775f757f3fSDimitry Andric SrcAlloca->moveBefore(*SrcAlloca->getParent(),
16785f757f3fSDimitry Andric SrcAlloca->getParent()->getFirstInsertionPt());
16795f757f3fSDimitry Andric // Align the allocas appropriately.
16805f757f3fSDimitry Andric SrcAlloca->setAlignment(
16815f757f3fSDimitry Andric std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
16825f757f3fSDimitry Andric
16835f757f3fSDimitry Andric // Merge the two allocas.
16845f757f3fSDimitry Andric DestAlloca->replaceAllUsesWith(SrcAlloca);
16855f757f3fSDimitry Andric eraseInstruction(DestAlloca);
16865f757f3fSDimitry Andric
16875f757f3fSDimitry Andric // Drop metadata on the source alloca.
16885f757f3fSDimitry Andric SrcAlloca->dropUnknownNonDebugMetadata();
16895f757f3fSDimitry Andric
16905f757f3fSDimitry Andric // TODO: Reconstruct merged lifetime markers.
16915f757f3fSDimitry Andric // Remove all other lifetime markers. if the original lifetime intrinsics
16925f757f3fSDimitry Andric // exists.
16935f757f3fSDimitry Andric if (!LifetimeMarkers.empty()) {
16945f757f3fSDimitry Andric for (Instruction *I : LifetimeMarkers)
16955f757f3fSDimitry Andric eraseInstruction(I);
16965f757f3fSDimitry Andric }
16975f757f3fSDimitry Andric
16985f757f3fSDimitry Andric // As this transformation can cause memory accesses that didn't previously
16995f757f3fSDimitry Andric // alias to begin to alias one another, we remove !noalias metadata from any
17005f757f3fSDimitry Andric // uses of either alloca. This is conservative, but more precision doesn't
17015f757f3fSDimitry Andric // seem worthwhile right now.
17025f757f3fSDimitry Andric for (Instruction *I : NoAliasInstrs)
17035f757f3fSDimitry Andric I->setMetadata(LLVMContext::MD_noalias, nullptr);
17045f757f3fSDimitry Andric
17055f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
17065f757f3fSDimitry Andric NumStackMove++;
17075f757f3fSDimitry Andric return true;
17085f757f3fSDimitry Andric }
17095f757f3fSDimitry Andric
isZeroSize(Value * Size)17105f757f3fSDimitry Andric static bool isZeroSize(Value *Size) {
17115f757f3fSDimitry Andric if (auto *I = dyn_cast<Instruction>(Size))
1712*0fca6ea1SDimitry Andric if (auto *Res = simplifyInstruction(I, I->getDataLayout()))
17135f757f3fSDimitry Andric Size = Res;
17145f757f3fSDimitry Andric // Treat undef/poison size like zero.
17155f757f3fSDimitry Andric if (auto *C = dyn_cast<Constant>(Size))
17165f757f3fSDimitry Andric return isa<UndefValue>(C) || C->isNullValue();
17175f757f3fSDimitry Andric return false;
17185f757f3fSDimitry Andric }
17195f757f3fSDimitry Andric
17200b57cec5SDimitry Andric /// Perform simplification of memcpy's. If we have memcpy A
17210b57cec5SDimitry Andric /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
17220b57cec5SDimitry Andric /// B to be a memcpy from X to Z (or potentially a memmove, depending on
17230b57cec5SDimitry Andric /// circumstances). This allows later passes to remove the first memcpy
17240b57cec5SDimitry Andric /// altogether.
processMemCpy(MemCpyInst * M,BasicBlock::iterator & BBI)17255ffd83dbSDimitry Andric bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
17260b57cec5SDimitry Andric // We can only optimize non-volatile memcpy's.
1727*0fca6ea1SDimitry Andric if (M->isVolatile())
1728*0fca6ea1SDimitry Andric return false;
17290b57cec5SDimitry Andric
17300b57cec5SDimitry Andric // If the source and destination of the memcpy are the same, then zap it.
17310b57cec5SDimitry Andric if (M->getSource() == M->getDest()) {
17325ffd83dbSDimitry Andric ++BBI;
1733e8d8bef9SDimitry Andric eraseInstruction(M);
17345ffd83dbSDimitry Andric return true;
17350b57cec5SDimitry Andric }
17360b57cec5SDimitry Andric
1737*0fca6ea1SDimitry Andric // If the size is zero, remove the memcpy.
17385f757f3fSDimitry Andric if (isZeroSize(M->getLength())) {
17395f757f3fSDimitry Andric ++BBI;
17405f757f3fSDimitry Andric eraseInstruction(M);
17415f757f3fSDimitry Andric return true;
17425f757f3fSDimitry Andric }
17435f757f3fSDimitry Andric
17445f757f3fSDimitry Andric MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
17455f757f3fSDimitry Andric if (!MA)
17465f757f3fSDimitry Andric // Degenerate case: memcpy marked as not accessing memory.
17475f757f3fSDimitry Andric return false;
17485f757f3fSDimitry Andric
17490b57cec5SDimitry Andric // If copying from a constant, try to turn the memcpy into a memset.
175004eeddc0SDimitry Andric if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
17510b57cec5SDimitry Andric if (GV->isConstant() && GV->hasDefinitiveInitializer())
17520b57cec5SDimitry Andric if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1753*0fca6ea1SDimitry Andric M->getDataLayout())) {
17540b57cec5SDimitry Andric IRBuilder<> Builder(M);
1755bdd1243dSDimitry Andric Instruction *NewM = Builder.CreateMemSet(
1756bdd1243dSDimitry Andric M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
17575f757f3fSDimitry Andric auto *LastDef = cast<MemoryDef>(MA);
1758e8d8bef9SDimitry Andric auto *NewAccess =
17595f757f3fSDimitry Andric MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1760e8d8bef9SDimitry Andric MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1761e8d8bef9SDimitry Andric
1762e8d8bef9SDimitry Andric eraseInstruction(M);
17630b57cec5SDimitry Andric ++NumCpyToSet;
17640b57cec5SDimitry Andric return true;
17650b57cec5SDimitry Andric }
17660b57cec5SDimitry Andric
1767bdd1243dSDimitry Andric BatchAAResults BAA(*AA);
176881ad6265SDimitry Andric // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
176981ad6265SDimitry Andric MemoryAccess *AnyClobber = MA->getDefiningAccess();
1770e8d8bef9SDimitry Andric MemoryLocation DestLoc = MemoryLocation::getForDest(M);
1771e8d8bef9SDimitry Andric const MemoryAccess *DestClobber =
1772bdd1243dSDimitry Andric MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1773e8d8bef9SDimitry Andric
1774e8d8bef9SDimitry Andric // Try to turn a partially redundant memset + memcpy into
177506c3fb27SDimitry Andric // smaller memset + memcpy. We don't need the memcpy size for this.
177606c3fb27SDimitry Andric // The memcpy must post-dom the memset, so limit this to the same basic
1777e8d8bef9SDimitry Andric // block. A non-local generalization is likely not worthwhile.
1778e8d8bef9SDimitry Andric if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1779e8d8bef9SDimitry Andric if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1780e8d8bef9SDimitry Andric if (DestClobber->getBlock() == M->getParent())
1781bdd1243dSDimitry Andric if (processMemSetMemCpyDependence(M, MDep, BAA))
1782e8d8bef9SDimitry Andric return true;
1783e8d8bef9SDimitry Andric
1784e8d8bef9SDimitry Andric MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1785bdd1243dSDimitry Andric AnyClobber, MemoryLocation::getForSource(M), BAA);
1786e8d8bef9SDimitry Andric
17875f757f3fSDimitry Andric // There are five possible optimizations we can do for memcpy:
1788e8d8bef9SDimitry Andric // a) memcpy-memcpy xform which exposes redundance for DSE.
1789e8d8bef9SDimitry Andric // b) call-memcpy xform for return slot optimization.
1790e8d8bef9SDimitry Andric // c) memcpy from freshly alloca'd space or space that has just started
1791e8d8bef9SDimitry Andric // its lifetime copies undefined data, and we can therefore eliminate
1792e8d8bef9SDimitry Andric // the memcpy in favor of the data that was already at the destination.
1793e8d8bef9SDimitry Andric // d) memcpy from a just-memset'd source can be turned into memset.
17945f757f3fSDimitry Andric // e) elimination of memcpy via stack-move optimization.
1795e8d8bef9SDimitry Andric if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1796e8d8bef9SDimitry Andric if (Instruction *MI = MD->getMemoryInst()) {
179704eeddc0SDimitry Andric if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1798e8d8bef9SDimitry Andric if (auto *C = dyn_cast<CallInst>(MI)) {
1799bdd1243dSDimitry Andric if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1800bdd1243dSDimitry Andric TypeSize::getFixed(CopySize->getZExtValue()),
1801bdd1243dSDimitry Andric M->getDestAlign().valueOrOne(), BAA,
180281ad6265SDimitry Andric [C]() -> CallInst * { return C; })) {
1803e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1804e8d8bef9SDimitry Andric << " call: " << *C << "\n"
1805e8d8bef9SDimitry Andric << " memcpy: " << *M << "\n");
1806e8d8bef9SDimitry Andric eraseInstruction(M);
1807e8d8bef9SDimitry Andric ++NumMemCpyInstr;
1808e8d8bef9SDimitry Andric return true;
1809e8d8bef9SDimitry Andric }
1810e8d8bef9SDimitry Andric }
1811e8d8bef9SDimitry Andric }
1812e8d8bef9SDimitry Andric if (auto *MDep = dyn_cast<MemCpyInst>(MI))
18135f757f3fSDimitry Andric if (processMemCpyMemCpyDependence(M, MDep, BAA))
18145f757f3fSDimitry Andric return true;
1815e8d8bef9SDimitry Andric if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1816bdd1243dSDimitry Andric if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1817e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1818e8d8bef9SDimitry Andric eraseInstruction(M);
1819e8d8bef9SDimitry Andric ++NumCpyToSet;
1820e8d8bef9SDimitry Andric return true;
1821e8d8bef9SDimitry Andric }
1822e8d8bef9SDimitry Andric }
1823e8d8bef9SDimitry Andric }
1824e8d8bef9SDimitry Andric
1825bdd1243dSDimitry Andric if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) {
1826e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1827e8d8bef9SDimitry Andric eraseInstruction(M);
1828e8d8bef9SDimitry Andric ++NumMemCpyInstr;
1829e8d8bef9SDimitry Andric return true;
1830e8d8bef9SDimitry Andric }
1831e8d8bef9SDimitry Andric }
18320b57cec5SDimitry Andric
18335f757f3fSDimitry Andric // If the transfer is from a stack slot to a stack slot, then we may be able
18345f757f3fSDimitry Andric // to perform the stack-move optimization. See the comments in
18355f757f3fSDimitry Andric // performStackMoveOptzn() for more details.
18365f757f3fSDimitry Andric auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
18375f757f3fSDimitry Andric if (!DestAlloca)
18385f757f3fSDimitry Andric return false;
18395f757f3fSDimitry Andric auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
18405f757f3fSDimitry Andric if (!SrcAlloca)
18415f757f3fSDimitry Andric return false;
18425f757f3fSDimitry Andric ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
18435f757f3fSDimitry Andric if (Len == nullptr)
18445f757f3fSDimitry Andric return false;
18455f757f3fSDimitry Andric if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
18465f757f3fSDimitry Andric TypeSize::getFixed(Len->getZExtValue()), BAA)) {
18475f757f3fSDimitry Andric // Avoid invalidating the iterator.
18485f757f3fSDimitry Andric BBI = M->getNextNonDebugInstruction()->getIterator();
18495f757f3fSDimitry Andric eraseInstruction(M);
18505f757f3fSDimitry Andric ++NumMemCpyInstr;
18515f757f3fSDimitry Andric return true;
18525f757f3fSDimitry Andric }
18535f757f3fSDimitry Andric
18540b57cec5SDimitry Andric return false;
18550b57cec5SDimitry Andric }
18560b57cec5SDimitry Andric
18570b57cec5SDimitry Andric /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
18580b57cec5SDimitry Andric /// not to alias.
processMemMove(MemMoveInst * M)18590b57cec5SDimitry Andric bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1860349cc55cSDimitry Andric // See if the source could be modified by this memmove potentially.
1861349cc55cSDimitry Andric if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
18620b57cec5SDimitry Andric return false;
18630b57cec5SDimitry Andric
18640b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
18650b57cec5SDimitry Andric << "\n");
18660b57cec5SDimitry Andric
18670b57cec5SDimitry Andric // If not, then we know we can transform this.
1868*0fca6ea1SDimitry Andric Type *ArgTys[3] = {M->getRawDest()->getType(), M->getRawSource()->getType(),
18690b57cec5SDimitry Andric M->getLength()->getType()};
1870*0fca6ea1SDimitry Andric M->setCalledFunction(
1871*0fca6ea1SDimitry Andric Intrinsic::getDeclaration(M->getModule(), Intrinsic::memcpy, ArgTys));
18720b57cec5SDimitry Andric
1873e8d8bef9SDimitry Andric // For MemorySSA nothing really changes (except that memcpy may imply stricter
1874e8d8bef9SDimitry Andric // aliasing guarantees).
1875e8d8bef9SDimitry Andric
18760b57cec5SDimitry Andric ++NumMoveToCpy;
18770b57cec5SDimitry Andric return true;
18780b57cec5SDimitry Andric }
18790b57cec5SDimitry Andric
18800b57cec5SDimitry Andric /// This is called on every byval argument in call sites.
processByValArgument(CallBase & CB,unsigned ArgNo)18815ffd83dbSDimitry Andric bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1882*0fca6ea1SDimitry Andric const DataLayout &DL = CB.getDataLayout();
18830b57cec5SDimitry Andric // Find out what feeds this byval argument.
18845ffd83dbSDimitry Andric Value *ByValArg = CB.getArgOperand(ArgNo);
1885fe6060f1SDimitry Andric Type *ByValTy = CB.getParamByValType(ArgNo);
18868c6f6c0cSDimitry Andric TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1887e8d8bef9SDimitry Andric MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1888e8d8bef9SDimitry Andric MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1889fe6060f1SDimitry Andric if (!CallAccess)
1890fe6060f1SDimitry Andric return false;
1891349cc55cSDimitry Andric MemCpyInst *MDep = nullptr;
1892bdd1243dSDimitry Andric BatchAAResults BAA(*AA);
1893e8d8bef9SDimitry Andric MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1894bdd1243dSDimitry Andric CallAccess->getDefiningAccess(), Loc, BAA);
1895e8d8bef9SDimitry Andric if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1896e8d8bef9SDimitry Andric MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
18970b57cec5SDimitry Andric
18980b57cec5SDimitry Andric // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
18990b57cec5SDimitry Andric // a memcpy, see if we can byval from the source of the memcpy instead of the
19000b57cec5SDimitry Andric // result.
19010b57cec5SDimitry Andric if (!MDep || MDep->isVolatile() ||
19020b57cec5SDimitry Andric ByValArg->stripPointerCasts() != MDep->getDest())
19030b57cec5SDimitry Andric return false;
19040b57cec5SDimitry Andric
19050b57cec5SDimitry Andric // The length of the memcpy must be larger or equal to the size of the byval.
190604eeddc0SDimitry Andric auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
19078c6f6c0cSDimitry Andric if (!C1 || !TypeSize::isKnownGE(
19088c6f6c0cSDimitry Andric TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
19090b57cec5SDimitry Andric return false;
19100b57cec5SDimitry Andric
19110b57cec5SDimitry Andric // Get the alignment of the byval. If the call doesn't specify the alignment,
19120b57cec5SDimitry Andric // then it is some target specific value that we can't know.
19135ffd83dbSDimitry Andric MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1914*0fca6ea1SDimitry Andric if (!ByValAlign)
1915*0fca6ea1SDimitry Andric return false;
19160b57cec5SDimitry Andric
19170b57cec5SDimitry Andric // If it is greater than the memcpy, then we check to see if we can force the
19180b57cec5SDimitry Andric // source of the memcpy to the alignment we need. If we fail, we bail out.
19195ffd83dbSDimitry Andric MaybeAlign MemDepAlign = MDep->getSourceAlign();
19205ffd83dbSDimitry Andric if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1921e8d8bef9SDimitry Andric getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1922e8d8bef9SDimitry Andric DT) < *ByValAlign)
19230b57cec5SDimitry Andric return false;
19240b57cec5SDimitry Andric
19255f757f3fSDimitry Andric // The type of the memcpy source must match the byval argument
19265f757f3fSDimitry Andric if (MDep->getSource()->getType() != ByValArg->getType())
19270b57cec5SDimitry Andric return false;
19280b57cec5SDimitry Andric
19290b57cec5SDimitry Andric // Verify that the copied-from memory doesn't change in between the memcpy and
19300b57cec5SDimitry Andric // the byval call.
19310b57cec5SDimitry Andric // memcpy(a <- b)
19320b57cec5SDimitry Andric // *b = 42;
19330b57cec5SDimitry Andric // foo(*a)
19340b57cec5SDimitry Andric // It would be invalid to transform the second memcpy into foo(*b).
1935bdd1243dSDimitry Andric if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
193606c3fb27SDimitry Andric MSSA->getMemoryAccess(MDep), CallAccess))
1937e8d8bef9SDimitry Andric return false;
19380b57cec5SDimitry Andric
19390b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
19400b57cec5SDimitry Andric << " " << *MDep << "\n"
19415ffd83dbSDimitry Andric << " " << CB << "\n");
19420b57cec5SDimitry Andric
19430b57cec5SDimitry Andric // Otherwise we're good! Update the byval argument.
1944b121cb00SDimitry Andric combineAAMetadata(&CB, MDep);
194506c3fb27SDimitry Andric CB.setArgOperand(ArgNo, MDep->getSource());
194606c3fb27SDimitry Andric ++NumMemCpyInstr;
194706c3fb27SDimitry Andric return true;
194806c3fb27SDimitry Andric }
194906c3fb27SDimitry Andric
195006c3fb27SDimitry Andric /// This is called on memcpy dest pointer arguments attributed as immutable
195106c3fb27SDimitry Andric /// during call. Try to use memcpy source directly if all of the following
195206c3fb27SDimitry Andric /// conditions are satisfied.
195306c3fb27SDimitry Andric /// 1. The memcpy dst is neither modified during the call nor captured by the
195406c3fb27SDimitry Andric /// call. (if readonly, noalias, nocapture attributes on call-site.)
195506c3fb27SDimitry Andric /// 2. The memcpy dst is an alloca with known alignment & size.
195606c3fb27SDimitry Andric /// 2-1. The memcpy length == the alloca size which ensures that the new
195706c3fb27SDimitry Andric /// pointer is dereferenceable for the required range
195806c3fb27SDimitry Andric /// 2-2. The src pointer has alignment >= the alloca alignment or can be
195906c3fb27SDimitry Andric /// enforced so.
196006c3fb27SDimitry Andric /// 3. The memcpy dst and src is not modified between the memcpy and the call.
196106c3fb27SDimitry Andric /// (if MSSA clobber check is safe.)
196206c3fb27SDimitry Andric /// 4. The memcpy src is not modified during the call. (ModRef check shows no
196306c3fb27SDimitry Andric /// Mod.)
processImmutArgument(CallBase & CB,unsigned ArgNo)196406c3fb27SDimitry Andric bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
196506c3fb27SDimitry Andric // 1. Ensure passed argument is immutable during call.
196606c3fb27SDimitry Andric if (!(CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
196706c3fb27SDimitry Andric CB.paramHasAttr(ArgNo, Attribute::NoCapture)))
196806c3fb27SDimitry Andric return false;
1969*0fca6ea1SDimitry Andric const DataLayout &DL = CB.getDataLayout();
197006c3fb27SDimitry Andric Value *ImmutArg = CB.getArgOperand(ArgNo);
197106c3fb27SDimitry Andric
197206c3fb27SDimitry Andric // 2. Check that arg is alloca
197306c3fb27SDimitry Andric // TODO: Even if the arg gets back to branches, we can remove memcpy if all
197406c3fb27SDimitry Andric // the alloca alignments can be enforced to source alignment.
197506c3fb27SDimitry Andric auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());
197606c3fb27SDimitry Andric if (!AI)
197706c3fb27SDimitry Andric return false;
197806c3fb27SDimitry Andric
197906c3fb27SDimitry Andric std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
198006c3fb27SDimitry Andric // Can't handle unknown size alloca.
198106c3fb27SDimitry Andric // (e.g. Variable Length Array, Scalable Vector)
198206c3fb27SDimitry Andric if (!AllocaSize || AllocaSize->isScalable())
198306c3fb27SDimitry Andric return false;
198406c3fb27SDimitry Andric MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));
198506c3fb27SDimitry Andric MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
198606c3fb27SDimitry Andric if (!CallAccess)
198706c3fb27SDimitry Andric return false;
198806c3fb27SDimitry Andric
198906c3fb27SDimitry Andric MemCpyInst *MDep = nullptr;
199006c3fb27SDimitry Andric BatchAAResults BAA(*AA);
199106c3fb27SDimitry Andric MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
199206c3fb27SDimitry Andric CallAccess->getDefiningAccess(), Loc, BAA);
199306c3fb27SDimitry Andric if (auto *MD = dyn_cast<MemoryDef>(Clobber))
199406c3fb27SDimitry Andric MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
199506c3fb27SDimitry Andric
199606c3fb27SDimitry Andric // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by
199706c3fb27SDimitry Andric // a memcpy, check that the arg equals the memcpy dest.
199806c3fb27SDimitry Andric if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
199906c3fb27SDimitry Andric return false;
200006c3fb27SDimitry Andric
20015f757f3fSDimitry Andric // The type of the memcpy source must match the immut argument
20025f757f3fSDimitry Andric if (MDep->getSource()->getType() != ImmutArg->getType())
200306c3fb27SDimitry Andric return false;
200406c3fb27SDimitry Andric
200506c3fb27SDimitry Andric // 2-1. The length of the memcpy must be equal to the size of the alloca.
200606c3fb27SDimitry Andric auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
200706c3fb27SDimitry Andric if (!MDepLen || AllocaSize != MDepLen->getValue())
200806c3fb27SDimitry Andric return false;
200906c3fb27SDimitry Andric
201006c3fb27SDimitry Andric // 2-2. the memcpy source align must be larger than or equal the alloca's
201106c3fb27SDimitry Andric // align. If not so, we check to see if we can force the source of the memcpy
201206c3fb27SDimitry Andric // to the alignment we need. If we fail, we bail out.
201306c3fb27SDimitry Andric Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
201406c3fb27SDimitry Andric Align AllocaAlign = AI->getAlign();
201506c3fb27SDimitry Andric if (MemDepAlign < AllocaAlign &&
201606c3fb27SDimitry Andric getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,
201706c3fb27SDimitry Andric DT) < AllocaAlign)
201806c3fb27SDimitry Andric return false;
201906c3fb27SDimitry Andric
202006c3fb27SDimitry Andric // 3. Verify that the source doesn't change in between the memcpy and
202106c3fb27SDimitry Andric // the call.
202206c3fb27SDimitry Andric // memcpy(a <- b)
202306c3fb27SDimitry Andric // *b = 42;
202406c3fb27SDimitry Andric // foo(*a)
202506c3fb27SDimitry Andric // It would be invalid to transform the second memcpy into foo(*b).
202606c3fb27SDimitry Andric if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
202706c3fb27SDimitry Andric MSSA->getMemoryAccess(MDep), CallAccess))
202806c3fb27SDimitry Andric return false;
202906c3fb27SDimitry Andric
203006c3fb27SDimitry Andric // 4. The memcpy src must not be modified during the call.
203106c3fb27SDimitry Andric if (isModSet(AA->getModRefInfo(&CB, MemoryLocation::getForSource(MDep))))
203206c3fb27SDimitry Andric return false;
203306c3fb27SDimitry Andric
203406c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
203506c3fb27SDimitry Andric << " " << *MDep << "\n"
203606c3fb27SDimitry Andric << " " << CB << "\n");
203706c3fb27SDimitry Andric
203806c3fb27SDimitry Andric // Otherwise we're good! Update the immut argument.
20394542f901SDimitry Andric combineAAMetadata(&CB, MDep);
204006c3fb27SDimitry Andric CB.setArgOperand(ArgNo, MDep->getSource());
20410b57cec5SDimitry Andric ++NumMemCpyInstr;
20420b57cec5SDimitry Andric return true;
20430b57cec5SDimitry Andric }
20440b57cec5SDimitry Andric
20450b57cec5SDimitry Andric /// Executes one iteration of MemCpyOptPass.
iterateOnFunction(Function & F)20460b57cec5SDimitry Andric bool MemCpyOptPass::iterateOnFunction(Function &F) {
20470b57cec5SDimitry Andric bool MadeChange = false;
20480b57cec5SDimitry Andric
20490b57cec5SDimitry Andric // Walk all instruction in the function.
20500b57cec5SDimitry Andric for (BasicBlock &BB : F) {
20510b57cec5SDimitry Andric // Skip unreachable blocks. For example processStore assumes that an
20520b57cec5SDimitry Andric // instruction in a BB can't be dominated by a later instruction in the
20530b57cec5SDimitry Andric // same BB (which is a scenario that can happen for an unreachable BB that
20540b57cec5SDimitry Andric // has itself as a predecessor).
2055e8d8bef9SDimitry Andric if (!DT->isReachableFromEntry(&BB))
20560b57cec5SDimitry Andric continue;
20570b57cec5SDimitry Andric
20580b57cec5SDimitry Andric for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
20590b57cec5SDimitry Andric // Avoid invalidating the iterator.
20600b57cec5SDimitry Andric Instruction *I = &*BI++;
20610b57cec5SDimitry Andric
20620b57cec5SDimitry Andric bool RepeatInstruction = false;
20630b57cec5SDimitry Andric
206404eeddc0SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(I))
20650b57cec5SDimitry Andric MadeChange |= processStore(SI, BI);
206604eeddc0SDimitry Andric else if (auto *M = dyn_cast<MemSetInst>(I))
20670b57cec5SDimitry Andric RepeatInstruction = processMemSet(M, BI);
206804eeddc0SDimitry Andric else if (auto *M = dyn_cast<MemCpyInst>(I))
20695ffd83dbSDimitry Andric RepeatInstruction = processMemCpy(M, BI);
207004eeddc0SDimitry Andric else if (auto *M = dyn_cast<MemMoveInst>(I))
20710b57cec5SDimitry Andric RepeatInstruction = processMemMove(M);
20725ffd83dbSDimitry Andric else if (auto *CB = dyn_cast<CallBase>(I)) {
207306c3fb27SDimitry Andric for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
20745ffd83dbSDimitry Andric if (CB->isByValArgument(i))
20755ffd83dbSDimitry Andric MadeChange |= processByValArgument(*CB, i);
207606c3fb27SDimitry Andric else if (CB->onlyReadsMemory(i))
207706c3fb27SDimitry Andric MadeChange |= processImmutArgument(*CB, i);
207806c3fb27SDimitry Andric }
20790b57cec5SDimitry Andric }
20800b57cec5SDimitry Andric
20810b57cec5SDimitry Andric // Reprocess the instruction if desired.
20820b57cec5SDimitry Andric if (RepeatInstruction) {
20830b57cec5SDimitry Andric if (BI != BB.begin())
20840b57cec5SDimitry Andric --BI;
20850b57cec5SDimitry Andric MadeChange = true;
20860b57cec5SDimitry Andric }
20870b57cec5SDimitry Andric }
20880b57cec5SDimitry Andric }
20890b57cec5SDimitry Andric
20900b57cec5SDimitry Andric return MadeChange;
20910b57cec5SDimitry Andric }
20920b57cec5SDimitry Andric
run(Function & F,FunctionAnalysisManager & AM)20930b57cec5SDimitry Andric PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
20940b57cec5SDimitry Andric auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2095e8d8bef9SDimitry Andric auto *AA = &AM.getResult<AAManager>(F);
2096e8d8bef9SDimitry Andric auto *AC = &AM.getResult<AssumptionAnalysis>(F);
2097e8d8bef9SDimitry Andric auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
20985f757f3fSDimitry Andric auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
2099349cc55cSDimitry Andric auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
21000b57cec5SDimitry Andric
21015f757f3fSDimitry Andric bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
21020b57cec5SDimitry Andric if (!MadeChange)
21030b57cec5SDimitry Andric return PreservedAnalyses::all();
21040b57cec5SDimitry Andric
21050b57cec5SDimitry Andric PreservedAnalyses PA;
21060b57cec5SDimitry Andric PA.preserveSet<CFGAnalyses>();
2107e8d8bef9SDimitry Andric PA.preserve<MemorySSAAnalysis>();
21080b57cec5SDimitry Andric return PA;
21090b57cec5SDimitry Andric }
21100b57cec5SDimitry Andric
runImpl(Function & F,TargetLibraryInfo * TLI_,AliasAnalysis * AA_,AssumptionCache * AC_,DominatorTree * DT_,PostDominatorTree * PDT_,MemorySSA * MSSA_)2111349cc55cSDimitry Andric bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
2112349cc55cSDimitry Andric AliasAnalysis *AA_, AssumptionCache *AC_,
21135f757f3fSDimitry Andric DominatorTree *DT_, PostDominatorTree *PDT_,
21145f757f3fSDimitry Andric MemorySSA *MSSA_) {
21150b57cec5SDimitry Andric bool MadeChange = false;
21160b57cec5SDimitry Andric TLI = TLI_;
2117e8d8bef9SDimitry Andric AA = AA_;
2118e8d8bef9SDimitry Andric AC = AC_;
2119e8d8bef9SDimitry Andric DT = DT_;
21205f757f3fSDimitry Andric PDT = PDT_;
2121e8d8bef9SDimitry Andric MSSA = MSSA_;
2122e8d8bef9SDimitry Andric MemorySSAUpdater MSSAU_(MSSA_);
2123349cc55cSDimitry Andric MSSAU = &MSSAU_;
21240b57cec5SDimitry Andric
21250b57cec5SDimitry Andric while (true) {
21260b57cec5SDimitry Andric if (!iterateOnFunction(F))
21270b57cec5SDimitry Andric break;
21280b57cec5SDimitry Andric MadeChange = true;
21290b57cec5SDimitry Andric }
21300b57cec5SDimitry Andric
2131349cc55cSDimitry Andric if (VerifyMemorySSA)
2132e8d8bef9SDimitry Andric MSSA_->verifyMemorySSA();
2133e8d8bef9SDimitry Andric
21340b57cec5SDimitry Andric return MadeChange;
21350b57cec5SDimitry Andric }
2136