xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The code below implements dead store elimination using MemorySSA. It uses
10 // the following general approach: given a MemoryDef, walk upwards to find
11 // clobbering MemoryDefs that may be killed by the starting def. Then check
12 // that there are no uses that may read the location of the original MemoryDef
13 // in between both MemoryDefs. A bit more concretely:
14 //
15 // For all MemoryDefs StartDef:
16 // 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17 //    upwards.
18 // 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19 //    checking all uses starting at MaybeDeadAccess and walking until we see
20 //    StartDef.
21 // 3. For each found CurrentDef, check that:
22 //   1. There are no barrier instructions between CurrentDef and StartDef (like
23 //       throws or stores with ordering constraints).
24 //   2. StartDef is executed whenever CurrentDef is executed.
25 //   3. StartDef completely overwrites CurrentDef.
26 // 4. Erase CurrentDef from the function and MemorySSA.
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "llvm/Transforms/Scalar/DeadStoreElimination.h"
31 #include "llvm/ADT/APInt.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/MapVector.h"
34 #include "llvm/ADT/PostOrderIterator.h"
35 #include "llvm/ADT/SetVector.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Statistic.h"
39 #include "llvm/ADT/StringRef.h"
40 #include "llvm/Analysis/AliasAnalysis.h"
41 #include "llvm/Analysis/CaptureTracking.h"
42 #include "llvm/Analysis/GlobalsModRef.h"
43 #include "llvm/Analysis/LoopInfo.h"
44 #include "llvm/Analysis/MemoryBuiltins.h"
45 #include "llvm/Analysis/MemoryLocation.h"
46 #include "llvm/Analysis/MemorySSA.h"
47 #include "llvm/Analysis/MemorySSAUpdater.h"
48 #include "llvm/Analysis/MustExecute.h"
49 #include "llvm/Analysis/PostDominators.h"
50 #include "llvm/Analysis/TargetLibraryInfo.h"
51 #include "llvm/Analysis/ValueTracking.h"
52 #include "llvm/IR/Argument.h"
53 #include "llvm/IR/AttributeMask.h"
54 #include "llvm/IR/BasicBlock.h"
55 #include "llvm/IR/Constant.h"
56 #include "llvm/IR/ConstantRangeList.h"
57 #include "llvm/IR/Constants.h"
58 #include "llvm/IR/DataLayout.h"
59 #include "llvm/IR/DebugInfo.h"
60 #include "llvm/IR/Dominators.h"
61 #include "llvm/IR/Function.h"
62 #include "llvm/IR/IRBuilder.h"
63 #include "llvm/IR/InstIterator.h"
64 #include "llvm/IR/InstrTypes.h"
65 #include "llvm/IR/Instruction.h"
66 #include "llvm/IR/Instructions.h"
67 #include "llvm/IR/IntrinsicInst.h"
68 #include "llvm/IR/Module.h"
69 #include "llvm/IR/PassManager.h"
70 #include "llvm/IR/PatternMatch.h"
71 #include "llvm/IR/Value.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/CommandLine.h"
74 #include "llvm/Support/Debug.h"
75 #include "llvm/Support/DebugCounter.h"
76 #include "llvm/Support/ErrorHandling.h"
77 #include "llvm/Support/raw_ostream.h"
78 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
79 #include "llvm/Transforms/Utils/BuildLibCalls.h"
80 #include "llvm/Transforms/Utils/Local.h"
81 #include <algorithm>
82 #include <cassert>
83 #include <cstdint>
84 #include <map>
85 #include <optional>
86 #include <utility>
87 
88 using namespace llvm;
89 using namespace PatternMatch;
90 
91 #define DEBUG_TYPE "dse"
92 
93 STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
94 STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
95 STATISTIC(NumFastStores, "Number of stores deleted");
96 STATISTIC(NumFastOther, "Number of other instrs removed");
97 STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
98 STATISTIC(NumModifiedStores, "Number of stores modified");
99 STATISTIC(NumCFGChecks, "Number of stores modified");
100 STATISTIC(NumCFGTries, "Number of stores modified");
101 STATISTIC(NumCFGSuccess, "Number of stores modified");
102 STATISTIC(NumGetDomMemoryDefPassed,
103           "Number of times a valid candidate is returned from getDomMemoryDef");
104 STATISTIC(NumDomMemDefChecks,
105           "Number iterations check for reads in getDomMemoryDef");
106 
107 DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
108               "Controls which MemoryDefs are eliminated.");
109 
110 static cl::opt<bool>
111 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
112   cl::init(true), cl::Hidden,
113   cl::desc("Enable partial-overwrite tracking in DSE"));
114 
115 static cl::opt<bool>
116 EnablePartialStoreMerging("enable-dse-partial-store-merging",
117   cl::init(true), cl::Hidden,
118   cl::desc("Enable partial store merging in DSE"));
119 
120 static cl::opt<unsigned>
121     MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
122                        cl::desc("The number of memory instructions to scan for "
123                                 "dead store elimination (default = 150)"));
124 static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
125     "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
126     cl::desc("The maximum number of steps while walking upwards to find "
127              "MemoryDefs that may be killed (default = 90)"));
128 
129 static cl::opt<unsigned> MemorySSAPartialStoreLimit(
130     "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
131     cl::desc("The maximum number candidates that only partially overwrite the "
132              "killing MemoryDef to consider"
133              " (default = 5)"));
134 
135 static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
136     "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
137     cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
138              "other stores per basic block (default = 5000)"));
139 
140 static cl::opt<unsigned> MemorySSASameBBStepCost(
141     "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
142     cl::desc(
143         "The cost of a step in the same basic block as the killing MemoryDef"
144         "(default = 1)"));
145 
146 static cl::opt<unsigned>
147     MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
148                              cl::Hidden,
149                              cl::desc("The cost of a step in a different basic "
150                                       "block than the killing MemoryDef"
151                                       "(default = 5)"));
152 
153 static cl::opt<unsigned> MemorySSAPathCheckLimit(
154     "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
155     cl::desc("The maximum number of blocks to check when trying to prove that "
156              "all paths to an exit go through a killing block (default = 50)"));
157 
158 // This flags allows or disallows DSE to optimize MemorySSA during its
159 // traversal. Note that DSE optimizing MemorySSA may impact other passes
160 // downstream of the DSE invocation and can lead to issues not being
161 // reproducible in isolation (i.e. when MemorySSA is built from scratch). In
162 // those cases, the flag can be used to check if DSE's MemorySSA optimizations
163 // impact follow-up passes.
164 static cl::opt<bool>
165     OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
166                       cl::desc("Allow DSE to optimize memory accesses."));
167 
168 // TODO: remove this flag.
169 static cl::opt<bool> EnableInitializesImprovement(
170     "enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden,
171     cl::desc("Enable the initializes attr improvement in DSE"));
172 
173 //===----------------------------------------------------------------------===//
174 // Helper functions
175 //===----------------------------------------------------------------------===//
176 using OverlapIntervalsTy = std::map<int64_t, int64_t>;
177 using InstOverlapIntervalsTy = MapVector<Instruction *, OverlapIntervalsTy>;
178 
179 /// Returns true if the end of this instruction can be safely shortened in
180 /// length.
isShortenableAtTheEnd(Instruction * I)181 static bool isShortenableAtTheEnd(Instruction *I) {
182   // Don't shorten stores for now
183   if (isa<StoreInst>(I))
184     return false;
185 
186   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
187     switch (II->getIntrinsicID()) {
188       default: return false;
189       case Intrinsic::memset:
190       case Intrinsic::memcpy:
191       case Intrinsic::memcpy_element_unordered_atomic:
192       case Intrinsic::memset_element_unordered_atomic:
193         // Do shorten memory intrinsics.
194         // FIXME: Add memmove if it's also safe to transform.
195         return true;
196     }
197   }
198 
199   // Don't shorten libcalls calls for now.
200 
201   return false;
202 }
203 
204 /// Returns true if the beginning of this instruction can be safely shortened
205 /// in length.
isShortenableAtTheBeginning(Instruction * I)206 static bool isShortenableAtTheBeginning(Instruction *I) {
207   // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
208   // easily done by offsetting the source address.
209   return isa<AnyMemSetInst>(I);
210 }
211 
getPointerSize(const Value * V,const DataLayout & DL,const TargetLibraryInfo & TLI,const Function * F)212 static std::optional<TypeSize> getPointerSize(const Value *V,
213                                               const DataLayout &DL,
214                                               const TargetLibraryInfo &TLI,
215                                               const Function *F) {
216   uint64_t Size;
217   ObjectSizeOpts Opts;
218   Opts.NullIsUnknownSize = NullPointerIsDefined(F);
219 
220   if (getObjectSize(V, Size, DL, &TLI, Opts))
221     return TypeSize::getFixed(Size);
222   return std::nullopt;
223 }
224 
225 namespace {
226 
227 enum OverwriteResult {
228   OW_Begin,
229   OW_Complete,
230   OW_End,
231   OW_PartialEarlierWithFullLater,
232   OW_MaybePartial,
233   OW_None,
234   OW_Unknown
235 };
236 
237 } // end anonymous namespace
238 
239 /// Check if two instruction are masked stores that completely
240 /// overwrite one another. More specifically, \p KillingI has to
241 /// overwrite \p DeadI.
isMaskedStoreOverwrite(const Instruction * KillingI,const Instruction * DeadI,BatchAAResults & AA)242 static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
243                                               const Instruction *DeadI,
244                                               BatchAAResults &AA) {
245   const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
246   const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
247   if (KillingII == nullptr || DeadII == nullptr)
248     return OW_Unknown;
249   if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
250     return OW_Unknown;
251 
252   switch (KillingII->getIntrinsicID()) {
253   case Intrinsic::masked_store:
254   case Intrinsic::vp_store: {
255     const DataLayout &DL = KillingII->getDataLayout();
256     auto *KillingTy = KillingII->getArgOperand(0)->getType();
257     auto *DeadTy = DeadII->getArgOperand(0)->getType();
258     if (DL.getTypeSizeInBits(KillingTy) != DL.getTypeSizeInBits(DeadTy))
259       return OW_Unknown;
260     // Element count.
261     if (cast<VectorType>(KillingTy)->getElementCount() !=
262         cast<VectorType>(DeadTy)->getElementCount())
263       return OW_Unknown;
264     // Pointers.
265     Value *KillingPtr = KillingII->getArgOperand(1);
266     Value *DeadPtr = DeadII->getArgOperand(1);
267     if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
268       return OW_Unknown;
269     if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
270       // Masks.
271       // TODO: check that KillingII's mask is a superset of the DeadII's mask.
272       if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
273         return OW_Unknown;
274     } else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
275       // Masks.
276       // TODO: check that KillingII's mask is a superset of the DeadII's mask.
277       if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
278         return OW_Unknown;
279       // Lengths.
280       if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
281         return OW_Unknown;
282     }
283     return OW_Complete;
284   }
285   default:
286     return OW_Unknown;
287   }
288 }
289 
290 /// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
291 /// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
292 /// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
293 /// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
294 /// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
295 /// overwritten by a killing (smaller) store which doesn't write outside the big
296 /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
297 /// NOTE: This function must only be called if both \p KillingLoc and \p
298 /// DeadLoc belong to the same underlying object with valid \p KillingOff and
299 /// \p DeadOff.
isPartialOverwrite(const MemoryLocation & KillingLoc,const MemoryLocation & DeadLoc,int64_t KillingOff,int64_t DeadOff,Instruction * DeadI,InstOverlapIntervalsTy & IOL)300 static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
301                                           const MemoryLocation &DeadLoc,
302                                           int64_t KillingOff, int64_t DeadOff,
303                                           Instruction *DeadI,
304                                           InstOverlapIntervalsTy &IOL) {
305   const uint64_t KillingSize = KillingLoc.Size.getValue();
306   const uint64_t DeadSize = DeadLoc.Size.getValue();
307   // We may now overlap, although the overlap is not complete. There might also
308   // be other incomplete overlaps, and together, they might cover the complete
309   // dead store.
310   // Note: The correctness of this logic depends on the fact that this function
311   // is not even called providing DepWrite when there are any intervening reads.
312   if (EnablePartialOverwriteTracking &&
313       KillingOff < int64_t(DeadOff + DeadSize) &&
314       int64_t(KillingOff + KillingSize) >= DeadOff) {
315 
316     // Insert our part of the overlap into the map.
317     auto &IM = IOL[DeadI];
318     LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
319                       << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
320                       << KillingOff << ", " << int64_t(KillingOff + KillingSize)
321                       << ")\n");
322 
323     // Make sure that we only insert non-overlapping intervals and combine
324     // adjacent intervals. The intervals are stored in the map with the ending
325     // offset as the key (in the half-open sense) and the starting offset as
326     // the value.
327     int64_t KillingIntStart = KillingOff;
328     int64_t KillingIntEnd = KillingOff + KillingSize;
329 
330     // Find any intervals ending at, or after, KillingIntStart which start
331     // before KillingIntEnd.
332     auto ILI = IM.lower_bound(KillingIntStart);
333     if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
334       // This existing interval is overlapped with the current store somewhere
335       // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
336       // intervals and adjusting our start and end.
337       KillingIntStart = std::min(KillingIntStart, ILI->second);
338       KillingIntEnd = std::max(KillingIntEnd, ILI->first);
339       ILI = IM.erase(ILI);
340 
341       // Continue erasing and adjusting our end in case other previous
342       // intervals are also overlapped with the current store.
343       //
344       // |--- dead 1 ---|  |--- dead 2 ---|
345       //     |------- killing---------|
346       //
347       while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
348         assert(ILI->second > KillingIntStart && "Unexpected interval");
349         KillingIntEnd = std::max(KillingIntEnd, ILI->first);
350         ILI = IM.erase(ILI);
351       }
352     }
353 
354     IM[KillingIntEnd] = KillingIntStart;
355 
356     ILI = IM.begin();
357     if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
358       LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
359                         << DeadOff << ", " << int64_t(DeadOff + DeadSize)
360                         << ") Composite KillingLoc [" << ILI->second << ", "
361                         << ILI->first << ")\n");
362       ++NumCompletePartials;
363       return OW_Complete;
364     }
365   }
366 
367   // Check for a dead store which writes to all the memory locations that
368   // the killing store writes to.
369   if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
370       int64_t(DeadOff + DeadSize) > KillingOff &&
371       uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
372     LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
373                       << ", " << int64_t(DeadOff + DeadSize)
374                       << ") by a killing store [" << KillingOff << ", "
375                       << int64_t(KillingOff + KillingSize) << ")\n");
376     // TODO: Maybe come up with a better name?
377     return OW_PartialEarlierWithFullLater;
378   }
379 
380   // Another interesting case is if the killing store overwrites the end of the
381   // dead store.
382   //
383   //      |--dead--|
384   //                |--   killing   --|
385   //
386   // In this case we may want to trim the size of dead store to avoid
387   // generating stores to addresses which will definitely be overwritten killing
388   // store.
389   if (!EnablePartialOverwriteTracking &&
390       (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
391        int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
392     return OW_End;
393 
394   // Finally, we also need to check if the killing store overwrites the
395   // beginning of the dead store.
396   //
397   //                |--dead--|
398   //      |--  killing  --|
399   //
400   // In this case we may want to move the destination address and trim the size
401   // of dead store to avoid generating stores to addresses which will definitely
402   // be overwritten killing store.
403   if (!EnablePartialOverwriteTracking &&
404       (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
405     assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
406            "Expect to be handled as OW_Complete");
407     return OW_Begin;
408   }
409   // Otherwise, they don't completely overlap.
410   return OW_Unknown;
411 }
412 
413 /// Returns true if the memory which is accessed by the second instruction is not
414 /// modified between the first and the second instruction.
415 /// Precondition: Second instruction must be dominated by the first
416 /// instruction.
417 static bool
memoryIsNotModifiedBetween(Instruction * FirstI,Instruction * SecondI,BatchAAResults & AA,const DataLayout & DL,DominatorTree * DT)418 memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
419                            BatchAAResults &AA, const DataLayout &DL,
420                            DominatorTree *DT) {
421   // Do a backwards scan through the CFG from SecondI to FirstI. Look for
422   // instructions which can modify the memory location accessed by SecondI.
423   //
424   // While doing the walk keep track of the address to check. It might be
425   // different in different basic blocks due to PHI translation.
426   using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
427   SmallVector<BlockAddressPair, 16> WorkList;
428   // Keep track of the address we visited each block with. Bail out if we
429   // visit a block with different addresses.
430   DenseMap<BasicBlock *, Value *> Visited;
431 
432   BasicBlock::iterator FirstBBI(FirstI);
433   ++FirstBBI;
434   BasicBlock::iterator SecondBBI(SecondI);
435   BasicBlock *FirstBB = FirstI->getParent();
436   BasicBlock *SecondBB = SecondI->getParent();
437   MemoryLocation MemLoc;
438   if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
439     MemLoc = MemoryLocation::getForDest(MemSet);
440   else
441     MemLoc = MemoryLocation::get(SecondI);
442 
443   auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
444 
445   // Start checking the SecondBB.
446   WorkList.push_back(
447       std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
448   bool isFirstBlock = true;
449 
450   // Check all blocks going backward until we reach the FirstBB.
451   while (!WorkList.empty()) {
452     BlockAddressPair Current = WorkList.pop_back_val();
453     BasicBlock *B = Current.first;
454     PHITransAddr &Addr = Current.second;
455     Value *Ptr = Addr.getAddr();
456 
457     // Ignore instructions before FirstI if this is the FirstBB.
458     BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
459 
460     BasicBlock::iterator EI;
461     if (isFirstBlock) {
462       // Ignore instructions after SecondI if this is the first visit of SecondBB.
463       assert(B == SecondBB && "first block is not the store block");
464       EI = SecondBBI;
465       isFirstBlock = false;
466     } else {
467       // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
468       // In this case we also have to look at instructions after SecondI.
469       EI = B->end();
470     }
471     for (; BI != EI; ++BI) {
472       Instruction *I = &*BI;
473       if (I->mayWriteToMemory() && I != SecondI)
474         if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
475           return false;
476     }
477     if (B != FirstBB) {
478       assert(B != &FirstBB->getParent()->getEntryBlock() &&
479           "Should not hit the entry block because SI must be dominated by LI");
480       for (BasicBlock *Pred : predecessors(B)) {
481         PHITransAddr PredAddr = Addr;
482         if (PredAddr.needsPHITranslationFromBlock(B)) {
483           if (!PredAddr.isPotentiallyPHITranslatable())
484             return false;
485           if (!PredAddr.translateValue(B, Pred, DT, false))
486             return false;
487         }
488         Value *TranslatedPtr = PredAddr.getAddr();
489         auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
490         if (!Inserted.second) {
491           // We already visited this block before. If it was with a different
492           // address - bail out!
493           if (TranslatedPtr != Inserted.first->second)
494             return false;
495           // ... otherwise just skip it.
496           continue;
497         }
498         WorkList.push_back(std::make_pair(Pred, PredAddr));
499       }
500     }
501   }
502   return true;
503 }
504 
shortenAssignment(Instruction * Inst,Value * OriginalDest,uint64_t OldOffsetInBits,uint64_t OldSizeInBits,uint64_t NewSizeInBits,bool IsOverwriteEnd)505 static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
506                               uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
507                               uint64_t NewSizeInBits, bool IsOverwriteEnd) {
508   const DataLayout &DL = Inst->getDataLayout();
509   uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
510   uint64_t DeadSliceOffsetInBits =
511       OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
512   auto SetDeadFragExpr = [](auto *Assign,
513                             DIExpression::FragmentInfo DeadFragment) {
514     // createFragmentExpression expects an offset relative to the existing
515     // fragment offset if there is one.
516     uint64_t RelativeOffset = DeadFragment.OffsetInBits -
517                               Assign->getExpression()
518                                   ->getFragmentInfo()
519                                   .value_or(DIExpression::FragmentInfo(0, 0))
520                                   .OffsetInBits;
521     if (auto NewExpr = DIExpression::createFragmentExpression(
522             Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {
523       Assign->setExpression(*NewExpr);
524       return;
525     }
526     // Failed to create a fragment expression for this so discard the value,
527     // making this a kill location.
528     auto *Expr = *DIExpression::createFragmentExpression(
529         DIExpression::get(Assign->getContext(), {}), DeadFragment.OffsetInBits,
530         DeadFragment.SizeInBits);
531     Assign->setExpression(Expr);
532     Assign->setKillLocation();
533   };
534 
535   // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
536   // link to any instructions. Created in the loop below (once).
537   DIAssignID *LinkToNothing = nullptr;
538   LLVMContext &Ctx = Inst->getContext();
539   auto GetDeadLink = [&Ctx, &LinkToNothing]() {
540     if (!LinkToNothing)
541       LinkToNothing = DIAssignID::getDistinct(Ctx);
542     return LinkToNothing;
543   };
544 
545   // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
546   // overlapping dbg.assign intrinsic. The loop invalidates the iterators
547   // returned by getAssignmentMarkers so save a copy of the markers to iterate
548   // over.
549   auto LinkedRange = at::getAssignmentMarkers(Inst);
550   SmallVector<DbgVariableRecord *> LinkedDVRAssigns =
551       at::getDVRAssignmentMarkers(Inst);
552   SmallVector<DbgAssignIntrinsic *> Linked(LinkedRange.begin(),
553                                            LinkedRange.end());
554   auto InsertAssignForOverlap = [&](auto *Assign) {
555     std::optional<DIExpression::FragmentInfo> NewFragment;
556     if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
557                                         DeadSliceSizeInBits, Assign,
558                                         NewFragment) ||
559         !NewFragment) {
560       // We couldn't calculate the intersecting fragment for some reason. Be
561       // cautious and unlink the whole assignment from the store.
562       Assign->setKillAddress();
563       Assign->setAssignId(GetDeadLink());
564       return;
565     }
566     // No intersect.
567     if (NewFragment->SizeInBits == 0)
568       return;
569 
570     // Fragments overlap: insert a new dbg.assign for this dead part.
571     auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());
572     NewAssign->insertAfter(Assign->getIterator());
573     NewAssign->setAssignId(GetDeadLink());
574     if (NewFragment)
575       SetDeadFragExpr(NewAssign, *NewFragment);
576     NewAssign->setKillAddress();
577   };
578   for_each(Linked, InsertAssignForOverlap);
579   for_each(LinkedDVRAssigns, InsertAssignForOverlap);
580 }
581 
582 /// Update the attributes given that a memory access is updated (the
583 /// dereferenced pointer could be moved forward when shortening a
584 /// mem intrinsic).
adjustArgAttributes(AnyMemIntrinsic * Intrinsic,unsigned ArgNo,uint64_t PtrOffset)585 static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo,
586                                 uint64_t PtrOffset) {
587   // Remember old attributes.
588   AttributeSet OldAttrs = Intrinsic->getParamAttributes(ArgNo);
589 
590   // Find attributes that should be kept, and remove the rest.
591   AttributeMask AttrsToRemove;
592   for (auto &Attr : OldAttrs) {
593     if (Attr.hasKindAsEnum()) {
594       switch (Attr.getKindAsEnum()) {
595       default:
596         break;
597       case Attribute::Alignment:
598         // Only keep alignment if PtrOffset satisfy the alignment.
599         if (isAligned(Attr.getAlignment().valueOrOne(), PtrOffset))
600           continue;
601         break;
602       case Attribute::Dereferenceable:
603       case Attribute::DereferenceableOrNull:
604         // We could reduce the size of these attributes according to
605         // PtrOffset. But we simply drop these for now.
606         break;
607       case Attribute::NonNull:
608       case Attribute::NoUndef:
609         continue;
610       }
611     }
612     AttrsToRemove.addAttribute(Attr);
613   }
614 
615   // Remove the attributes that should be dropped.
616   Intrinsic->removeParamAttrs(ArgNo, AttrsToRemove);
617 }
618 
tryToShorten(Instruction * DeadI,int64_t & DeadStart,uint64_t & DeadSize,int64_t KillingStart,uint64_t KillingSize,bool IsOverwriteEnd)619 static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
620                          uint64_t &DeadSize, int64_t KillingStart,
621                          uint64_t KillingSize, bool IsOverwriteEnd) {
622   auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
623   Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
624 
625   // We assume that memet/memcpy operates in chunks of the "largest" native
626   // type size and aligned on the same value. That means optimal start and size
627   // of memset/memcpy should be modulo of preferred alignment of that type. That
628   // is it there is no any sense in trying to reduce store size any further
629   // since any "extra" stores comes for free anyway.
630   // On the other hand, maximum alignment we can achieve is limited by alignment
631   // of initial store.
632 
633   // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
634   // "largest" native type.
635   // Note: What is the proper way to get that value?
636   // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
637   // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
638 
639   int64_t ToRemoveStart = 0;
640   uint64_t ToRemoveSize = 0;
641   // Compute start and size of the region to remove. Make sure 'PrefAlign' is
642   // maintained on the remaining store.
643   if (IsOverwriteEnd) {
644     // Calculate required adjustment for 'KillingStart' in order to keep
645     // remaining store size aligned on 'PerfAlign'.
646     uint64_t Off =
647         offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
648     ToRemoveStart = KillingStart + Off;
649     if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
650       return false;
651     ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
652   } else {
653     ToRemoveStart = DeadStart;
654     assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
655            "Not overlapping accesses?");
656     ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
657     // Calculate required adjustment for 'ToRemoveSize'in order to keep
658     // start of the remaining store aligned on 'PerfAlign'.
659     uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
660     if (Off != 0) {
661       if (ToRemoveSize <= (PrefAlign.value() - Off))
662         return false;
663       ToRemoveSize -= PrefAlign.value() - Off;
664     }
665     assert(isAligned(PrefAlign, ToRemoveSize) &&
666            "Should preserve selected alignment");
667   }
668 
669   assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
670   assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
671 
672   uint64_t NewSize = DeadSize - ToRemoveSize;
673   if (DeadIntrinsic->isAtomic()) {
674     // When shortening an atomic memory intrinsic, the newly shortened
675     // length must remain an integer multiple of the element size.
676     const uint32_t ElementSize = DeadIntrinsic->getElementSizeInBytes();
677     if (0 != NewSize % ElementSize)
678       return false;
679   }
680 
681   LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW "
682                     << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
683                     << "\n  KILLER [" << ToRemoveStart << ", "
684                     << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
685 
686   Value *DeadWriteLength = DeadIntrinsic->getLength();
687   Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
688   DeadIntrinsic->setLength(TrimmedLength);
689   DeadIntrinsic->setDestAlignment(PrefAlign);
690 
691   Value *OrigDest = DeadIntrinsic->getRawDest();
692   if (!IsOverwriteEnd) {
693     Value *Indices[1] = {
694         ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
695     Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
696         Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "",
697         DeadI->getIterator());
698     NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
699     DeadIntrinsic->setDest(NewDestGEP);
700     adjustArgAttributes(DeadIntrinsic, 0, ToRemoveSize);
701   }
702 
703   // Update attached dbg.assign intrinsics. Assume 8-bit byte.
704   shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,
705                     IsOverwriteEnd);
706 
707   // Finally update start and size of dead access.
708   if (!IsOverwriteEnd)
709     DeadStart += ToRemoveSize;
710   DeadSize = NewSize;
711 
712   return true;
713 }
714 
tryToShortenEnd(Instruction * DeadI,OverlapIntervalsTy & IntervalMap,int64_t & DeadStart,uint64_t & DeadSize)715 static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
716                             int64_t &DeadStart, uint64_t &DeadSize) {
717   if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
718     return false;
719 
720   OverlapIntervalsTy::iterator OII = --IntervalMap.end();
721   int64_t KillingStart = OII->second;
722   uint64_t KillingSize = OII->first - KillingStart;
723 
724   assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
725 
726   if (KillingStart > DeadStart &&
727       // Note: "KillingStart - KillingStart" is known to be positive due to
728       // preceding check.
729       (uint64_t)(KillingStart - DeadStart) < DeadSize &&
730       // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
731       // be non negative due to preceding checks.
732       KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
733     if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
734                      true)) {
735       IntervalMap.erase(OII);
736       return true;
737     }
738   }
739   return false;
740 }
741 
tryToShortenBegin(Instruction * DeadI,OverlapIntervalsTy & IntervalMap,int64_t & DeadStart,uint64_t & DeadSize)742 static bool tryToShortenBegin(Instruction *DeadI,
743                               OverlapIntervalsTy &IntervalMap,
744                               int64_t &DeadStart, uint64_t &DeadSize) {
745   if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))
746     return false;
747 
748   OverlapIntervalsTy::iterator OII = IntervalMap.begin();
749   int64_t KillingStart = OII->second;
750   uint64_t KillingSize = OII->first - KillingStart;
751 
752   assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
753 
754   if (KillingStart <= DeadStart &&
755       // Note: "DeadStart - KillingStart" is known to be non negative due to
756       // preceding check.
757       KillingSize > (uint64_t)(DeadStart - KillingStart)) {
758     // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
759     // be positive due to preceding checks.
760     assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
761            "Should have been handled as OW_Complete");
762     if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
763                      false)) {
764       IntervalMap.erase(OII);
765       return true;
766     }
767   }
768   return false;
769 }
770 
771 static Constant *
tryToMergePartialOverlappingStores(StoreInst * KillingI,StoreInst * DeadI,int64_t KillingOffset,int64_t DeadOffset,const DataLayout & DL,BatchAAResults & AA,DominatorTree * DT)772 tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
773                                    int64_t KillingOffset, int64_t DeadOffset,
774                                    const DataLayout &DL, BatchAAResults &AA,
775                                    DominatorTree *DT) {
776 
777   if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
778       DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
779       KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
780       DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
781       memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
782     // If the store we find is:
783     //   a) partially overwritten by the store to 'Loc'
784     //   b) the killing store is fully contained in the dead one and
785     //   c) they both have a constant value
786     //   d) none of the two stores need padding
787     // Merge the two stores, replacing the dead store's value with a
788     // merge of both values.
789     // TODO: Deal with other constant types (vectors, etc), and probably
790     // some mem intrinsics (if needed)
791 
792     APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
793     APInt KillingValue =
794         cast<ConstantInt>(KillingI->getValueOperand())->getValue();
795     unsigned KillingBits = KillingValue.getBitWidth();
796     assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
797     KillingValue = KillingValue.zext(DeadValue.getBitWidth());
798 
799     // Offset of the smaller store inside the larger store
800     unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
801     unsigned LShiftAmount =
802         DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
803                          : BitOffsetDiff;
804     APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
805                                    LShiftAmount + KillingBits);
806     // Clear the bits we'll be replacing, then OR with the smaller
807     // store, shifted appropriately.
808     APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
809     LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n  Dead: " << *DeadI
810                       << "\n  Killing: " << *KillingI
811                       << "\n  Merged Value: " << Merged << '\n');
812     return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
813   }
814   return nullptr;
815 }
816 
817 namespace {
818 // Returns true if \p I is an intrinsic that does not read or write memory.
isNoopIntrinsic(Instruction * I)819 bool isNoopIntrinsic(Instruction *I) {
820   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
821     switch (II->getIntrinsicID()) {
822     case Intrinsic::lifetime_start:
823     case Intrinsic::lifetime_end:
824     case Intrinsic::invariant_end:
825     case Intrinsic::launder_invariant_group:
826     case Intrinsic::assume:
827       return true;
828     case Intrinsic::dbg_declare:
829     case Intrinsic::dbg_label:
830     case Intrinsic::dbg_value:
831       llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
832     default:
833       return false;
834     }
835   }
836   return false;
837 }
838 
839 // Check if we can ignore \p D for DSE.
canSkipDef(MemoryDef * D,bool DefVisibleToCaller)840 bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
841   Instruction *DI = D->getMemoryInst();
842   // Calls that only access inaccessible memory cannot read or write any memory
843   // locations we consider for elimination.
844   if (auto *CB = dyn_cast<CallBase>(DI))
845     if (CB->onlyAccessesInaccessibleMemory())
846       return true;
847 
848   // We can eliminate stores to locations not visible to the caller across
849   // throwing instructions.
850   if (DI->mayThrow() && !DefVisibleToCaller)
851     return true;
852 
853   // We can remove the dead stores, irrespective of the fence and its ordering
854   // (release/acquire/seq_cst). Fences only constraints the ordering of
855   // already visible stores, it does not make a store visible to other
856   // threads. So, skipping over a fence does not change a store from being
857   // dead.
858   if (isa<FenceInst>(DI))
859     return true;
860 
861   // Skip intrinsics that do not really read or modify memory.
862   if (isNoopIntrinsic(DI))
863     return true;
864 
865   return false;
866 }
867 
868 // A memory location wrapper that represents a MemoryLocation, `MemLoc`,
869 // defined by `MemDef`.
870 struct MemoryLocationWrapper {
MemoryLocationWrapper__anon067a5ac70511::MemoryLocationWrapper871   MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
872                         bool DefByInitializesAttr)
873       : MemLoc(MemLoc), MemDef(MemDef),
874         DefByInitializesAttr(DefByInitializesAttr) {
875     assert(MemLoc.Ptr && "MemLoc should be not null");
876     UnderlyingObject = getUnderlyingObject(MemLoc.Ptr);
877     DefInst = MemDef->getMemoryInst();
878   }
879 
880   MemoryLocation MemLoc;
881   const Value *UnderlyingObject;
882   MemoryDef *MemDef;
883   Instruction *DefInst;
884   bool DefByInitializesAttr = false;
885 };
886 
887 // A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
888 // defined by this MemoryDef.
889 struct MemoryDefWrapper {
MemoryDefWrapper__anon067a5ac70511::MemoryDefWrapper890   MemoryDefWrapper(MemoryDef *MemDef,
891                    ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {
892     DefInst = MemDef->getMemoryInst();
893     for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
894       DefinedLocations.push_back(
895           MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
896   }
897   Instruction *DefInst;
898   SmallVector<MemoryLocationWrapper, 1> DefinedLocations;
899 };
900 
hasInitializesAttr(Instruction * I)901 bool hasInitializesAttr(Instruction *I) {
902   CallBase *CB = dyn_cast<CallBase>(I);
903   return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
904 }
905 
906 struct ArgumentInitInfo {
907   unsigned Idx;
908   bool IsDeadOrInvisibleOnUnwind;
909   ConstantRangeList Inits;
910 };
911 
912 // Return the intersected range list of the initializes attributes of "Args".
913 // "Args" are call arguments that alias to each other.
914 // If any argument in "Args" doesn't have dead_on_unwind attr and
915 // "CallHasNoUnwindAttr" is false, return empty.
getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,bool CallHasNoUnwindAttr)916 ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
917                                               bool CallHasNoUnwindAttr) {
918   if (Args.empty())
919     return {};
920 
921   // To address unwind, the function should have nounwind attribute or the
922   // arguments have dead or invisible on unwind. Otherwise, return empty.
923   for (const auto &Arg : Args) {
924     if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)
925       return {};
926     if (Arg.Inits.empty())
927       return {};
928   }
929 
930   ConstantRangeList IntersectedIntervals = Args.front().Inits;
931   for (auto &Arg : Args.drop_front())
932     IntersectedIntervals = IntersectedIntervals.intersectWith(Arg.Inits);
933 
934   return IntersectedIntervals;
935 }
936 
937 struct DSEState {
938   Function &F;
939   AliasAnalysis &AA;
940   EarliestEscapeAnalysis EA;
941 
942   /// The single BatchAA instance that is used to cache AA queries. It will
943   /// not be invalidated over the whole run. This is safe, because:
944   /// 1. Only memory writes are removed, so the alias cache for memory
945   ///    locations remains valid.
946   /// 2. No new instructions are added (only instructions removed), so cached
947   ///    information for a deleted value cannot be accessed by a re-used new
948   ///    value pointer.
949   BatchAAResults BatchAA;
950 
951   MemorySSA &MSSA;
952   DominatorTree &DT;
953   PostDominatorTree &PDT;
954   const TargetLibraryInfo &TLI;
955   const DataLayout &DL;
956   const LoopInfo &LI;
957 
958   // Whether the function contains any irreducible control flow, useful for
959   // being accurately able to detect loops.
960   bool ContainsIrreducibleLoops;
961 
962   // All MemoryDefs that potentially could kill other MemDefs.
963   SmallVector<MemoryDef *, 64> MemDefs;
964   // Any that should be skipped as they are already deleted
965   SmallPtrSet<MemoryAccess *, 4> SkipStores;
966   // Keep track whether a given object is captured before return or not.
967   DenseMap<const Value *, bool> CapturedBeforeReturn;
968   // Keep track of all of the objects that are invisible to the caller after
969   // the function returns.
970   DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
971   // Keep track of blocks with throwing instructions not modeled in MemorySSA.
972   SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
973   // Post-order numbers for each basic block. Used to figure out if memory
974   // accesses are executed before another access.
975   DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
976 
977   /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
978   /// basic block.
979   MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
980   // Check if there are root nodes that are terminated by UnreachableInst.
981   // Those roots pessimize post-dominance queries. If there are such roots,
982   // fall back to CFG scan starting from all non-unreachable roots.
983   bool AnyUnreachableExit;
984 
985   // Whether or not we should iterate on removing dead stores at the end of the
986   // function due to removing a store causing a previously captured pointer to
987   // no longer be captured.
988   bool ShouldIterateEndOfFunctionDSE;
989 
990   /// Dead instructions to be removed at the end of DSE.
991   SmallVector<Instruction *> ToRemove;
992 
993   // Class contains self-reference, make sure it's not copied/moved.
994   DSEState(const DSEState &) = delete;
995   DSEState &operator=(const DSEState &) = delete;
996 
DSEState__anon067a5ac70511::DSEState997   DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
998            PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
999            const LoopInfo &LI)
1000       : F(F), AA(AA), EA(DT, &LI), BatchAA(AA, &EA), MSSA(MSSA), DT(DT),
1001         PDT(PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {
1002     // Collect blocks with throwing instructions not modeled in MemorySSA and
1003     // alloc-like objects.
1004     unsigned PO = 0;
1005     for (BasicBlock *BB : post_order(&F)) {
1006       PostOrderNumbers[BB] = PO++;
1007       for (Instruction &I : *BB) {
1008         MemoryAccess *MA = MSSA.getMemoryAccess(&I);
1009         if (I.mayThrow() && !MA)
1010           ThrowingBlocks.insert(I.getParent());
1011 
1012         auto *MD = dyn_cast_or_null<MemoryDef>(MA);
1013         if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
1014             (getLocForWrite(&I) || isMemTerminatorInst(&I) ||
1015              (EnableInitializesImprovement && hasInitializesAttr(&I))))
1016           MemDefs.push_back(MD);
1017       }
1018     }
1019 
1020     // Treat byval, inalloca or dead on return arguments the same as Allocas,
1021     // stores to them are dead at the end of the function.
1022     for (Argument &AI : F.args())
1023       if (AI.hasPassPointeeByValueCopyAttr() || AI.hasDeadOnReturnAttr())
1024         InvisibleToCallerAfterRet.insert({&AI, true});
1025 
1026     // Collect whether there is any irreducible control flow in the function.
1027     ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
1028 
1029     AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
1030       return isa<UnreachableInst>(E->getTerminator());
1031     });
1032   }
1033 
pushMemUses__anon067a5ac70511::DSEState1034   static void pushMemUses(MemoryAccess *Acc,
1035                           SmallVectorImpl<MemoryAccess *> &WorkList,
1036                           SmallPtrSetImpl<MemoryAccess *> &Visited) {
1037     for (Use &U : Acc->uses()) {
1038       auto *MA = cast<MemoryAccess>(U.getUser());
1039       if (Visited.insert(MA).second)
1040         WorkList.push_back(MA);
1041     }
1042   };
1043 
strengthenLocationSize__anon067a5ac70511::DSEState1044   LocationSize strengthenLocationSize(const Instruction *I,
1045                                       LocationSize Size) const {
1046     if (auto *CB = dyn_cast<CallBase>(I)) {
1047       LibFunc F;
1048       if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
1049           (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
1050         // Use the precise location size specified by the 3rd argument
1051         // for determining KillingI overwrites DeadLoc if it is a memset_chk
1052         // instruction. memset_chk will write either the amount specified as 3rd
1053         // argument or the function will immediately abort and exit the program.
1054         // NOTE: AA may determine NoAlias if it can prove that the access size
1055         // is larger than the allocation size due to that being UB. To avoid
1056         // returning potentially invalid NoAlias results by AA, limit the use of
1057         // the precise location size to isOverwrite.
1058         if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
1059           return LocationSize::precise(Len->getZExtValue());
1060       }
1061     }
1062     return Size;
1063   }
1064 
1065   /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
1066   /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
1067   /// location (by \p DeadI instruction).
1068   /// Return OW_MaybePartial if \p KillingI does not completely overwrite
1069   /// \p DeadI, but they both write to the same underlying object. In that
1070   /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
1071   /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
1072   /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
isOverwrite__anon067a5ac70511::DSEState1073   OverwriteResult isOverwrite(const Instruction *KillingI,
1074                               const Instruction *DeadI,
1075                               const MemoryLocation &KillingLoc,
1076                               const MemoryLocation &DeadLoc,
1077                               int64_t &KillingOff, int64_t &DeadOff) {
1078     // AliasAnalysis does not always account for loops. Limit overwrite checks
1079     // to dependencies for which we can guarantee they are independent of any
1080     // loops they are in.
1081     if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
1082       return OW_Unknown;
1083 
1084     LocationSize KillingLocSize =
1085         strengthenLocationSize(KillingI, KillingLoc.Size);
1086     const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
1087     const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
1088     const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
1089     const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
1090 
1091     // Check whether the killing store overwrites the whole object, in which
1092     // case the size/offset of the dead store does not matter.
1093     if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
1094         isIdentifiedObject(KillingUndObj)) {
1095       std::optional<TypeSize> KillingUndObjSize =
1096           getPointerSize(KillingUndObj, DL, TLI, &F);
1097       if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
1098         return OW_Complete;
1099     }
1100 
1101     // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
1102     // get imprecise values here, though (except for unknown sizes).
1103     if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
1104       // In case no constant size is known, try to an IR values for the number
1105       // of bytes written and check if they match.
1106       const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
1107       const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
1108       if (KillingMemI && DeadMemI) {
1109         const Value *KillingV = KillingMemI->getLength();
1110         const Value *DeadV = DeadMemI->getLength();
1111         if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
1112           return OW_Complete;
1113       }
1114 
1115       // Masked stores have imprecise locations, but we can reason about them
1116       // to some extent.
1117       return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
1118     }
1119 
1120     const TypeSize KillingSize = KillingLocSize.getValue();
1121     const TypeSize DeadSize = DeadLoc.Size.getValue();
1122     // Bail on doing Size comparison which depends on AA for now
1123     // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
1124     const bool AnyScalable =
1125         DeadSize.isScalable() || KillingLocSize.isScalable();
1126 
1127     if (AnyScalable)
1128       return OW_Unknown;
1129     // Query the alias information
1130     AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
1131 
1132     // If the start pointers are the same, we just have to compare sizes to see if
1133     // the killing store was larger than the dead store.
1134     if (AAR == AliasResult::MustAlias) {
1135       // Make sure that the KillingSize size is >= the DeadSize size.
1136       if (KillingSize >= DeadSize)
1137         return OW_Complete;
1138     }
1139 
1140     // If we hit a partial alias we may have a full overwrite
1141     if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1142       int32_t Off = AAR.getOffset();
1143       if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
1144         return OW_Complete;
1145     }
1146 
1147     // If we can't resolve the same pointers to the same object, then we can't
1148     // analyze them at all.
1149     if (DeadUndObj != KillingUndObj) {
1150       // Non aliasing stores to different objects don't overlap. Note that
1151       // if the killing store is known to overwrite whole object (out of
1152       // bounds access overwrites whole object as well) then it is assumed to
1153       // completely overwrite any store to the same object even if they don't
1154       // actually alias (see next check).
1155       if (AAR == AliasResult::NoAlias)
1156         return OW_None;
1157       return OW_Unknown;
1158     }
1159 
1160     // Okay, we have stores to two completely different pointers.  Try to
1161     // decompose the pointer into a "base + constant_offset" form.  If the base
1162     // pointers are equal, then we can reason about the two stores.
1163     DeadOff = 0;
1164     KillingOff = 0;
1165     const Value *DeadBasePtr =
1166         GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
1167     const Value *KillingBasePtr =
1168         GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
1169 
1170     // If the base pointers still differ, we have two completely different
1171     // stores.
1172     if (DeadBasePtr != KillingBasePtr)
1173       return OW_Unknown;
1174 
1175     // The killing access completely overlaps the dead store if and only if
1176     // both start and end of the dead one is "inside" the killing one:
1177     //    |<->|--dead--|<->|
1178     //    |-----killing------|
1179     // Accesses may overlap if and only if start of one of them is "inside"
1180     // another one:
1181     //    |<->|--dead--|<-------->|
1182     //    |-------killing--------|
1183     //           OR
1184     //    |-------dead-------|
1185     //    |<->|---killing---|<----->|
1186     //
1187     // We have to be careful here as *Off is signed while *.Size is unsigned.
1188 
1189     // Check if the dead access starts "not before" the killing one.
1190     if (DeadOff >= KillingOff) {
1191       // If the dead access ends "not after" the killing access then the
1192       // dead one is completely overwritten by the killing one.
1193       if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1194         return OW_Complete;
1195       // If start of the dead access is "before" end of the killing access
1196       // then accesses overlap.
1197       else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1198         return OW_MaybePartial;
1199     }
1200     // If start of the killing access is "before" end of the dead access then
1201     // accesses overlap.
1202     else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1203       return OW_MaybePartial;
1204     }
1205 
1206     // Can reach here only if accesses are known not to overlap.
1207     return OW_None;
1208   }
1209 
isInvisibleToCallerAfterRet__anon067a5ac70511::DSEState1210   bool isInvisibleToCallerAfterRet(const Value *V) {
1211     if (isa<AllocaInst>(V))
1212       return true;
1213 
1214     auto I = InvisibleToCallerAfterRet.insert({V, false});
1215     if (I.second && isInvisibleToCallerOnUnwind(V) && isNoAliasCall(V))
1216       I.first->second = capturesNothing(PointerMayBeCaptured(
1217           V, /*ReturnCaptures=*/true, CaptureComponents::Provenance));
1218     return I.first->second;
1219   }
1220 
isInvisibleToCallerOnUnwind__anon067a5ac70511::DSEState1221   bool isInvisibleToCallerOnUnwind(const Value *V) {
1222     bool RequiresNoCaptureBeforeUnwind;
1223     if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
1224       return false;
1225     if (!RequiresNoCaptureBeforeUnwind)
1226       return true;
1227 
1228     auto I = CapturedBeforeReturn.insert({V, true});
1229     if (I.second)
1230       // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1231       // with the killing MemoryDef. But we refrain from doing so for now to
1232       // limit compile-time and this does not cause any changes to the number
1233       // of stores removed on a large test set in practice.
1234       I.first->second = capturesAnything(PointerMayBeCaptured(
1235           V, /*ReturnCaptures=*/false, CaptureComponents::Provenance));
1236     return !I.first->second;
1237   }
1238 
getLocForWrite__anon067a5ac70511::DSEState1239   std::optional<MemoryLocation> getLocForWrite(Instruction *I) const {
1240     if (!I->mayWriteToMemory())
1241       return std::nullopt;
1242 
1243     if (auto *CB = dyn_cast<CallBase>(I))
1244       return MemoryLocation::getForDest(CB, TLI);
1245 
1246     return MemoryLocation::getOrNone(I);
1247   }
1248 
1249   // Returns a list of <MemoryLocation, bool> pairs written by I.
1250   // The bool means whether the write is from Initializes attr.
1251   SmallVector<std::pair<MemoryLocation, bool>, 1>
getLocForInst__anon067a5ac70511::DSEState1252   getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
1253     SmallVector<std::pair<MemoryLocation, bool>, 1> Locations;
1254     if (isMemTerminatorInst(I)) {
1255       if (auto Loc = getLocForTerminator(I))
1256         Locations.push_back(std::make_pair(Loc->first, false));
1257       return Locations;
1258     }
1259 
1260     if (auto Loc = getLocForWrite(I))
1261       Locations.push_back(std::make_pair(*Loc, false));
1262 
1263     if (ConsiderInitializesAttr) {
1264       for (auto &MemLoc : getInitializesArgMemLoc(I)) {
1265         Locations.push_back(std::make_pair(MemLoc, true));
1266       }
1267     }
1268     return Locations;
1269   }
1270 
1271   /// Assuming this instruction has a dead analyzable write, can we delete
1272   /// this instruction?
isRemovable__anon067a5ac70511::DSEState1273   bool isRemovable(Instruction *I) {
1274     assert(getLocForWrite(I) && "Must have analyzable write");
1275 
1276     // Don't remove volatile/atomic stores.
1277     if (StoreInst *SI = dyn_cast<StoreInst>(I))
1278       return SI->isUnordered();
1279 
1280     if (auto *CB = dyn_cast<CallBase>(I)) {
1281       // Don't remove volatile memory intrinsics.
1282       if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1283         return !MI->isVolatile();
1284 
1285       // Never remove dead lifetime intrinsics, e.g. because they are followed
1286       // by a free.
1287       if (CB->isLifetimeStartOrEnd())
1288         return false;
1289 
1290       return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1291              !CB->isTerminator();
1292     }
1293 
1294     return false;
1295   }
1296 
1297   /// Returns true if \p UseInst completely overwrites \p DefLoc
1298   /// (stored by \p DefInst).
isCompleteOverwrite__anon067a5ac70511::DSEState1299   bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1300                            Instruction *UseInst) {
1301     // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1302     // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1303     // MemoryDef.
1304     if (!UseInst->mayWriteToMemory())
1305       return false;
1306 
1307     if (auto *CB = dyn_cast<CallBase>(UseInst))
1308       if (CB->onlyAccessesInaccessibleMemory())
1309         return false;
1310 
1311     int64_t InstWriteOffset, DepWriteOffset;
1312     if (auto CC = getLocForWrite(UseInst))
1313       return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1314                          DepWriteOffset) == OW_Complete;
1315     return false;
1316   }
1317 
1318   /// Returns true if \p Def is not read before returning from the function.
isWriteAtEndOfFunction__anon067a5ac70511::DSEState1319   bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc) {
1320     LLVM_DEBUG(dbgs() << "  Check if def " << *Def << " ("
1321                       << *Def->getMemoryInst()
1322                       << ") is at the end the function \n");
1323     SmallVector<MemoryAccess *, 4> WorkList;
1324     SmallPtrSet<MemoryAccess *, 8> Visited;
1325 
1326     pushMemUses(Def, WorkList, Visited);
1327     for (unsigned I = 0; I < WorkList.size(); I++) {
1328       if (WorkList.size() >= MemorySSAScanLimit) {
1329         LLVM_DEBUG(dbgs() << "  ... hit exploration limit.\n");
1330         return false;
1331       }
1332 
1333       MemoryAccess *UseAccess = WorkList[I];
1334       if (isa<MemoryPhi>(UseAccess)) {
1335         // AliasAnalysis does not account for loops. Limit elimination to
1336         // candidates for which we can guarantee they always store to the same
1337         // memory location.
1338         if (!isGuaranteedLoopInvariant(DefLoc.Ptr))
1339           return false;
1340 
1341         pushMemUses(cast<MemoryPhi>(UseAccess), WorkList, Visited);
1342         continue;
1343       }
1344       // TODO: Checking for aliasing is expensive. Consider reducing the amount
1345       // of times this is called and/or caching it.
1346       Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1347       if (isReadClobber(DefLoc, UseInst)) {
1348         LLVM_DEBUG(dbgs() << "  ... hit read clobber " << *UseInst << ".\n");
1349         return false;
1350       }
1351 
1352       if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1353         pushMemUses(UseDef, WorkList, Visited);
1354     }
1355     return true;
1356   }
1357 
1358   /// If \p I is a memory  terminator like llvm.lifetime.end or free, return a
1359   /// pair with the MemoryLocation terminated by \p I and a boolean flag
1360   /// indicating whether \p I is a free-like call.
1361   std::optional<std::pair<MemoryLocation, bool>>
getLocForTerminator__anon067a5ac70511::DSEState1362   getLocForTerminator(Instruction *I) const {
1363     uint64_t Len;
1364     Value *Ptr;
1365     if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
1366                                                       m_Value(Ptr))))
1367       return {std::make_pair(MemoryLocation(Ptr, Len), false)};
1368 
1369     if (auto *CB = dyn_cast<CallBase>(I)) {
1370       if (Value *FreedOp = getFreedOperand(CB, &TLI))
1371         return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
1372     }
1373 
1374     return std::nullopt;
1375   }
1376 
1377   /// Returns true if \p I is a memory terminator instruction like
1378   /// llvm.lifetime.end or free.
isMemTerminatorInst__anon067a5ac70511::DSEState1379   bool isMemTerminatorInst(Instruction *I) const {
1380     auto *CB = dyn_cast<CallBase>(I);
1381     return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1382                   getFreedOperand(CB, &TLI) != nullptr);
1383   }
1384 
1385   /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1386   /// instruction \p AccessI.
isMemTerminator__anon067a5ac70511::DSEState1387   bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1388                        Instruction *MaybeTerm) {
1389     std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1390         getLocForTerminator(MaybeTerm);
1391 
1392     if (!MaybeTermLoc)
1393       return false;
1394 
1395     // If the terminator is a free-like call, all accesses to the underlying
1396     // object can be considered terminated.
1397     if (getUnderlyingObject(Loc.Ptr) !=
1398         getUnderlyingObject(MaybeTermLoc->first.Ptr))
1399       return false;
1400 
1401     auto TermLoc = MaybeTermLoc->first;
1402     if (MaybeTermLoc->second) {
1403       const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1404       return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1405     }
1406     int64_t InstWriteOffset = 0;
1407     int64_t DepWriteOffset = 0;
1408     return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1409                        DepWriteOffset) == OW_Complete;
1410   }
1411 
1412   // Returns true if \p Use may read from \p DefLoc.
isReadClobber__anon067a5ac70511::DSEState1413   bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1414     if (isNoopIntrinsic(UseInst))
1415       return false;
1416 
1417     // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1418     // treated as read clobber.
1419     if (auto SI = dyn_cast<StoreInst>(UseInst))
1420       return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1421 
1422     if (!UseInst->mayReadFromMemory())
1423       return false;
1424 
1425     if (auto *CB = dyn_cast<CallBase>(UseInst))
1426       if (CB->onlyAccessesInaccessibleMemory())
1427         return false;
1428 
1429     return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1430   }
1431 
1432   /// Returns true if a dependency between \p Current and \p KillingDef is
1433   /// guaranteed to be loop invariant for the loops that they are in. Either
1434   /// because they are known to be in the same block, in the same loop level or
1435   /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1436   /// during execution of the containing function.
isGuaranteedLoopIndependent__anon067a5ac70511::DSEState1437   bool isGuaranteedLoopIndependent(const Instruction *Current,
1438                                    const Instruction *KillingDef,
1439                                    const MemoryLocation &CurrentLoc) {
1440     // If the dependency is within the same block or loop level (being careful
1441     // of irreducible loops), we know that AA will return a valid result for the
1442     // memory dependency. (Both at the function level, outside of any loop,
1443     // would also be valid but we currently disable that to limit compile time).
1444     if (Current->getParent() == KillingDef->getParent())
1445       return true;
1446     const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
1447     if (!ContainsIrreducibleLoops && CurrentLI &&
1448         CurrentLI == LI.getLoopFor(KillingDef->getParent()))
1449       return true;
1450     // Otherwise check the memory location is invariant to any loops.
1451     return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1452   }
1453 
1454   /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1455   /// loop. In particular, this guarantees that it only references a single
1456   /// MemoryLocation during execution of the containing function.
isGuaranteedLoopInvariant__anon067a5ac70511::DSEState1457   bool isGuaranteedLoopInvariant(const Value *Ptr) {
1458     Ptr = Ptr->stripPointerCasts();
1459     if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1460       if (GEP->hasAllConstantIndices())
1461         Ptr = GEP->getPointerOperand()->stripPointerCasts();
1462 
1463     if (auto *I = dyn_cast<Instruction>(Ptr)) {
1464       return I->getParent()->isEntryBlock() ||
1465              (!ContainsIrreducibleLoops && !LI.getLoopFor(I->getParent()));
1466     }
1467     return true;
1468   }
1469 
1470   // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1471   // with no read access between them or on any other path to a function exit
1472   // block if \p KillingLoc is not accessible after the function returns. If
1473   // there is no such MemoryDef, return std::nullopt. The returned value may not
1474   // (completely) overwrite \p KillingLoc. Currently we bail out when we
1475   // encounter an aliasing MemoryUse (read).
1476   std::optional<MemoryAccess *>
getDomMemoryDef__anon067a5ac70511::DSEState1477   getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1478                   const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1479                   unsigned &ScanLimit, unsigned &WalkerStepLimit,
1480                   bool IsMemTerm, unsigned &PartialLimit,
1481                   bool IsInitializesAttrMemLoc) {
1482     if (ScanLimit == 0 || WalkerStepLimit == 0) {
1483       LLVM_DEBUG(dbgs() << "\n    ...  hit scan limit\n");
1484       return std::nullopt;
1485     }
1486 
1487     MemoryAccess *Current = StartAccess;
1488     Instruction *KillingI = KillingDef->getMemoryInst();
1489     LLVM_DEBUG(dbgs() << "  trying to get dominating access\n");
1490 
1491     // Only optimize defining access of KillingDef when directly starting at its
1492     // defining access. The defining access also must only access KillingLoc. At
1493     // the moment we only support instructions with a single write location, so
1494     // it should be sufficient to disable optimizations for instructions that
1495     // also read from memory.
1496     bool CanOptimize = OptimizeMemorySSA &&
1497                        KillingDef->getDefiningAccess() == StartAccess &&
1498                        !KillingI->mayReadFromMemory();
1499 
1500     // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1501     std::optional<MemoryLocation> CurrentLoc;
1502     for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1503       LLVM_DEBUG({
1504         dbgs() << "   visiting " << *Current;
1505         if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1506           dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1507                  << ")";
1508         dbgs() << "\n";
1509       });
1510 
1511       // Reached TOP.
1512       if (MSSA.isLiveOnEntryDef(Current)) {
1513         LLVM_DEBUG(dbgs() << "   ...  found LiveOnEntryDef\n");
1514         if (CanOptimize && Current != KillingDef->getDefiningAccess())
1515           // The first clobbering def is... none.
1516           KillingDef->setOptimized(Current);
1517         return std::nullopt;
1518       }
1519 
1520       // Cost of a step. Accesses in the same block are more likely to be valid
1521       // candidates for elimination, hence consider them cheaper.
1522       unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1523                               ? MemorySSASameBBStepCost
1524                               : MemorySSAOtherBBStepCost;
1525       if (WalkerStepLimit <= StepCost) {
1526         LLVM_DEBUG(dbgs() << "   ...  hit walker step limit\n");
1527         return std::nullopt;
1528       }
1529       WalkerStepLimit -= StepCost;
1530 
1531       // Return for MemoryPhis. They cannot be eliminated directly and the
1532       // caller is responsible for traversing them.
1533       if (isa<MemoryPhi>(Current)) {
1534         LLVM_DEBUG(dbgs() << "   ...  found MemoryPhi\n");
1535         return Current;
1536       }
1537 
1538       // Below, check if CurrentDef is a valid candidate to be eliminated by
1539       // KillingDef. If it is not, check the next candidate.
1540       MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1541       Instruction *CurrentI = CurrentDef->getMemoryInst();
1542 
1543       if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1544         CanOptimize = false;
1545         continue;
1546       }
1547 
1548       // Before we try to remove anything, check for any extra throwing
1549       // instructions that block us from DSEing
1550       if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1551         LLVM_DEBUG(dbgs() << "  ... skip, may throw!\n");
1552         return std::nullopt;
1553       }
1554 
1555       // Check for anything that looks like it will be a barrier to further
1556       // removal
1557       if (isDSEBarrier(KillingUndObj, CurrentI)) {
1558         LLVM_DEBUG(dbgs() << "  ... skip, barrier\n");
1559         return std::nullopt;
1560       }
1561 
1562       // If Current is known to be on path that reads DefLoc or is a read
1563       // clobber, bail out, as the path is not profitable. We skip this check
1564       // for intrinsic calls, because the code knows how to handle memcpy
1565       // intrinsics.
1566       if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1567         return std::nullopt;
1568 
1569       // Quick check if there are direct uses that are read-clobbers.
1570       if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1571             if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1572               return !MSSA.dominates(StartAccess, UseOrDef) &&
1573                      isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1574             return false;
1575           })) {
1576         LLVM_DEBUG(dbgs() << "   ...  found a read clobber\n");
1577         return std::nullopt;
1578       }
1579 
1580       // If Current does not have an analyzable write location or is not
1581       // removable, skip it.
1582       CurrentLoc = getLocForWrite(CurrentI);
1583       if (!CurrentLoc || !isRemovable(CurrentI)) {
1584         CanOptimize = false;
1585         continue;
1586       }
1587 
1588       // AliasAnalysis does not account for loops. Limit elimination to
1589       // candidates for which we can guarantee they always store to the same
1590       // memory location and not located in different loops.
1591       if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1592         LLVM_DEBUG(dbgs() << "  ... not guaranteed loop independent\n");
1593         CanOptimize = false;
1594         continue;
1595       }
1596 
1597       if (IsMemTerm) {
1598         // If the killing def is a memory terminator (e.g. lifetime.end), check
1599         // the next candidate if the current Current does not write the same
1600         // underlying object as the terminator.
1601         if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1602           CanOptimize = false;
1603           continue;
1604         }
1605       } else {
1606         int64_t KillingOffset = 0;
1607         int64_t DeadOffset = 0;
1608         auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1609                               KillingOffset, DeadOffset);
1610         if (CanOptimize) {
1611           // CurrentDef is the earliest write clobber of KillingDef. Use it as
1612           // optimized access. Do not optimize if CurrentDef is already the
1613           // defining access of KillingDef.
1614           if (CurrentDef != KillingDef->getDefiningAccess() &&
1615               (OR == OW_Complete || OR == OW_MaybePartial))
1616             KillingDef->setOptimized(CurrentDef);
1617 
1618           // Once a may-aliasing def is encountered do not set an optimized
1619           // access.
1620           if (OR != OW_None)
1621             CanOptimize = false;
1622         }
1623 
1624         // If Current does not write to the same object as KillingDef, check
1625         // the next candidate.
1626         if (OR == OW_Unknown || OR == OW_None)
1627           continue;
1628         else if (OR == OW_MaybePartial) {
1629           // If KillingDef only partially overwrites Current, check the next
1630           // candidate if the partial step limit is exceeded. This aggressively
1631           // limits the number of candidates for partial store elimination,
1632           // which are less likely to be removable in the end.
1633           if (PartialLimit <= 1) {
1634             WalkerStepLimit -= 1;
1635             LLVM_DEBUG(dbgs() << "   ... reached partial limit ... continue with next access\n");
1636             continue;
1637           }
1638           PartialLimit -= 1;
1639         }
1640       }
1641       break;
1642     };
1643 
1644     // Accesses to objects accessible after the function returns can only be
1645     // eliminated if the access is dead along all paths to the exit. Collect
1646     // the blocks with killing (=completely overwriting MemoryDefs) and check if
1647     // they cover all paths from MaybeDeadAccess to any function exit.
1648     SmallPtrSet<Instruction *, 16> KillingDefs;
1649     KillingDefs.insert(KillingDef->getMemoryInst());
1650     MemoryAccess *MaybeDeadAccess = Current;
1651     MemoryLocation MaybeDeadLoc = *CurrentLoc;
1652     Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1653     LLVM_DEBUG(dbgs() << "  Checking for reads of " << *MaybeDeadAccess << " ("
1654                       << *MaybeDeadI << ")\n");
1655 
1656     SmallVector<MemoryAccess *, 32> WorkList;
1657     SmallPtrSet<MemoryAccess *, 32> Visited;
1658     pushMemUses(MaybeDeadAccess, WorkList, Visited);
1659 
1660     // Check if DeadDef may be read.
1661     for (unsigned I = 0; I < WorkList.size(); I++) {
1662       MemoryAccess *UseAccess = WorkList[I];
1663 
1664       LLVM_DEBUG(dbgs() << "   " << *UseAccess);
1665       // Bail out if the number of accesses to check exceeds the scan limit.
1666       if (ScanLimit < (WorkList.size() - I)) {
1667         LLVM_DEBUG(dbgs() << "\n    ...  hit scan limit\n");
1668         return std::nullopt;
1669       }
1670       --ScanLimit;
1671       NumDomMemDefChecks++;
1672 
1673       if (isa<MemoryPhi>(UseAccess)) {
1674         if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1675               return DT.properlyDominates(KI->getParent(),
1676                                           UseAccess->getBlock());
1677             })) {
1678           LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1679           continue;
1680         }
1681         LLVM_DEBUG(dbgs() << "\n    ... adding PHI uses\n");
1682         pushMemUses(UseAccess, WorkList, Visited);
1683         continue;
1684       }
1685 
1686       Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1687       LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1688 
1689       if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1690             return DT.dominates(KI, UseInst);
1691           })) {
1692         LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1693         continue;
1694       }
1695 
1696       // A memory terminator kills all preceeding MemoryDefs and all succeeding
1697       // MemoryAccesses. We do not have to check it's users.
1698       if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1699         LLVM_DEBUG(
1700             dbgs()
1701             << " ... skipping, memterminator invalidates following accesses\n");
1702         continue;
1703       }
1704 
1705       if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1706         LLVM_DEBUG(dbgs() << "    ... adding uses of intrinsic\n");
1707         pushMemUses(UseAccess, WorkList, Visited);
1708         continue;
1709       }
1710 
1711       if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1712         LLVM_DEBUG(dbgs() << "  ... found throwing instruction\n");
1713         return std::nullopt;
1714       }
1715 
1716       // Uses which may read the original MemoryDef mean we cannot eliminate the
1717       // original MD. Stop walk.
1718       // If KillingDef is a CallInst with "initializes" attribute, the reads in
1719       // the callee would be dominated by initializations, so it should be safe.
1720       bool IsKillingDefFromInitAttr = false;
1721       if (IsInitializesAttrMemLoc) {
1722         if (KillingI == UseInst &&
1723             KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr))
1724           IsKillingDefFromInitAttr = true;
1725       }
1726 
1727       if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
1728         LLVM_DEBUG(dbgs() << "    ... found read clobber\n");
1729         return std::nullopt;
1730       }
1731 
1732       // If this worklist walks back to the original memory access (and the
1733       // pointer is not guarenteed loop invariant) then we cannot assume that a
1734       // store kills itself.
1735       if (MaybeDeadAccess == UseAccess &&
1736           !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1737         LLVM_DEBUG(dbgs() << "    ... found not loop invariant self access\n");
1738         return std::nullopt;
1739       }
1740       // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1741       // if it reads the memory location.
1742       // TODO: It would probably be better to check for self-reads before
1743       // calling the function.
1744       if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1745         LLVM_DEBUG(dbgs() << "    ... skipping killing def/dom access\n");
1746         continue;
1747       }
1748 
1749       // Check all uses for MemoryDefs, except for defs completely overwriting
1750       // the original location. Otherwise we have to check uses of *all*
1751       // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1752       // miss cases like the following
1753       //   1 = Def(LoE) ; <----- DeadDef stores [0,1]
1754       //   2 = Def(1)   ; (2, 1) = NoAlias,   stores [2,3]
1755       //   Use(2)       ; MayAlias 2 *and* 1, loads [0, 3].
1756       //                  (The Use points to the *first* Def it may alias)
1757       //   3 = Def(1)   ; <---- Current  (3, 2) = NoAlias, (3,1) = MayAlias,
1758       //                  stores [0,1]
1759       if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1760         if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1761           BasicBlock *MaybeKillingBlock = UseInst->getParent();
1762           if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1763               PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1764             if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1765               LLVM_DEBUG(dbgs()
1766                          << "    ... found killing def " << *UseInst << "\n");
1767               KillingDefs.insert(UseInst);
1768             }
1769           } else {
1770             LLVM_DEBUG(dbgs()
1771                        << "    ... found preceeding def " << *UseInst << "\n");
1772             return std::nullopt;
1773           }
1774         } else
1775           pushMemUses(UseDef, WorkList, Visited);
1776       }
1777     }
1778 
1779     // For accesses to locations visible after the function returns, make sure
1780     // that the location is dead (=overwritten) along all paths from
1781     // MaybeDeadAccess to the exit.
1782     if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1783       SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1784       for (Instruction *KD : KillingDefs)
1785         KillingBlocks.insert(KD->getParent());
1786       assert(!KillingBlocks.empty() &&
1787              "Expected at least a single killing block");
1788 
1789       // Find the common post-dominator of all killing blocks.
1790       BasicBlock *CommonPred = *KillingBlocks.begin();
1791       for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1792         if (!CommonPred)
1793           break;
1794         CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1795       }
1796 
1797       // If the common post-dominator does not post-dominate MaybeDeadAccess,
1798       // there is a path from MaybeDeadAccess to an exit not going through a
1799       // killing block.
1800       if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1801         if (!AnyUnreachableExit)
1802           return std::nullopt;
1803 
1804         // Fall back to CFG scan starting at all non-unreachable roots if not
1805         // all paths to the exit go through CommonPred.
1806         CommonPred = nullptr;
1807       }
1808 
1809       // If CommonPred itself is in the set of killing blocks, we're done.
1810       if (KillingBlocks.count(CommonPred))
1811         return {MaybeDeadAccess};
1812 
1813       SetVector<BasicBlock *> WorkList;
1814       // If CommonPred is null, there are multiple exits from the function.
1815       // They all have to be added to the worklist.
1816       if (CommonPred)
1817         WorkList.insert(CommonPred);
1818       else
1819         for (BasicBlock *R : PDT.roots()) {
1820           if (!isa<UnreachableInst>(R->getTerminator()))
1821             WorkList.insert(R);
1822         }
1823 
1824       NumCFGTries++;
1825       // Check if all paths starting from an exit node go through one of the
1826       // killing blocks before reaching MaybeDeadAccess.
1827       for (unsigned I = 0; I < WorkList.size(); I++) {
1828         NumCFGChecks++;
1829         BasicBlock *Current = WorkList[I];
1830         if (KillingBlocks.count(Current))
1831           continue;
1832         if (Current == MaybeDeadAccess->getBlock())
1833           return std::nullopt;
1834 
1835         // MaybeDeadAccess is reachable from the entry, so we don't have to
1836         // explore unreachable blocks further.
1837         if (!DT.isReachableFromEntry(Current))
1838           continue;
1839 
1840         WorkList.insert_range(predecessors(Current));
1841 
1842         if (WorkList.size() >= MemorySSAPathCheckLimit)
1843           return std::nullopt;
1844       }
1845       NumCFGSuccess++;
1846     }
1847 
1848     // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1849     // potentially dead.
1850     return {MaybeDeadAccess};
1851   }
1852 
1853   /// Delete dead memory defs and recursively add their operands to ToRemove if
1854   /// they became dead.
1855   void
deleteDeadInstruction__anon067a5ac70511::DSEState1856   deleteDeadInstruction(Instruction *SI,
1857                         SmallPtrSetImpl<MemoryAccess *> *Deleted = nullptr) {
1858     MemorySSAUpdater Updater(&MSSA);
1859     SmallVector<Instruction *, 32> NowDeadInsts;
1860     NowDeadInsts.push_back(SI);
1861     --NumFastOther;
1862 
1863     while (!NowDeadInsts.empty()) {
1864       Instruction *DeadInst = NowDeadInsts.pop_back_val();
1865       ++NumFastOther;
1866 
1867       // Try to preserve debug information attached to the dead instruction.
1868       salvageDebugInfo(*DeadInst);
1869       salvageKnowledge(DeadInst);
1870 
1871       // Remove the Instruction from MSSA.
1872       MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
1873       bool IsMemDef = MA && isa<MemoryDef>(MA);
1874       if (MA) {
1875         if (IsMemDef) {
1876           auto *MD = cast<MemoryDef>(MA);
1877           SkipStores.insert(MD);
1878           if (Deleted)
1879             Deleted->insert(MD);
1880           if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
1881             if (SI->getValueOperand()->getType()->isPointerTy()) {
1882               const Value *UO = getUnderlyingObject(SI->getValueOperand());
1883               if (CapturedBeforeReturn.erase(UO))
1884                 ShouldIterateEndOfFunctionDSE = true;
1885               InvisibleToCallerAfterRet.erase(UO);
1886             }
1887           }
1888         }
1889 
1890         Updater.removeMemoryAccess(MA);
1891       }
1892 
1893       auto I = IOLs.find(DeadInst->getParent());
1894       if (I != IOLs.end())
1895         I->second.erase(DeadInst);
1896       // Remove its operands
1897       for (Use &O : DeadInst->operands())
1898         if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1899           O.set(PoisonValue::get(O->getType()));
1900           if (isInstructionTriviallyDead(OpI, &TLI))
1901             NowDeadInsts.push_back(OpI);
1902         }
1903 
1904       EA.removeInstruction(DeadInst);
1905       // Remove memory defs directly if they don't produce results, but only
1906       // queue other dead instructions for later removal. They may have been
1907       // used as memory locations that have been cached by BatchAA. Removing
1908       // them here may lead to newly created instructions to be allocated at the
1909       // same address, yielding stale cache entries.
1910       if (IsMemDef && DeadInst->getType()->isVoidTy())
1911         DeadInst->eraseFromParent();
1912       else
1913         ToRemove.push_back(DeadInst);
1914     }
1915   }
1916 
1917   // Check for any extra throws between \p KillingI and \p DeadI that block
1918   // DSE.  This only checks extra maythrows (those that aren't MemoryDef's).
1919   // MemoryDef that may throw are handled during the walk from one def to the
1920   // next.
mayThrowBetween__anon067a5ac70511::DSEState1921   bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1922                        const Value *KillingUndObj) {
1923     // First see if we can ignore it by using the fact that KillingI is an
1924     // alloca/alloca like object that is not visible to the caller during
1925     // execution of the function.
1926     if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
1927       return false;
1928 
1929     if (KillingI->getParent() == DeadI->getParent())
1930       return ThrowingBlocks.count(KillingI->getParent());
1931     return !ThrowingBlocks.empty();
1932   }
1933 
1934   // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1935   // instructions act as barriers:
1936   //  * A memory instruction that may throw and \p KillingI accesses a non-stack
1937   //  object.
1938   //  * Atomic stores stronger that monotonic.
isDSEBarrier__anon067a5ac70511::DSEState1939   bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
1940     // If DeadI may throw it acts as a barrier, unless we are to an
1941     // alloca/alloca like object that does not escape.
1942     if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
1943       return true;
1944 
1945     // If DeadI is an atomic load/store stronger than monotonic, do not try to
1946     // eliminate/reorder it.
1947     if (DeadI->isAtomic()) {
1948       if (auto *LI = dyn_cast<LoadInst>(DeadI))
1949         return isStrongerThanMonotonic(LI->getOrdering());
1950       if (auto *SI = dyn_cast<StoreInst>(DeadI))
1951         return isStrongerThanMonotonic(SI->getOrdering());
1952       if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
1953         return isStrongerThanMonotonic(ARMW->getOrdering());
1954       if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
1955         return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
1956                isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
1957       llvm_unreachable("other instructions should be skipped in MemorySSA");
1958     }
1959     return false;
1960   }
1961 
1962   /// Eliminate writes to objects that are not visible in the caller and are not
1963   /// accessed before returning from the function.
eliminateDeadWritesAtEndOfFunction__anon067a5ac70511::DSEState1964   bool eliminateDeadWritesAtEndOfFunction() {
1965     bool MadeChange = false;
1966     LLVM_DEBUG(
1967         dbgs()
1968         << "Trying to eliminate MemoryDefs at the end of the function\n");
1969     do {
1970       ShouldIterateEndOfFunctionDSE = false;
1971       for (MemoryDef *Def : llvm::reverse(MemDefs)) {
1972         if (SkipStores.contains(Def))
1973           continue;
1974 
1975         Instruction *DefI = Def->getMemoryInst();
1976         auto DefLoc = getLocForWrite(DefI);
1977         if (!DefLoc || !isRemovable(DefI)) {
1978           LLVM_DEBUG(dbgs() << "  ... could not get location for write or "
1979                                "instruction not removable.\n");
1980           continue;
1981         }
1982 
1983         // NOTE: Currently eliminating writes at the end of a function is
1984         // limited to MemoryDefs with a single underlying object, to save
1985         // compile-time. In practice it appears the case with multiple
1986         // underlying objects is very uncommon. If it turns out to be important,
1987         // we can use getUnderlyingObjects here instead.
1988         const Value *UO = getUnderlyingObject(DefLoc->Ptr);
1989         if (!isInvisibleToCallerAfterRet(UO))
1990           continue;
1991 
1992         if (isWriteAtEndOfFunction(Def, *DefLoc)) {
1993           // See through pointer-to-pointer bitcasts
1994           LLVM_DEBUG(dbgs() << "   ... MemoryDef is not accessed until the end "
1995                                "of the function\n");
1996           deleteDeadInstruction(DefI);
1997           ++NumFastStores;
1998           MadeChange = true;
1999         }
2000       }
2001     } while (ShouldIterateEndOfFunctionDSE);
2002     return MadeChange;
2003   }
2004 
2005   /// If we have a zero initializing memset following a call to malloc,
2006   /// try folding it into a call to calloc.
tryFoldIntoCalloc__anon067a5ac70511::DSEState2007   bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
2008     Instruction *DefI = Def->getMemoryInst();
2009     MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
2010     if (!MemSet)
2011       // TODO: Could handle zero store to small allocation as well.
2012       return false;
2013     Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
2014     if (!StoredConstant || !StoredConstant->isNullValue())
2015       return false;
2016 
2017     if (!isRemovable(DefI))
2018       // The memset might be volatile..
2019       return false;
2020 
2021     if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
2022         F.hasFnAttribute(Attribute::SanitizeAddress) ||
2023         F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
2024         F.getName() == "calloc")
2025       return false;
2026     auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
2027     if (!Malloc)
2028       return false;
2029     auto *InnerCallee = Malloc->getCalledFunction();
2030     if (!InnerCallee)
2031       return false;
2032     LibFunc Func = NotLibFunc;
2033     StringRef ZeroedVariantName;
2034     if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
2035         Func != LibFunc_malloc) {
2036       Attribute Attr = Malloc->getFnAttr("alloc-variant-zeroed");
2037       if (!Attr.isValid())
2038         return false;
2039       ZeroedVariantName = Attr.getValueAsString();
2040       if (ZeroedVariantName.empty())
2041         return false;
2042     }
2043 
2044     // Gracefully handle malloc with unexpected memory attributes.
2045     auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));
2046     if (!MallocDef)
2047       return false;
2048 
2049     auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
2050       // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
2051       // of malloc block
2052       auto *MallocBB = Malloc->getParent(),
2053         *MemsetBB = Memset->getParent();
2054       if (MallocBB == MemsetBB)
2055         return true;
2056       auto *Ptr = Memset->getArgOperand(0);
2057       auto *TI = MallocBB->getTerminator();
2058       BasicBlock *TrueBB, *FalseBB;
2059       if (!match(TI, m_Br(m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(Ptr),
2060                                          m_Zero()),
2061                           TrueBB, FalseBB)))
2062         return false;
2063       if (MemsetBB != FalseBB)
2064         return false;
2065       return true;
2066     };
2067 
2068     if (Malloc->getOperand(0) != MemSet->getLength())
2069       return false;
2070     if (!shouldCreateCalloc(Malloc, MemSet) || !DT.dominates(Malloc, MemSet) ||
2071         !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
2072       return false;
2073     IRBuilder<> IRB(Malloc);
2074     assert(Func == LibFunc_malloc || !ZeroedVariantName.empty());
2075     Value *Calloc = nullptr;
2076     if (!ZeroedVariantName.empty()) {
2077       LLVMContext &Ctx = Malloc->getContext();
2078       AttributeList Attrs = InnerCallee->getAttributes();
2079       AllocFnKind AllocKind =
2080           Attrs.getFnAttr(Attribute::AllocKind).getAllocKind() |
2081           AllocFnKind::Zeroed;
2082       AllocKind &= ~AllocFnKind::Uninitialized;
2083       Attrs =
2084           Attrs.addFnAttribute(Ctx, Attribute::getWithAllocKind(Ctx, AllocKind))
2085               .removeFnAttribute(Ctx, "alloc-variant-zeroed");
2086       FunctionCallee ZeroedVariant = Malloc->getModule()->getOrInsertFunction(
2087           ZeroedVariantName, InnerCallee->getFunctionType(), Attrs);
2088       SmallVector<Value *, 3> Args;
2089       Args.append(Malloc->arg_begin(), Malloc->arg_end());
2090       Calloc = IRB.CreateCall(ZeroedVariant, Args, ZeroedVariantName);
2091     } else {
2092       Type *SizeTTy = Malloc->getArgOperand(0)->getType();
2093       Calloc =
2094           emitCalloc(ConstantInt::get(SizeTTy, 1), Malloc->getArgOperand(0),
2095                      IRB, TLI, Malloc->getType()->getPointerAddressSpace());
2096     }
2097     if (!Calloc)
2098       return false;
2099 
2100     MemorySSAUpdater Updater(&MSSA);
2101     auto *NewAccess =
2102       Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), nullptr,
2103                                       MallocDef);
2104     auto *NewAccessMD = cast<MemoryDef>(NewAccess);
2105     Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
2106     Malloc->replaceAllUsesWith(Calloc);
2107     deleteDeadInstruction(Malloc);
2108     return true;
2109   }
2110 
2111   // Check if there is a dominating condition, that implies that the value
2112   // being stored in a ptr is already present in the ptr.
dominatingConditionImpliesValue__anon067a5ac70511::DSEState2113   bool dominatingConditionImpliesValue(MemoryDef *Def) {
2114     auto *StoreI = cast<StoreInst>(Def->getMemoryInst());
2115     BasicBlock *StoreBB = StoreI->getParent();
2116     Value *StorePtr = StoreI->getPointerOperand();
2117     Value *StoreVal = StoreI->getValueOperand();
2118 
2119     DomTreeNode *IDom = DT.getNode(StoreBB)->getIDom();
2120     if (!IDom)
2121       return false;
2122 
2123     auto *BI = dyn_cast<BranchInst>(IDom->getBlock()->getTerminator());
2124     if (!BI || !BI->isConditional())
2125       return false;
2126 
2127     // In case both blocks are the same, it is not possible to determine
2128     // if optimization is possible. (We would not want to optimize a store
2129     // in the FalseBB if condition is true and vice versa.)
2130     if (BI->getSuccessor(0) == BI->getSuccessor(1))
2131       return false;
2132 
2133     Instruction *ICmpL;
2134     CmpPredicate Pred;
2135     if (!match(BI->getCondition(),
2136                m_c_ICmp(Pred,
2137                         m_CombineAnd(m_Load(m_Specific(StorePtr)),
2138                                      m_Instruction(ICmpL)),
2139                         m_Specific(StoreVal))) ||
2140         !ICmpInst::isEquality(Pred))
2141       return false;
2142 
2143     // In case the else blocks also branches to the if block or the other way
2144     // around it is not possible to determine if the optimization is possible.
2145     if (Pred == ICmpInst::ICMP_EQ &&
2146         !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(0)),
2147                       StoreBB))
2148       return false;
2149 
2150     if (Pred == ICmpInst::ICMP_NE &&
2151         !DT.dominates(BasicBlockEdge(BI->getParent(), BI->getSuccessor(1)),
2152                       StoreBB))
2153       return false;
2154 
2155     MemoryAccess *LoadAcc = MSSA.getMemoryAccess(ICmpL);
2156     MemoryAccess *ClobAcc =
2157         MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
2158 
2159     return MSSA.dominates(ClobAcc, LoadAcc);
2160   }
2161 
2162   /// \returns true if \p Def is a no-op store, either because it
2163   /// directly stores back a loaded value or stores zero to a calloced object.
storeIsNoop__anon067a5ac70511::DSEState2164   bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
2165     Instruction *DefI = Def->getMemoryInst();
2166     StoreInst *Store = dyn_cast<StoreInst>(DefI);
2167     MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
2168     Constant *StoredConstant = nullptr;
2169     if (Store)
2170       StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
2171     else if (MemSet)
2172       StoredConstant = dyn_cast<Constant>(MemSet->getValue());
2173     else
2174       return false;
2175 
2176     if (!isRemovable(DefI))
2177       return false;
2178 
2179     if (StoredConstant) {
2180       Constant *InitC =
2181           getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
2182       // If the clobbering access is LiveOnEntry, no instructions between them
2183       // can modify the memory location.
2184       if (InitC && InitC == StoredConstant)
2185         return MSSA.isLiveOnEntryDef(
2186             MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));
2187     }
2188 
2189     if (!Store)
2190       return false;
2191 
2192     if (dominatingConditionImpliesValue(Def))
2193       return true;
2194 
2195     if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
2196       if (LoadI->getPointerOperand() == Store->getOperand(1)) {
2197         // Get the defining access for the load.
2198         auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
2199         // Fast path: the defining accesses are the same.
2200         if (LoadAccess == Def->getDefiningAccess())
2201           return true;
2202 
2203         // Look through phi accesses. Recursively scan all phi accesses by
2204         // adding them to a worklist. Bail when we run into a memory def that
2205         // does not match LoadAccess.
2206         SetVector<MemoryAccess *> ToCheck;
2207         MemoryAccess *Current =
2208             MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);
2209         // We don't want to bail when we run into the store memory def. But,
2210         // the phi access may point to it. So, pretend like we've already
2211         // checked it.
2212         ToCheck.insert(Def);
2213         ToCheck.insert(Current);
2214         // Start at current (1) to simulate already having checked Def.
2215         for (unsigned I = 1; I < ToCheck.size(); ++I) {
2216           Current = ToCheck[I];
2217           if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
2218             // Check all the operands.
2219             for (auto &Use : PhiAccess->incoming_values())
2220               ToCheck.insert(cast<MemoryAccess>(&Use));
2221             continue;
2222           }
2223 
2224           // If we found a memory def, bail. This happens when we have an
2225           // unrelated write in between an otherwise noop store.
2226           assert(isa<MemoryDef>(Current) &&
2227                  "Only MemoryDefs should reach here.");
2228           // TODO: Skip no alias MemoryDefs that have no aliasing reads.
2229           // We are searching for the definition of the store's destination.
2230           // So, if that is the same definition as the load, then this is a
2231           // noop. Otherwise, fail.
2232           if (LoadAccess != Current)
2233             return false;
2234         }
2235         return true;
2236       }
2237     }
2238 
2239     return false;
2240   }
2241 
removePartiallyOverlappedStores__anon067a5ac70511::DSEState2242   bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2243     bool Changed = false;
2244     for (auto OI : IOL) {
2245       Instruction *DeadI = OI.first;
2246       MemoryLocation Loc = *getLocForWrite(DeadI);
2247       assert(isRemovable(DeadI) && "Expect only removable instruction");
2248 
2249       const Value *Ptr = Loc.Ptr->stripPointerCasts();
2250       int64_t DeadStart = 0;
2251       uint64_t DeadSize = Loc.Size.getValue();
2252       GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
2253       OverlapIntervalsTy &IntervalMap = OI.second;
2254       Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2255       if (IntervalMap.empty())
2256         continue;
2257       Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2258     }
2259     return Changed;
2260   }
2261 
2262   /// Eliminates writes to locations where the value that is being written
2263   /// is already stored at the same location.
eliminateRedundantStoresOfExistingValues__anon067a5ac70511::DSEState2264   bool eliminateRedundantStoresOfExistingValues() {
2265     bool MadeChange = false;
2266     LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2267                          "already existing value\n");
2268     for (auto *Def : MemDefs) {
2269       if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
2270         continue;
2271 
2272       Instruction *DefInst = Def->getMemoryInst();
2273       auto MaybeDefLoc = getLocForWrite(DefInst);
2274       if (!MaybeDefLoc || !isRemovable(DefInst))
2275         continue;
2276 
2277       MemoryDef *UpperDef;
2278       // To conserve compile-time, we avoid walking to the next clobbering def.
2279       // Instead, we just try to get the optimized access, if it exists. DSE
2280       // will try to optimize defs during the earlier traversal.
2281       if (Def->isOptimized())
2282         UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
2283       else
2284         UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
2285       if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
2286         continue;
2287 
2288       Instruction *UpperInst = UpperDef->getMemoryInst();
2289       auto IsRedundantStore = [&]() {
2290         // We don't care about differences in call attributes here.
2291         if (DefInst->isIdenticalToWhenDefined(UpperInst,
2292                                               /*IntersectAttrs=*/true))
2293           return true;
2294         if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2295           if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2296             // MemSetInst must have a write location.
2297             auto UpperLoc = getLocForWrite(UpperInst);
2298             if (!UpperLoc)
2299               return false;
2300             int64_t InstWriteOffset = 0;
2301             int64_t DepWriteOffset = 0;
2302             auto OR = isOverwrite(UpperInst, DefInst, *UpperLoc, *MaybeDefLoc,
2303                                   InstWriteOffset, DepWriteOffset);
2304             Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
2305             return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2306                    OR == OW_Complete;
2307           }
2308         }
2309         return false;
2310       };
2311 
2312       if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
2313         continue;
2314       LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n  DEAD: " << *DefInst
2315                         << '\n');
2316       deleteDeadInstruction(DefInst);
2317       NumRedundantStores++;
2318       MadeChange = true;
2319     }
2320     return MadeChange;
2321   }
2322 
2323   // Return the locations written by the initializes attribute.
2324   // Note that this function considers:
2325   // 1. Unwind edge: use "initializes" attribute only if the callee has
2326   //    "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
2327   //    or the argument is invisible to caller on unwind. That is, we don't
2328   //    perform incorrect DSE on unwind edges in the current function.
2329   // 2. Argument alias: for aliasing arguments, the "initializes" attribute is
2330   //    the intersected range list of their "initializes" attributes.
2331   SmallVector<MemoryLocation, 1> getInitializesArgMemLoc(const Instruction *I);
2332 
2333   // Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are
2334   // killed by `KillingLocWrapper.MemDef`. Return whether
2335   // any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.
2336   std::pair<bool, bool>
2337   eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper);
2338 
2339   // Try to eliminate dead defs killed by `KillingDefWrapper` and return the
2340   // change state: whether make any change.
2341   bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
2342 };
2343 
2344 // Return true if "Arg" is function local and isn't captured before "CB".
isFuncLocalAndNotCaptured(Value * Arg,const CallBase * CB,EarliestEscapeAnalysis & EA)2345 bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
2346                                EarliestEscapeAnalysis &EA) {
2347   const Value *UnderlyingObj = getUnderlyingObject(Arg);
2348   return isIdentifiedFunctionLocal(UnderlyingObj) &&
2349          capturesNothing(
2350              EA.getCapturesBefore(UnderlyingObj, CB, /*OrAt*/ true));
2351 }
2352 
2353 SmallVector<MemoryLocation, 1>
getInitializesArgMemLoc(const Instruction * I)2354 DSEState::getInitializesArgMemLoc(const Instruction *I) {
2355   const CallBase *CB = dyn_cast<CallBase>(I);
2356   if (!CB)
2357     return {};
2358 
2359   // Collect aliasing arguments and their initializes ranges.
2360   SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
2361   for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
2362     Value *CurArg = CB->getArgOperand(Idx);
2363     if (!CurArg->getType()->isPointerTy())
2364       continue;
2365 
2366     ConstantRangeList Inits;
2367     Attribute InitializesAttr = CB->getParamAttr(Idx, Attribute::Initializes);
2368     // initializes on byval arguments refers to the callee copy, not the
2369     // original memory the caller passed in.
2370     if (InitializesAttr.isValid() && !CB->isByValArgument(Idx))
2371       Inits = InitializesAttr.getValueAsConstantRangeList();
2372 
2373     // Check whether "CurArg" could alias with global variables. We require
2374     // either it's function local and isn't captured before or the "CB" only
2375     // accesses arg or inaccessible mem.
2376     if (!Inits.empty() && !CB->onlyAccessesInaccessibleMemOrArgMem() &&
2377         !isFuncLocalAndNotCaptured(CurArg, CB, EA))
2378       Inits = ConstantRangeList();
2379 
2380     // We don't perform incorrect DSE on unwind edges in the current function,
2381     // and use the "initializes" attribute to kill dead stores if:
2382     // - The call does not throw exceptions, "CB->doesNotThrow()".
2383     // - Or the callee parameter has "dead_on_unwind" attribute.
2384     // - Or the argument is invisible to caller on unwind, and there are no
2385     //   unwind edges from this call in the current function (e.g. `CallInst`).
2386     bool IsDeadOrInvisibleOnUnwind =
2387         CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) ||
2388         (isa<CallInst>(CB) && isInvisibleToCallerOnUnwind(CurArg));
2389     ArgumentInitInfo InitInfo{Idx, IsDeadOrInvisibleOnUnwind, Inits};
2390     bool FoundAliasing = false;
2391     for (auto &[Arg, AliasList] : Arguments) {
2392       auto AAR = BatchAA.alias(MemoryLocation::getBeforeOrAfter(Arg),
2393                                MemoryLocation::getBeforeOrAfter(CurArg));
2394       if (AAR == AliasResult::NoAlias) {
2395         continue;
2396       } else if (AAR == AliasResult::MustAlias) {
2397         FoundAliasing = true;
2398         AliasList.push_back(InitInfo);
2399       } else {
2400         // For PartialAlias and MayAlias, there is an offset or may be an
2401         // unknown offset between the arguments and we insert an empty init
2402         // range to discard the entire initializes info while intersecting.
2403         FoundAliasing = true;
2404         AliasList.push_back(ArgumentInitInfo{Idx, IsDeadOrInvisibleOnUnwind,
2405                                              ConstantRangeList()});
2406       }
2407     }
2408     if (!FoundAliasing)
2409       Arguments[CurArg] = {InitInfo};
2410   }
2411 
2412   SmallVector<MemoryLocation, 1> Locations;
2413   for (const auto &[_, Args] : Arguments) {
2414     auto IntersectedRanges =
2415         getIntersectedInitRangeList(Args, CB->doesNotThrow());
2416     if (IntersectedRanges.empty())
2417       continue;
2418 
2419     for (const auto &Arg : Args) {
2420       for (const auto &Range : IntersectedRanges) {
2421         int64_t Start = Range.getLower().getSExtValue();
2422         int64_t End = Range.getUpper().getSExtValue();
2423         // For now, we only handle locations starting at offset 0.
2424         if (Start == 0)
2425           Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
2426                                              LocationSize::precise(End - Start),
2427                                              CB->getAAMetadata()));
2428       }
2429     }
2430   }
2431   return Locations;
2432 }
2433 
2434 std::pair<bool, bool>
eliminateDeadDefs(const MemoryLocationWrapper & KillingLocWrapper)2435 DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
2436   bool Changed = false;
2437   bool DeletedKillingLoc = false;
2438   unsigned ScanLimit = MemorySSAScanLimit;
2439   unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2440   unsigned PartialLimit = MemorySSAPartialStoreLimit;
2441   // Worklist of MemoryAccesses that may be killed by
2442   // "KillingLocWrapper.MemDef".
2443   SmallSetVector<MemoryAccess *, 8> ToCheck;
2444   // Track MemoryAccesses that have been deleted in the loop below, so we can
2445   // skip them. Don't use SkipStores for this, which may contain reused
2446   // MemoryAccess addresses.
2447   SmallPtrSet<MemoryAccess *, 8> Deleted;
2448   [[maybe_unused]] unsigned OrigNumSkipStores = SkipStores.size();
2449   ToCheck.insert(KillingLocWrapper.MemDef->getDefiningAccess());
2450 
2451   // Check if MemoryAccesses in the worklist are killed by
2452   // "KillingLocWrapper.MemDef".
2453   for (unsigned I = 0; I < ToCheck.size(); I++) {
2454     MemoryAccess *Current = ToCheck[I];
2455     if (Deleted.contains(Current))
2456       continue;
2457     std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
2458         KillingLocWrapper.MemDef, Current, KillingLocWrapper.MemLoc,
2459         KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
2460         isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit,
2461         KillingLocWrapper.DefByInitializesAttr);
2462 
2463     if (!MaybeDeadAccess) {
2464       LLVM_DEBUG(dbgs() << "  finished walk\n");
2465       continue;
2466     }
2467     MemoryAccess *DeadAccess = *MaybeDeadAccess;
2468     LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2469     if (isa<MemoryPhi>(DeadAccess)) {
2470       LLVM_DEBUG(dbgs() << "\n  ... adding incoming values to worklist\n");
2471       for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2472         MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2473         BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2474         BasicBlock *PhiBlock = DeadAccess->getBlock();
2475 
2476         // We only consider incoming MemoryAccesses that come before the
2477         // MemoryPhi. Otherwise we could discover candidates that do not
2478         // strictly dominate our starting def.
2479         if (PostOrderNumbers[IncomingBlock] > PostOrderNumbers[PhiBlock])
2480           ToCheck.insert(IncomingAccess);
2481       }
2482       continue;
2483     }
2484     // We cannot apply the initializes attribute to DeadAccess/DeadDef.
2485     // It would incorrectly consider a call instruction as redundant store
2486     // and remove this call instruction.
2487     // TODO: this conflates the existence of a MemoryLocation with being able
2488     // to delete the instruction. Fix isRemovable() to consider calls with
2489     // side effects that cannot be removed, e.g. calls with the initializes
2490     // attribute, and remove getLocForInst(ConsiderInitializesAttr = false).
2491     MemoryDefWrapper DeadDefWrapper(
2492         cast<MemoryDef>(DeadAccess),
2493         getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
2494                       /*ConsiderInitializesAttr=*/false));
2495     assert(DeadDefWrapper.DefinedLocations.size() == 1);
2496     MemoryLocationWrapper &DeadLocWrapper =
2497         DeadDefWrapper.DefinedLocations.front();
2498     LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
2499     ToCheck.insert(DeadLocWrapper.MemDef->getDefiningAccess());
2500     NumGetDomMemoryDefPassed++;
2501 
2502     if (!DebugCounter::shouldExecute(MemorySSACounter))
2503       continue;
2504     if (isMemTerminatorInst(KillingLocWrapper.DefInst)) {
2505       if (KillingLocWrapper.UnderlyingObject != DeadLocWrapper.UnderlyingObject)
2506         continue;
2507       LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
2508                         << *DeadLocWrapper.DefInst << "\n  KILLER: "
2509                         << *KillingLocWrapper.DefInst << '\n');
2510       deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2511       ++NumFastStores;
2512       Changed = true;
2513     } else {
2514       // Check if DeadI overwrites KillingI.
2515       int64_t KillingOffset = 0;
2516       int64_t DeadOffset = 0;
2517       OverwriteResult OR =
2518           isOverwrite(KillingLocWrapper.DefInst, DeadLocWrapper.DefInst,
2519                       KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2520                       KillingOffset, DeadOffset);
2521       if (OR == OW_MaybePartial) {
2522         auto &IOL = IOLs[DeadLocWrapper.DefInst->getParent()];
2523         OR = isPartialOverwrite(KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2524                                 KillingOffset, DeadOffset,
2525                                 DeadLocWrapper.DefInst, IOL);
2526       }
2527       if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2528         auto *DeadSI = dyn_cast<StoreInst>(DeadLocWrapper.DefInst);
2529         auto *KillingSI = dyn_cast<StoreInst>(KillingLocWrapper.DefInst);
2530         // We are re-using tryToMergePartialOverlappingStores, which requires
2531         // DeadSI to dominate KillingSI.
2532         // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2533         if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2534           if (Constant *Merged = tryToMergePartialOverlappingStores(
2535                   KillingSI, DeadSI, KillingOffset, DeadOffset, DL, BatchAA,
2536                   &DT)) {
2537 
2538             // Update stored value of earlier store to merged constant.
2539             DeadSI->setOperand(0, Merged);
2540             ++NumModifiedStores;
2541             Changed = true;
2542             DeletedKillingLoc = true;
2543 
2544             // Remove killing store and remove any outstanding overlap
2545             // intervals for the updated store.
2546             deleteDeadInstruction(KillingSI, &Deleted);
2547             auto I = IOLs.find(DeadSI->getParent());
2548             if (I != IOLs.end())
2549               I->second.erase(DeadSI);
2550             break;
2551           }
2552         }
2553       }
2554       if (OR == OW_Complete) {
2555         LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
2556                           << *DeadLocWrapper.DefInst << "\n  KILLER: "
2557                           << *KillingLocWrapper.DefInst << '\n');
2558         deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2559         ++NumFastStores;
2560         Changed = true;
2561       }
2562     }
2563   }
2564 
2565   assert(SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2566          "SkipStores and Deleted out of sync?");
2567 
2568   return {Changed, DeletedKillingLoc};
2569 }
2570 
eliminateDeadDefs(const MemoryDefWrapper & KillingDefWrapper)2571 bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
2572   if (KillingDefWrapper.DefinedLocations.empty()) {
2573     LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2574                       << *KillingDefWrapper.DefInst << "\n");
2575     return false;
2576   }
2577 
2578   bool MadeChange = false;
2579   for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
2580     LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2581                       << *KillingLocWrapper.MemDef << " ("
2582                       << *KillingLocWrapper.DefInst << ")\n");
2583     auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
2584     MadeChange |= Changed;
2585 
2586     // Check if the store is a no-op.
2587     if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
2588                                           KillingLocWrapper.UnderlyingObject)) {
2589       LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n  DEAD: "
2590                         << *KillingLocWrapper.DefInst << '\n');
2591       deleteDeadInstruction(KillingLocWrapper.DefInst);
2592       NumRedundantStores++;
2593       MadeChange = true;
2594       continue;
2595     }
2596     // Can we form a calloc from a memset/malloc pair?
2597     if (!DeletedKillingLoc &&
2598         tryFoldIntoCalloc(KillingLocWrapper.MemDef,
2599                           KillingLocWrapper.UnderlyingObject)) {
2600       LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2601                         << "  DEAD: " << *KillingLocWrapper.DefInst << '\n');
2602       deleteDeadInstruction(KillingLocWrapper.DefInst);
2603       MadeChange = true;
2604       continue;
2605     }
2606   }
2607   return MadeChange;
2608 }
2609 
eliminateDeadStores(Function & F,AliasAnalysis & AA,MemorySSA & MSSA,DominatorTree & DT,PostDominatorTree & PDT,const TargetLibraryInfo & TLI,const LoopInfo & LI)2610 static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2611                                 DominatorTree &DT, PostDominatorTree &PDT,
2612                                 const TargetLibraryInfo &TLI,
2613                                 const LoopInfo &LI) {
2614   bool MadeChange = false;
2615   DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2616   // For each store:
2617   for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2618     MemoryDef *KillingDef = State.MemDefs[I];
2619     if (State.SkipStores.count(KillingDef))
2620       continue;
2621 
2622     MemoryDefWrapper KillingDefWrapper(
2623         KillingDef, State.getLocForInst(KillingDef->getMemoryInst(),
2624                                         EnableInitializesImprovement));
2625     MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
2626   }
2627 
2628   if (EnablePartialOverwriteTracking)
2629     for (auto &KV : State.IOLs)
2630       MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2631 
2632   MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2633   MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2634 
2635   while (!State.ToRemove.empty()) {
2636     Instruction *DeadInst = State.ToRemove.pop_back_val();
2637     DeadInst->eraseFromParent();
2638   }
2639 
2640   return MadeChange;
2641 }
2642 } // end anonymous namespace
2643 
2644 //===----------------------------------------------------------------------===//
2645 // DSE Pass
2646 //===----------------------------------------------------------------------===//
run(Function & F,FunctionAnalysisManager & AM)2647 PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
2648   AliasAnalysis &AA = AM.getResult<AAManager>(F);
2649   const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2650   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
2651   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2652   PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
2653   LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
2654 
2655   bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2656 
2657 #ifdef LLVM_ENABLE_STATS
2658   if (AreStatisticsEnabled())
2659     for (auto &I : instructions(F))
2660       NumRemainingStores += isa<StoreInst>(&I);
2661 #endif
2662 
2663   if (!Changed)
2664     return PreservedAnalyses::all();
2665 
2666   PreservedAnalyses PA;
2667   PA.preserveSet<CFGAnalyses>();
2668   PA.preserve<MemorySSAAnalysis>();
2669   PA.preserve<LoopAnalysis>();
2670   return PA;
2671 }
2672