xref: /freebsd/contrib/llvm-project/llvm/include/llvm/Analysis/LoopAccessAnalysis.h (revision 62987288060ff68c817b7056815aa9fb8ba8ecd7)
1 //===- llvm/Analysis/LoopAccessAnalysis.h -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interface for the loop memory dependence framework that
10 // was originally developed for the Loop Vectorizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
15 #define LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
16 
17 #include "llvm/ADT/EquivalenceClasses.h"
18 #include "llvm/Analysis/LoopAnalysisManager.h"
19 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include <optional>
22 #include <variant>
23 
24 namespace llvm {
25 
26 class AAResults;
27 class DataLayout;
28 class Loop;
29 class LoopAccessInfo;
30 class raw_ostream;
31 class SCEV;
32 class SCEVUnionPredicate;
33 class Value;
34 
35 /// Collection of parameters shared beetween the Loop Vectorizer and the
36 /// Loop Access Analysis.
37 struct VectorizerParams {
38   /// Maximum SIMD width.
39   static const unsigned MaxVectorWidth;
40 
41   /// VF as overridden by the user.
42   static unsigned VectorizationFactor;
43   /// Interleave factor as overridden by the user.
44   static unsigned VectorizationInterleave;
45   /// True if force-vector-interleave was specified by the user.
46   static bool isInterleaveForced();
47 
48   /// \When performing memory disambiguation checks at runtime do not
49   /// make more than this number of comparisons.
50   static unsigned RuntimeMemoryCheckThreshold;
51 
52   // When creating runtime checks for nested loops, where possible try to
53   // write the checks in a form that allows them to be easily hoisted out of
54   // the outermost loop. For example, we can do this by expanding the range of
55   // addresses considered to include the entire nested loop so that they are
56   // loop invariant.
57   static bool HoistRuntimeChecks;
58 };
59 
60 /// Checks memory dependences among accesses to the same underlying
61 /// object to determine whether there vectorization is legal or not (and at
62 /// which vectorization factor).
63 ///
64 /// Note: This class will compute a conservative dependence for access to
65 /// different underlying pointers. Clients, such as the loop vectorizer, will
66 /// sometimes deal these potential dependencies by emitting runtime checks.
67 ///
68 /// We use the ScalarEvolution framework to symbolically evalutate access
69 /// functions pairs. Since we currently don't restructure the loop we can rely
70 /// on the program order of memory accesses to determine their safety.
71 /// At the moment we will only deem accesses as safe for:
72 ///  * A negative constant distance assuming program order.
73 ///
74 ///      Safe: tmp = a[i + 1];     OR     a[i + 1] = x;
75 ///            a[i] = tmp;                y = a[i];
76 ///
77 ///   The latter case is safe because later checks guarantuee that there can't
78 ///   be a cycle through a phi node (that is, we check that "x" and "y" is not
79 ///   the same variable: a header phi can only be an induction or a reduction, a
80 ///   reduction can't have a memory sink, an induction can't have a memory
81 ///   source). This is important and must not be violated (or we have to
82 ///   resort to checking for cycles through memory).
83 ///
84 ///  * A positive constant distance assuming program order that is bigger
85 ///    than the biggest memory access.
86 ///
87 ///     tmp = a[i]        OR              b[i] = x
88 ///     a[i+2] = tmp                      y = b[i+2];
89 ///
90 ///     Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
91 ///
92 ///  * Zero distances and all accesses have the same size.
93 ///
94 class MemoryDepChecker {
95 public:
96   typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
97   typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
98   /// Set of potential dependent memory accesses.
99   typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
100 
101   /// Type to keep track of the status of the dependence check. The order of
102   /// the elements is important and has to be from most permissive to least
103   /// permissive.
104   enum class VectorizationSafetyStatus {
105     // Can vectorize safely without RT checks. All dependences are known to be
106     // safe.
107     Safe,
108     // Can possibly vectorize with RT checks to overcome unknown dependencies.
109     PossiblySafeWithRtChecks,
110     // Cannot vectorize due to known unsafe dependencies.
111     Unsafe,
112   };
113 
114   /// Dependece between memory access instructions.
115   struct Dependence {
116     /// The type of the dependence.
117     enum DepType {
118       // No dependence.
119       NoDep,
120       // We couldn't determine the direction or the distance.
121       Unknown,
122       // At least one of the memory access instructions may access a loop
123       // varying object, e.g. the address of underlying object is loaded inside
124       // the loop, like A[B[i]]. We cannot determine direction or distance in
125       // those cases, and also are unable to generate any runtime checks.
126       IndirectUnsafe,
127 
128       // Lexically forward.
129       //
130       // FIXME: If we only have loop-independent forward dependences (e.g. a
131       // read and write of A[i]), LAA will locally deem the dependence "safe"
132       // without querying the MemoryDepChecker.  Therefore we can miss
133       // enumerating loop-independent forward dependences in
134       // getDependences.  Note that as soon as there are different
135       // indices used to access the same array, the MemoryDepChecker *is*
136       // queried and the dependence list is complete.
137       Forward,
138       // Forward, but if vectorized, is likely to prevent store-to-load
139       // forwarding.
140       ForwardButPreventsForwarding,
141       // Lexically backward.
142       Backward,
143       // Backward, but the distance allows a vectorization factor of dependent
144       // on MinDepDistBytes.
145       BackwardVectorizable,
146       // Same, but may prevent store-to-load forwarding.
147       BackwardVectorizableButPreventsForwarding
148     };
149 
150     /// String version of the types.
151     static const char *DepName[];
152 
153     /// Index of the source of the dependence in the InstMap vector.
154     unsigned Source;
155     /// Index of the destination of the dependence in the InstMap vector.
156     unsigned Destination;
157     /// The type of the dependence.
158     DepType Type;
159 
DependenceDependence160     Dependence(unsigned Source, unsigned Destination, DepType Type)
161         : Source(Source), Destination(Destination), Type(Type) {}
162 
163     /// Return the source instruction of the dependence.
164     Instruction *getSource(const MemoryDepChecker &DepChecker) const;
165     /// Return the destination instruction of the dependence.
166     Instruction *getDestination(const MemoryDepChecker &DepChecker) const;
167 
168     /// Dependence types that don't prevent vectorization.
169     static VectorizationSafetyStatus isSafeForVectorization(DepType Type);
170 
171     /// Lexically forward dependence.
172     bool isForward() const;
173     /// Lexically backward dependence.
174     bool isBackward() const;
175 
176     /// May be a lexically backward dependence type (includes Unknown).
177     bool isPossiblyBackward() const;
178 
179     /// Print the dependence.  \p Instr is used to map the instruction
180     /// indices to instructions.
181     void print(raw_ostream &OS, unsigned Depth,
182                const SmallVectorImpl<Instruction *> &Instrs) const;
183   };
184 
MemoryDepChecker(PredicatedScalarEvolution & PSE,const Loop * L,const DenseMap<Value *,const SCEV * > & SymbolicStrides,unsigned MaxTargetVectorWidthInBits)185   MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L,
186                    const DenseMap<Value *, const SCEV *> &SymbolicStrides,
187                    unsigned MaxTargetVectorWidthInBits)
188       : PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides),
189         MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}
190 
191   /// Register the location (instructions are given increasing numbers)
192   /// of a write access.
193   void addAccess(StoreInst *SI);
194 
195   /// Register the location (instructions are given increasing numbers)
196   /// of a write access.
197   void addAccess(LoadInst *LI);
198 
199   /// Check whether the dependencies between the accesses are safe.
200   ///
201   /// Only checks sets with elements in \p CheckDeps.
202   bool areDepsSafe(const DepCandidates &AccessSets,
203                    const MemAccessInfoList &CheckDeps);
204 
205   /// No memory dependence was encountered that would inhibit
206   /// vectorization.
isSafeForVectorization()207   bool isSafeForVectorization() const {
208     return Status == VectorizationSafetyStatus::Safe;
209   }
210 
211   /// Return true if the number of elements that are safe to operate on
212   /// simultaneously is not bounded.
isSafeForAnyVectorWidth()213   bool isSafeForAnyVectorWidth() const {
214     return MaxSafeVectorWidthInBits == UINT_MAX;
215   }
216 
217   /// Return the number of elements that are safe to operate on
218   /// simultaneously, multiplied by the size of the element in bits.
getMaxSafeVectorWidthInBits()219   uint64_t getMaxSafeVectorWidthInBits() const {
220     return MaxSafeVectorWidthInBits;
221   }
222 
223   /// In same cases when the dependency check fails we can still
224   /// vectorize the loop with a dynamic array access check.
shouldRetryWithRuntimeCheck()225   bool shouldRetryWithRuntimeCheck() const {
226     return FoundNonConstantDistanceDependence &&
227            Status == VectorizationSafetyStatus::PossiblySafeWithRtChecks;
228   }
229 
230   /// Returns the memory dependences.  If null is returned we exceeded
231   /// the MaxDependences threshold and this information is not
232   /// available.
getDependences()233   const SmallVectorImpl<Dependence> *getDependences() const {
234     return RecordDependences ? &Dependences : nullptr;
235   }
236 
clearDependences()237   void clearDependences() { Dependences.clear(); }
238 
239   /// The vector of memory access instructions.  The indices are used as
240   /// instruction identifiers in the Dependence class.
getMemoryInstructions()241   const SmallVectorImpl<Instruction *> &getMemoryInstructions() const {
242     return InstMap;
243   }
244 
245   /// Generate a mapping between the memory instructions and their
246   /// indices according to program order.
generateInstructionOrderMap()247   DenseMap<Instruction *, unsigned> generateInstructionOrderMap() const {
248     DenseMap<Instruction *, unsigned> OrderMap;
249 
250     for (unsigned I = 0; I < InstMap.size(); ++I)
251       OrderMap[InstMap[I]] = I;
252 
253     return OrderMap;
254   }
255 
256   /// Find the set of instructions that read or write via \p Ptr.
257   SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr,
258                                                          bool isWrite) const;
259 
260   /// Return the program order indices for the access location (Ptr, IsWrite).
261   /// Returns an empty ArrayRef if there are no accesses for the location.
getOrderForAccess(Value * Ptr,bool IsWrite)262   ArrayRef<unsigned> getOrderForAccess(Value *Ptr, bool IsWrite) const {
263     auto I = Accesses.find({Ptr, IsWrite});
264     if (I != Accesses.end())
265       return I->second;
266     return {};
267   }
268 
getInnermostLoop()269   const Loop *getInnermostLoop() const { return InnermostLoop; }
270 
271   DenseMap<std::pair<const SCEV *, Type *>,
272            std::pair<const SCEV *, const SCEV *>> &
getPointerBounds()273   getPointerBounds() {
274     return PointerBounds;
275   }
276 
277 private:
278   /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
279   /// applies dynamic knowledge to simplify SCEV expressions and convert them
280   /// to a more usable form. We need this in case assumptions about SCEV
281   /// expressions need to be made in order to avoid unknown dependences. For
282   /// example we might assume a unit stride for a pointer in order to prove
283   /// that a memory access is strided and doesn't wrap.
284   PredicatedScalarEvolution &PSE;
285   const Loop *InnermostLoop;
286 
287   /// Reference to map of pointer values to
288   /// their stride symbols, if they have a symbolic stride.
289   const DenseMap<Value *, const SCEV *> &SymbolicStrides;
290 
291   /// Maps access locations (ptr, read/write) to program order.
292   DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses;
293 
294   /// Memory access instructions in program order.
295   SmallVector<Instruction *, 16> InstMap;
296 
297   /// The program order index to be used for the next instruction.
298   unsigned AccessIdx = 0;
299 
300   /// The smallest dependence distance in bytes in the loop. This may not be
301   /// the same as the maximum number of bytes that are safe to operate on
302   /// simultaneously.
303   uint64_t MinDepDistBytes = 0;
304 
305   /// Number of elements (from consecutive iterations) that are safe to
306   /// operate on simultaneously, multiplied by the size of the element in bits.
307   /// The size of the element is taken from the memory access that is most
308   /// restrictive.
309   uint64_t MaxSafeVectorWidthInBits = -1U;
310 
311   /// If we see a non-constant dependence distance we can still try to
312   /// vectorize this loop with runtime checks.
313   bool FoundNonConstantDistanceDependence = false;
314 
315   /// Result of the dependence checks, indicating whether the checked
316   /// dependences are safe for vectorization, require RT checks or are known to
317   /// be unsafe.
318   VectorizationSafetyStatus Status = VectorizationSafetyStatus::Safe;
319 
320   //// True if Dependences reflects the dependences in the
321   //// loop.  If false we exceeded MaxDependences and
322   //// Dependences is invalid.
323   bool RecordDependences = true;
324 
325   /// Memory dependences collected during the analysis.  Only valid if
326   /// RecordDependences is true.
327   SmallVector<Dependence, 8> Dependences;
328 
329   /// The maximum width of a target's vector registers multiplied by 2 to also
330   /// roughly account for additional interleaving. Is used to decide if a
331   /// backwards dependence with non-constant stride should be classified as
332   /// backwards-vectorizable or unknown (triggering a runtime check).
333   unsigned MaxTargetVectorWidthInBits = 0;
334 
335   /// Mapping of SCEV expressions to their expanded pointer bounds (pair of
336   /// start and end pointer expressions).
337   DenseMap<std::pair<const SCEV *, Type *>,
338            std::pair<const SCEV *, const SCEV *>>
339       PointerBounds;
340 
341   /// Check whether there is a plausible dependence between the two
342   /// accesses.
343   ///
344   /// Access \p A must happen before \p B in program order. The two indices
345   /// identify the index into the program order map.
346   ///
347   /// This function checks  whether there is a plausible dependence (or the
348   /// absence of such can't be proved) between the two accesses. If there is a
349   /// plausible dependence but the dependence distance is bigger than one
350   /// element access it records this distance in \p MinDepDistBytes (if this
351   /// distance is smaller than any other distance encountered so far).
352   /// Otherwise, this function returns true signaling a possible dependence.
353   Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx,
354                                   const MemAccessInfo &B, unsigned BIdx);
355 
356   /// Check whether the data dependence could prevent store-load
357   /// forwarding.
358   ///
359   /// \return false if we shouldn't vectorize at all or avoid larger
360   /// vectorization factors by limiting MinDepDistBytes.
361   bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
362 
363   /// Updates the current safety status with \p S. We can go from Safe to
364   /// either PossiblySafeWithRtChecks or Unsafe and from
365   /// PossiblySafeWithRtChecks to Unsafe.
366   void mergeInStatus(VectorizationSafetyStatus S);
367 
368   struct DepDistanceStrideAndSizeInfo {
369     const SCEV *Dist;
370     uint64_t StrideA;
371     uint64_t StrideB;
372     uint64_t TypeByteSize;
373     bool AIsWrite;
374     bool BIsWrite;
375 
DepDistanceStrideAndSizeInfoDepDistanceStrideAndSizeInfo376     DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t StrideA,
377                                  uint64_t StrideB, uint64_t TypeByteSize,
378                                  bool AIsWrite, bool BIsWrite)
379         : Dist(Dist), StrideA(StrideA), StrideB(StrideB),
380           TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
381   };
382 
383   /// Get the dependence distance, strides, type size and whether it is a write
384   /// for the dependence between A and B. Returns a DepType, if we can prove
385   /// there's no dependence or the analysis fails. Outlined to lambda to limit
386   /// he scope of various temporary variables, like A/BPtr, StrideA/BPtr and
387   /// others. Returns either the dependence result, if it could already be
388   /// determined, or a struct containing (Distance, Stride, TypeSize, AIsWrite,
389   /// BIsWrite).
390   std::variant<Dependence::DepType, DepDistanceStrideAndSizeInfo>
391   getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst,
392                                      const MemAccessInfo &B,
393                                      Instruction *BInst);
394 };
395 
396 class RuntimePointerChecking;
397 /// A grouping of pointers. A single memcheck is required between
398 /// two groups.
399 struct RuntimeCheckingPtrGroup {
400   /// Create a new pointer checking group containing a single
401   /// pointer, with index \p Index in RtCheck.
402   RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck);
403 
404   /// Tries to add the pointer recorded in RtCheck at index
405   /// \p Index to this pointer checking group. We can only add a pointer
406   /// to a checking group if we will still be able to get
407   /// the upper and lower bounds of the check. Returns true in case
408   /// of success, false otherwise.
409   bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck);
410   bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End,
411                   unsigned AS, bool NeedsFreeze, ScalarEvolution &SE);
412 
413   /// The SCEV expression which represents the upper bound of all the
414   /// pointers in this group.
415   const SCEV *High;
416   /// The SCEV expression which represents the lower bound of all the
417   /// pointers in this group.
418   const SCEV *Low;
419   /// Indices of all the pointers that constitute this grouping.
420   SmallVector<unsigned, 2> Members;
421   /// Address space of the involved pointers.
422   unsigned AddressSpace;
423   /// Whether the pointer needs to be frozen after expansion, e.g. because it
424   /// may be poison outside the loop.
425   bool NeedsFreeze = false;
426 };
427 
428 /// A memcheck which made up of a pair of grouped pointers.
429 typedef std::pair<const RuntimeCheckingPtrGroup *,
430                   const RuntimeCheckingPtrGroup *>
431     RuntimePointerCheck;
432 
433 struct PointerDiffInfo {
434   const SCEV *SrcStart;
435   const SCEV *SinkStart;
436   unsigned AccessSize;
437   bool NeedsFreeze;
438 
PointerDiffInfoPointerDiffInfo439   PointerDiffInfo(const SCEV *SrcStart, const SCEV *SinkStart,
440                   unsigned AccessSize, bool NeedsFreeze)
441       : SrcStart(SrcStart), SinkStart(SinkStart), AccessSize(AccessSize),
442         NeedsFreeze(NeedsFreeze) {}
443 };
444 
445 /// Holds information about the memory runtime legality checks to verify
446 /// that a group of pointers do not overlap.
447 class RuntimePointerChecking {
448   friend struct RuntimeCheckingPtrGroup;
449 
450 public:
451   struct PointerInfo {
452     /// Holds the pointer value that we need to check.
453     TrackingVH<Value> PointerValue;
454     /// Holds the smallest byte address accessed by the pointer throughout all
455     /// iterations of the loop.
456     const SCEV *Start;
457     /// Holds the largest byte address accessed by the pointer throughout all
458     /// iterations of the loop, plus 1.
459     const SCEV *End;
460     /// Holds the information if this pointer is used for writing to memory.
461     bool IsWritePtr;
462     /// Holds the id of the set of pointers that could be dependent because of a
463     /// shared underlying object.
464     unsigned DependencySetId;
465     /// Holds the id of the disjoint alias set to which this pointer belongs.
466     unsigned AliasSetId;
467     /// SCEV for the access.
468     const SCEV *Expr;
469     /// True if the pointer expressions needs to be frozen after expansion.
470     bool NeedsFreeze;
471 
PointerInfoPointerInfo472     PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End,
473                 bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId,
474                 const SCEV *Expr, bool NeedsFreeze)
475         : PointerValue(PointerValue), Start(Start), End(End),
476           IsWritePtr(IsWritePtr), DependencySetId(DependencySetId),
477           AliasSetId(AliasSetId), Expr(Expr), NeedsFreeze(NeedsFreeze) {}
478   };
479 
RuntimePointerChecking(MemoryDepChecker & DC,ScalarEvolution * SE)480   RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE)
481       : DC(DC), SE(SE) {}
482 
483   /// Reset the state of the pointer runtime information.
reset()484   void reset() {
485     Need = false;
486     Pointers.clear();
487     Checks.clear();
488   }
489 
490   /// Insert a pointer and calculate the start and end SCEVs.
491   /// We need \p PSE in order to compute the SCEV expression of the pointer
492   /// according to the assumptions that we've made during the analysis.
493   /// The method might also version the pointer stride according to \p Strides,
494   /// and add new predicates to \p PSE.
495   void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy,
496               bool WritePtr, unsigned DepSetId, unsigned ASId,
497               PredicatedScalarEvolution &PSE, bool NeedsFreeze);
498 
499   /// No run-time memory checking is necessary.
empty()500   bool empty() const { return Pointers.empty(); }
501 
502   /// Generate the checks and store it.  This also performs the grouping
503   /// of pointers to reduce the number of memchecks necessary.
504   void generateChecks(MemoryDepChecker::DepCandidates &DepCands,
505                       bool UseDependencies);
506 
507   /// Returns the checks that generateChecks created. They can be used to ensure
508   /// no read/write accesses overlap across all loop iterations.
getChecks()509   const SmallVectorImpl<RuntimePointerCheck> &getChecks() const {
510     return Checks;
511   }
512 
513   // Returns an optional list of (pointer-difference expressions, access size)
514   // pairs that can be used to prove that there are no vectorization-preventing
515   // dependencies at runtime. There are is a vectorization-preventing dependency
516   // if any pointer-difference is <u VF * InterleaveCount * access size. Returns
517   // std::nullopt if pointer-difference checks cannot be used.
getDiffChecks()518   std::optional<ArrayRef<PointerDiffInfo>> getDiffChecks() const {
519     if (!CanUseDiffCheck)
520       return std::nullopt;
521     return {DiffChecks};
522   }
523 
524   /// Decide if we need to add a check between two groups of pointers,
525   /// according to needsChecking.
526   bool needsChecking(const RuntimeCheckingPtrGroup &M,
527                      const RuntimeCheckingPtrGroup &N) const;
528 
529   /// Returns the number of run-time checks required according to
530   /// needsChecking.
getNumberOfChecks()531   unsigned getNumberOfChecks() const { return Checks.size(); }
532 
533   /// Print the list run-time memory checks necessary.
534   void print(raw_ostream &OS, unsigned Depth = 0) const;
535 
536   /// Print \p Checks.
537   void printChecks(raw_ostream &OS,
538                    const SmallVectorImpl<RuntimePointerCheck> &Checks,
539                    unsigned Depth = 0) const;
540 
541   /// This flag indicates if we need to add the runtime check.
542   bool Need = false;
543 
544   /// Information about the pointers that may require checking.
545   SmallVector<PointerInfo, 2> Pointers;
546 
547   /// Holds a partitioning of pointers into "check groups".
548   SmallVector<RuntimeCheckingPtrGroup, 2> CheckingGroups;
549 
550   /// Check if pointers are in the same partition
551   ///
552   /// \p PtrToPartition contains the partition number for pointers (-1 if the
553   /// pointer belongs to multiple partitions).
554   static bool
555   arePointersInSamePartition(const SmallVectorImpl<int> &PtrToPartition,
556                              unsigned PtrIdx1, unsigned PtrIdx2);
557 
558   /// Decide whether we need to issue a run-time check for pointer at
559   /// index \p I and \p J to prove their independence.
560   bool needsChecking(unsigned I, unsigned J) const;
561 
562   /// Return PointerInfo for pointer at index \p PtrIdx.
getPointerInfo(unsigned PtrIdx)563   const PointerInfo &getPointerInfo(unsigned PtrIdx) const {
564     return Pointers[PtrIdx];
565   }
566 
getSE()567   ScalarEvolution *getSE() const { return SE; }
568 
569 private:
570   /// Groups pointers such that a single memcheck is required
571   /// between two different groups. This will clear the CheckingGroups vector
572   /// and re-compute it. We will only group dependecies if \p UseDependencies
573   /// is true, otherwise we will create a separate group for each pointer.
574   void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
575                    bool UseDependencies);
576 
577   /// Generate the checks and return them.
578   SmallVector<RuntimePointerCheck, 4> generateChecks();
579 
580   /// Try to create add a new (pointer-difference, access size) pair to
581   /// DiffCheck for checking groups \p CGI and \p CGJ. If pointer-difference
582   /// checks cannot be used for the groups, set CanUseDiffCheck to false.
583   bool tryToCreateDiffCheck(const RuntimeCheckingPtrGroup &CGI,
584                             const RuntimeCheckingPtrGroup &CGJ);
585 
586   MemoryDepChecker &DC;
587 
588   /// Holds a pointer to the ScalarEvolution analysis.
589   ScalarEvolution *SE;
590 
591   /// Set of run-time checks required to establish independence of
592   /// otherwise may-aliasing pointers in the loop.
593   SmallVector<RuntimePointerCheck, 4> Checks;
594 
595   /// Flag indicating if pointer-difference checks can be used
596   bool CanUseDiffCheck = true;
597 
598   /// A list of (pointer-difference, access size) pairs that can be used to
599   /// prove that there are no vectorization-preventing dependencies.
600   SmallVector<PointerDiffInfo> DiffChecks;
601 };
602 
603 /// Drive the analysis of memory accesses in the loop
604 ///
605 /// This class is responsible for analyzing the memory accesses of a loop.  It
606 /// collects the accesses and then its main helper the AccessAnalysis class
607 /// finds and categorizes the dependences in buildDependenceSets.
608 ///
609 /// For memory dependences that can be analyzed at compile time, it determines
610 /// whether the dependence is part of cycle inhibiting vectorization.  This work
611 /// is delegated to the MemoryDepChecker class.
612 ///
613 /// For memory dependences that cannot be determined at compile time, it
614 /// generates run-time checks to prove independence.  This is done by
615 /// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the
616 /// RuntimePointerCheck class.
617 ///
618 /// If pointers can wrap or can't be expressed as affine AddRec expressions by
619 /// ScalarEvolution, we will generate run-time checks by emitting a
620 /// SCEVUnionPredicate.
621 ///
622 /// Checks for both memory dependences and the SCEV predicates contained in the
623 /// PSE must be emitted in order for the results of this analysis to be valid.
624 class LoopAccessInfo {
625 public:
626   LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI,
627                  const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT,
628                  LoopInfo *LI);
629 
630   /// Return true we can analyze the memory accesses in the loop and there are
631   /// no memory dependence cycles. Note that for dependences between loads &
632   /// stores with uniform addresses,
633   /// hasStoreStoreDependenceInvolvingLoopInvariantAddress and
634   /// hasLoadStoreDependenceInvolvingLoopInvariantAddress also need to be
635   /// checked.
canVectorizeMemory()636   bool canVectorizeMemory() const { return CanVecMem; }
637 
638   /// Return true if there is a convergent operation in the loop. There may
639   /// still be reported runtime pointer checks that would be required, but it is
640   /// not legal to insert them.
hasConvergentOp()641   bool hasConvergentOp() const { return HasConvergentOp; }
642 
getRuntimePointerChecking()643   const RuntimePointerChecking *getRuntimePointerChecking() const {
644     return PtrRtChecking.get();
645   }
646 
647   /// Number of memchecks required to prove independence of otherwise
648   /// may-alias pointers.
getNumRuntimePointerChecks()649   unsigned getNumRuntimePointerChecks() const {
650     return PtrRtChecking->getNumberOfChecks();
651   }
652 
653   /// Return true if the block BB needs to be predicated in order for the loop
654   /// to be vectorized.
655   static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
656                                     DominatorTree *DT);
657 
658   /// Returns true if value \p V is loop invariant.
659   bool isInvariant(Value *V) const;
660 
getNumStores()661   unsigned getNumStores() const { return NumStores; }
getNumLoads()662   unsigned getNumLoads() const { return NumLoads;}
663 
664   /// The diagnostics report generated for the analysis.  E.g. why we
665   /// couldn't analyze the loop.
getReport()666   const OptimizationRemarkAnalysis *getReport() const { return Report.get(); }
667 
668   /// the Memory Dependence Checker which can determine the
669   /// loop-independent and loop-carried dependences between memory accesses.
getDepChecker()670   const MemoryDepChecker &getDepChecker() const { return *DepChecker; }
671 
672   /// Return the list of instructions that use \p Ptr to read or write
673   /// memory.
getInstructionsForAccess(Value * Ptr,bool isWrite)674   SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr,
675                                                          bool isWrite) const {
676     return DepChecker->getInstructionsForAccess(Ptr, isWrite);
677   }
678 
679   /// If an access has a symbolic strides, this maps the pointer value to
680   /// the stride symbol.
getSymbolicStrides()681   const DenseMap<Value *, const SCEV *> &getSymbolicStrides() const {
682     return SymbolicStrides;
683   }
684 
685   /// Print the information about the memory accesses in the loop.
686   void print(raw_ostream &OS, unsigned Depth = 0) const;
687 
688   /// Return true if the loop has memory dependence involving two stores to an
689   /// invariant address, else return false.
hasStoreStoreDependenceInvolvingLoopInvariantAddress()690   bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const {
691     return HasStoreStoreDependenceInvolvingLoopInvariantAddress;
692   }
693 
694   /// Return true if the loop has memory dependence involving a load and a store
695   /// to an invariant address, else return false.
hasLoadStoreDependenceInvolvingLoopInvariantAddress()696   bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const {
697     return HasLoadStoreDependenceInvolvingLoopInvariantAddress;
698   }
699 
700   /// Return the list of stores to invariant addresses.
getStoresToInvariantAddresses()701   ArrayRef<StoreInst *> getStoresToInvariantAddresses() const {
702     return StoresToInvariantAddresses;
703   }
704 
705   /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
706   /// them to a more usable form.  All SCEV expressions during the analysis
707   /// should be re-written (and therefore simplified) according to PSE.
708   /// A user of LoopAccessAnalysis will need to emit the runtime checks
709   /// associated with this predicate.
getPSE()710   const PredicatedScalarEvolution &getPSE() const { return *PSE; }
711 
712 private:
713   /// Analyze the loop. Returns true if all memory access in the loop can be
714   /// vectorized.
715   bool analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI,
716                    DominatorTree *DT);
717 
718   /// Check if the structure of the loop allows it to be analyzed by this
719   /// pass.
720   bool canAnalyzeLoop();
721 
722   /// Save the analysis remark.
723   ///
724   /// LAA does not directly emits the remarks.  Instead it stores it which the
725   /// client can retrieve and presents as its own analysis
726   /// (e.g. -Rpass-analysis=loop-vectorize).
727   OptimizationRemarkAnalysis &recordAnalysis(StringRef RemarkName,
728                                              Instruction *Instr = nullptr);
729 
730   /// Collect memory access with loop invariant strides.
731   ///
732   /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
733   /// invariant.
734   void collectStridedAccess(Value *LoadOrStoreInst);
735 
736   // Emits the first unsafe memory dependence in a loop.
737   // Emits nothing if there are no unsafe dependences
738   // or if the dependences were not recorded.
739   void emitUnsafeDependenceRemark();
740 
741   std::unique_ptr<PredicatedScalarEvolution> PSE;
742 
743   /// We need to check that all of the pointers in this list are disjoint
744   /// at runtime. Using std::unique_ptr to make using move ctor simpler.
745   std::unique_ptr<RuntimePointerChecking> PtrRtChecking;
746 
747   /// the Memory Dependence Checker which can determine the
748   /// loop-independent and loop-carried dependences between memory accesses.
749   std::unique_ptr<MemoryDepChecker> DepChecker;
750 
751   Loop *TheLoop;
752 
753   unsigned NumLoads = 0;
754   unsigned NumStores = 0;
755 
756   /// Cache the result of analyzeLoop.
757   bool CanVecMem = false;
758   bool HasConvergentOp = false;
759 
760   /// Indicator that there are two non vectorizable stores to the same uniform
761   /// address.
762   bool HasStoreStoreDependenceInvolvingLoopInvariantAddress = false;
763   /// Indicator that there is non vectorizable load and store to the same
764   /// uniform address.
765   bool HasLoadStoreDependenceInvolvingLoopInvariantAddress = false;
766 
767   /// List of stores to invariant addresses.
768   SmallVector<StoreInst *> StoresToInvariantAddresses;
769 
770   /// The diagnostics report generated for the analysis.  E.g. why we
771   /// couldn't analyze the loop.
772   std::unique_ptr<OptimizationRemarkAnalysis> Report;
773 
774   /// If an access has a symbolic strides, this maps the pointer value to
775   /// the stride symbol.
776   DenseMap<Value *, const SCEV *> SymbolicStrides;
777 };
778 
779 /// Return the SCEV corresponding to a pointer with the symbolic stride
780 /// replaced with constant one, assuming the SCEV predicate associated with
781 /// \p PSE is true.
782 ///
783 /// If necessary this method will version the stride of the pointer according
784 /// to \p PtrToStride and therefore add further predicates to \p PSE.
785 ///
786 /// \p PtrToStride provides the mapping between the pointer value and its
787 /// stride as collected by LoopVectorizationLegality::collectStridedAccess.
788 const SCEV *
789 replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
790                           const DenseMap<Value *, const SCEV *> &PtrToStride,
791                           Value *Ptr);
792 
793 /// If the pointer has a constant stride return it in units of the access type
794 /// size. If the pointer is loop-invariant, return 0. Otherwise return
795 /// std::nullopt.
796 ///
797 /// Ensure that it does not wrap in the address space, assuming the predicate
798 /// associated with \p PSE is true.
799 ///
800 /// If necessary this method will version the stride of the pointer according
801 /// to \p PtrToStride and therefore add further predicates to \p PSE.
802 /// The \p Assume parameter indicates if we are allowed to make additional
803 /// run-time assumptions.
804 ///
805 /// Note that the analysis results are defined if-and-only-if the original
806 /// memory access was defined.  If that access was dead, or UB, then the
807 /// result of this function is undefined.
808 std::optional<int64_t>
809 getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
810              const Loop *Lp,
811              const DenseMap<Value *, const SCEV *> &StridesMap = DenseMap<Value *, const SCEV *>(),
812              bool Assume = false, bool ShouldCheckWrap = true);
813 
814 /// Returns the distance between the pointers \p PtrA and \p PtrB iff they are
815 /// compatible and it is possible to calculate the distance between them. This
816 /// is a simple API that does not depend on the analysis pass.
817 /// \param StrictCheck Ensure that the calculated distance matches the
818 /// type-based one after all the bitcasts removal in the provided pointers.
819 std::optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
820                                    Value *PtrB, const DataLayout &DL,
821                                    ScalarEvolution &SE,
822                                    bool StrictCheck = false,
823                                    bool CheckType = true);
824 
825 /// Attempt to sort the pointers in \p VL and return the sorted indices
826 /// in \p SortedIndices, if reordering is required.
827 ///
828 /// Returns 'true' if sorting is legal, otherwise returns 'false'.
829 ///
830 /// For example, for a given \p VL of memory accesses in program order, a[i+4],
831 /// a[i+0], a[i+1] and a[i+7], this function will sort the \p VL and save the
832 /// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and
833 /// saves the mask for actual memory accesses in program order in
834 /// \p SortedIndices as <1,2,0,3>
835 bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
836                      ScalarEvolution &SE,
837                      SmallVectorImpl<unsigned> &SortedIndices);
838 
839 /// Returns true if the memory operations \p A and \p B are consecutive.
840 /// This is a simple API that does not depend on the analysis pass.
841 bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
842                          ScalarEvolution &SE, bool CheckType = true);
843 
844 class LoopAccessInfoManager {
845   /// The cache.
846   DenseMap<Loop *, std::unique_ptr<LoopAccessInfo>> LoopAccessInfoMap;
847 
848   // The used analysis passes.
849   ScalarEvolution &SE;
850   AAResults &AA;
851   DominatorTree &DT;
852   LoopInfo &LI;
853   TargetTransformInfo *TTI;
854   const TargetLibraryInfo *TLI = nullptr;
855 
856 public:
LoopAccessInfoManager(ScalarEvolution & SE,AAResults & AA,DominatorTree & DT,LoopInfo & LI,TargetTransformInfo * TTI,const TargetLibraryInfo * TLI)857   LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT,
858                         LoopInfo &LI, TargetTransformInfo *TTI,
859                         const TargetLibraryInfo *TLI)
860       : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {}
861 
862   const LoopAccessInfo &getInfo(Loop &L);
863 
864   void clear();
865 
866   bool invalidate(Function &F, const PreservedAnalyses &PA,
867                   FunctionAnalysisManager::Invalidator &Inv);
868 };
869 
870 /// This analysis provides dependence information for the memory
871 /// accesses of a loop.
872 ///
873 /// It runs the analysis for a loop on demand.  This can be initiated by
874 /// querying the loop access info via AM.getResult<LoopAccessAnalysis>.
875 /// getResult return a LoopAccessInfo object.  See this class for the
876 /// specifics of what information is provided.
877 class LoopAccessAnalysis
878     : public AnalysisInfoMixin<LoopAccessAnalysis> {
879   friend AnalysisInfoMixin<LoopAccessAnalysis>;
880   static AnalysisKey Key;
881 
882 public:
883   typedef LoopAccessInfoManager Result;
884 
885   Result run(Function &F, FunctionAnalysisManager &AM);
886 };
887 
getSource(const MemoryDepChecker & DepChecker)888 inline Instruction *MemoryDepChecker::Dependence::getSource(
889     const MemoryDepChecker &DepChecker) const {
890   return DepChecker.getMemoryInstructions()[Source];
891 }
892 
getDestination(const MemoryDepChecker & DepChecker)893 inline Instruction *MemoryDepChecker::Dependence::getDestination(
894     const MemoryDepChecker &DepChecker) const {
895   return DepChecker.getMemoryInstructions()[Destination];
896 }
897 
898 } // End llvm namespace
899 
900 #endif
901