1 //===- llvm/Analysis/LoopAccessAnalysis.h -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interface for the loop memory dependence framework that
10 // was originally developed for the Loop Vectorizer.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
15 #define LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
16
17 #include "llvm/ADT/EquivalenceClasses.h"
18 #include "llvm/Analysis/LoopAnalysisManager.h"
19 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include <optional>
22 #include <variant>
23
24 namespace llvm {
25
26 class AAResults;
27 class DataLayout;
28 class Loop;
29 class LoopAccessInfo;
30 class raw_ostream;
31 class SCEV;
32 class SCEVUnionPredicate;
33 class Value;
34
35 /// Collection of parameters shared beetween the Loop Vectorizer and the
36 /// Loop Access Analysis.
37 struct VectorizerParams {
38 /// Maximum SIMD width.
39 static const unsigned MaxVectorWidth;
40
41 /// VF as overridden by the user.
42 static unsigned VectorizationFactor;
43 /// Interleave factor as overridden by the user.
44 static unsigned VectorizationInterleave;
45 /// True if force-vector-interleave was specified by the user.
46 static bool isInterleaveForced();
47
48 /// \When performing memory disambiguation checks at runtime do not
49 /// make more than this number of comparisons.
50 static unsigned RuntimeMemoryCheckThreshold;
51
52 // When creating runtime checks for nested loops, where possible try to
53 // write the checks in a form that allows them to be easily hoisted out of
54 // the outermost loop. For example, we can do this by expanding the range of
55 // addresses considered to include the entire nested loop so that they are
56 // loop invariant.
57 static bool HoistRuntimeChecks;
58 };
59
60 /// Checks memory dependences among accesses to the same underlying
61 /// object to determine whether there vectorization is legal or not (and at
62 /// which vectorization factor).
63 ///
64 /// Note: This class will compute a conservative dependence for access to
65 /// different underlying pointers. Clients, such as the loop vectorizer, will
66 /// sometimes deal these potential dependencies by emitting runtime checks.
67 ///
68 /// We use the ScalarEvolution framework to symbolically evalutate access
69 /// functions pairs. Since we currently don't restructure the loop we can rely
70 /// on the program order of memory accesses to determine their safety.
71 /// At the moment we will only deem accesses as safe for:
72 /// * A negative constant distance assuming program order.
73 ///
74 /// Safe: tmp = a[i + 1]; OR a[i + 1] = x;
75 /// a[i] = tmp; y = a[i];
76 ///
77 /// The latter case is safe because later checks guarantuee that there can't
78 /// be a cycle through a phi node (that is, we check that "x" and "y" is not
79 /// the same variable: a header phi can only be an induction or a reduction, a
80 /// reduction can't have a memory sink, an induction can't have a memory
81 /// source). This is important and must not be violated (or we have to
82 /// resort to checking for cycles through memory).
83 ///
84 /// * A positive constant distance assuming program order that is bigger
85 /// than the biggest memory access.
86 ///
87 /// tmp = a[i] OR b[i] = x
88 /// a[i+2] = tmp y = b[i+2];
89 ///
90 /// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
91 ///
92 /// * Zero distances and all accesses have the same size.
93 ///
94 class MemoryDepChecker {
95 public:
96 typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
97 typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
98 /// Set of potential dependent memory accesses.
99 typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
100
101 /// Type to keep track of the status of the dependence check. The order of
102 /// the elements is important and has to be from most permissive to least
103 /// permissive.
104 enum class VectorizationSafetyStatus {
105 // Can vectorize safely without RT checks. All dependences are known to be
106 // safe.
107 Safe,
108 // Can possibly vectorize with RT checks to overcome unknown dependencies.
109 PossiblySafeWithRtChecks,
110 // Cannot vectorize due to known unsafe dependencies.
111 Unsafe,
112 };
113
114 /// Dependece between memory access instructions.
115 struct Dependence {
116 /// The type of the dependence.
117 enum DepType {
118 // No dependence.
119 NoDep,
120 // We couldn't determine the direction or the distance.
121 Unknown,
122 // At least one of the memory access instructions may access a loop
123 // varying object, e.g. the address of underlying object is loaded inside
124 // the loop, like A[B[i]]. We cannot determine direction or distance in
125 // those cases, and also are unable to generate any runtime checks.
126 IndirectUnsafe,
127
128 // Lexically forward.
129 //
130 // FIXME: If we only have loop-independent forward dependences (e.g. a
131 // read and write of A[i]), LAA will locally deem the dependence "safe"
132 // without querying the MemoryDepChecker. Therefore we can miss
133 // enumerating loop-independent forward dependences in
134 // getDependences. Note that as soon as there are different
135 // indices used to access the same array, the MemoryDepChecker *is*
136 // queried and the dependence list is complete.
137 Forward,
138 // Forward, but if vectorized, is likely to prevent store-to-load
139 // forwarding.
140 ForwardButPreventsForwarding,
141 // Lexically backward.
142 Backward,
143 // Backward, but the distance allows a vectorization factor of dependent
144 // on MinDepDistBytes.
145 BackwardVectorizable,
146 // Same, but may prevent store-to-load forwarding.
147 BackwardVectorizableButPreventsForwarding
148 };
149
150 /// String version of the types.
151 static const char *DepName[];
152
153 /// Index of the source of the dependence in the InstMap vector.
154 unsigned Source;
155 /// Index of the destination of the dependence in the InstMap vector.
156 unsigned Destination;
157 /// The type of the dependence.
158 DepType Type;
159
DependenceDependence160 Dependence(unsigned Source, unsigned Destination, DepType Type)
161 : Source(Source), Destination(Destination), Type(Type) {}
162
163 /// Return the source instruction of the dependence.
164 Instruction *getSource(const MemoryDepChecker &DepChecker) const;
165 /// Return the destination instruction of the dependence.
166 Instruction *getDestination(const MemoryDepChecker &DepChecker) const;
167
168 /// Dependence types that don't prevent vectorization.
169 static VectorizationSafetyStatus isSafeForVectorization(DepType Type);
170
171 /// Lexically forward dependence.
172 bool isForward() const;
173 /// Lexically backward dependence.
174 bool isBackward() const;
175
176 /// May be a lexically backward dependence type (includes Unknown).
177 bool isPossiblyBackward() const;
178
179 /// Print the dependence. \p Instr is used to map the instruction
180 /// indices to instructions.
181 void print(raw_ostream &OS, unsigned Depth,
182 const SmallVectorImpl<Instruction *> &Instrs) const;
183 };
184
MemoryDepChecker(PredicatedScalarEvolution & PSE,const Loop * L,const DenseMap<Value *,const SCEV * > & SymbolicStrides,unsigned MaxTargetVectorWidthInBits)185 MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L,
186 const DenseMap<Value *, const SCEV *> &SymbolicStrides,
187 unsigned MaxTargetVectorWidthInBits)
188 : PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides),
189 MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}
190
191 /// Register the location (instructions are given increasing numbers)
192 /// of a write access.
193 void addAccess(StoreInst *SI);
194
195 /// Register the location (instructions are given increasing numbers)
196 /// of a write access.
197 void addAccess(LoadInst *LI);
198
199 /// Check whether the dependencies between the accesses are safe.
200 ///
201 /// Only checks sets with elements in \p CheckDeps.
202 bool areDepsSafe(const DepCandidates &AccessSets,
203 const MemAccessInfoList &CheckDeps);
204
205 /// No memory dependence was encountered that would inhibit
206 /// vectorization.
isSafeForVectorization()207 bool isSafeForVectorization() const {
208 return Status == VectorizationSafetyStatus::Safe;
209 }
210
211 /// Return true if the number of elements that are safe to operate on
212 /// simultaneously is not bounded.
isSafeForAnyVectorWidth()213 bool isSafeForAnyVectorWidth() const {
214 return MaxSafeVectorWidthInBits == UINT_MAX;
215 }
216
217 /// Return the number of elements that are safe to operate on
218 /// simultaneously, multiplied by the size of the element in bits.
getMaxSafeVectorWidthInBits()219 uint64_t getMaxSafeVectorWidthInBits() const {
220 return MaxSafeVectorWidthInBits;
221 }
222
223 /// In same cases when the dependency check fails we can still
224 /// vectorize the loop with a dynamic array access check.
shouldRetryWithRuntimeCheck()225 bool shouldRetryWithRuntimeCheck() const {
226 return FoundNonConstantDistanceDependence &&
227 Status == VectorizationSafetyStatus::PossiblySafeWithRtChecks;
228 }
229
230 /// Returns the memory dependences. If null is returned we exceeded
231 /// the MaxDependences threshold and this information is not
232 /// available.
getDependences()233 const SmallVectorImpl<Dependence> *getDependences() const {
234 return RecordDependences ? &Dependences : nullptr;
235 }
236
clearDependences()237 void clearDependences() { Dependences.clear(); }
238
239 /// The vector of memory access instructions. The indices are used as
240 /// instruction identifiers in the Dependence class.
getMemoryInstructions()241 const SmallVectorImpl<Instruction *> &getMemoryInstructions() const {
242 return InstMap;
243 }
244
245 /// Generate a mapping between the memory instructions and their
246 /// indices according to program order.
generateInstructionOrderMap()247 DenseMap<Instruction *, unsigned> generateInstructionOrderMap() const {
248 DenseMap<Instruction *, unsigned> OrderMap;
249
250 for (unsigned I = 0; I < InstMap.size(); ++I)
251 OrderMap[InstMap[I]] = I;
252
253 return OrderMap;
254 }
255
256 /// Find the set of instructions that read or write via \p Ptr.
257 SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr,
258 bool isWrite) const;
259
260 /// Return the program order indices for the access location (Ptr, IsWrite).
261 /// Returns an empty ArrayRef if there are no accesses for the location.
getOrderForAccess(Value * Ptr,bool IsWrite)262 ArrayRef<unsigned> getOrderForAccess(Value *Ptr, bool IsWrite) const {
263 auto I = Accesses.find({Ptr, IsWrite});
264 if (I != Accesses.end())
265 return I->second;
266 return {};
267 }
268
getInnermostLoop()269 const Loop *getInnermostLoop() const { return InnermostLoop; }
270
271 DenseMap<std::pair<const SCEV *, Type *>,
272 std::pair<const SCEV *, const SCEV *>> &
getPointerBounds()273 getPointerBounds() {
274 return PointerBounds;
275 }
276
277 private:
278 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
279 /// applies dynamic knowledge to simplify SCEV expressions and convert them
280 /// to a more usable form. We need this in case assumptions about SCEV
281 /// expressions need to be made in order to avoid unknown dependences. For
282 /// example we might assume a unit stride for a pointer in order to prove
283 /// that a memory access is strided and doesn't wrap.
284 PredicatedScalarEvolution &PSE;
285 const Loop *InnermostLoop;
286
287 /// Reference to map of pointer values to
288 /// their stride symbols, if they have a symbolic stride.
289 const DenseMap<Value *, const SCEV *> &SymbolicStrides;
290
291 /// Maps access locations (ptr, read/write) to program order.
292 DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses;
293
294 /// Memory access instructions in program order.
295 SmallVector<Instruction *, 16> InstMap;
296
297 /// The program order index to be used for the next instruction.
298 unsigned AccessIdx = 0;
299
300 /// The smallest dependence distance in bytes in the loop. This may not be
301 /// the same as the maximum number of bytes that are safe to operate on
302 /// simultaneously.
303 uint64_t MinDepDistBytes = 0;
304
305 /// Number of elements (from consecutive iterations) that are safe to
306 /// operate on simultaneously, multiplied by the size of the element in bits.
307 /// The size of the element is taken from the memory access that is most
308 /// restrictive.
309 uint64_t MaxSafeVectorWidthInBits = -1U;
310
311 /// If we see a non-constant dependence distance we can still try to
312 /// vectorize this loop with runtime checks.
313 bool FoundNonConstantDistanceDependence = false;
314
315 /// Result of the dependence checks, indicating whether the checked
316 /// dependences are safe for vectorization, require RT checks or are known to
317 /// be unsafe.
318 VectorizationSafetyStatus Status = VectorizationSafetyStatus::Safe;
319
320 //// True if Dependences reflects the dependences in the
321 //// loop. If false we exceeded MaxDependences and
322 //// Dependences is invalid.
323 bool RecordDependences = true;
324
325 /// Memory dependences collected during the analysis. Only valid if
326 /// RecordDependences is true.
327 SmallVector<Dependence, 8> Dependences;
328
329 /// The maximum width of a target's vector registers multiplied by 2 to also
330 /// roughly account for additional interleaving. Is used to decide if a
331 /// backwards dependence with non-constant stride should be classified as
332 /// backwards-vectorizable or unknown (triggering a runtime check).
333 unsigned MaxTargetVectorWidthInBits = 0;
334
335 /// Mapping of SCEV expressions to their expanded pointer bounds (pair of
336 /// start and end pointer expressions).
337 DenseMap<std::pair<const SCEV *, Type *>,
338 std::pair<const SCEV *, const SCEV *>>
339 PointerBounds;
340
341 /// Check whether there is a plausible dependence between the two
342 /// accesses.
343 ///
344 /// Access \p A must happen before \p B in program order. The two indices
345 /// identify the index into the program order map.
346 ///
347 /// This function checks whether there is a plausible dependence (or the
348 /// absence of such can't be proved) between the two accesses. If there is a
349 /// plausible dependence but the dependence distance is bigger than one
350 /// element access it records this distance in \p MinDepDistBytes (if this
351 /// distance is smaller than any other distance encountered so far).
352 /// Otherwise, this function returns true signaling a possible dependence.
353 Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx,
354 const MemAccessInfo &B, unsigned BIdx);
355
356 /// Check whether the data dependence could prevent store-load
357 /// forwarding.
358 ///
359 /// \return false if we shouldn't vectorize at all or avoid larger
360 /// vectorization factors by limiting MinDepDistBytes.
361 bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
362
363 /// Updates the current safety status with \p S. We can go from Safe to
364 /// either PossiblySafeWithRtChecks or Unsafe and from
365 /// PossiblySafeWithRtChecks to Unsafe.
366 void mergeInStatus(VectorizationSafetyStatus S);
367
368 struct DepDistanceStrideAndSizeInfo {
369 const SCEV *Dist;
370 uint64_t StrideA;
371 uint64_t StrideB;
372 uint64_t TypeByteSize;
373 bool AIsWrite;
374 bool BIsWrite;
375
DepDistanceStrideAndSizeInfoDepDistanceStrideAndSizeInfo376 DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t StrideA,
377 uint64_t StrideB, uint64_t TypeByteSize,
378 bool AIsWrite, bool BIsWrite)
379 : Dist(Dist), StrideA(StrideA), StrideB(StrideB),
380 TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
381 };
382
383 /// Get the dependence distance, strides, type size and whether it is a write
384 /// for the dependence between A and B. Returns a DepType, if we can prove
385 /// there's no dependence or the analysis fails. Outlined to lambda to limit
386 /// he scope of various temporary variables, like A/BPtr, StrideA/BPtr and
387 /// others. Returns either the dependence result, if it could already be
388 /// determined, or a struct containing (Distance, Stride, TypeSize, AIsWrite,
389 /// BIsWrite).
390 std::variant<Dependence::DepType, DepDistanceStrideAndSizeInfo>
391 getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst,
392 const MemAccessInfo &B,
393 Instruction *BInst);
394 };
395
396 class RuntimePointerChecking;
397 /// A grouping of pointers. A single memcheck is required between
398 /// two groups.
399 struct RuntimeCheckingPtrGroup {
400 /// Create a new pointer checking group containing a single
401 /// pointer, with index \p Index in RtCheck.
402 RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck);
403
404 /// Tries to add the pointer recorded in RtCheck at index
405 /// \p Index to this pointer checking group. We can only add a pointer
406 /// to a checking group if we will still be able to get
407 /// the upper and lower bounds of the check. Returns true in case
408 /// of success, false otherwise.
409 bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck);
410 bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End,
411 unsigned AS, bool NeedsFreeze, ScalarEvolution &SE);
412
413 /// The SCEV expression which represents the upper bound of all the
414 /// pointers in this group.
415 const SCEV *High;
416 /// The SCEV expression which represents the lower bound of all the
417 /// pointers in this group.
418 const SCEV *Low;
419 /// Indices of all the pointers that constitute this grouping.
420 SmallVector<unsigned, 2> Members;
421 /// Address space of the involved pointers.
422 unsigned AddressSpace;
423 /// Whether the pointer needs to be frozen after expansion, e.g. because it
424 /// may be poison outside the loop.
425 bool NeedsFreeze = false;
426 };
427
428 /// A memcheck which made up of a pair of grouped pointers.
429 typedef std::pair<const RuntimeCheckingPtrGroup *,
430 const RuntimeCheckingPtrGroup *>
431 RuntimePointerCheck;
432
433 struct PointerDiffInfo {
434 const SCEV *SrcStart;
435 const SCEV *SinkStart;
436 unsigned AccessSize;
437 bool NeedsFreeze;
438
PointerDiffInfoPointerDiffInfo439 PointerDiffInfo(const SCEV *SrcStart, const SCEV *SinkStart,
440 unsigned AccessSize, bool NeedsFreeze)
441 : SrcStart(SrcStart), SinkStart(SinkStart), AccessSize(AccessSize),
442 NeedsFreeze(NeedsFreeze) {}
443 };
444
445 /// Holds information about the memory runtime legality checks to verify
446 /// that a group of pointers do not overlap.
447 class RuntimePointerChecking {
448 friend struct RuntimeCheckingPtrGroup;
449
450 public:
451 struct PointerInfo {
452 /// Holds the pointer value that we need to check.
453 TrackingVH<Value> PointerValue;
454 /// Holds the smallest byte address accessed by the pointer throughout all
455 /// iterations of the loop.
456 const SCEV *Start;
457 /// Holds the largest byte address accessed by the pointer throughout all
458 /// iterations of the loop, plus 1.
459 const SCEV *End;
460 /// Holds the information if this pointer is used for writing to memory.
461 bool IsWritePtr;
462 /// Holds the id of the set of pointers that could be dependent because of a
463 /// shared underlying object.
464 unsigned DependencySetId;
465 /// Holds the id of the disjoint alias set to which this pointer belongs.
466 unsigned AliasSetId;
467 /// SCEV for the access.
468 const SCEV *Expr;
469 /// True if the pointer expressions needs to be frozen after expansion.
470 bool NeedsFreeze;
471
PointerInfoPointerInfo472 PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End,
473 bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId,
474 const SCEV *Expr, bool NeedsFreeze)
475 : PointerValue(PointerValue), Start(Start), End(End),
476 IsWritePtr(IsWritePtr), DependencySetId(DependencySetId),
477 AliasSetId(AliasSetId), Expr(Expr), NeedsFreeze(NeedsFreeze) {}
478 };
479
RuntimePointerChecking(MemoryDepChecker & DC,ScalarEvolution * SE)480 RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE)
481 : DC(DC), SE(SE) {}
482
483 /// Reset the state of the pointer runtime information.
reset()484 void reset() {
485 Need = false;
486 Pointers.clear();
487 Checks.clear();
488 }
489
490 /// Insert a pointer and calculate the start and end SCEVs.
491 /// We need \p PSE in order to compute the SCEV expression of the pointer
492 /// according to the assumptions that we've made during the analysis.
493 /// The method might also version the pointer stride according to \p Strides,
494 /// and add new predicates to \p PSE.
495 void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy,
496 bool WritePtr, unsigned DepSetId, unsigned ASId,
497 PredicatedScalarEvolution &PSE, bool NeedsFreeze);
498
499 /// No run-time memory checking is necessary.
empty()500 bool empty() const { return Pointers.empty(); }
501
502 /// Generate the checks and store it. This also performs the grouping
503 /// of pointers to reduce the number of memchecks necessary.
504 void generateChecks(MemoryDepChecker::DepCandidates &DepCands,
505 bool UseDependencies);
506
507 /// Returns the checks that generateChecks created. They can be used to ensure
508 /// no read/write accesses overlap across all loop iterations.
getChecks()509 const SmallVectorImpl<RuntimePointerCheck> &getChecks() const {
510 return Checks;
511 }
512
513 // Returns an optional list of (pointer-difference expressions, access size)
514 // pairs that can be used to prove that there are no vectorization-preventing
515 // dependencies at runtime. There are is a vectorization-preventing dependency
516 // if any pointer-difference is <u VF * InterleaveCount * access size. Returns
517 // std::nullopt if pointer-difference checks cannot be used.
getDiffChecks()518 std::optional<ArrayRef<PointerDiffInfo>> getDiffChecks() const {
519 if (!CanUseDiffCheck)
520 return std::nullopt;
521 return {DiffChecks};
522 }
523
524 /// Decide if we need to add a check between two groups of pointers,
525 /// according to needsChecking.
526 bool needsChecking(const RuntimeCheckingPtrGroup &M,
527 const RuntimeCheckingPtrGroup &N) const;
528
529 /// Returns the number of run-time checks required according to
530 /// needsChecking.
getNumberOfChecks()531 unsigned getNumberOfChecks() const { return Checks.size(); }
532
533 /// Print the list run-time memory checks necessary.
534 void print(raw_ostream &OS, unsigned Depth = 0) const;
535
536 /// Print \p Checks.
537 void printChecks(raw_ostream &OS,
538 const SmallVectorImpl<RuntimePointerCheck> &Checks,
539 unsigned Depth = 0) const;
540
541 /// This flag indicates if we need to add the runtime check.
542 bool Need = false;
543
544 /// Information about the pointers that may require checking.
545 SmallVector<PointerInfo, 2> Pointers;
546
547 /// Holds a partitioning of pointers into "check groups".
548 SmallVector<RuntimeCheckingPtrGroup, 2> CheckingGroups;
549
550 /// Check if pointers are in the same partition
551 ///
552 /// \p PtrToPartition contains the partition number for pointers (-1 if the
553 /// pointer belongs to multiple partitions).
554 static bool
555 arePointersInSamePartition(const SmallVectorImpl<int> &PtrToPartition,
556 unsigned PtrIdx1, unsigned PtrIdx2);
557
558 /// Decide whether we need to issue a run-time check for pointer at
559 /// index \p I and \p J to prove their independence.
560 bool needsChecking(unsigned I, unsigned J) const;
561
562 /// Return PointerInfo for pointer at index \p PtrIdx.
getPointerInfo(unsigned PtrIdx)563 const PointerInfo &getPointerInfo(unsigned PtrIdx) const {
564 return Pointers[PtrIdx];
565 }
566
getSE()567 ScalarEvolution *getSE() const { return SE; }
568
569 private:
570 /// Groups pointers such that a single memcheck is required
571 /// between two different groups. This will clear the CheckingGroups vector
572 /// and re-compute it. We will only group dependecies if \p UseDependencies
573 /// is true, otherwise we will create a separate group for each pointer.
574 void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
575 bool UseDependencies);
576
577 /// Generate the checks and return them.
578 SmallVector<RuntimePointerCheck, 4> generateChecks();
579
580 /// Try to create add a new (pointer-difference, access size) pair to
581 /// DiffCheck for checking groups \p CGI and \p CGJ. If pointer-difference
582 /// checks cannot be used for the groups, set CanUseDiffCheck to false.
583 bool tryToCreateDiffCheck(const RuntimeCheckingPtrGroup &CGI,
584 const RuntimeCheckingPtrGroup &CGJ);
585
586 MemoryDepChecker &DC;
587
588 /// Holds a pointer to the ScalarEvolution analysis.
589 ScalarEvolution *SE;
590
591 /// Set of run-time checks required to establish independence of
592 /// otherwise may-aliasing pointers in the loop.
593 SmallVector<RuntimePointerCheck, 4> Checks;
594
595 /// Flag indicating if pointer-difference checks can be used
596 bool CanUseDiffCheck = true;
597
598 /// A list of (pointer-difference, access size) pairs that can be used to
599 /// prove that there are no vectorization-preventing dependencies.
600 SmallVector<PointerDiffInfo> DiffChecks;
601 };
602
603 /// Drive the analysis of memory accesses in the loop
604 ///
605 /// This class is responsible for analyzing the memory accesses of a loop. It
606 /// collects the accesses and then its main helper the AccessAnalysis class
607 /// finds and categorizes the dependences in buildDependenceSets.
608 ///
609 /// For memory dependences that can be analyzed at compile time, it determines
610 /// whether the dependence is part of cycle inhibiting vectorization. This work
611 /// is delegated to the MemoryDepChecker class.
612 ///
613 /// For memory dependences that cannot be determined at compile time, it
614 /// generates run-time checks to prove independence. This is done by
615 /// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the
616 /// RuntimePointerCheck class.
617 ///
618 /// If pointers can wrap or can't be expressed as affine AddRec expressions by
619 /// ScalarEvolution, we will generate run-time checks by emitting a
620 /// SCEVUnionPredicate.
621 ///
622 /// Checks for both memory dependences and the SCEV predicates contained in the
623 /// PSE must be emitted in order for the results of this analysis to be valid.
624 class LoopAccessInfo {
625 public:
626 LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI,
627 const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT,
628 LoopInfo *LI);
629
630 /// Return true we can analyze the memory accesses in the loop and there are
631 /// no memory dependence cycles. Note that for dependences between loads &
632 /// stores with uniform addresses,
633 /// hasStoreStoreDependenceInvolvingLoopInvariantAddress and
634 /// hasLoadStoreDependenceInvolvingLoopInvariantAddress also need to be
635 /// checked.
canVectorizeMemory()636 bool canVectorizeMemory() const { return CanVecMem; }
637
638 /// Return true if there is a convergent operation in the loop. There may
639 /// still be reported runtime pointer checks that would be required, but it is
640 /// not legal to insert them.
hasConvergentOp()641 bool hasConvergentOp() const { return HasConvergentOp; }
642
getRuntimePointerChecking()643 const RuntimePointerChecking *getRuntimePointerChecking() const {
644 return PtrRtChecking.get();
645 }
646
647 /// Number of memchecks required to prove independence of otherwise
648 /// may-alias pointers.
getNumRuntimePointerChecks()649 unsigned getNumRuntimePointerChecks() const {
650 return PtrRtChecking->getNumberOfChecks();
651 }
652
653 /// Return true if the block BB needs to be predicated in order for the loop
654 /// to be vectorized.
655 static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
656 DominatorTree *DT);
657
658 /// Returns true if value \p V is loop invariant.
659 bool isInvariant(Value *V) const;
660
getNumStores()661 unsigned getNumStores() const { return NumStores; }
getNumLoads()662 unsigned getNumLoads() const { return NumLoads;}
663
664 /// The diagnostics report generated for the analysis. E.g. why we
665 /// couldn't analyze the loop.
getReport()666 const OptimizationRemarkAnalysis *getReport() const { return Report.get(); }
667
668 /// the Memory Dependence Checker which can determine the
669 /// loop-independent and loop-carried dependences between memory accesses.
getDepChecker()670 const MemoryDepChecker &getDepChecker() const { return *DepChecker; }
671
672 /// Return the list of instructions that use \p Ptr to read or write
673 /// memory.
getInstructionsForAccess(Value * Ptr,bool isWrite)674 SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr,
675 bool isWrite) const {
676 return DepChecker->getInstructionsForAccess(Ptr, isWrite);
677 }
678
679 /// If an access has a symbolic strides, this maps the pointer value to
680 /// the stride symbol.
getSymbolicStrides()681 const DenseMap<Value *, const SCEV *> &getSymbolicStrides() const {
682 return SymbolicStrides;
683 }
684
685 /// Print the information about the memory accesses in the loop.
686 void print(raw_ostream &OS, unsigned Depth = 0) const;
687
688 /// Return true if the loop has memory dependence involving two stores to an
689 /// invariant address, else return false.
hasStoreStoreDependenceInvolvingLoopInvariantAddress()690 bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const {
691 return HasStoreStoreDependenceInvolvingLoopInvariantAddress;
692 }
693
694 /// Return true if the loop has memory dependence involving a load and a store
695 /// to an invariant address, else return false.
hasLoadStoreDependenceInvolvingLoopInvariantAddress()696 bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const {
697 return HasLoadStoreDependenceInvolvingLoopInvariantAddress;
698 }
699
700 /// Return the list of stores to invariant addresses.
getStoresToInvariantAddresses()701 ArrayRef<StoreInst *> getStoresToInvariantAddresses() const {
702 return StoresToInvariantAddresses;
703 }
704
705 /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
706 /// them to a more usable form. All SCEV expressions during the analysis
707 /// should be re-written (and therefore simplified) according to PSE.
708 /// A user of LoopAccessAnalysis will need to emit the runtime checks
709 /// associated with this predicate.
getPSE()710 const PredicatedScalarEvolution &getPSE() const { return *PSE; }
711
712 private:
713 /// Analyze the loop. Returns true if all memory access in the loop can be
714 /// vectorized.
715 bool analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI,
716 DominatorTree *DT);
717
718 /// Check if the structure of the loop allows it to be analyzed by this
719 /// pass.
720 bool canAnalyzeLoop();
721
722 /// Save the analysis remark.
723 ///
724 /// LAA does not directly emits the remarks. Instead it stores it which the
725 /// client can retrieve and presents as its own analysis
726 /// (e.g. -Rpass-analysis=loop-vectorize).
727 OptimizationRemarkAnalysis &recordAnalysis(StringRef RemarkName,
728 Instruction *Instr = nullptr);
729
730 /// Collect memory access with loop invariant strides.
731 ///
732 /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
733 /// invariant.
734 void collectStridedAccess(Value *LoadOrStoreInst);
735
736 // Emits the first unsafe memory dependence in a loop.
737 // Emits nothing if there are no unsafe dependences
738 // or if the dependences were not recorded.
739 void emitUnsafeDependenceRemark();
740
741 std::unique_ptr<PredicatedScalarEvolution> PSE;
742
743 /// We need to check that all of the pointers in this list are disjoint
744 /// at runtime. Using std::unique_ptr to make using move ctor simpler.
745 std::unique_ptr<RuntimePointerChecking> PtrRtChecking;
746
747 /// the Memory Dependence Checker which can determine the
748 /// loop-independent and loop-carried dependences between memory accesses.
749 std::unique_ptr<MemoryDepChecker> DepChecker;
750
751 Loop *TheLoop;
752
753 unsigned NumLoads = 0;
754 unsigned NumStores = 0;
755
756 /// Cache the result of analyzeLoop.
757 bool CanVecMem = false;
758 bool HasConvergentOp = false;
759
760 /// Indicator that there are two non vectorizable stores to the same uniform
761 /// address.
762 bool HasStoreStoreDependenceInvolvingLoopInvariantAddress = false;
763 /// Indicator that there is non vectorizable load and store to the same
764 /// uniform address.
765 bool HasLoadStoreDependenceInvolvingLoopInvariantAddress = false;
766
767 /// List of stores to invariant addresses.
768 SmallVector<StoreInst *> StoresToInvariantAddresses;
769
770 /// The diagnostics report generated for the analysis. E.g. why we
771 /// couldn't analyze the loop.
772 std::unique_ptr<OptimizationRemarkAnalysis> Report;
773
774 /// If an access has a symbolic strides, this maps the pointer value to
775 /// the stride symbol.
776 DenseMap<Value *, const SCEV *> SymbolicStrides;
777 };
778
779 /// Return the SCEV corresponding to a pointer with the symbolic stride
780 /// replaced with constant one, assuming the SCEV predicate associated with
781 /// \p PSE is true.
782 ///
783 /// If necessary this method will version the stride of the pointer according
784 /// to \p PtrToStride and therefore add further predicates to \p PSE.
785 ///
786 /// \p PtrToStride provides the mapping between the pointer value and its
787 /// stride as collected by LoopVectorizationLegality::collectStridedAccess.
788 const SCEV *
789 replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
790 const DenseMap<Value *, const SCEV *> &PtrToStride,
791 Value *Ptr);
792
793 /// If the pointer has a constant stride return it in units of the access type
794 /// size. If the pointer is loop-invariant, return 0. Otherwise return
795 /// std::nullopt.
796 ///
797 /// Ensure that it does not wrap in the address space, assuming the predicate
798 /// associated with \p PSE is true.
799 ///
800 /// If necessary this method will version the stride of the pointer according
801 /// to \p PtrToStride and therefore add further predicates to \p PSE.
802 /// The \p Assume parameter indicates if we are allowed to make additional
803 /// run-time assumptions.
804 ///
805 /// Note that the analysis results are defined if-and-only-if the original
806 /// memory access was defined. If that access was dead, or UB, then the
807 /// result of this function is undefined.
808 std::optional<int64_t>
809 getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
810 const Loop *Lp,
811 const DenseMap<Value *, const SCEV *> &StridesMap = DenseMap<Value *, const SCEV *>(),
812 bool Assume = false, bool ShouldCheckWrap = true);
813
814 /// Returns the distance between the pointers \p PtrA and \p PtrB iff they are
815 /// compatible and it is possible to calculate the distance between them. This
816 /// is a simple API that does not depend on the analysis pass.
817 /// \param StrictCheck Ensure that the calculated distance matches the
818 /// type-based one after all the bitcasts removal in the provided pointers.
819 std::optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
820 Value *PtrB, const DataLayout &DL,
821 ScalarEvolution &SE,
822 bool StrictCheck = false,
823 bool CheckType = true);
824
825 /// Attempt to sort the pointers in \p VL and return the sorted indices
826 /// in \p SortedIndices, if reordering is required.
827 ///
828 /// Returns 'true' if sorting is legal, otherwise returns 'false'.
829 ///
830 /// For example, for a given \p VL of memory accesses in program order, a[i+4],
831 /// a[i+0], a[i+1] and a[i+7], this function will sort the \p VL and save the
832 /// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and
833 /// saves the mask for actual memory accesses in program order in
834 /// \p SortedIndices as <1,2,0,3>
835 bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
836 ScalarEvolution &SE,
837 SmallVectorImpl<unsigned> &SortedIndices);
838
839 /// Returns true if the memory operations \p A and \p B are consecutive.
840 /// This is a simple API that does not depend on the analysis pass.
841 bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
842 ScalarEvolution &SE, bool CheckType = true);
843
844 class LoopAccessInfoManager {
845 /// The cache.
846 DenseMap<Loop *, std::unique_ptr<LoopAccessInfo>> LoopAccessInfoMap;
847
848 // The used analysis passes.
849 ScalarEvolution &SE;
850 AAResults &AA;
851 DominatorTree &DT;
852 LoopInfo &LI;
853 TargetTransformInfo *TTI;
854 const TargetLibraryInfo *TLI = nullptr;
855
856 public:
LoopAccessInfoManager(ScalarEvolution & SE,AAResults & AA,DominatorTree & DT,LoopInfo & LI,TargetTransformInfo * TTI,const TargetLibraryInfo * TLI)857 LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT,
858 LoopInfo &LI, TargetTransformInfo *TTI,
859 const TargetLibraryInfo *TLI)
860 : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {}
861
862 const LoopAccessInfo &getInfo(Loop &L);
863
864 void clear();
865
866 bool invalidate(Function &F, const PreservedAnalyses &PA,
867 FunctionAnalysisManager::Invalidator &Inv);
868 };
869
870 /// This analysis provides dependence information for the memory
871 /// accesses of a loop.
872 ///
873 /// It runs the analysis for a loop on demand. This can be initiated by
874 /// querying the loop access info via AM.getResult<LoopAccessAnalysis>.
875 /// getResult return a LoopAccessInfo object. See this class for the
876 /// specifics of what information is provided.
877 class LoopAccessAnalysis
878 : public AnalysisInfoMixin<LoopAccessAnalysis> {
879 friend AnalysisInfoMixin<LoopAccessAnalysis>;
880 static AnalysisKey Key;
881
882 public:
883 typedef LoopAccessInfoManager Result;
884
885 Result run(Function &F, FunctionAnalysisManager &AM);
886 };
887
getSource(const MemoryDepChecker & DepChecker)888 inline Instruction *MemoryDepChecker::Dependence::getSource(
889 const MemoryDepChecker &DepChecker) const {
890 return DepChecker.getMemoryInstructions()[Source];
891 }
892
getDestination(const MemoryDepChecker & DepChecker)893 inline Instruction *MemoryDepChecker::Dependence::getDestination(
894 const MemoryDepChecker &DepChecker) const {
895 return DepChecker.getMemoryInstructions()[Destination];
896 }
897
898 } // End llvm namespace
899
900 #endif
901