Lines Matching +full:atomic +full:- +full:threshold +full:- +full:us

1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
71 #define DEBUG_TYPE "loop-accesses"
74 VectorizationFactor("force-vector-width", cl::Hidden,
80 VectorizationInterleave("force-vector-interleave", cl::Hidden,
88 "runtime-memory-check-threshold", cl::Hidden,
96 "memory-check-merge-threshold", cl::Hidden,
104 /// We collect dependences up to this threshold.
106 MaxDependences("max-dependences", cl::Hidden,
108 "loop-access analysis (default = 100)"),
123 "enable-mem-access-versioning", cl::init(true), cl::Hidden,
126 /// Enable store-to-load forwarding conflict detection. This option can
129 "store-to-load-forwarding-conflict-detection", cl::Hidden,
130 cl::desc("Enable conflict detection in loop-access analysis"),
134 "max-forked-scev-depth", cl::Hidden,
139 "laa-speculate-unit-stride", cl::Hidden,
140 cl::desc("Speculate that non-constant strides are unit in LAA"),
144 "hoist-runtime-checks", cl::Hidden,
163 // For a non-symbolic stride, just return the original expression. in replaceSymbolicStrideSCEV()
166 const SCEV *StrideSCEV = SI->second; in replaceSymbolicStrideSCEV()
174 const auto *CT = SE->getOne(StrideSCEV->getType()); in replaceSymbolicStrideSCEV()
175 PSE.addPredicate(*SE->getEqualPredicate(StrideSCEV, CT)); in replaceSymbolicStrideSCEV()
187 .PointerValue->getType() in RuntimeCheckingPtrGroup()
188 ->getPointerAddressSpace()), in RuntimeCheckingPtrGroup()
194 /// Let's assume A is the first access and B is a memory access on N-th loop
198 /// N is a calculated back-edge taken count:
199 /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
215 {SE->getCouldNotCompute(), SE->getCouldNotCompute()}}); in getStartAndEndForAccess()
217 return Iter->second; in getStartAndEndForAccess()
222 if (SE->isLoopInvariant(PtrExpr, Lp)) { in getStartAndEndForAccess()
227 ScStart = AR->getStart(); in getStartAndEndForAccess()
228 ScEnd = AR->evaluateAtIteration(Ex, *SE); in getStartAndEndForAccess()
229 const SCEV *Step = AR->getStepRecurrence(*SE); in getStartAndEndForAccess()
234 if (CStep->getValue()->isNegative()) in getStartAndEndForAccess()
240 ScStart = SE->getUMinExpr(ScStart, ScEnd); in getStartAndEndForAccess()
241 ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); in getStartAndEndForAccess()
244 return {SE->getCouldNotCompute(), SE->getCouldNotCompute()}; in getStartAndEndForAccess()
246 assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant"); in getStartAndEndForAccess()
247 assert(SE->isLoopInvariant(ScEnd, Lp)&& "ScEnd needs to be invariant"); in getStartAndEndForAccess()
250 auto &DL = Lp->getHeader()->getDataLayout(); in getStartAndEndForAccess()
251 Type *IdxTy = DL.getIndexType(PtrExpr->getType()); in getStartAndEndForAccess()
252 const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy); in getStartAndEndForAccess()
253 ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); in getStartAndEndForAccess()
255 Iter->second = {ScStart, ScEnd}; in getStartAndEndForAccess()
256 return Iter->second; in getStartAndEndForAccess()
288 if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() || in tryToCreateDiffCheck()
289 !DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty()) in tryToCreateDiffCheck()
293 DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr); in tryToCreateDiffCheck()
295 DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr); in tryToCreateDiffCheck()
305 auto *SrcAR = dyn_cast<SCEVAddRecExpr>(Src->Expr); in tryToCreateDiffCheck()
306 auto *SinkAR = dyn_cast<SCEVAddRecExpr>(Sink->Expr); in tryToCreateDiffCheck()
307 if (!SrcAR || !SinkAR || SrcAR->getLoop() != DC.getInnermostLoop() || in tryToCreateDiffCheck()
308 SinkAR->getLoop() != DC.getInnermostLoop()) in tryToCreateDiffCheck()
312 DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr); in tryToCreateDiffCheck()
314 DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr); in tryToCreateDiffCheck()
321 SinkAR->getLoop()->getHeader()->getDataLayout(); in tryToCreateDiffCheck()
328 auto *Step = dyn_cast<SCEVConstant>(SinkAR->getStepRecurrence(*SE)); in tryToCreateDiffCheck()
329 if (!Step || Step != SrcAR->getStepRecurrence(*SE) || in tryToCreateDiffCheck()
330 Step->getAPInt().abs() != AllocSize) in tryToCreateDiffCheck()
334 IntegerType::get(Src->PointerValue->getContext(), in tryToCreateDiffCheck()
338 if (Step->getValue()->isNegative()) in tryToCreateDiffCheck()
341 const SCEV *SinkStartInt = SE->getPtrToIntExpr(SinkAR->getStart(), IntTy); in tryToCreateDiffCheck()
342 const SCEV *SrcStartInt = SE->getPtrToIntExpr(SrcAR->getStart(), IntTy); in tryToCreateDiffCheck()
347 const Loop *InnerLoop = SrcAR->getLoop(); in tryToCreateDiffCheck()
352 if (HoistRuntimeChecks && InnerLoop->getParentLoop() && in tryToCreateDiffCheck()
356 const Loop *StartARLoop = SrcStartAR->getLoop(); in tryToCreateDiffCheck()
357 if (StartARLoop == SinkStartAR->getLoop() && in tryToCreateDiffCheck()
358 StartARLoop == InnerLoop->getParentLoop() && in tryToCreateDiffCheck()
362 SrcStartAR->getStepRecurrence(*SE) != in tryToCreateDiffCheck()
363 SinkStartAR->getStepRecurrence(*SE)) { in tryToCreateDiffCheck()
374 Src->NeedsFreeze || Sink->NeedsFreeze); in tryToCreateDiffCheck()
415 const SCEV *Diff = SE->getMinusSCEV(J, I); in getMinFromExprs()
420 return C->getValue()->isNegative() ? J : I; in getMinFromExprs()
427 RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(), in addPointer()
458 this->NeedsFreeze |= NeedsFreeze; in addPointer()
466 // - We know that pointers in the same equivalence class share in groupChecks()
469 // - We wouldn't be able to merge two pointers for which we need in groupChecks()
477 // - if the difference between this pointer and the min/max bounds in groupChecks()
484 // with a non-constant difference, we shouldn't perform any pointer in groupChecks()
496 // us to perform an accurate check in this case. in groupChecks()
519 It->second.push_back(Index); in groupChecks()
548 auto PointerI = PositionMap.find(MI->getPointer()); in groupChecks()
551 for (unsigned Pointer : PointerI->second) { in groupChecks()
561 // reasonable. If we do end up hitting this threshold, the algorithm in groupChecks()
575 // We couldn't add this pointer to any existing set or the threshold in groupChecks()
591 return (PtrToPartition[PtrIdx1] != -1 && in arePointersInSamePartition()
619 const auto &First = Check1->Members, &Second = Check2->Members; in printChecks()
635 OS.indent(Depth) << "Run-time memory checks:\n"; in print()
687 /// Check if we can emit a run-time no-alias check for \p Access.
689 /// Returns true if we can emit a run-time no alias check for \p Access.
691 /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
692 /// we will attempt to use additional run-time checks in order to get
702 /// non-intersection.
751 // iteration-local. in adjustAliasScopeList()
752 if (any_of(ScopeList->operands(), [&](Metadata *Scope) { in adjustAliasScopeList()
786 /// Sets of potentially dependent accesses - members of one set share an
800 /// The SCEV predicate containing all the SCEV-related assumptions.
814 /// by adding run-time checks (overflow checks) if necessary.
817 // The bounds for loop-invariant pointer is trivial. in hasComputableBounds()
818 if (PSE.getSE()->isLoopInvariant(PtrScev, L)) in hasComputableBounds()
829 return AR->isAffine(); in hasComputableBounds()
837 if (PSE.getSE()->isLoopInvariant(PtrScev, L)) in isNoWrap()
858 // SCEV does not look through non-header PHIs inside the loop. Such phis in visitPointers()
861 if (PN && InnermostLoop.contains(PN->getParent()) && in visitPointers()
862 PN->getParent() != InnermostLoop.getHeader()) { in visitPointers()
863 for (const Use &Inc : PN->incoming_values()) in visitPointers()
895 const SCEV *Scev = SE->getSCEV(Ptr); in findForkedSCEVs()
896 if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) || in findForkedSCEVs()
902 Depth--; in findForkedSCEVs()
911 return SE->getAddExpr(L, R); in findForkedSCEVs()
913 return SE->getMinusSCEV(L, R); in findForkedSCEVs()
920 unsigned Opcode = I->getOpcode(); in findForkedSCEVs()
924 Type *SourceTy = GEP->getSourceElementType(); in findForkedSCEVs()
927 if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) { in findForkedSCEVs()
933 findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth); in findForkedSCEVs()
934 findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth); in findForkedSCEVs()
953 Type *IntPtrTy = SE->getEffectiveSCEVType( in findForkedSCEVs()
954 SE->getSCEV(GEP->getPointerOperand())->getType()); in findForkedSCEVs()
959 const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy); in findForkedSCEVs()
962 const SCEV *Scaled1 = SE->getMulExpr( in findForkedSCEVs()
963 Size, SE->getTruncateOrSignExtend(get<0>(OffsetScevs[0]), IntPtrTy)); in findForkedSCEVs()
964 const SCEV *Scaled2 = SE->getMulExpr( in findForkedSCEVs()
965 Size, SE->getTruncateOrSignExtend(get<0>(OffsetScevs[1]), IntPtrTy)); in findForkedSCEVs()
966 ScevList.emplace_back(SE->getAddExpr(get<0>(BaseScevs[0]), Scaled1), in findForkedSCEVs()
968 ScevList.emplace_back(SE->getAddExpr(get<0>(BaseScevs[1]), Scaled2), in findForkedSCEVs()
977 findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth); in findForkedSCEVs()
978 findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth); in findForkedSCEVs()
991 if (I->getNumOperands() == 2) { in findForkedSCEVs()
992 findForkedSCEVs(SE, L, I->getOperand(0), ChildScevs, Depth); in findForkedSCEVs()
993 findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth); in findForkedSCEVs()
1006 findForkedSCEVs(SE, L, I->getOperand(0), LScevs, Depth); in findForkedSCEVs()
1007 findForkedSCEVs(SE, L, I->getOperand(1), RScevs, Depth); in findForkedSCEVs()
1046 assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!"); in findForkedPointer()
1054 SE->isLoopInvariant(get<0>(Scevs[0]), L)) && in findForkedPointer()
1056 SE->isLoopInvariant(get<0>(Scevs[1]), L))) { in findForkedPointer()
1205 // dependence sets (in which case RunningDepId > 2) or if we need to re-try in canCheckPtrAtRT()
1210 // We need to perform run-time alias checks, but some pointers had bounds in canCheckPtrAtRT()
1252 unsigned ASi = PtrI->getType()->getPointerAddressSpace(); in canCheckPtrAtRT()
1253 unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); in canCheckPtrAtRT()
1269 // If we can do run-time checks, but there are no checks, no runtime checks in canCheckPtrAtRT()
1281 // We process the set twice: first we process read-write pointers, last we in processMemAccesses()
1282 // process read-only pointers. This allows us to skip dependence tests for in processMemAccesses()
1283 // read-only pointers. in processMemAccesses()
1292 : (ReadOnlyPtr.count(A.getPointer()) ? "read-only" in processMemAccesses()
1298 // compatibility and potential for underlying-object overlap. As a result, we in processMemAccesses()
1302 // Note that both the alias-set tracker and the alias sets themselves used in processMemAccesses()
1317 // and then to process read-only pointers. in processMemAccesses()
1342 "Alias-set pointer not in the access set?"); in processMemAccesses()
1347 // Memorize read-only pointers for later processing and skip them in in processMemAccesses()
1350 // consecutive as "read-only" pointers (so that we check in processMemAccesses()
1359 // If this is a write - check other reads and writes for conflicts. If in processMemAccesses()
1361 // there is no other write to the ptr - this is an optimization to in processMemAccesses()
1386 TheLoop->getHeader()->getParent(), in processMemAccesses()
1387 UnderlyingObj->getType()->getPointerAddressSpace())) in processMemAccesses()
1393 DepCands.unionSets(Access, Prev->second); in processMemAccesses()
1404 /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
1410 if (AR->getNoWrapFlags(SCEV::NoWrapMask)) in isNoWrapAddRec()
1416 // Scalar evolution does not propagate the non-wrapping flags to values that in isNoWrapAddRec()
1417 // are derived from a non-wrapping induction variable because non-wrapping in isNoWrapAddRec()
1418 // could be flow-sensitive. in isNoWrapAddRec()
1421 // non-wrapping for the *specific* value of Ptr. in isNoWrapAddRec()
1425 if (!GEP || !GEP->isInBounds()) in isNoWrapAddRec()
1428 // Make sure there is only one non-const index and analyze that. in isNoWrapAddRec()
1430 for (Value *Index : GEP->indices()) in isNoWrapAddRec()
1440 // The index in GEP is signed. It is non-wrapping if it's derived from a NSW in isNoWrapAddRec()
1443 if (OBO->hasNoSignedWrap() && in isNoWrapAddRec()
1446 isa<ConstantInt>(OBO->getOperand(1))) { in isNoWrapAddRec()
1447 auto *OpScev = PSE.getSCEV(OBO->getOperand(0)); in isNoWrapAddRec()
1450 return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW); in isNoWrapAddRec()
1463 if (PSE.getSE()->isLoopInvariant(PtrScev, Lp)) in getPtrStride()
1466 Type *Ty = Ptr->getType(); in getPtrStride()
1467 assert(Ty->isPointerTy() && "Unexpected non-ptr"); in getPtrStride()
1469 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy in getPtrStride()
1479 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr in getPtrStride()
1485 if (Lp != AR->getLoop()) { in getPtrStride()
1486 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " in getPtrStride()
1492 const SCEV *Step = AR->getStepRecurrence(*PSE.getSE()); in getPtrStride()
1497 LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr in getPtrStride()
1502 auto &DL = Lp->getHeader()->getDataLayout(); in getPtrStride()
1505 const APInt &APStepVal = C->getAPInt(); in getPtrStride()
1507 // Huge step value - give up. in getPtrStride()
1532 GEP && GEP->isInBounds() && (Stride == 1 || Stride == -1)) in getPtrStride()
1538 unsigned AddrSpace = Ty->getPointerAddressSpace(); in getPtrStride()
1539 if (!NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace) && in getPtrStride()
1540 (Stride == 1 || Stride == -1)) in getPtrStride()
1552 dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " in getPtrStride()
1562 assert(PtrA && PtrB && "Expected non-nullptr pointers."); in getPointersDiff()
1572 unsigned ASA = PtrA->getType()->getPointerAddressSpace(); in getPointersDiff()
1573 unsigned ASB = PtrB->getType()->getPointerAddressSpace(); in getPointersDiff()
1581 Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); in getPointersDiff()
1582 Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); in getPointersDiff()
1588 ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace(); in getPointersDiff()
1589 ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace(); in getPointersDiff()
1598 OffsetB -= OffsetA; in getPointersDiff()
1608 Val = Diff->getAPInt().getSExtValue(); in getPointersDiff()
1613 // Ensure that the calculated distance matches the type-based one after all in getPointersDiff()
1624 VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && in sortPtrAccesses()
1651 // Fill SortedIndices array only if it is non-consecutive. in sortPtrAccesses()
1675 visitPointers(SI->getPointerOperand(), *InnermostLoop, in addAccess()
1684 visitPointers(LI->getPointerOperand(), *InnermostLoop, in addAccess()
1752 // factor store-load forwarding does not take place. in couldPreventStoreLoadForward()
1754 // prevent store-load forwarding making vectorized code run a lot slower. in couldPreventStoreLoadForward()
1755 // a[i] = a[i-3] ^ a[i-8]; in couldPreventStoreLoadForward()
1756 // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and in couldPreventStoreLoadForward()
1757 // hence on your typical architecture store-load forwarding does not take in couldPreventStoreLoadForward()
1759 // Store-load forwarding distance. in couldPreventStoreLoadForward()
1761 // After this many iterations store-to-load forwarding conflicts should not in couldPreventStoreLoadForward()
1782 << " that could cause a store-load forwarding conflict\n"); in couldPreventStoreLoadForward()
1798 /// Given a dependence-distance \p Dist between two
1839 uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType()); in isSafeDependenceDistance()
1843 // backedgeTakenCount is non-negative, so we zero extend Product. in isSafeDependenceDistance()
1847 CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType()); in isSafeDependenceDistance()
1849 // Is Dist - (MaxBTC * Step) > 0 ? in isSafeDependenceDistance()
1855 // Second try: Is -Dist - (MaxBTC * Step) > 0 ? in isSafeDependenceDistance()
1856 // (If so, then we have proven (**) because |Dist| >= -1*Dist) in isSafeDependenceDistance()
1870 assert(TypeByteSize > 0 && "The type size in byte must be non-zero"); in areStridedAccessesIndependent()
1871 assert(Distance > 0 && "The distance must be non-zero"); in areStridedAccessesIndependent()
1903 const auto &DL = InnermostLoop->getHeader()->getDataLayout(); in getDependenceDistanceStrideAndSize()
1916 if (APtr->getType()->getPointerAddressSpace() != in getDependenceDistanceStrideAndSize()
1917 BPtr->getType()->getPointerAddressSpace()) in getDependenceDistanceStrideAndSize()
1946 // sink are loop invariant to avoid compile-time increases. This is not in getDependenceDistanceStrideAndSize()
1969 // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and in getDependenceDistanceStrideAndSize()
1970 // not loop-invariant (stride will be 0 in that case), we cannot analyze the in getDependenceDistanceStrideAndSize()
1973 LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); in getDependenceDistanceStrideAndSize()
2025 // non-constant distance dependencies. in isDependent()
2032 auto &DL = InnermostLoop->getHeader()->getDataLayout(); in isDependent()
2049 const APInt &Val = C->getAPInt(); in isDependent()
2095 couldPreventStoreLoadForward(C->getAPInt().abs().getZExtValue(), in isDependent()
2098 dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); in isDependent()
2116 // FoundNonConstantDistanceDependence to force re-trying with runtime in isDependent()
2118 // original behavior w.r.t. re-trying with runtime checks. in isDependent()
2127 LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with " in isDependent()
2135 // Bail out early if passed-in parameters make vectorization not feasible. in isDependent()
2160 // 4 * 2 * (MinNumIter - 1). MinDistance needs for the last iteration: 4. in isDependent()
2161 // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4. in isDependent()
2174 TypeByteSize * *CommonStride * (MinNumIter - 1) + TypeByteSize; in isDependent()
2177 // For non-constant distances, we checked the lower bound of the in isDependent()
2179 // for vectorization). Classify it as Unknown, so we re-try with runtime in isDependent()
2237 // For non-constant distances, we checked the lower bound of the dependence in isDependent()
2239 // vectorization). Classify it as Unknown, so we re-try with runtime checks. in isDependent()
2250 MinDepDistBytes = -1; in areDepsSafe()
2269 bool AIIsWrite = AI->getInt(); in areDepsSafe()
2271 // other stores in the same equivalence class - to the same address. in areDepsSafe()
2326 auto &IndexVector = Accesses.find(Access)->second; in getInstructionsForAccess()
2331 [&](unsigned Idx) { return this->InstMap[Idx]; }); in getInstructionsForAccess()
2349 OS.indent(Depth + 2) << *Instrs[Source] << " -> \n"; in print()
2356 << TheLoop->getHeader()->getParent()->getName() << "' from " in canAnalyzeLoop()
2357 << TheLoop->getLocStr() << "\n"); in canAnalyzeLoop()
2360 if (!TheLoop->isInnermost()) { in canAnalyzeLoop()
2367 if (TheLoop->getNumBackEdges() != 1) { in canAnalyzeLoop()
2378 const SCEV *ExitCount = PSE->getSymbolicMaxBackedgeTakenCount(); in canAnalyzeLoop()
2387 << TheLoop->getHeader()->getName() << "\n"); in canAnalyzeLoop()
2408 PtrRtChecking->Pointers.clear(); in analyzeLoop()
2409 PtrRtChecking->Need = false; in analyzeLoop()
2411 const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); in analyzeLoop()
2415 !TheLoop->getHeader()->getParent()->hasOptSize(); in analyzeLoop()
2426 if (Call->isConvergent()) in analyzeLoop()
2430 // With both a non-vectorizable memory instruction and a convergent in analyzeLoop()
2441 for (Metadata *Op : Decl->getScopeList()->operands()) in analyzeLoop()
2457 if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() && in analyzeLoop()
2468 if (!Ld->isSimple() && !IsAnnotatedParallel) { in analyzeLoop()
2470 << "read with atomic ordering or volatile read"; in analyzeLoop()
2471 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n"); in analyzeLoop()
2477 DepChecker->addAccess(Ld); in analyzeLoop()
2492 if (!St->isSimple() && !IsAnnotatedParallel) { in analyzeLoop()
2494 << "write with atomic ordering or volatile write"; in analyzeLoop()
2495 LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n"); in analyzeLoop()
2501 DepChecker->addAccess(St); in analyzeLoop()
2517 LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n"); in analyzeLoop()
2537 Value *Ptr = ST->getPointerOperand(); in analyzeLoop()
2546 // If we did *not* see this pointer before, insert it to the read-write in analyzeLoop()
2556 if (blockNeedsPredication(ST->getParent(), TheLoop, DT)) in analyzeLoop()
2575 Value *Ptr = LD->getPointerOperand(); in analyzeLoop()
2578 // the read-write list. This allows us to vectorize expressions in analyzeLoop()
2579 // such as A[i] += x; Because the address of A[i] is a read-write in analyzeLoop()
2587 !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides).value_or(0)) { in analyzeLoop()
2604 if (blockNeedsPredication(LD->getParent(), TheLoop, DT)) in analyzeLoop()
2614 // If we write (or read-write) to a single destination and there are no in analyzeLoop()
2617 LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n"); in analyzeLoop()
2629 Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop, in analyzeLoop()
2646 DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses, in analyzeLoop()
2649 if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) { in analyzeLoop()
2655 PtrRtChecking->reset(); in analyzeLoop()
2656 PtrRtChecking->Need = true; in analyzeLoop()
2658 auto *SE = PSE->getSE(); in analyzeLoop()
2686 << (PtrRtChecking->Need ? "" : " don't") in analyzeLoop()
2704 if (Found == Deps->end()) in emitUnsafeDependenceRemark()
2717 HasForcedDistribution = mdconst::extract<ConstantInt>(*Op)->getZExtValue(); in emitUnsafeDependenceRemark()
2740 "store-to-load forwarding."; in emitUnsafeDependenceRemark()
2744 "store-to-load forwarding."; in emitUnsafeDependenceRemark()
2755 DebugLoc SourceLoc = I->getDebugLoc(); in emitUnsafeDependenceRemark()
2757 SourceLoc = DD->getDebugLoc(); in emitUnsafeDependenceRemark()
2766 assert(TheLoop->contains(BB) && "Unknown block used"); in blockNeedsPredication()
2769 BasicBlock* Latch = TheLoop->getLoopLatch(); in blockNeedsPredication()
2770 return !DT->dominates(BB, Latch); in blockNeedsPredication()
2777 Value *CodeRegion = TheLoop->getHeader(); in recordAnalysis()
2778 DebugLoc DL = TheLoop->getStartLoc(); in recordAnalysis()
2781 CodeRegion = I->getParent(); in recordAnalysis()
2784 if (I->getDebugLoc()) in recordAnalysis()
2785 DL = I->getDebugLoc(); in recordAnalysis()
2794 auto *SE = PSE->getSE(); in isInvariant()
2796 // trivially loop-invariant FP values to be considered invariant. in isInvariant()
2797 if (!SE->isSCEVable(V->getType())) in isInvariant()
2799 const SCEV *S = SE->getSCEV(V); in isInvariant()
2800 return SE->isLoopInvariant(S, TheLoop); in isInvariant()
2807 const DataLayout &DL = Gep->getDataLayout(); in getGEPInductionOperand()
2808 unsigned LastOperand = Gep->getNumOperands() - 1; in getGEPInductionOperand()
2809 TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType()); in getGEPInductionOperand()
2812 while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) { in getGEPInductionOperand()
2815 std::advance(GEPTI, LastOperand - 2); in getGEPInductionOperand()
2824 --LastOperand; in getGEPInductionOperand()
2831 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
2842 for (unsigned I = 0, E = GEP->getNumOperands(); I != E; ++I) in stripGetElementPtr()
2844 !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(I)), Lp)) in stripGetElementPtr()
2846 return GEP->getOperand(InductionOperand); in stripGetElementPtr()
2852 auto *PtrTy = dyn_cast<PointerType>(Ptr->getType()); in getStrideFromPointer()
2853 if (!PtrTy || PtrTy->isAggregateType()) in getStrideFromPointer()
2865 const SCEV *V = SE->getSCEV(Ptr); in getStrideFromPointer()
2870 V = C->getOperand(); in getStrideFromPointer()
2878 if (Lp != S->getLoop()) in getStrideFromPointer()
2881 V = S->getStepRecurrence(*SE); in getStrideFromPointer()
2889 if (M->getOperand(0)->getSCEVType() != scConstant) in getStrideFromPointer()
2892 const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt(); in getStrideFromPointer()
2894 // Huge step value - give up. in getStrideFromPointer()
2901 V = M->getOperand(1); in getStrideFromPointer()
2907 if (!SE->isLoopInvariant(V, Lp)) in getStrideFromPointer()
2915 if (isa<SCEVUnknown>(C->getOperand())) in getStrideFromPointer()
2927 // could broaden the scope of values returned here - to anything in collectStridedAccess()
2929 // computation of an interesting IV - but we chose not to as we in collectStridedAccess()
2932 const SCEV *StrideExpr = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop); in collectStridedAccess()
2941 LLVM_DEBUG(dbgs() << " Chose not to due to -laa-speculate-unit-stride\n"); in collectStridedAccess()
2946 // Stride >= Trip-Count. Such a predicate will effectively optimize a single in collectStridedAccess()
2947 // or zero iteration loop, as Trip-Count <= Stride == 1. in collectStridedAccess()
2958 const SCEV *MaxBTC = PSE->getSymbolicMaxBackedgeTakenCount(); in collectStridedAccess()
2962 // The backedgeTakenCount is non-negative, so we zero extend MaxBTC. in collectStridedAccess()
2963 const DataLayout &DL = TheLoop->getHeader()->getDataLayout(); in collectStridedAccess()
2964 uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType()); in collectStridedAccess()
2965 uint64_t BETypeSizeBits = DL.getTypeSizeInBits(MaxBTC->getType()); in collectStridedAccess()
2968 ScalarEvolution *SE = PSE->getSE(); in collectStridedAccess()
2970 CastedStride = SE->getNoopOrSignExtend(StrideExpr, MaxBTC->getType()); in collectStridedAccess()
2972 CastedBECount = SE->getZeroExtendExpr(MaxBTC, StrideExpr->getType()); in collectStridedAccess()
2973 const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount); in collectStridedAccess()
2976 // Stride - MaxBTC> 0 in collectStridedAccess()
2977 if (SE->isKnownPositive(StrideMinusBETaken)) { in collectStridedAccess()
2990 StrideBase = C->getOperand(); in collectStridedAccess()
3003 TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector); in LoopAccessInfo()
3011 TTI->getRegisterBitWidth(TargetTransformInfo::RGK_ScalableVector); in LoopAccessInfo()
3029 if (PtrRtChecking->Need) in print()
3030 OS << " with run-time checks"; in print()
3038 OS.indent(Depth) << "Report: " << Report->getMsg() << "\n"; in print()
3040 if (auto *Dependences = DepChecker->getDependences()) { in print()
3043 Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions()); in print()
3049 // List the pair of accesses need run-time checks to prove independence. in print()
3050 PtrRtChecking->print(OS, Depth); in print()
3062 PSE->getPredicate().print(OS, Depth); in print()
3066 OS.indent(Depth) << "Expressions re-written:\n"; in print()
3067 PSE->print(OS, Depth); in print()
3074 It->second = in getInfo()
3077 return *It->second; in getInfo()
3086 if (LAI->getRuntimePointerChecking()->getChecks().empty() && in clear()
3087 LAI->getPSE().getPredicate().isAlwaysTrue()) in clear()