1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 10 // accesses. Currently, it is an (incomplete) implementation of the approach 11 // described in 12 // 13 // Practical Dependence Testing 14 // Goff, Kennedy, Tseng 15 // PLDI 1991 16 // 17 // There's a single entry point that analyzes the dependence between a pair 18 // of memory references in a function, returning either NULL, for no dependence, 19 // or a more-or-less detailed description of the dependence between them. 20 // 21 // Currently, the implementation cannot propagate constraints between 22 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 23 // Both of these are conservative weaknesses; 24 // that is, not a source of correctness problems. 25 // 26 // Since Clang linearizes some array subscripts, the dependence 27 // analysis is using SCEV->delinearize to recover the representation of multiple 28 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 29 // delinearization is controlled by the flag -da-delinearize. 30 // 31 // We should pay some careful attention to the possibility of integer overflow 32 // in the implementation of the various tests. This could happen with Add, 33 // Subtract, or Multiply, with both APInt's and SCEV's. 34 // 35 // Some non-linear subscript pairs can be handled by the GCD test 36 // (and perhaps other tests). 37 // Should explore how often these things occur. 38 // 39 // Finally, it seems like certain test cases expose weaknesses in the SCEV 40 // simplification, especially in the handling of sign and zero extensions. 41 // It could be useful to spend time exploring these. 42 // 43 // Please note that this is work in progress and the interface is subject to 44 // change. 45 // 46 //===----------------------------------------------------------------------===// 47 // // 48 // In memory of Ken Kennedy, 1945 - 2007 // 49 // // 50 //===----------------------------------------------------------------------===// 51 52 #include "llvm/Analysis/DependenceAnalysis.h" 53 #include "llvm/ADT/Statistic.h" 54 #include "llvm/Analysis/AliasAnalysis.h" 55 #include "llvm/Analysis/Delinearization.h" 56 #include "llvm/Analysis/LoopInfo.h" 57 #include "llvm/Analysis/ScalarEvolution.h" 58 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 59 #include "llvm/Analysis/ValueTracking.h" 60 #include "llvm/IR/InstIterator.h" 61 #include "llvm/IR/Module.h" 62 #include "llvm/InitializePasses.h" 63 #include "llvm/Support/CommandLine.h" 64 #include "llvm/Support/Debug.h" 65 #include "llvm/Support/ErrorHandling.h" 66 #include "llvm/Support/raw_ostream.h" 67 68 using namespace llvm; 69 70 #define DEBUG_TYPE "da" 71 72 //===----------------------------------------------------------------------===// 73 // statistics 74 75 STATISTIC(TotalArrayPairs, "Array pairs tested"); 76 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 77 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 78 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 79 STATISTIC(ZIVapplications, "ZIV applications"); 80 STATISTIC(ZIVindependence, "ZIV independence"); 81 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 82 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 83 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 84 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 85 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 86 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 87 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 88 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 89 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 90 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 91 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 92 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 93 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 94 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 95 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 96 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 97 STATISTIC(DeltaApplications, "Delta applications"); 98 STATISTIC(DeltaSuccesses, "Delta successes"); 99 STATISTIC(DeltaIndependence, "Delta independence"); 100 STATISTIC(DeltaPropagations, "Delta propagations"); 101 STATISTIC(GCDapplications, "GCD applications"); 102 STATISTIC(GCDsuccesses, "GCD successes"); 103 STATISTIC(GCDindependence, "GCD independence"); 104 STATISTIC(BanerjeeApplications, "Banerjee applications"); 105 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 106 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 107 108 static cl::opt<bool> 109 Delinearize("da-delinearize", cl::init(true), cl::Hidden, 110 cl::desc("Try to delinearize array references.")); 111 static cl::opt<bool> DisableDelinearizationChecks( 112 "da-disable-delinearization-checks", cl::Hidden, 113 cl::desc( 114 "Disable checks that try to statically verify validity of " 115 "delinearized subscripts. Enabling this option may result in incorrect " 116 "dependence vectors for languages that allow the subscript of one " 117 "dimension to underflow or overflow into another dimension.")); 118 119 static cl::opt<unsigned> MIVMaxLevelThreshold( 120 "da-miv-max-level-threshold", cl::init(7), cl::Hidden, 121 cl::desc("Maximum depth allowed for the recursive algorithm used to " 122 "explore MIV direction vectors.")); 123 124 //===----------------------------------------------------------------------===// 125 // basics 126 127 DependenceAnalysis::Result 128 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 129 auto &AA = FAM.getResult<AAManager>(F); 130 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 131 auto &LI = FAM.getResult<LoopAnalysis>(F); 132 return DependenceInfo(&F, &AA, &SE, &LI); 133 } 134 135 AnalysisKey DependenceAnalysis::Key; 136 137 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 138 "Dependence Analysis", true, true) 139 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 140 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 141 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 142 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 143 true, true) 144 145 char DependenceAnalysisWrapperPass::ID = 0; 146 147 DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass() 148 : FunctionPass(ID) {} 149 150 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 151 return new DependenceAnalysisWrapperPass(); 152 } 153 154 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 155 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 156 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 157 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 158 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 159 return false; 160 } 161 162 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 163 164 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 165 166 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 167 AU.setPreservesAll(); 168 AU.addRequiredTransitive<AAResultsWrapperPass>(); 169 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 170 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 171 } 172 173 // Used to test the dependence analyzer. 174 // Looks through the function, noting instructions that may access memory. 175 // Calls depends() on every possible pair and prints out the result. 176 // Ignores all other instructions. 177 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA, 178 ScalarEvolution &SE, bool NormalizeResults) { 179 auto *F = DA->getFunction(); 180 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 181 ++SrcI) { 182 if (SrcI->mayReadOrWriteMemory()) { 183 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 184 DstI != DstE; ++DstI) { 185 if (DstI->mayReadOrWriteMemory()) { 186 OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n"; 187 OS << " da analyze - "; 188 if (auto D = DA->depends(&*SrcI, &*DstI, 189 /*UnderRuntimeAssumptions=*/true)) { 190 191 #ifndef NDEBUG 192 // Verify that the distance being zero is equivalent to the 193 // direction being EQ. 194 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 195 const SCEV *Distance = D->getDistance(Level); 196 bool IsDistanceZero = Distance && Distance->isZero(); 197 bool IsDirectionEQ = 198 D->getDirection(Level) == Dependence::DVEntry::EQ; 199 assert(IsDistanceZero == IsDirectionEQ && 200 "Inconsistent distance and direction."); 201 } 202 #endif 203 204 // Normalize negative direction vectors if required by clients. 205 if (NormalizeResults && D->normalize(&SE)) 206 OS << "normalized - "; 207 D->dump(OS); 208 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 209 if (D->isSplitable(Level)) { 210 OS << " da analyze - split level = " << Level; 211 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 212 OS << "!\n"; 213 } 214 } 215 } else 216 OS << "none!\n"; 217 } 218 } 219 } 220 } 221 SCEVUnionPredicate Assumptions = DA->getRuntimeAssumptions(); 222 if (!Assumptions.isAlwaysTrue()) { 223 OS << "Runtime Assumptions:\n"; 224 Assumptions.print(OS, 0); 225 } 226 } 227 228 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 229 const Module *) const { 230 dumpExampleDependence(OS, info.get(), 231 getAnalysis<ScalarEvolutionWrapperPass>().getSE(), false); 232 } 233 234 PreservedAnalyses 235 DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { 236 OS << "Printing analysis 'Dependence Analysis' for function '" << F.getName() 237 << "':\n"; 238 dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F), 239 FAM.getResult<ScalarEvolutionAnalysis>(F), 240 NormalizeResults); 241 return PreservedAnalyses::all(); 242 } 243 244 //===----------------------------------------------------------------------===// 245 // Dependence methods 246 247 // Returns true if this is an input dependence. 248 bool Dependence::isInput() const { 249 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 250 } 251 252 253 // Returns true if this is an output dependence. 254 bool Dependence::isOutput() const { 255 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 256 } 257 258 259 // Returns true if this is an flow (aka true) dependence. 260 bool Dependence::isFlow() const { 261 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 262 } 263 264 265 // Returns true if this is an anti dependence. 266 bool Dependence::isAnti() const { 267 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 268 } 269 270 271 // Returns true if a particular level is scalar; that is, 272 // if no subscript in the source or destination mention the induction 273 // variable associated with the loop at this level. 274 // Leave this out of line, so it will serve as a virtual method anchor 275 bool Dependence::isScalar(unsigned level) const { 276 return false; 277 } 278 279 280 //===----------------------------------------------------------------------===// 281 // FullDependence methods 282 283 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 284 const SCEVUnionPredicate &Assumes, 285 bool PossiblyLoopIndependent, 286 unsigned CommonLevels) 287 : Dependence(Source, Destination, Assumes), Levels(CommonLevels), 288 LoopIndependent(PossiblyLoopIndependent) { 289 Consistent = true; 290 if (CommonLevels) 291 DV = std::make_unique<DVEntry[]>(CommonLevels); 292 } 293 294 // FIXME: in some cases the meaning of a negative direction vector 295 // may not be straightforward, e.g., 296 // for (int i = 0; i < 32; ++i) { 297 // Src: A[i] = ...; 298 // Dst: use(A[31 - i]); 299 // } 300 // The dependency is 301 // flow { Src[i] -> Dst[31 - i] : when i >= 16 } and 302 // anti { Dst[i] -> Src[31 - i] : when i < 16 }, 303 // -- hence a [<>]. 304 // As long as a dependence result contains '>' ('<>', '<=>', "*"), it 305 // means that a reversed/normalized dependence needs to be considered 306 // as well. Nevertheless, current isDirectionNegative() only returns 307 // true with a '>' or '>=' dependency for ease of canonicalizing the 308 // dependency vector, since the reverse of '<>', '<=>' and "*" is itself. 309 bool FullDependence::isDirectionNegative() const { 310 for (unsigned Level = 1; Level <= Levels; ++Level) { 311 unsigned char Direction = DV[Level - 1].Direction; 312 if (Direction == Dependence::DVEntry::EQ) 313 continue; 314 if (Direction == Dependence::DVEntry::GT || 315 Direction == Dependence::DVEntry::GE) 316 return true; 317 return false; 318 } 319 return false; 320 } 321 322 bool FullDependence::normalize(ScalarEvolution *SE) { 323 if (!isDirectionNegative()) 324 return false; 325 326 LLVM_DEBUG(dbgs() << "Before normalizing negative direction vectors:\n"; 327 dump(dbgs());); 328 std::swap(Src, Dst); 329 for (unsigned Level = 1; Level <= Levels; ++Level) { 330 unsigned char Direction = DV[Level - 1].Direction; 331 // Reverse the direction vector, this means LT becomes GT 332 // and GT becomes LT. 333 unsigned char RevDirection = Direction & Dependence::DVEntry::EQ; 334 if (Direction & Dependence::DVEntry::LT) 335 RevDirection |= Dependence::DVEntry::GT; 336 if (Direction & Dependence::DVEntry::GT) 337 RevDirection |= Dependence::DVEntry::LT; 338 DV[Level - 1].Direction = RevDirection; 339 // Reverse the dependence distance as well. 340 if (DV[Level - 1].Distance != nullptr) 341 DV[Level - 1].Distance = 342 SE->getNegativeSCEV(DV[Level - 1].Distance); 343 } 344 345 LLVM_DEBUG(dbgs() << "After normalizing negative direction vectors:\n"; 346 dump(dbgs());); 347 return true; 348 } 349 350 // The rest are simple getters that hide the implementation. 351 352 // getDirection - Returns the direction associated with a particular level. 353 unsigned FullDependence::getDirection(unsigned Level) const { 354 assert(0 < Level && Level <= Levels && "Level out of range"); 355 return DV[Level - 1].Direction; 356 } 357 358 359 // Returns the distance (or NULL) associated with a particular level. 360 const SCEV *FullDependence::getDistance(unsigned Level) const { 361 assert(0 < Level && Level <= Levels && "Level out of range"); 362 return DV[Level - 1].Distance; 363 } 364 365 366 // Returns true if a particular level is scalar; that is, 367 // if no subscript in the source or destination mention the induction 368 // variable associated with the loop at this level. 369 bool FullDependence::isScalar(unsigned Level) const { 370 assert(0 < Level && Level <= Levels && "Level out of range"); 371 return DV[Level - 1].Scalar; 372 } 373 374 375 // Returns true if peeling the first iteration from this loop 376 // will break this dependence. 377 bool FullDependence::isPeelFirst(unsigned Level) const { 378 assert(0 < Level && Level <= Levels && "Level out of range"); 379 return DV[Level - 1].PeelFirst; 380 } 381 382 383 // Returns true if peeling the last iteration from this loop 384 // will break this dependence. 385 bool FullDependence::isPeelLast(unsigned Level) const { 386 assert(0 < Level && Level <= Levels && "Level out of range"); 387 return DV[Level - 1].PeelLast; 388 } 389 390 391 // Returns true if splitting this loop will break the dependence. 392 bool FullDependence::isSplitable(unsigned Level) const { 393 assert(0 < Level && Level <= Levels && "Level out of range"); 394 return DV[Level - 1].Splitable; 395 } 396 397 398 //===----------------------------------------------------------------------===// 399 // DependenceInfo::Constraint methods 400 401 // If constraint is a point <X, Y>, returns X. 402 // Otherwise assert. 403 const SCEV *DependenceInfo::Constraint::getX() const { 404 assert(Kind == Point && "Kind should be Point"); 405 return A; 406 } 407 408 409 // If constraint is a point <X, Y>, returns Y. 410 // Otherwise assert. 411 const SCEV *DependenceInfo::Constraint::getY() const { 412 assert(Kind == Point && "Kind should be Point"); 413 return B; 414 } 415 416 417 // If constraint is a line AX + BY = C, returns A. 418 // Otherwise assert. 419 const SCEV *DependenceInfo::Constraint::getA() const { 420 assert((Kind == Line || Kind == Distance) && 421 "Kind should be Line (or Distance)"); 422 return A; 423 } 424 425 426 // If constraint is a line AX + BY = C, returns B. 427 // Otherwise assert. 428 const SCEV *DependenceInfo::Constraint::getB() const { 429 assert((Kind == Line || Kind == Distance) && 430 "Kind should be Line (or Distance)"); 431 return B; 432 } 433 434 435 // If constraint is a line AX + BY = C, returns C. 436 // Otherwise assert. 437 const SCEV *DependenceInfo::Constraint::getC() const { 438 assert((Kind == Line || Kind == Distance) && 439 "Kind should be Line (or Distance)"); 440 return C; 441 } 442 443 444 // If constraint is a distance, returns D. 445 // Otherwise assert. 446 const SCEV *DependenceInfo::Constraint::getD() const { 447 assert(Kind == Distance && "Kind should be Distance"); 448 return SE->getNegativeSCEV(C); 449 } 450 451 452 // Returns the loop associated with this constraint. 453 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 454 assert((Kind == Distance || Kind == Line || Kind == Point) && 455 "Kind should be Distance, Line, or Point"); 456 return AssociatedLoop; 457 } 458 459 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 460 const Loop *CurLoop) { 461 Kind = Point; 462 A = X; 463 B = Y; 464 AssociatedLoop = CurLoop; 465 } 466 467 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 468 const SCEV *CC, const Loop *CurLoop) { 469 Kind = Line; 470 A = AA; 471 B = BB; 472 C = CC; 473 AssociatedLoop = CurLoop; 474 } 475 476 void DependenceInfo::Constraint::setDistance(const SCEV *D, 477 const Loop *CurLoop) { 478 Kind = Distance; 479 A = SE->getOne(D->getType()); 480 B = SE->getNegativeSCEV(A); 481 C = SE->getNegativeSCEV(D); 482 AssociatedLoop = CurLoop; 483 } 484 485 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 486 487 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 488 SE = NewSE; 489 Kind = Any; 490 } 491 492 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 493 // For debugging purposes. Dumps the constraint out to OS. 494 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 495 if (isEmpty()) 496 OS << " Empty\n"; 497 else if (isAny()) 498 OS << " Any\n"; 499 else if (isPoint()) 500 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 501 else if (isDistance()) 502 OS << " Distance is " << *getD() << 503 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 504 else if (isLine()) 505 OS << " Line is " << *getA() << "*X + " << 506 *getB() << "*Y = " << *getC() << "\n"; 507 else 508 llvm_unreachable("unknown constraint type in Constraint::dump"); 509 } 510 #endif 511 512 513 // Updates X with the intersection 514 // of the Constraints X and Y. Returns true if X has changed. 515 // Corresponds to Figure 4 from the paper 516 // 517 // Practical Dependence Testing 518 // Goff, Kennedy, Tseng 519 // PLDI 1991 520 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 521 ++DeltaApplications; 522 LLVM_DEBUG(dbgs() << "\tintersect constraints\n"); 523 LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 524 LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 525 assert(!Y->isPoint() && "Y must not be a Point"); 526 if (X->isAny()) { 527 if (Y->isAny()) 528 return false; 529 *X = *Y; 530 return true; 531 } 532 if (X->isEmpty()) 533 return false; 534 if (Y->isEmpty()) { 535 X->setEmpty(); 536 return true; 537 } 538 539 if (X->isDistance() && Y->isDistance()) { 540 LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n"); 541 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 542 return false; 543 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 544 X->setEmpty(); 545 ++DeltaSuccesses; 546 return true; 547 } 548 // Hmmm, interesting situation. 549 // I guess if either is constant, keep it and ignore the other. 550 if (isa<SCEVConstant>(Y->getD())) { 551 *X = *Y; 552 return true; 553 } 554 return false; 555 } 556 557 // At this point, the pseudo-code in Figure 4 of the paper 558 // checks if (X->isPoint() && Y->isPoint()). 559 // This case can't occur in our implementation, 560 // since a Point can only arise as the result of intersecting 561 // two Line constraints, and the right-hand value, Y, is never 562 // the result of an intersection. 563 assert(!(X->isPoint() && Y->isPoint()) && 564 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 565 566 if (X->isLine() && Y->isLine()) { 567 LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n"); 568 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 569 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 570 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 571 // slopes are equal, so lines are parallel 572 LLVM_DEBUG(dbgs() << "\t\tsame slope\n"); 573 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 574 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 575 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 576 return false; 577 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 578 X->setEmpty(); 579 ++DeltaSuccesses; 580 return true; 581 } 582 return false; 583 } 584 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 585 // slopes differ, so lines intersect 586 LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n"); 587 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 588 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 589 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 590 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 591 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 592 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 593 const SCEVConstant *C1A2_C2A1 = 594 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 595 const SCEVConstant *C1B2_C2B1 = 596 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 597 const SCEVConstant *A1B2_A2B1 = 598 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 599 const SCEVConstant *A2B1_A1B2 = 600 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 601 if (!C1B2_C2B1 || !C1A2_C2A1 || 602 !A1B2_A2B1 || !A2B1_A1B2) 603 return false; 604 APInt Xtop = C1B2_C2B1->getAPInt(); 605 APInt Xbot = A1B2_A2B1->getAPInt(); 606 APInt Ytop = C1A2_C2A1->getAPInt(); 607 APInt Ybot = A2B1_A1B2->getAPInt(); 608 LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 609 LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 610 LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 611 LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 612 APInt Xq = Xtop; // these need to be initialized, even 613 APInt Xr = Xtop; // though they're just going to be overwritten 614 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 615 APInt Yq = Ytop; 616 APInt Yr = Ytop; 617 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 618 if (Xr != 0 || Yr != 0) { 619 X->setEmpty(); 620 ++DeltaSuccesses; 621 return true; 622 } 623 LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 624 if (Xq.slt(0) || Yq.slt(0)) { 625 X->setEmpty(); 626 ++DeltaSuccesses; 627 return true; 628 } 629 if (const SCEVConstant *CUB = 630 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 631 const APInt &UpperBound = CUB->getAPInt(); 632 LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 633 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 634 X->setEmpty(); 635 ++DeltaSuccesses; 636 return true; 637 } 638 } 639 X->setPoint(SE->getConstant(Xq), 640 SE->getConstant(Yq), 641 X->getAssociatedLoop()); 642 ++DeltaSuccesses; 643 return true; 644 } 645 return false; 646 } 647 648 // if (X->isLine() && Y->isPoint()) This case can't occur. 649 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 650 651 if (X->isPoint() && Y->isLine()) { 652 LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n"); 653 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 654 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 655 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 656 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 657 return false; 658 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 659 X->setEmpty(); 660 ++DeltaSuccesses; 661 return true; 662 } 663 return false; 664 } 665 666 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 667 return false; 668 } 669 670 671 //===----------------------------------------------------------------------===// 672 // DependenceInfo methods 673 674 // For debugging purposes. Dumps a dependence to OS. 675 void Dependence::dump(raw_ostream &OS) const { 676 bool Splitable = false; 677 if (isConfused()) 678 OS << "confused"; 679 else { 680 if (isConsistent()) 681 OS << "consistent "; 682 if (isFlow()) 683 OS << "flow"; 684 else if (isOutput()) 685 OS << "output"; 686 else if (isAnti()) 687 OS << "anti"; 688 else if (isInput()) 689 OS << "input"; 690 unsigned Levels = getLevels(); 691 OS << " ["; 692 for (unsigned II = 1; II <= Levels; ++II) { 693 if (isSplitable(II)) 694 Splitable = true; 695 if (isPeelFirst(II)) 696 OS << 'p'; 697 const SCEV *Distance = getDistance(II); 698 if (Distance) 699 OS << *Distance; 700 else if (isScalar(II)) 701 OS << "S"; 702 else { 703 unsigned Direction = getDirection(II); 704 if (Direction == DVEntry::ALL) 705 OS << "*"; 706 else { 707 if (Direction & DVEntry::LT) 708 OS << "<"; 709 if (Direction & DVEntry::EQ) 710 OS << "="; 711 if (Direction & DVEntry::GT) 712 OS << ">"; 713 } 714 } 715 if (isPeelLast(II)) 716 OS << 'p'; 717 if (II < Levels) 718 OS << " "; 719 } 720 if (isLoopIndependent()) 721 OS << "|<"; 722 OS << "]"; 723 if (Splitable) 724 OS << " splitable"; 725 } 726 OS << "!\n"; 727 728 SCEVUnionPredicate Assumptions = getRuntimeAssumptions(); 729 if (!Assumptions.isAlwaysTrue()) { 730 OS << " Runtime Assumptions:\n"; 731 Assumptions.print(OS, 2); 732 } 733 } 734 735 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their 736 // underlaying objects. If LocA and LocB are known to not alias (for any reason: 737 // tbaa, non-overlapping regions etc), then it is known there is no dependecy. 738 // Otherwise the underlying objects are checked to see if they point to 739 // different identifiable objects. 740 static AliasResult underlyingObjectsAlias(AAResults *AA, 741 const DataLayout &DL, 742 const MemoryLocation &LocA, 743 const MemoryLocation &LocB) { 744 // Check the original locations (minus size) for noalias, which can happen for 745 // tbaa, incompatible underlying object locations, etc. 746 MemoryLocation LocAS = 747 MemoryLocation::getBeforeOrAfter(LocA.Ptr, LocA.AATags); 748 MemoryLocation LocBS = 749 MemoryLocation::getBeforeOrAfter(LocB.Ptr, LocB.AATags); 750 BatchAAResults BAA(*AA); 751 BAA.enableCrossIterationMode(); 752 753 if (BAA.isNoAlias(LocAS, LocBS)) 754 return AliasResult::NoAlias; 755 756 // Check the underlying objects are the same 757 const Value *AObj = getUnderlyingObject(LocA.Ptr); 758 const Value *BObj = getUnderlyingObject(LocB.Ptr); 759 760 // If the underlying objects are the same, they must alias 761 if (AObj == BObj) 762 return AliasResult::MustAlias; 763 764 // We may have hit the recursion limit for underlying objects, or have 765 // underlying objects where we don't know they will alias. 766 if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj)) 767 return AliasResult::MayAlias; 768 769 // Otherwise we know the objects are different and both identified objects so 770 // must not alias. 771 return AliasResult::NoAlias; 772 } 773 774 // Returns true if the load or store can be analyzed. Atomic and volatile 775 // operations have properties which this analysis does not understand. 776 static 777 bool isLoadOrStore(const Instruction *I) { 778 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 779 return LI->isUnordered(); 780 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 781 return SI->isUnordered(); 782 return false; 783 } 784 785 786 // Examines the loop nesting of the Src and Dst 787 // instructions and establishes their shared loops. Sets the variables 788 // CommonLevels, SrcLevels, and MaxLevels. 789 // The source and destination instructions needn't be contained in the same 790 // loop. The routine establishNestingLevels finds the level of most deeply 791 // nested loop that contains them both, CommonLevels. An instruction that's 792 // not contained in a loop is at level = 0. MaxLevels is equal to the level 793 // of the source plus the level of the destination, minus CommonLevels. 794 // This lets us allocate vectors MaxLevels in length, with room for every 795 // distinct loop referenced in both the source and destination subscripts. 796 // The variable SrcLevels is the nesting depth of the source instruction. 797 // It's used to help calculate distinct loops referenced by the destination. 798 // Here's the map from loops to levels: 799 // 0 - unused 800 // 1 - outermost common loop 801 // ... - other common loops 802 // CommonLevels - innermost common loop 803 // ... - loops containing Src but not Dst 804 // SrcLevels - innermost loop containing Src but not Dst 805 // ... - loops containing Dst but not Src 806 // MaxLevels - innermost loops containing Dst but not Src 807 // Consider the follow code fragment: 808 // for (a = ...) { 809 // for (b = ...) { 810 // for (c = ...) { 811 // for (d = ...) { 812 // A[] = ...; 813 // } 814 // } 815 // for (e = ...) { 816 // for (f = ...) { 817 // for (g = ...) { 818 // ... = A[]; 819 // } 820 // } 821 // } 822 // } 823 // } 824 // If we're looking at the possibility of a dependence between the store 825 // to A (the Src) and the load from A (the Dst), we'll note that they 826 // have 2 loops in common, so CommonLevels will equal 2 and the direction 827 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 828 // A map from loop names to loop numbers would look like 829 // a - 1 830 // b - 2 = CommonLevels 831 // c - 3 832 // d - 4 = SrcLevels 833 // e - 5 834 // f - 6 835 // g - 7 = MaxLevels 836 void DependenceInfo::establishNestingLevels(const Instruction *Src, 837 const Instruction *Dst) { 838 const BasicBlock *SrcBlock = Src->getParent(); 839 const BasicBlock *DstBlock = Dst->getParent(); 840 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 841 unsigned DstLevel = LI->getLoopDepth(DstBlock); 842 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 843 const Loop *DstLoop = LI->getLoopFor(DstBlock); 844 SrcLevels = SrcLevel; 845 MaxLevels = SrcLevel + DstLevel; 846 while (SrcLevel > DstLevel) { 847 SrcLoop = SrcLoop->getParentLoop(); 848 SrcLevel--; 849 } 850 while (DstLevel > SrcLevel) { 851 DstLoop = DstLoop->getParentLoop(); 852 DstLevel--; 853 } 854 while (SrcLoop != DstLoop) { 855 SrcLoop = SrcLoop->getParentLoop(); 856 DstLoop = DstLoop->getParentLoop(); 857 SrcLevel--; 858 } 859 CommonLevels = SrcLevel; 860 MaxLevels -= CommonLevels; 861 } 862 863 864 // Given one of the loops containing the source, return 865 // its level index in our numbering scheme. 866 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 867 return SrcLoop->getLoopDepth(); 868 } 869 870 871 // Given one of the loops containing the destination, 872 // return its level index in our numbering scheme. 873 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 874 unsigned D = DstLoop->getLoopDepth(); 875 if (D > CommonLevels) 876 // This tries to make sure that we assign unique numbers to src and dst when 877 // the memory accesses reside in different loops that have the same depth. 878 return D - CommonLevels + SrcLevels; 879 else 880 return D; 881 } 882 883 884 // Returns true if Expression is loop invariant in LoopNest. 885 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 886 const Loop *LoopNest) const { 887 // Unlike ScalarEvolution::isLoopInvariant() we consider an access outside of 888 // any loop as invariant, because we only consier expression evaluation at a 889 // specific position (where the array access takes place), and not across the 890 // entire function. 891 if (!LoopNest) 892 return true; 893 894 // If the expression is invariant in the outermost loop of the loop nest, it 895 // is invariant anywhere in the loop nest. 896 return SE->isLoopInvariant(Expression, LoopNest->getOutermostLoop()); 897 } 898 899 900 901 // Finds the set of loops from the LoopNest that 902 // have a level <= CommonLevels and are referred to by the SCEV Expression. 903 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 904 const Loop *LoopNest, 905 SmallBitVector &Loops) const { 906 while (LoopNest) { 907 unsigned Level = LoopNest->getLoopDepth(); 908 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 909 Loops.set(Level); 910 LoopNest = LoopNest->getParentLoop(); 911 } 912 } 913 914 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 915 916 unsigned widestWidthSeen = 0; 917 Type *widestType; 918 919 // Go through each pair and find the widest bit to which we need 920 // to extend all of them. 921 for (Subscript *Pair : Pairs) { 922 const SCEV *Src = Pair->Src; 923 const SCEV *Dst = Pair->Dst; 924 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 925 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 926 if (SrcTy == nullptr || DstTy == nullptr) { 927 assert(SrcTy == DstTy && "This function only unify integer types and " 928 "expect Src and Dst share the same type " 929 "otherwise."); 930 continue; 931 } 932 if (SrcTy->getBitWidth() > widestWidthSeen) { 933 widestWidthSeen = SrcTy->getBitWidth(); 934 widestType = SrcTy; 935 } 936 if (DstTy->getBitWidth() > widestWidthSeen) { 937 widestWidthSeen = DstTy->getBitWidth(); 938 widestType = DstTy; 939 } 940 } 941 942 943 assert(widestWidthSeen > 0); 944 945 // Now extend each pair to the widest seen. 946 for (Subscript *Pair : Pairs) { 947 const SCEV *Src = Pair->Src; 948 const SCEV *Dst = Pair->Dst; 949 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 950 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 951 if (SrcTy == nullptr || DstTy == nullptr) { 952 assert(SrcTy == DstTy && "This function only unify integer types and " 953 "expect Src and Dst share the same type " 954 "otherwise."); 955 continue; 956 } 957 if (SrcTy->getBitWidth() < widestWidthSeen) 958 // Sign-extend Src to widestType 959 Pair->Src = SE->getSignExtendExpr(Src, widestType); 960 if (DstTy->getBitWidth() < widestWidthSeen) { 961 // Sign-extend Dst to widestType 962 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 963 } 964 } 965 } 966 967 // removeMatchingExtensions - Examines a subscript pair. 968 // If the source and destination are identically sign (or zero) 969 // extended, it strips off the extension in an effect to simplify 970 // the actual analysis. 971 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 972 const SCEV *Src = Pair->Src; 973 const SCEV *Dst = Pair->Dst; 974 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 975 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 976 const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Src); 977 const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Dst); 978 const SCEV *SrcCastOp = SrcCast->getOperand(); 979 const SCEV *DstCastOp = DstCast->getOperand(); 980 if (SrcCastOp->getType() == DstCastOp->getType()) { 981 Pair->Src = SrcCastOp; 982 Pair->Dst = DstCastOp; 983 } 984 } 985 } 986 987 // Examine the scev and return true iff it's affine. 988 // Collect any loops mentioned in the set of "Loops". 989 bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest, 990 SmallBitVector &Loops, bool IsSrc) { 991 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 992 if (!AddRec) 993 return isLoopInvariant(Expr, LoopNest); 994 995 // The AddRec must depend on one of the containing loops. Otherwise, 996 // mapSrcLoop and mapDstLoop return indices outside the intended range. This 997 // can happen when a subscript in one loop references an IV from a sibling 998 // loop that could not be replaced with a concrete exit value by 999 // getSCEVAtScope. 1000 const Loop *L = LoopNest; 1001 while (L && AddRec->getLoop() != L) 1002 L = L->getParentLoop(); 1003 if (!L) 1004 return false; 1005 1006 const SCEV *Start = AddRec->getStart(); 1007 const SCEV *Step = AddRec->getStepRecurrence(*SE); 1008 if (!isLoopInvariant(Step, LoopNest)) 1009 return false; 1010 if (IsSrc) 1011 Loops.set(mapSrcLoop(AddRec->getLoop())); 1012 else 1013 Loops.set(mapDstLoop(AddRec->getLoop())); 1014 return checkSubscript(Start, LoopNest, Loops, IsSrc); 1015 } 1016 1017 // Examine the scev and return true iff it's linear. 1018 // Collect any loops mentioned in the set of "Loops". 1019 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 1020 SmallBitVector &Loops) { 1021 return checkSubscript(Src, LoopNest, Loops, true); 1022 } 1023 1024 // Examine the scev and return true iff it's linear. 1025 // Collect any loops mentioned in the set of "Loops". 1026 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 1027 SmallBitVector &Loops) { 1028 return checkSubscript(Dst, LoopNest, Loops, false); 1029 } 1030 1031 1032 // Examines the subscript pair (the Src and Dst SCEVs) 1033 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 1034 // Collects the associated loops in a set. 1035 DependenceInfo::Subscript::ClassificationKind 1036 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 1037 const SCEV *Dst, const Loop *DstLoopNest, 1038 SmallBitVector &Loops) { 1039 SmallBitVector SrcLoops(MaxLevels + 1); 1040 SmallBitVector DstLoops(MaxLevels + 1); 1041 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 1042 return Subscript::NonLinear; 1043 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 1044 return Subscript::NonLinear; 1045 Loops = SrcLoops; 1046 Loops |= DstLoops; 1047 unsigned N = Loops.count(); 1048 if (N == 0) 1049 return Subscript::ZIV; 1050 if (N == 1) 1051 return Subscript::SIV; 1052 if (N == 2 && (SrcLoops.count() == 0 || 1053 DstLoops.count() == 0 || 1054 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 1055 return Subscript::RDIV; 1056 return Subscript::MIV; 1057 } 1058 1059 1060 // A wrapper around SCEV::isKnownPredicate. 1061 // Looks for cases where we're interested in comparing for equality. 1062 // If both X and Y have been identically sign or zero extended, 1063 // it strips off the (confusing) extensions before invoking 1064 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 1065 // will be similarly updated. 1066 // 1067 // If SCEV::isKnownPredicate can't prove the predicate, 1068 // we try simple subtraction, which seems to help in some cases 1069 // involving symbolics. 1070 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 1071 const SCEV *Y) const { 1072 if (Pred == CmpInst::ICMP_EQ || 1073 Pred == CmpInst::ICMP_NE) { 1074 if ((isa<SCEVSignExtendExpr>(X) && 1075 isa<SCEVSignExtendExpr>(Y)) || 1076 (isa<SCEVZeroExtendExpr>(X) && 1077 isa<SCEVZeroExtendExpr>(Y))) { 1078 const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(X); 1079 const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Y); 1080 const SCEV *Xop = CX->getOperand(); 1081 const SCEV *Yop = CY->getOperand(); 1082 if (Xop->getType() == Yop->getType()) { 1083 X = Xop; 1084 Y = Yop; 1085 } 1086 } 1087 } 1088 if (SE->isKnownPredicate(Pred, X, Y)) 1089 return true; 1090 // If SE->isKnownPredicate can't prove the condition, 1091 // we try the brute-force approach of subtracting 1092 // and testing the difference. 1093 // By testing with SE->isKnownPredicate first, we avoid 1094 // the possibility of overflow when the arguments are constants. 1095 const SCEV *Delta = SE->getMinusSCEV(X, Y); 1096 switch (Pred) { 1097 case CmpInst::ICMP_EQ: 1098 return Delta->isZero(); 1099 case CmpInst::ICMP_NE: 1100 return SE->isKnownNonZero(Delta); 1101 case CmpInst::ICMP_SGE: 1102 return SE->isKnownNonNegative(Delta); 1103 case CmpInst::ICMP_SLE: 1104 return SE->isKnownNonPositive(Delta); 1105 case CmpInst::ICMP_SGT: 1106 return SE->isKnownPositive(Delta); 1107 case CmpInst::ICMP_SLT: 1108 return SE->isKnownNegative(Delta); 1109 default: 1110 llvm_unreachable("unexpected predicate in isKnownPredicate"); 1111 } 1112 } 1113 1114 /// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1)) 1115 /// with some extra checking if S is an AddRec and we can prove less-than using 1116 /// the loop bounds. 1117 bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { 1118 // First unify to the same type 1119 auto *SType = dyn_cast<IntegerType>(S->getType()); 1120 auto *SizeType = dyn_cast<IntegerType>(Size->getType()); 1121 if (!SType || !SizeType) 1122 return false; 1123 Type *MaxType = 1124 (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType; 1125 S = SE->getTruncateOrZeroExtend(S, MaxType); 1126 Size = SE->getTruncateOrZeroExtend(Size, MaxType); 1127 1128 // Special check for addrecs using BE taken count 1129 const SCEV *Bound = SE->getMinusSCEV(S, Size); 1130 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) { 1131 if (AddRec->isAffine()) { 1132 const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); 1133 if (!isa<SCEVCouldNotCompute>(BECount)) { 1134 const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE); 1135 if (SE->isKnownNegative(Limit)) 1136 return true; 1137 } 1138 } 1139 } 1140 1141 // Check using normal isKnownNegative 1142 const SCEV *LimitedBound = 1143 SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType()))); 1144 return SE->isKnownNegative(LimitedBound); 1145 } 1146 1147 bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const { 1148 bool Inbounds = false; 1149 if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr)) 1150 Inbounds = SrcGEP->isInBounds(); 1151 if (Inbounds) { 1152 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { 1153 if (AddRec->isAffine()) { 1154 // We know S is for Ptr, the operand on a load/store, so doesn't wrap. 1155 // If both parts are NonNegative, the end result will be NonNegative 1156 if (SE->isKnownNonNegative(AddRec->getStart()) && 1157 SE->isKnownNonNegative(AddRec->getOperand(1))) 1158 return true; 1159 } 1160 } 1161 } 1162 1163 return SE->isKnownNonNegative(S); 1164 } 1165 1166 // All subscripts are all the same type. 1167 // Loop bound may be smaller (e.g., a char). 1168 // Should zero extend loop bound, since it's always >= 0. 1169 // This routine collects upper bound and extends or truncates if needed. 1170 // Truncating is safe when subscripts are known not to wrap. Cases without 1171 // nowrap flags should have been rejected earlier. 1172 // Return null if no bound available. 1173 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 1174 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 1175 const SCEV *UB = SE->getBackedgeTakenCount(L); 1176 return SE->getTruncateOrZeroExtend(UB, T); 1177 } 1178 return nullptr; 1179 } 1180 1181 1182 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1183 // If the cast fails, returns NULL. 1184 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1185 Type *T) const { 1186 if (const SCEV *UB = collectUpperBound(L, T)) 1187 return dyn_cast<SCEVConstant>(UB); 1188 return nullptr; 1189 } 1190 1191 1192 // testZIV - 1193 // When we have a pair of subscripts of the form [c1] and [c2], 1194 // where c1 and c2 are both loop invariant, we attack it using 1195 // the ZIV test. Basically, we test by comparing the two values, 1196 // but there are actually three possible results: 1197 // 1) the values are equal, so there's a dependence 1198 // 2) the values are different, so there's no dependence 1199 // 3) the values might be equal, so we have to assume a dependence. 1200 // 1201 // Return true if dependence disproved. 1202 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1203 FullDependence &Result) const { 1204 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 1205 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1206 ++ZIVapplications; 1207 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1208 LLVM_DEBUG(dbgs() << " provably dependent\n"); 1209 return false; // provably dependent 1210 } 1211 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1212 LLVM_DEBUG(dbgs() << " provably independent\n"); 1213 ++ZIVindependence; 1214 return true; // provably independent 1215 } 1216 LLVM_DEBUG(dbgs() << " possibly dependent\n"); 1217 Result.Consistent = false; 1218 return false; // possibly dependent 1219 } 1220 1221 1222 // strongSIVtest - 1223 // From the paper, Practical Dependence Testing, Section 4.2.1 1224 // 1225 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1226 // where i is an induction variable, c1 and c2 are loop invariant, 1227 // and a is a constant, we can solve it exactly using the Strong SIV test. 1228 // 1229 // Can prove independence. Failing that, can compute distance (and direction). 1230 // In the presence of symbolic terms, we can sometimes make progress. 1231 // 1232 // If there's a dependence, 1233 // 1234 // c1 + a*i = c2 + a*i' 1235 // 1236 // The dependence distance is 1237 // 1238 // d = i' - i = (c1 - c2)/a 1239 // 1240 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1241 // loop's upper bound. If a dependence exists, the dependence direction is 1242 // defined as 1243 // 1244 // { < if d > 0 1245 // direction = { = if d = 0 1246 // { > if d < 0 1247 // 1248 // Return true if dependence disproved. 1249 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1250 const SCEV *DstConst, const Loop *CurLoop, 1251 unsigned Level, FullDependence &Result, 1252 Constraint &NewConstraint) const { 1253 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n"); 1254 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1255 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1256 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1257 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1258 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1259 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1260 ++StrongSIVapplications; 1261 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1262 Level--; 1263 1264 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1265 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta); 1266 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1267 1268 // check that |Delta| < iteration count 1269 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1270 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1271 LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1272 const SCEV *AbsDelta = 1273 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1274 const SCEV *AbsCoeff = 1275 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1276 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1277 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1278 // Distance greater than trip count - no dependence 1279 ++StrongSIVindependence; 1280 ++StrongSIVsuccesses; 1281 return true; 1282 } 1283 } 1284 1285 // Can we compute distance? 1286 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1287 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1288 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1289 APInt Distance = ConstDelta; // these need to be initialized 1290 APInt Remainder = ConstDelta; 1291 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1292 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1293 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1294 // Make sure Coeff divides Delta exactly 1295 if (Remainder != 0) { 1296 // Coeff doesn't divide Distance, no dependence 1297 ++StrongSIVindependence; 1298 ++StrongSIVsuccesses; 1299 return true; 1300 } 1301 Result.DV[Level].Distance = SE->getConstant(Distance); 1302 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1303 if (Distance.sgt(0)) 1304 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1305 else if (Distance.slt(0)) 1306 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1307 else 1308 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1309 ++StrongSIVsuccesses; 1310 } 1311 else if (Delta->isZero()) { 1312 // since 0/X == 0 1313 Result.DV[Level].Distance = Delta; 1314 NewConstraint.setDistance(Delta, CurLoop); 1315 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1316 ++StrongSIVsuccesses; 1317 } 1318 else { 1319 if (Coeff->isOne()) { 1320 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1321 Result.DV[Level].Distance = Delta; // since X/1 == X 1322 NewConstraint.setDistance(Delta, CurLoop); 1323 } 1324 else { 1325 Result.Consistent = false; 1326 NewConstraint.setLine(Coeff, 1327 SE->getNegativeSCEV(Coeff), 1328 SE->getNegativeSCEV(Delta), CurLoop); 1329 } 1330 1331 // maybe we can get a useful direction 1332 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1333 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1334 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1335 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1336 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1337 // The double negatives above are confusing. 1338 // It helps to read !SE->isKnownNonZero(Delta) 1339 // as "Delta might be Zero" 1340 unsigned NewDirection = Dependence::DVEntry::NONE; 1341 if ((DeltaMaybePositive && CoeffMaybePositive) || 1342 (DeltaMaybeNegative && CoeffMaybeNegative)) 1343 NewDirection = Dependence::DVEntry::LT; 1344 if (DeltaMaybeZero) 1345 NewDirection |= Dependence::DVEntry::EQ; 1346 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1347 (DeltaMaybePositive && CoeffMaybeNegative)) 1348 NewDirection |= Dependence::DVEntry::GT; 1349 if (NewDirection < Result.DV[Level].Direction) 1350 ++StrongSIVsuccesses; 1351 Result.DV[Level].Direction &= NewDirection; 1352 } 1353 return false; 1354 } 1355 1356 1357 // weakCrossingSIVtest - 1358 // From the paper, Practical Dependence Testing, Section 4.2.2 1359 // 1360 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1361 // where i is an induction variable, c1 and c2 are loop invariant, 1362 // and a is a constant, we can solve it exactly using the 1363 // Weak-Crossing SIV test. 1364 // 1365 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1366 // the two lines, where i = i', yielding 1367 // 1368 // c1 + a*i = c2 - a*i 1369 // 2a*i = c2 - c1 1370 // i = (c2 - c1)/2a 1371 // 1372 // If i < 0, there is no dependence. 1373 // If i > upperbound, there is no dependence. 1374 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1375 // If i = upperbound, there's a dependence with distance = 0. 1376 // If i is integral, there's a dependence (all directions). 1377 // If the non-integer part = 1/2, there's a dependence (<> directions). 1378 // Otherwise, there's no dependence. 1379 // 1380 // Can prove independence. Failing that, 1381 // can sometimes refine the directions. 1382 // Can determine iteration for splitting. 1383 // 1384 // Return true if dependence disproved. 1385 bool DependenceInfo::weakCrossingSIVtest( 1386 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1387 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1388 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1389 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1390 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1391 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1392 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1393 ++WeakCrossingSIVapplications; 1394 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1395 Level--; 1396 Result.Consistent = false; 1397 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1398 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1399 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1400 if (Delta->isZero()) { 1401 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT; 1402 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT; 1403 ++WeakCrossingSIVsuccesses; 1404 if (!Result.DV[Level].Direction) { 1405 ++WeakCrossingSIVindependence; 1406 return true; 1407 } 1408 Result.DV[Level].Distance = Delta; // = 0 1409 return false; 1410 } 1411 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1412 if (!ConstCoeff) 1413 return false; 1414 1415 Result.DV[Level].Splitable = true; 1416 if (SE->isKnownNegative(ConstCoeff)) { 1417 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1418 assert(ConstCoeff && 1419 "dynamic cast of negative of ConstCoeff should yield constant"); 1420 Delta = SE->getNegativeSCEV(Delta); 1421 } 1422 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1423 1424 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1425 SplitIter = SE->getUDivExpr( 1426 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1427 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1428 LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1429 1430 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1431 if (!ConstDelta) 1432 return false; 1433 1434 // We're certain that ConstCoeff > 0; therefore, 1435 // if Delta < 0, then no dependence. 1436 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1437 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1438 if (SE->isKnownNegative(Delta)) { 1439 // No dependence, Delta < 0 1440 ++WeakCrossingSIVindependence; 1441 ++WeakCrossingSIVsuccesses; 1442 return true; 1443 } 1444 1445 // We're certain that Delta > 0 and ConstCoeff > 0. 1446 // Check Delta/(2*ConstCoeff) against upper loop bound 1447 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1448 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1449 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1450 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1451 ConstantTwo); 1452 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1453 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1454 // Delta too big, no dependence 1455 ++WeakCrossingSIVindependence; 1456 ++WeakCrossingSIVsuccesses; 1457 return true; 1458 } 1459 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1460 // i = i' = UB 1461 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT; 1462 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT; 1463 ++WeakCrossingSIVsuccesses; 1464 if (!Result.DV[Level].Direction) { 1465 ++WeakCrossingSIVindependence; 1466 return true; 1467 } 1468 Result.DV[Level].Splitable = false; 1469 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1470 return false; 1471 } 1472 } 1473 1474 // check that Coeff divides Delta 1475 APInt APDelta = ConstDelta->getAPInt(); 1476 APInt APCoeff = ConstCoeff->getAPInt(); 1477 APInt Distance = APDelta; // these need to be initialzed 1478 APInt Remainder = APDelta; 1479 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1480 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1481 if (Remainder != 0) { 1482 // Coeff doesn't divide Delta, no dependence 1483 ++WeakCrossingSIVindependence; 1484 ++WeakCrossingSIVsuccesses; 1485 return true; 1486 } 1487 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1488 1489 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1490 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1491 Remainder = Distance.srem(Two); 1492 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1493 if (Remainder != 0) { 1494 // Equal direction isn't possible 1495 Result.DV[Level].Direction &= ~Dependence::DVEntry::EQ; 1496 ++WeakCrossingSIVsuccesses; 1497 } 1498 return false; 1499 } 1500 1501 1502 // Kirch's algorithm, from 1503 // 1504 // Optimizing Supercompilers for Supercomputers 1505 // Michael Wolfe 1506 // MIT Press, 1989 1507 // 1508 // Program 2.1, page 29. 1509 // Computes the GCD of AM and BM. 1510 // Also finds a solution to the equation ax - by = gcd(a, b). 1511 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1512 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1513 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1514 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1515 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1516 APInt G0 = AM.abs(); 1517 APInt G1 = BM.abs(); 1518 APInt Q = G0; // these need to be initialized 1519 APInt R = G0; 1520 APInt::sdivrem(G0, G1, Q, R); 1521 while (R != 0) { 1522 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1523 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1524 G0 = G1; G1 = R; 1525 APInt::sdivrem(G0, G1, Q, R); 1526 } 1527 G = G1; 1528 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1529 X = AM.slt(0) ? -A1 : A1; 1530 Y = BM.slt(0) ? B1 : -B1; 1531 1532 // make sure gcd divides Delta 1533 R = Delta.srem(G); 1534 if (R != 0) 1535 return true; // gcd doesn't divide Delta, no dependence 1536 Q = Delta.sdiv(G); 1537 return false; 1538 } 1539 1540 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1541 APInt Q = A; // these need to be initialized 1542 APInt R = A; 1543 APInt::sdivrem(A, B, Q, R); 1544 if (R == 0) 1545 return Q; 1546 if ((A.sgt(0) && B.sgt(0)) || 1547 (A.slt(0) && B.slt(0))) 1548 return Q; 1549 else 1550 return Q - 1; 1551 } 1552 1553 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1554 APInt Q = A; // these need to be initialized 1555 APInt R = A; 1556 APInt::sdivrem(A, B, Q, R); 1557 if (R == 0) 1558 return Q; 1559 if ((A.sgt(0) && B.sgt(0)) || 1560 (A.slt(0) && B.slt(0))) 1561 return Q + 1; 1562 else 1563 return Q; 1564 } 1565 1566 // exactSIVtest - 1567 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1568 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1569 // and a2 are constant, we can solve it exactly using an algorithm developed 1570 // by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in: 1571 // 1572 // Dependence Analysis for Supercomputing 1573 // Utpal Banerjee 1574 // Kluwer Academic Publishers, 1988 1575 // 1576 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1577 // so use them if possible. They're also a bit better with symbolics and, 1578 // in the case of the strong SIV test, can compute Distances. 1579 // 1580 // Return true if dependence disproved. 1581 // 1582 // This is a modified version of the original Banerjee algorithm. The original 1583 // only tested whether Dst depends on Src. This algorithm extends that and 1584 // returns all the dependencies that exist between Dst and Src. 1585 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1586 const SCEV *SrcConst, const SCEV *DstConst, 1587 const Loop *CurLoop, unsigned Level, 1588 FullDependence &Result, 1589 Constraint &NewConstraint) const { 1590 LLVM_DEBUG(dbgs() << "\tExact SIV test\n"); 1591 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1592 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1593 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1594 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1595 ++ExactSIVapplications; 1596 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1597 Level--; 1598 Result.Consistent = false; 1599 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1600 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1601 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta, 1602 CurLoop); 1603 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1604 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1605 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1606 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1607 return false; 1608 1609 // find gcd 1610 APInt G, X, Y; 1611 APInt AM = ConstSrcCoeff->getAPInt(); 1612 APInt BM = ConstDstCoeff->getAPInt(); 1613 APInt CM = ConstDelta->getAPInt(); 1614 unsigned Bits = AM.getBitWidth(); 1615 if (findGCD(Bits, AM, BM, CM, G, X, Y)) { 1616 // gcd doesn't divide Delta, no dependence 1617 ++ExactSIVindependence; 1618 ++ExactSIVsuccesses; 1619 return true; 1620 } 1621 1622 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1623 1624 // since SCEV construction normalizes, LM = 0 1625 APInt UM(Bits, 1, true); 1626 bool UMValid = false; 1627 // UM is perhaps unavailable, let's check 1628 if (const SCEVConstant *CUB = 1629 collectConstantUpperBound(CurLoop, Delta->getType())) { 1630 UM = CUB->getAPInt(); 1631 LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1632 UMValid = true; 1633 } 1634 1635 APInt TU(APInt::getSignedMaxValue(Bits)); 1636 APInt TL(APInt::getSignedMinValue(Bits)); 1637 APInt TC = CM.sdiv(G); 1638 APInt TX = X * TC; 1639 APInt TY = Y * TC; 1640 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); 1641 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); 1642 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); 1643 1644 SmallVector<APInt, 2> TLVec, TUVec; 1645 APInt TB = BM.sdiv(G); 1646 if (TB.sgt(0)) { 1647 TLVec.push_back(ceilingOfQuotient(-TX, TB)); 1648 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1649 // New bound check - modification to Banerjee's e3 check 1650 if (UMValid) { 1651 TUVec.push_back(floorOfQuotient(UM - TX, TB)); 1652 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1653 } 1654 } else { 1655 TUVec.push_back(floorOfQuotient(-TX, TB)); 1656 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1657 // New bound check - modification to Banerjee's e3 check 1658 if (UMValid) { 1659 TLVec.push_back(ceilingOfQuotient(UM - TX, TB)); 1660 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1661 } 1662 } 1663 1664 APInt TA = AM.sdiv(G); 1665 if (TA.sgt(0)) { 1666 if (UMValid) { 1667 TUVec.push_back(floorOfQuotient(UM - TY, TA)); 1668 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1669 } 1670 // New bound check - modification to Banerjee's e3 check 1671 TLVec.push_back(ceilingOfQuotient(-TY, TA)); 1672 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1673 } else { 1674 if (UMValid) { 1675 TLVec.push_back(ceilingOfQuotient(UM - TY, TA)); 1676 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1677 } 1678 // New bound check - modification to Banerjee's e3 check 1679 TUVec.push_back(floorOfQuotient(-TY, TA)); 1680 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1681 } 1682 1683 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); 1684 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); 1685 1686 if (TLVec.empty() || TUVec.empty()) 1687 return false; 1688 TL = APIntOps::smax(TLVec.front(), TLVec.back()); 1689 TU = APIntOps::smin(TUVec.front(), TUVec.back()); 1690 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1691 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1692 1693 if (TL.sgt(TU)) { 1694 ++ExactSIVindependence; 1695 ++ExactSIVsuccesses; 1696 return true; 1697 } 1698 1699 // explore directions 1700 unsigned NewDirection = Dependence::DVEntry::NONE; 1701 APInt LowerDistance, UpperDistance; 1702 if (TA.sgt(TB)) { 1703 LowerDistance = (TY - TX) + (TA - TB) * TL; 1704 UpperDistance = (TY - TX) + (TA - TB) * TU; 1705 } else { 1706 LowerDistance = (TY - TX) + (TA - TB) * TU; 1707 UpperDistance = (TY - TX) + (TA - TB) * TL; 1708 } 1709 1710 LLVM_DEBUG(dbgs() << "\t LowerDistance = " << LowerDistance << "\n"); 1711 LLVM_DEBUG(dbgs() << "\t UpperDistance = " << UpperDistance << "\n"); 1712 1713 APInt Zero(Bits, 0, true); 1714 if (LowerDistance.sle(Zero) && UpperDistance.sge(Zero)) { 1715 NewDirection |= Dependence::DVEntry::EQ; 1716 ++ExactSIVsuccesses; 1717 } 1718 if (LowerDistance.slt(0)) { 1719 NewDirection |= Dependence::DVEntry::GT; 1720 ++ExactSIVsuccesses; 1721 } 1722 if (UpperDistance.sgt(0)) { 1723 NewDirection |= Dependence::DVEntry::LT; 1724 ++ExactSIVsuccesses; 1725 } 1726 1727 // finished 1728 Result.DV[Level].Direction &= NewDirection; 1729 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1730 ++ExactSIVindependence; 1731 LLVM_DEBUG(dbgs() << "\t Result = "); 1732 LLVM_DEBUG(Result.dump(dbgs())); 1733 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1734 } 1735 1736 1737 // Return true if the divisor evenly divides the dividend. 1738 static 1739 bool isRemainderZero(const SCEVConstant *Dividend, 1740 const SCEVConstant *Divisor) { 1741 const APInt &ConstDividend = Dividend->getAPInt(); 1742 const APInt &ConstDivisor = Divisor->getAPInt(); 1743 return ConstDividend.srem(ConstDivisor) == 0; 1744 } 1745 1746 1747 // weakZeroSrcSIVtest - 1748 // From the paper, Practical Dependence Testing, Section 4.2.2 1749 // 1750 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1751 // where i is an induction variable, c1 and c2 are loop invariant, 1752 // and a is a constant, we can solve it exactly using the 1753 // Weak-Zero SIV test. 1754 // 1755 // Given 1756 // 1757 // c1 = c2 + a*i 1758 // 1759 // we get 1760 // 1761 // (c1 - c2)/a = i 1762 // 1763 // If i is not an integer, there's no dependence. 1764 // If i < 0 or > UB, there's no dependence. 1765 // If i = 0, the direction is >= and peeling the 1766 // 1st iteration will break the dependence. 1767 // If i = UB, the direction is <= and peeling the 1768 // last iteration will break the dependence. 1769 // Otherwise, the direction is *. 1770 // 1771 // Can prove independence. Failing that, we can sometimes refine 1772 // the directions. Can sometimes show that first or last 1773 // iteration carries all the dependences (so worth peeling). 1774 // 1775 // (see also weakZeroDstSIVtest) 1776 // 1777 // Return true if dependence disproved. 1778 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1779 const SCEV *SrcConst, 1780 const SCEV *DstConst, 1781 const Loop *CurLoop, unsigned Level, 1782 FullDependence &Result, 1783 Constraint &NewConstraint) const { 1784 // For the WeakSIV test, it's possible the loop isn't common to 1785 // the Src and Dst loops. If it isn't, then there's no need to 1786 // record a direction. 1787 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1788 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1789 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1790 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1791 ++WeakZeroSIVapplications; 1792 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1793 Level--; 1794 Result.Consistent = false; 1795 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1796 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1797 CurLoop); 1798 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1799 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1800 if (Level < CommonLevels) { 1801 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1802 Result.DV[Level].PeelFirst = true; 1803 ++WeakZeroSIVsuccesses; 1804 } 1805 return false; // dependences caused by first iteration 1806 } 1807 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1808 if (!ConstCoeff) 1809 return false; 1810 const SCEV *AbsCoeff = 1811 SE->isKnownNegative(ConstCoeff) ? 1812 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1813 const SCEV *NewDelta = 1814 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1815 1816 // check that Delta/SrcCoeff < iteration count 1817 // really check NewDelta < count*AbsCoeff 1818 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1819 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1820 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1821 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1822 ++WeakZeroSIVindependence; 1823 ++WeakZeroSIVsuccesses; 1824 return true; 1825 } 1826 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1827 // dependences caused by last iteration 1828 if (Level < CommonLevels) { 1829 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1830 Result.DV[Level].PeelLast = true; 1831 ++WeakZeroSIVsuccesses; 1832 } 1833 return false; 1834 } 1835 } 1836 1837 // check that Delta/SrcCoeff >= 0 1838 // really check that NewDelta >= 0 1839 if (SE->isKnownNegative(NewDelta)) { 1840 // No dependence, newDelta < 0 1841 ++WeakZeroSIVindependence; 1842 ++WeakZeroSIVsuccesses; 1843 return true; 1844 } 1845 1846 // if SrcCoeff doesn't divide Delta, then no dependence 1847 if (isa<SCEVConstant>(Delta) && 1848 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1849 ++WeakZeroSIVindependence; 1850 ++WeakZeroSIVsuccesses; 1851 return true; 1852 } 1853 return false; 1854 } 1855 1856 1857 // weakZeroDstSIVtest - 1858 // From the paper, Practical Dependence Testing, Section 4.2.2 1859 // 1860 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1861 // where i is an induction variable, c1 and c2 are loop invariant, 1862 // and a is a constant, we can solve it exactly using the 1863 // Weak-Zero SIV test. 1864 // 1865 // Given 1866 // 1867 // c1 + a*i = c2 1868 // 1869 // we get 1870 // 1871 // i = (c2 - c1)/a 1872 // 1873 // If i is not an integer, there's no dependence. 1874 // If i < 0 or > UB, there's no dependence. 1875 // If i = 0, the direction is <= and peeling the 1876 // 1st iteration will break the dependence. 1877 // If i = UB, the direction is >= and peeling the 1878 // last iteration will break the dependence. 1879 // Otherwise, the direction is *. 1880 // 1881 // Can prove independence. Failing that, we can sometimes refine 1882 // the directions. Can sometimes show that first or last 1883 // iteration carries all the dependences (so worth peeling). 1884 // 1885 // (see also weakZeroSrcSIVtest) 1886 // 1887 // Return true if dependence disproved. 1888 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1889 const SCEV *SrcConst, 1890 const SCEV *DstConst, 1891 const Loop *CurLoop, unsigned Level, 1892 FullDependence &Result, 1893 Constraint &NewConstraint) const { 1894 // For the WeakSIV test, it's possible the loop isn't common to the 1895 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1896 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1897 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1898 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1899 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1900 ++WeakZeroSIVapplications; 1901 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1902 Level--; 1903 Result.Consistent = false; 1904 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1905 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1906 CurLoop); 1907 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1908 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1909 if (Level < CommonLevels) { 1910 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1911 Result.DV[Level].PeelFirst = true; 1912 ++WeakZeroSIVsuccesses; 1913 } 1914 return false; // dependences caused by first iteration 1915 } 1916 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1917 if (!ConstCoeff) 1918 return false; 1919 const SCEV *AbsCoeff = 1920 SE->isKnownNegative(ConstCoeff) ? 1921 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1922 const SCEV *NewDelta = 1923 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1924 1925 // check that Delta/SrcCoeff < iteration count 1926 // really check NewDelta < count*AbsCoeff 1927 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1928 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1929 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1930 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1931 ++WeakZeroSIVindependence; 1932 ++WeakZeroSIVsuccesses; 1933 return true; 1934 } 1935 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1936 // dependences caused by last iteration 1937 if (Level < CommonLevels) { 1938 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1939 Result.DV[Level].PeelLast = true; 1940 ++WeakZeroSIVsuccesses; 1941 } 1942 return false; 1943 } 1944 } 1945 1946 // check that Delta/SrcCoeff >= 0 1947 // really check that NewDelta >= 0 1948 if (SE->isKnownNegative(NewDelta)) { 1949 // No dependence, newDelta < 0 1950 ++WeakZeroSIVindependence; 1951 ++WeakZeroSIVsuccesses; 1952 return true; 1953 } 1954 1955 // if SrcCoeff doesn't divide Delta, then no dependence 1956 if (isa<SCEVConstant>(Delta) && 1957 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1958 ++WeakZeroSIVindependence; 1959 ++WeakZeroSIVsuccesses; 1960 return true; 1961 } 1962 return false; 1963 } 1964 1965 1966 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1967 // Things of the form [c1 + a*i] and [c2 + b*j], 1968 // where i and j are induction variable, c1 and c2 are loop invariant, 1969 // and a and b are constants. 1970 // Returns true if any possible dependence is disproved. 1971 // Marks the result as inconsistent. 1972 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1973 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1974 const SCEV *SrcConst, const SCEV *DstConst, 1975 const Loop *SrcLoop, const Loop *DstLoop, 1976 FullDependence &Result) const { 1977 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n"); 1978 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1979 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1980 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1981 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1982 ++ExactRDIVapplications; 1983 Result.Consistent = false; 1984 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1985 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1986 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1987 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1988 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1989 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1990 return false; 1991 1992 // find gcd 1993 APInt G, X, Y; 1994 APInt AM = ConstSrcCoeff->getAPInt(); 1995 APInt BM = ConstDstCoeff->getAPInt(); 1996 APInt CM = ConstDelta->getAPInt(); 1997 unsigned Bits = AM.getBitWidth(); 1998 if (findGCD(Bits, AM, BM, CM, G, X, Y)) { 1999 // gcd doesn't divide Delta, no dependence 2000 ++ExactRDIVindependence; 2001 return true; 2002 } 2003 2004 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 2005 2006 // since SCEV construction seems to normalize, LM = 0 2007 APInt SrcUM(Bits, 1, true); 2008 bool SrcUMvalid = false; 2009 // SrcUM is perhaps unavailable, let's check 2010 if (const SCEVConstant *UpperBound = 2011 collectConstantUpperBound(SrcLoop, Delta->getType())) { 2012 SrcUM = UpperBound->getAPInt(); 2013 LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 2014 SrcUMvalid = true; 2015 } 2016 2017 APInt DstUM(Bits, 1, true); 2018 bool DstUMvalid = false; 2019 // UM is perhaps unavailable, let's check 2020 if (const SCEVConstant *UpperBound = 2021 collectConstantUpperBound(DstLoop, Delta->getType())) { 2022 DstUM = UpperBound->getAPInt(); 2023 LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 2024 DstUMvalid = true; 2025 } 2026 2027 APInt TU(APInt::getSignedMaxValue(Bits)); 2028 APInt TL(APInt::getSignedMinValue(Bits)); 2029 APInt TC = CM.sdiv(G); 2030 APInt TX = X * TC; 2031 APInt TY = Y * TC; 2032 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); 2033 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); 2034 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); 2035 2036 SmallVector<APInt, 2> TLVec, TUVec; 2037 APInt TB = BM.sdiv(G); 2038 if (TB.sgt(0)) { 2039 TLVec.push_back(ceilingOfQuotient(-TX, TB)); 2040 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 2041 if (SrcUMvalid) { 2042 TUVec.push_back(floorOfQuotient(SrcUM - TX, TB)); 2043 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 2044 } 2045 } else { 2046 TUVec.push_back(floorOfQuotient(-TX, TB)); 2047 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 2048 if (SrcUMvalid) { 2049 TLVec.push_back(ceilingOfQuotient(SrcUM - TX, TB)); 2050 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 2051 } 2052 } 2053 2054 APInt TA = AM.sdiv(G); 2055 if (TA.sgt(0)) { 2056 TLVec.push_back(ceilingOfQuotient(-TY, TA)); 2057 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 2058 if (DstUMvalid) { 2059 TUVec.push_back(floorOfQuotient(DstUM - TY, TA)); 2060 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 2061 } 2062 } else { 2063 TUVec.push_back(floorOfQuotient(-TY, TA)); 2064 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 2065 if (DstUMvalid) { 2066 TLVec.push_back(ceilingOfQuotient(DstUM - TY, TA)); 2067 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 2068 } 2069 } 2070 2071 if (TLVec.empty() || TUVec.empty()) 2072 return false; 2073 2074 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); 2075 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); 2076 2077 TL = APIntOps::smax(TLVec.front(), TLVec.back()); 2078 TU = APIntOps::smin(TUVec.front(), TUVec.back()); 2079 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 2080 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 2081 2082 if (TL.sgt(TU)) 2083 ++ExactRDIVindependence; 2084 return TL.sgt(TU); 2085 } 2086 2087 2088 // symbolicRDIVtest - 2089 // In Section 4.5 of the Practical Dependence Testing paper,the authors 2090 // introduce a special case of Banerjee's Inequalities (also called the 2091 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 2092 // particularly cases with symbolics. Since it's only able to disprove 2093 // dependence (not compute distances or directions), we'll use it as a 2094 // fall back for the other tests. 2095 // 2096 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2097 // where i and j are induction variables and c1 and c2 are loop invariants, 2098 // we can use the symbolic tests to disprove some dependences, serving as a 2099 // backup for the RDIV test. Note that i and j can be the same variable, 2100 // letting this test serve as a backup for the various SIV tests. 2101 // 2102 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 2103 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 2104 // loop bounds for the i and j loops, respectively. So, ... 2105 // 2106 // c1 + a1*i = c2 + a2*j 2107 // a1*i - a2*j = c2 - c1 2108 // 2109 // To test for a dependence, we compute c2 - c1 and make sure it's in the 2110 // range of the maximum and minimum possible values of a1*i - a2*j. 2111 // Considering the signs of a1 and a2, we have 4 possible cases: 2112 // 2113 // 1) If a1 >= 0 and a2 >= 0, then 2114 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 2115 // -a2*N2 <= c2 - c1 <= a1*N1 2116 // 2117 // 2) If a1 >= 0 and a2 <= 0, then 2118 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 2119 // 0 <= c2 - c1 <= a1*N1 - a2*N2 2120 // 2121 // 3) If a1 <= 0 and a2 >= 0, then 2122 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 2123 // a1*N1 - a2*N2 <= c2 - c1 <= 0 2124 // 2125 // 4) If a1 <= 0 and a2 <= 0, then 2126 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 2127 // a1*N1 <= c2 - c1 <= -a2*N2 2128 // 2129 // return true if dependence disproved 2130 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 2131 const SCEV *C1, const SCEV *C2, 2132 const Loop *Loop1, 2133 const Loop *Loop2) const { 2134 ++SymbolicRDIVapplications; 2135 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 2136 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1); 2137 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 2138 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 2139 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 2140 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 2141 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 2142 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 2143 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 2144 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 2145 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 2146 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 2147 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 2148 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 2149 if (SE->isKnownNonNegative(A1)) { 2150 if (SE->isKnownNonNegative(A2)) { 2151 // A1 >= 0 && A2 >= 0 2152 if (N1) { 2153 // make sure that c2 - c1 <= a1*N1 2154 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2155 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2156 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 2157 ++SymbolicRDIVindependence; 2158 return true; 2159 } 2160 } 2161 if (N2) { 2162 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 2163 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2164 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2165 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 2166 ++SymbolicRDIVindependence; 2167 return true; 2168 } 2169 } 2170 } 2171 else if (SE->isKnownNonPositive(A2)) { 2172 // a1 >= 0 && a2 <= 0 2173 if (N1 && N2) { 2174 // make sure that c2 - c1 <= a1*N1 - a2*N2 2175 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2176 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2177 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2178 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2179 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2180 ++SymbolicRDIVindependence; 2181 return true; 2182 } 2183 } 2184 // make sure that 0 <= c2 - c1 2185 if (SE->isKnownNegative(C2_C1)) { 2186 ++SymbolicRDIVindependence; 2187 return true; 2188 } 2189 } 2190 } 2191 else if (SE->isKnownNonPositive(A1)) { 2192 if (SE->isKnownNonNegative(A2)) { 2193 // a1 <= 0 && a2 >= 0 2194 if (N1 && N2) { 2195 // make sure that a1*N1 - a2*N2 <= c2 - c1 2196 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2197 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2198 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2199 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2200 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2201 ++SymbolicRDIVindependence; 2202 return true; 2203 } 2204 } 2205 // make sure that c2 - c1 <= 0 2206 if (SE->isKnownPositive(C2_C1)) { 2207 ++SymbolicRDIVindependence; 2208 return true; 2209 } 2210 } 2211 else if (SE->isKnownNonPositive(A2)) { 2212 // a1 <= 0 && a2 <= 0 2213 if (N1) { 2214 // make sure that a1*N1 <= c2 - c1 2215 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2216 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2217 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2218 ++SymbolicRDIVindependence; 2219 return true; 2220 } 2221 } 2222 if (N2) { 2223 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2224 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2225 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2226 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2227 ++SymbolicRDIVindependence; 2228 return true; 2229 } 2230 } 2231 } 2232 } 2233 return false; 2234 } 2235 2236 2237 // testSIV - 2238 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2239 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2240 // a2 are constant, we attack it with an SIV test. While they can all be 2241 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2242 // they apply; they're cheaper and sometimes more precise. 2243 // 2244 // Return true if dependence disproved. 2245 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2246 FullDependence &Result, Constraint &NewConstraint, 2247 const SCEV *&SplitIter) const { 2248 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2249 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2250 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2251 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2252 if (SrcAddRec && DstAddRec) { 2253 const SCEV *SrcConst = SrcAddRec->getStart(); 2254 const SCEV *DstConst = DstAddRec->getStart(); 2255 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2256 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2257 const Loop *CurLoop = SrcAddRec->getLoop(); 2258 assert(CurLoop == DstAddRec->getLoop() && 2259 "both loops in SIV should be same"); 2260 Level = mapSrcLoop(CurLoop); 2261 bool disproven; 2262 if (SrcCoeff == DstCoeff) 2263 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2264 Level, Result, NewConstraint); 2265 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2266 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2267 Level, Result, NewConstraint, SplitIter); 2268 else 2269 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2270 Level, Result, NewConstraint); 2271 return disproven || 2272 gcdMIVtest(Src, Dst, Result) || 2273 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2274 } 2275 if (SrcAddRec) { 2276 const SCEV *SrcConst = SrcAddRec->getStart(); 2277 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2278 const SCEV *DstConst = Dst; 2279 const Loop *CurLoop = SrcAddRec->getLoop(); 2280 Level = mapSrcLoop(CurLoop); 2281 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2282 Level, Result, NewConstraint) || 2283 gcdMIVtest(Src, Dst, Result); 2284 } 2285 if (DstAddRec) { 2286 const SCEV *DstConst = DstAddRec->getStart(); 2287 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2288 const SCEV *SrcConst = Src; 2289 const Loop *CurLoop = DstAddRec->getLoop(); 2290 Level = mapDstLoop(CurLoop); 2291 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2292 CurLoop, Level, Result, NewConstraint) || 2293 gcdMIVtest(Src, Dst, Result); 2294 } 2295 llvm_unreachable("SIV test expected at least one AddRec"); 2296 return false; 2297 } 2298 2299 2300 // testRDIV - 2301 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2302 // where i and j are induction variables, c1 and c2 are loop invariant, 2303 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2304 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2305 // It doesn't make sense to talk about distance or direction in this case, 2306 // so there's no point in making special versions of the Strong SIV test or 2307 // the Weak-crossing SIV test. 2308 // 2309 // With minor algebra, this test can also be used for things like 2310 // [c1 + a1*i + a2*j][c2]. 2311 // 2312 // Return true if dependence disproved. 2313 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2314 FullDependence &Result) const { 2315 // we have 3 possible situations here: 2316 // 1) [a*i + b] and [c*j + d] 2317 // 2) [a*i + c*j + b] and [d] 2318 // 3) [b] and [a*i + c*j + d] 2319 // We need to find what we've got and get organized 2320 2321 const SCEV *SrcConst, *DstConst; 2322 const SCEV *SrcCoeff, *DstCoeff; 2323 const Loop *SrcLoop, *DstLoop; 2324 2325 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2326 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2327 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2328 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2329 if (SrcAddRec && DstAddRec) { 2330 SrcConst = SrcAddRec->getStart(); 2331 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2332 SrcLoop = SrcAddRec->getLoop(); 2333 DstConst = DstAddRec->getStart(); 2334 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2335 DstLoop = DstAddRec->getLoop(); 2336 } 2337 else if (SrcAddRec) { 2338 if (const SCEVAddRecExpr *tmpAddRec = 2339 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2340 SrcConst = tmpAddRec->getStart(); 2341 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2342 SrcLoop = tmpAddRec->getLoop(); 2343 DstConst = Dst; 2344 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2345 DstLoop = SrcAddRec->getLoop(); 2346 } 2347 else 2348 llvm_unreachable("RDIV reached by surprising SCEVs"); 2349 } 2350 else if (DstAddRec) { 2351 if (const SCEVAddRecExpr *tmpAddRec = 2352 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2353 DstConst = tmpAddRec->getStart(); 2354 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2355 DstLoop = tmpAddRec->getLoop(); 2356 SrcConst = Src; 2357 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2358 SrcLoop = DstAddRec->getLoop(); 2359 } 2360 else 2361 llvm_unreachable("RDIV reached by surprising SCEVs"); 2362 } 2363 else 2364 llvm_unreachable("RDIV expected at least one AddRec"); 2365 return exactRDIVtest(SrcCoeff, DstCoeff, 2366 SrcConst, DstConst, 2367 SrcLoop, DstLoop, 2368 Result) || 2369 gcdMIVtest(Src, Dst, Result) || 2370 symbolicRDIVtest(SrcCoeff, DstCoeff, 2371 SrcConst, DstConst, 2372 SrcLoop, DstLoop); 2373 } 2374 2375 2376 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2377 // Return true if dependence disproved. 2378 // Can sometimes refine direction vectors. 2379 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2380 const SmallBitVector &Loops, 2381 FullDependence &Result) const { 2382 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2383 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2384 Result.Consistent = false; 2385 return gcdMIVtest(Src, Dst, Result) || 2386 banerjeeMIVtest(Src, Dst, Loops, Result); 2387 } 2388 2389 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2390 // in this case 10. If there is no constant part, returns std::nullopt. 2391 static std::optional<APInt> getConstantPart(const SCEV *Expr) { 2392 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2393 return Constant->getAPInt(); 2394 if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2395 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2396 return Constant->getAPInt(); 2397 return std::nullopt; 2398 } 2399 2400 //===----------------------------------------------------------------------===// 2401 // gcdMIVtest - 2402 // Tests an MIV subscript pair for dependence. 2403 // Returns true if any possible dependence is disproved. 2404 // Marks the result as inconsistent. 2405 // Can sometimes disprove the equal direction for 1 or more loops, 2406 // as discussed in Michael Wolfe's book, 2407 // High Performance Compilers for Parallel Computing, page 235. 2408 // 2409 // We spend some effort (code!) to handle cases like 2410 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2411 // but M and N are just loop-invariant variables. 2412 // This should help us handle linearized subscripts; 2413 // also makes this test a useful backup to the various SIV tests. 2414 // 2415 // It occurs to me that the presence of loop-invariant variables 2416 // changes the nature of the test from "greatest common divisor" 2417 // to "a common divisor". 2418 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2419 FullDependence &Result) const { 2420 LLVM_DEBUG(dbgs() << "starting gcd\n"); 2421 ++GCDapplications; 2422 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2423 APInt RunningGCD = APInt::getZero(BitWidth); 2424 2425 // Examine Src coefficients. 2426 // Compute running GCD and record source constant. 2427 // Because we're looking for the constant at the end of the chain, 2428 // we can't quit the loop just because the GCD == 1. 2429 const SCEV *Coefficients = Src; 2430 while (const SCEVAddRecExpr *AddRec = 2431 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2432 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2433 // If the coefficient is the product of a constant and other stuff, 2434 // we can use the constant in the GCD computation. 2435 std::optional<APInt> ConstCoeff = getConstantPart(Coeff); 2436 if (!ConstCoeff) 2437 return false; 2438 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs()); 2439 Coefficients = AddRec->getStart(); 2440 } 2441 const SCEV *SrcConst = Coefficients; 2442 2443 // Examine Dst coefficients. 2444 // Compute running GCD and record destination constant. 2445 // Because we're looking for the constant at the end of the chain, 2446 // we can't quit the loop just because the GCD == 1. 2447 Coefficients = Dst; 2448 while (const SCEVAddRecExpr *AddRec = 2449 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2450 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2451 // If the coefficient is the product of a constant and other stuff, 2452 // we can use the constant in the GCD computation. 2453 std::optional<APInt> ConstCoeff = getConstantPart(Coeff); 2454 if (!ConstCoeff) 2455 return false; 2456 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs()); 2457 Coefficients = AddRec->getStart(); 2458 } 2459 const SCEV *DstConst = Coefficients; 2460 2461 APInt ExtraGCD = APInt::getZero(BitWidth); 2462 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2463 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2464 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2465 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2466 // If Delta is a sum of products, we may be able to make further progress. 2467 for (const SCEV *Operand : Sum->operands()) { 2468 if (isa<SCEVConstant>(Operand)) { 2469 assert(!Constant && "Surprised to find multiple constants"); 2470 Constant = cast<SCEVConstant>(Operand); 2471 } 2472 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2473 // Search for constant operand to participate in GCD; 2474 // If none found; return false. 2475 std::optional<APInt> ConstOp = getConstantPart(Product); 2476 if (!ConstOp) 2477 return false; 2478 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, ConstOp->abs()); 2479 } 2480 else 2481 return false; 2482 } 2483 } 2484 if (!Constant) 2485 return false; 2486 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2487 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2488 if (ConstDelta == 0) 2489 return false; 2490 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2491 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2492 APInt Remainder = ConstDelta.srem(RunningGCD); 2493 if (Remainder != 0) { 2494 ++GCDindependence; 2495 return true; 2496 } 2497 2498 // Try to disprove equal directions. 2499 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2500 // the code above can't disprove the dependence because the GCD = 1. 2501 // So we consider what happen if i = i' and what happens if j = j'. 2502 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2503 // which is infeasible, so we can disallow the = direction for the i level. 2504 // Setting j = j' doesn't help matters, so we end up with a direction vector 2505 // of [<>, *] 2506 // 2507 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2508 // we need to remember that the constant part is 5 and the RunningGCD should 2509 // be initialized to ExtraGCD = 30. 2510 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2511 2512 bool Improved = false; 2513 Coefficients = Src; 2514 while (const SCEVAddRecExpr *AddRec = 2515 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2516 Coefficients = AddRec->getStart(); 2517 const Loop *CurLoop = AddRec->getLoop(); 2518 RunningGCD = ExtraGCD; 2519 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2520 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2521 const SCEV *Inner = Src; 2522 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2523 AddRec = cast<SCEVAddRecExpr>(Inner); 2524 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2525 if (CurLoop == AddRec->getLoop()) 2526 ; // SrcCoeff == Coeff 2527 else { 2528 // If the coefficient is the product of a constant and other stuff, 2529 // we can use the constant in the GCD computation. 2530 std::optional<APInt> ConstCoeff = getConstantPart(Coeff); 2531 if (!ConstCoeff) 2532 return false; 2533 RunningGCD = 2534 APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs()); 2535 } 2536 Inner = AddRec->getStart(); 2537 } 2538 Inner = Dst; 2539 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2540 AddRec = cast<SCEVAddRecExpr>(Inner); 2541 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2542 if (CurLoop == AddRec->getLoop()) 2543 DstCoeff = Coeff; 2544 else { 2545 // If the coefficient is the product of a constant and other stuff, 2546 // we can use the constant in the GCD computation. 2547 std::optional<APInt> ConstCoeff = getConstantPart(Coeff); 2548 if (!ConstCoeff) 2549 return false; 2550 RunningGCD = 2551 APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs()); 2552 } 2553 Inner = AddRec->getStart(); 2554 } 2555 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2556 // If the coefficient is the product of a constant and other stuff, 2557 // we can use the constant in the GCD computation. 2558 std::optional<APInt> ConstCoeff = getConstantPart(Delta); 2559 if (!ConstCoeff) 2560 // The difference of the two coefficients might not be a product 2561 // or constant, in which case we give up on this direction. 2562 continue; 2563 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs()); 2564 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2565 if (RunningGCD != 0) { 2566 Remainder = ConstDelta.srem(RunningGCD); 2567 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2568 if (Remainder != 0) { 2569 unsigned Level = mapSrcLoop(CurLoop); 2570 Result.DV[Level - 1].Direction &= ~Dependence::DVEntry::EQ; 2571 Improved = true; 2572 } 2573 } 2574 } 2575 if (Improved) 2576 ++GCDsuccesses; 2577 LLVM_DEBUG(dbgs() << "all done\n"); 2578 return false; 2579 } 2580 2581 2582 //===----------------------------------------------------------------------===// 2583 // banerjeeMIVtest - 2584 // Use Banerjee's Inequalities to test an MIV subscript pair. 2585 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2586 // Generally follows the discussion in Section 2.5.2 of 2587 // 2588 // Optimizing Supercompilers for Supercomputers 2589 // Michael Wolfe 2590 // 2591 // The inequalities given on page 25 are simplified in that loops are 2592 // normalized so that the lower bound is always 0 and the stride is always 1. 2593 // For example, Wolfe gives 2594 // 2595 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2596 // 2597 // where A_k is the coefficient of the kth index in the source subscript, 2598 // B_k is the coefficient of the kth index in the destination subscript, 2599 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2600 // index, and N_k is the stride of the kth index. Since all loops are normalized 2601 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2602 // equation to 2603 // 2604 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2605 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2606 // 2607 // Similar simplifications are possible for the other equations. 2608 // 2609 // When we can't determine the number of iterations for a loop, 2610 // we use NULL as an indicator for the worst case, infinity. 2611 // When computing the upper bound, NULL denotes +inf; 2612 // for the lower bound, NULL denotes -inf. 2613 // 2614 // Return true if dependence disproved. 2615 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2616 const SmallBitVector &Loops, 2617 FullDependence &Result) const { 2618 LLVM_DEBUG(dbgs() << "starting Banerjee\n"); 2619 ++BanerjeeApplications; 2620 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n'); 2621 const SCEV *A0; 2622 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2623 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2624 const SCEV *B0; 2625 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2626 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2627 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2628 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2629 2630 // Compute bounds for all the * directions. 2631 LLVM_DEBUG(dbgs() << "\tBounds[*]\n"); 2632 for (unsigned K = 1; K <= MaxLevels; ++K) { 2633 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2634 Bound[K].Direction = Dependence::DVEntry::ALL; 2635 Bound[K].DirSet = Dependence::DVEntry::NONE; 2636 findBoundsALL(A, B, Bound, K); 2637 #ifndef NDEBUG 2638 LLVM_DEBUG(dbgs() << "\t " << K << '\t'); 2639 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2640 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2641 else 2642 LLVM_DEBUG(dbgs() << "-inf\t"); 2643 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2644 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2645 else 2646 LLVM_DEBUG(dbgs() << "+inf\n"); 2647 #endif 2648 } 2649 2650 // Test the *, *, *, ... case. 2651 bool Disproved = false; 2652 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2653 // Explore the direction vector hierarchy. 2654 unsigned DepthExpanded = 0; 2655 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2656 Loops, DepthExpanded, Delta); 2657 if (NewDeps > 0) { 2658 bool Improved = false; 2659 for (unsigned K = 1; K <= CommonLevels; ++K) { 2660 if (Loops[K]) { 2661 unsigned Old = Result.DV[K - 1].Direction; 2662 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2663 Improved |= Old != Result.DV[K - 1].Direction; 2664 if (!Result.DV[K - 1].Direction) { 2665 Improved = false; 2666 Disproved = true; 2667 break; 2668 } 2669 } 2670 } 2671 if (Improved) 2672 ++BanerjeeSuccesses; 2673 } 2674 else { 2675 ++BanerjeeIndependence; 2676 Disproved = true; 2677 } 2678 } 2679 else { 2680 ++BanerjeeIndependence; 2681 Disproved = true; 2682 } 2683 delete [] Bound; 2684 delete [] A; 2685 delete [] B; 2686 return Disproved; 2687 } 2688 2689 2690 // Hierarchically expands the direction vector 2691 // search space, combining the directions of discovered dependences 2692 // in the DirSet field of Bound. Returns the number of distinct 2693 // dependences discovered. If the dependence is disproved, 2694 // it will return 0. 2695 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2696 CoefficientInfo *B, BoundInfo *Bound, 2697 const SmallBitVector &Loops, 2698 unsigned &DepthExpanded, 2699 const SCEV *Delta) const { 2700 // This algorithm has worst case complexity of O(3^n), where 'n' is the number 2701 // of common loop levels. To avoid excessive compile-time, pessimize all the 2702 // results and immediately return when the number of common levels is beyond 2703 // the given threshold. 2704 if (CommonLevels > MIVMaxLevelThreshold) { 2705 LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV " 2706 "direction exploration is terminated.\n"); 2707 for (unsigned K = 1; K <= CommonLevels; ++K) 2708 if (Loops[K]) 2709 Bound[K].DirSet = Dependence::DVEntry::ALL; 2710 return 1; 2711 } 2712 2713 if (Level > CommonLevels) { 2714 // record result 2715 LLVM_DEBUG(dbgs() << "\t["); 2716 for (unsigned K = 1; K <= CommonLevels; ++K) { 2717 if (Loops[K]) { 2718 Bound[K].DirSet |= Bound[K].Direction; 2719 #ifndef NDEBUG 2720 switch (Bound[K].Direction) { 2721 case Dependence::DVEntry::LT: 2722 LLVM_DEBUG(dbgs() << " <"); 2723 break; 2724 case Dependence::DVEntry::EQ: 2725 LLVM_DEBUG(dbgs() << " ="); 2726 break; 2727 case Dependence::DVEntry::GT: 2728 LLVM_DEBUG(dbgs() << " >"); 2729 break; 2730 case Dependence::DVEntry::ALL: 2731 LLVM_DEBUG(dbgs() << " *"); 2732 break; 2733 default: 2734 llvm_unreachable("unexpected Bound[K].Direction"); 2735 } 2736 #endif 2737 } 2738 } 2739 LLVM_DEBUG(dbgs() << " ]\n"); 2740 return 1; 2741 } 2742 if (Loops[Level]) { 2743 if (Level > DepthExpanded) { 2744 DepthExpanded = Level; 2745 // compute bounds for <, =, > at current level 2746 findBoundsLT(A, B, Bound, Level); 2747 findBoundsGT(A, B, Bound, Level); 2748 findBoundsEQ(A, B, Bound, Level); 2749 #ifndef NDEBUG 2750 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2751 LLVM_DEBUG(dbgs() << "\t <\t"); 2752 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2753 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] 2754 << '\t'); 2755 else 2756 LLVM_DEBUG(dbgs() << "-inf\t"); 2757 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2758 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] 2759 << '\n'); 2760 else 2761 LLVM_DEBUG(dbgs() << "+inf\n"); 2762 LLVM_DEBUG(dbgs() << "\t =\t"); 2763 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2764 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] 2765 << '\t'); 2766 else 2767 LLVM_DEBUG(dbgs() << "-inf\t"); 2768 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2769 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] 2770 << '\n'); 2771 else 2772 LLVM_DEBUG(dbgs() << "+inf\n"); 2773 LLVM_DEBUG(dbgs() << "\t >\t"); 2774 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2775 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] 2776 << '\t'); 2777 else 2778 LLVM_DEBUG(dbgs() << "-inf\t"); 2779 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2780 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] 2781 << '\n'); 2782 else 2783 LLVM_DEBUG(dbgs() << "+inf\n"); 2784 #endif 2785 } 2786 2787 unsigned NewDeps = 0; 2788 2789 // test bounds for <, *, *, ... 2790 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2791 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2792 Loops, DepthExpanded, Delta); 2793 2794 // Test bounds for =, *, *, ... 2795 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2796 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2797 Loops, DepthExpanded, Delta); 2798 2799 // test bounds for >, *, *, ... 2800 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2801 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2802 Loops, DepthExpanded, Delta); 2803 2804 Bound[Level].Direction = Dependence::DVEntry::ALL; 2805 return NewDeps; 2806 } 2807 else 2808 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2809 } 2810 2811 2812 // Returns true iff the current bounds are plausible. 2813 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2814 BoundInfo *Bound, const SCEV *Delta) const { 2815 Bound[Level].Direction = DirKind; 2816 if (const SCEV *LowerBound = getLowerBound(Bound)) 2817 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2818 return false; 2819 if (const SCEV *UpperBound = getUpperBound(Bound)) 2820 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2821 return false; 2822 return true; 2823 } 2824 2825 2826 // Computes the upper and lower bounds for level K 2827 // using the * direction. Records them in Bound. 2828 // Wolfe gives the equations 2829 // 2830 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2831 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2832 // 2833 // Since we normalize loops, we can simplify these equations to 2834 // 2835 // LB^*_k = (A^-_k - B^+_k)U_k 2836 // UB^*_k = (A^+_k - B^-_k)U_k 2837 // 2838 // We must be careful to handle the case where the upper bound is unknown. 2839 // Note that the lower bound is always <= 0 2840 // and the upper bound is always >= 0. 2841 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2842 BoundInfo *Bound, unsigned K) const { 2843 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2844 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2845 if (Bound[K].Iterations) { 2846 Bound[K].Lower[Dependence::DVEntry::ALL] = 2847 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2848 Bound[K].Iterations); 2849 Bound[K].Upper[Dependence::DVEntry::ALL] = 2850 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2851 Bound[K].Iterations); 2852 } 2853 else { 2854 // If the difference is 0, we won't need to know the number of iterations. 2855 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2856 Bound[K].Lower[Dependence::DVEntry::ALL] = 2857 SE->getZero(A[K].Coeff->getType()); 2858 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2859 Bound[K].Upper[Dependence::DVEntry::ALL] = 2860 SE->getZero(A[K].Coeff->getType()); 2861 } 2862 } 2863 2864 2865 // Computes the upper and lower bounds for level K 2866 // using the = direction. Records them in Bound. 2867 // Wolfe gives the equations 2868 // 2869 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2870 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2871 // 2872 // Since we normalize loops, we can simplify these equations to 2873 // 2874 // LB^=_k = (A_k - B_k)^- U_k 2875 // UB^=_k = (A_k - B_k)^+ U_k 2876 // 2877 // We must be careful to handle the case where the upper bound is unknown. 2878 // Note that the lower bound is always <= 0 2879 // and the upper bound is always >= 0. 2880 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2881 BoundInfo *Bound, unsigned K) const { 2882 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2883 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2884 if (Bound[K].Iterations) { 2885 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2886 const SCEV *NegativePart = getNegativePart(Delta); 2887 Bound[K].Lower[Dependence::DVEntry::EQ] = 2888 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2889 const SCEV *PositivePart = getPositivePart(Delta); 2890 Bound[K].Upper[Dependence::DVEntry::EQ] = 2891 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2892 } 2893 else { 2894 // If the positive/negative part of the difference is 0, 2895 // we won't need to know the number of iterations. 2896 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2897 const SCEV *NegativePart = getNegativePart(Delta); 2898 if (NegativePart->isZero()) 2899 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2900 const SCEV *PositivePart = getPositivePart(Delta); 2901 if (PositivePart->isZero()) 2902 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2903 } 2904 } 2905 2906 2907 // Computes the upper and lower bounds for level K 2908 // using the < direction. Records them in Bound. 2909 // Wolfe gives the equations 2910 // 2911 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2912 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2913 // 2914 // Since we normalize loops, we can simplify these equations to 2915 // 2916 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2917 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2918 // 2919 // We must be careful to handle the case where the upper bound is unknown. 2920 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2921 BoundInfo *Bound, unsigned K) const { 2922 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2923 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2924 if (Bound[K].Iterations) { 2925 const SCEV *Iter_1 = SE->getMinusSCEV( 2926 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2927 const SCEV *NegPart = 2928 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2929 Bound[K].Lower[Dependence::DVEntry::LT] = 2930 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2931 const SCEV *PosPart = 2932 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2933 Bound[K].Upper[Dependence::DVEntry::LT] = 2934 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2935 } 2936 else { 2937 // If the positive/negative part of the difference is 0, 2938 // we won't need to know the number of iterations. 2939 const SCEV *NegPart = 2940 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2941 if (NegPart->isZero()) 2942 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2943 const SCEV *PosPart = 2944 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2945 if (PosPart->isZero()) 2946 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2947 } 2948 } 2949 2950 2951 // Computes the upper and lower bounds for level K 2952 // using the > direction. Records them in Bound. 2953 // Wolfe gives the equations 2954 // 2955 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2956 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2957 // 2958 // Since we normalize loops, we can simplify these equations to 2959 // 2960 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2961 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2962 // 2963 // We must be careful to handle the case where the upper bound is unknown. 2964 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2965 BoundInfo *Bound, unsigned K) const { 2966 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2967 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2968 if (Bound[K].Iterations) { 2969 const SCEV *Iter_1 = SE->getMinusSCEV( 2970 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2971 const SCEV *NegPart = 2972 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2973 Bound[K].Lower[Dependence::DVEntry::GT] = 2974 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2975 const SCEV *PosPart = 2976 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2977 Bound[K].Upper[Dependence::DVEntry::GT] = 2978 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2979 } 2980 else { 2981 // If the positive/negative part of the difference is 0, 2982 // we won't need to know the number of iterations. 2983 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2984 if (NegPart->isZero()) 2985 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2986 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2987 if (PosPart->isZero()) 2988 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2989 } 2990 } 2991 2992 2993 // X^+ = max(X, 0) 2994 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2995 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2996 } 2997 2998 2999 // X^- = min(X, 0) 3000 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 3001 return SE->getSMinExpr(X, SE->getZero(X->getType())); 3002 } 3003 3004 3005 // Walks through the subscript, 3006 // collecting each coefficient, the associated loop bounds, 3007 // and recording its positive and negative parts for later use. 3008 DependenceInfo::CoefficientInfo * 3009 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 3010 const SCEV *&Constant) const { 3011 const SCEV *Zero = SE->getZero(Subscript->getType()); 3012 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 3013 for (unsigned K = 1; K <= MaxLevels; ++K) { 3014 CI[K].Coeff = Zero; 3015 CI[K].PosPart = Zero; 3016 CI[K].NegPart = Zero; 3017 CI[K].Iterations = nullptr; 3018 } 3019 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 3020 const Loop *L = AddRec->getLoop(); 3021 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 3022 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 3023 CI[K].PosPart = getPositivePart(CI[K].Coeff); 3024 CI[K].NegPart = getNegativePart(CI[K].Coeff); 3025 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 3026 Subscript = AddRec->getStart(); 3027 } 3028 Constant = Subscript; 3029 #ifndef NDEBUG 3030 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n"); 3031 for (unsigned K = 1; K <= MaxLevels; ++K) { 3032 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 3033 LLVM_DEBUG(dbgs() << "\tPos Part = "); 3034 LLVM_DEBUG(dbgs() << *CI[K].PosPart); 3035 LLVM_DEBUG(dbgs() << "\tNeg Part = "); 3036 LLVM_DEBUG(dbgs() << *CI[K].NegPart); 3037 LLVM_DEBUG(dbgs() << "\tUpper Bound = "); 3038 if (CI[K].Iterations) 3039 LLVM_DEBUG(dbgs() << *CI[K].Iterations); 3040 else 3041 LLVM_DEBUG(dbgs() << "+inf"); 3042 LLVM_DEBUG(dbgs() << '\n'); 3043 } 3044 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 3045 #endif 3046 return CI; 3047 } 3048 3049 3050 // Looks through all the bounds info and 3051 // computes the lower bound given the current direction settings 3052 // at each level. If the lower bound for any level is -inf, 3053 // the result is -inf. 3054 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 3055 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 3056 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 3057 if (Bound[K].Lower[Bound[K].Direction]) 3058 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 3059 else 3060 Sum = nullptr; 3061 } 3062 return Sum; 3063 } 3064 3065 3066 // Looks through all the bounds info and 3067 // computes the upper bound given the current direction settings 3068 // at each level. If the upper bound at any level is +inf, 3069 // the result is +inf. 3070 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 3071 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 3072 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 3073 if (Bound[K].Upper[Bound[K].Direction]) 3074 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 3075 else 3076 Sum = nullptr; 3077 } 3078 return Sum; 3079 } 3080 3081 3082 //===----------------------------------------------------------------------===// 3083 // Constraint manipulation for Delta test. 3084 3085 // Given a linear SCEV, 3086 // return the coefficient (the step) 3087 // corresponding to the specified loop. 3088 // If there isn't one, return 0. 3089 // For example, given a*i + b*j + c*k, finding the coefficient 3090 // corresponding to the j loop would yield b. 3091 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 3092 const Loop *TargetLoop) const { 3093 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3094 if (!AddRec) 3095 return SE->getZero(Expr->getType()); 3096 if (AddRec->getLoop() == TargetLoop) 3097 return AddRec->getStepRecurrence(*SE); 3098 return findCoefficient(AddRec->getStart(), TargetLoop); 3099 } 3100 3101 3102 // Given a linear SCEV, 3103 // return the SCEV given by zeroing out the coefficient 3104 // corresponding to the specified loop. 3105 // For example, given a*i + b*j + c*k, zeroing the coefficient 3106 // corresponding to the j loop would yield a*i + c*k. 3107 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 3108 const Loop *TargetLoop) const { 3109 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3110 if (!AddRec) 3111 return Expr; // ignore 3112 if (AddRec->getLoop() == TargetLoop) 3113 return AddRec->getStart(); 3114 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 3115 AddRec->getStepRecurrence(*SE), 3116 AddRec->getLoop(), 3117 AddRec->getNoWrapFlags()); 3118 } 3119 3120 3121 // Given a linear SCEV Expr, 3122 // return the SCEV given by adding some Value to the 3123 // coefficient corresponding to the specified TargetLoop. 3124 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 3125 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 3126 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 3127 const Loop *TargetLoop, 3128 const SCEV *Value) const { 3129 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3130 if (!AddRec) // create a new addRec 3131 return SE->getAddRecExpr(Expr, 3132 Value, 3133 TargetLoop, 3134 SCEV::FlagAnyWrap); // Worst case, with no info. 3135 if (AddRec->getLoop() == TargetLoop) { 3136 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 3137 if (Sum->isZero()) 3138 return AddRec->getStart(); 3139 return SE->getAddRecExpr(AddRec->getStart(), 3140 Sum, 3141 AddRec->getLoop(), 3142 AddRec->getNoWrapFlags()); 3143 } 3144 if (SE->isLoopInvariant(AddRec, TargetLoop)) 3145 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 3146 return SE->getAddRecExpr( 3147 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 3148 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 3149 AddRec->getNoWrapFlags()); 3150 } 3151 3152 3153 // Review the constraints, looking for opportunities 3154 // to simplify a subscript pair (Src and Dst). 3155 // Return true if some simplification occurs. 3156 // If the simplification isn't exact (that is, if it is conservative 3157 // in terms of dependence), set consistent to false. 3158 // Corresponds to Figure 5 from the paper 3159 // 3160 // Practical Dependence Testing 3161 // Goff, Kennedy, Tseng 3162 // PLDI 1991 3163 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 3164 SmallBitVector &Loops, 3165 SmallVectorImpl<Constraint> &Constraints, 3166 bool &Consistent) { 3167 bool Result = false; 3168 for (unsigned LI : Loops.set_bits()) { 3169 LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3170 LLVM_DEBUG(Constraints[LI].dump(dbgs())); 3171 if (Constraints[LI].isDistance()) 3172 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3173 else if (Constraints[LI].isLine()) 3174 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3175 else if (Constraints[LI].isPoint()) 3176 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3177 } 3178 return Result; 3179 } 3180 3181 3182 // Attempt to propagate a distance 3183 // constraint into a subscript pair (Src and Dst). 3184 // Return true if some simplification occurs. 3185 // If the simplification isn't exact (that is, if it is conservative 3186 // in terms of dependence), set consistent to false. 3187 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3188 Constraint &CurConstraint, 3189 bool &Consistent) { 3190 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3191 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3192 const SCEV *A_K = findCoefficient(Src, CurLoop); 3193 if (A_K->isZero()) 3194 return false; 3195 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3196 Src = SE->getMinusSCEV(Src, DA_K); 3197 Src = zeroCoefficient(Src, CurLoop); 3198 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3199 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3200 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3201 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3202 if (!findCoefficient(Dst, CurLoop)->isZero()) 3203 Consistent = false; 3204 return true; 3205 } 3206 3207 3208 // Attempt to propagate a line 3209 // constraint into a subscript pair (Src and Dst). 3210 // Return true if some simplification occurs. 3211 // If the simplification isn't exact (that is, if it is conservative 3212 // in terms of dependence), set consistent to false. 3213 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3214 Constraint &CurConstraint, 3215 bool &Consistent) { 3216 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3217 const SCEV *A = CurConstraint.getA(); 3218 const SCEV *B = CurConstraint.getB(); 3219 const SCEV *C = CurConstraint.getC(); 3220 LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C 3221 << "\n"); 3222 LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3223 LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3224 if (A->isZero()) { 3225 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3226 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3227 if (!Bconst || !Cconst) return false; 3228 APInt Beta = Bconst->getAPInt(); 3229 APInt Charlie = Cconst->getAPInt(); 3230 APInt CdivB = Charlie.sdiv(Beta); 3231 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3232 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3233 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3234 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3235 Dst = zeroCoefficient(Dst, CurLoop); 3236 if (!findCoefficient(Src, CurLoop)->isZero()) 3237 Consistent = false; 3238 } 3239 else if (B->isZero()) { 3240 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3241 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3242 if (!Aconst || !Cconst) return false; 3243 APInt Alpha = Aconst->getAPInt(); 3244 APInt Charlie = Cconst->getAPInt(); 3245 APInt CdivA = Charlie.sdiv(Alpha); 3246 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3247 const SCEV *A_K = findCoefficient(Src, CurLoop); 3248 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3249 Src = zeroCoefficient(Src, CurLoop); 3250 if (!findCoefficient(Dst, CurLoop)->isZero()) 3251 Consistent = false; 3252 } 3253 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3254 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3255 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3256 if (!Aconst || !Cconst) return false; 3257 APInt Alpha = Aconst->getAPInt(); 3258 APInt Charlie = Cconst->getAPInt(); 3259 APInt CdivA = Charlie.sdiv(Alpha); 3260 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3261 const SCEV *A_K = findCoefficient(Src, CurLoop); 3262 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3263 Src = zeroCoefficient(Src, CurLoop); 3264 Dst = addToCoefficient(Dst, CurLoop, A_K); 3265 if (!findCoefficient(Dst, CurLoop)->isZero()) 3266 Consistent = false; 3267 } 3268 else { 3269 // paper is incorrect here, or perhaps just misleading 3270 const SCEV *A_K = findCoefficient(Src, CurLoop); 3271 Src = SE->getMulExpr(Src, A); 3272 Dst = SE->getMulExpr(Dst, A); 3273 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3274 Src = zeroCoefficient(Src, CurLoop); 3275 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3276 if (!findCoefficient(Dst, CurLoop)->isZero()) 3277 Consistent = false; 3278 } 3279 LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3280 LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3281 return true; 3282 } 3283 3284 3285 // Attempt to propagate a point 3286 // constraint into a subscript pair (Src and Dst). 3287 // Return true if some simplification occurs. 3288 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3289 Constraint &CurConstraint) { 3290 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3291 const SCEV *A_K = findCoefficient(Src, CurLoop); 3292 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3293 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3294 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3295 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3296 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3297 Src = zeroCoefficient(Src, CurLoop); 3298 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3299 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3300 Dst = zeroCoefficient(Dst, CurLoop); 3301 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3302 return true; 3303 } 3304 3305 3306 // Update direction vector entry based on the current constraint. 3307 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3308 const Constraint &CurConstraint) const { 3309 LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3310 LLVM_DEBUG(CurConstraint.dump(dbgs())); 3311 if (CurConstraint.isAny()) 3312 ; // use defaults 3313 else if (CurConstraint.isDistance()) { 3314 // this one is consistent, the others aren't 3315 Level.Scalar = false; 3316 Level.Distance = CurConstraint.getD(); 3317 unsigned NewDirection = Dependence::DVEntry::NONE; 3318 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3319 NewDirection = Dependence::DVEntry::EQ; 3320 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3321 NewDirection |= Dependence::DVEntry::LT; 3322 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3323 NewDirection |= Dependence::DVEntry::GT; 3324 Level.Direction &= NewDirection; 3325 } 3326 else if (CurConstraint.isLine()) { 3327 Level.Scalar = false; 3328 Level.Distance = nullptr; 3329 // direction should be accurate 3330 } 3331 else if (CurConstraint.isPoint()) { 3332 Level.Scalar = false; 3333 Level.Distance = nullptr; 3334 unsigned NewDirection = Dependence::DVEntry::NONE; 3335 if (!isKnownPredicate(CmpInst::ICMP_NE, 3336 CurConstraint.getY(), 3337 CurConstraint.getX())) 3338 // if X may be = Y 3339 NewDirection |= Dependence::DVEntry::EQ; 3340 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3341 CurConstraint.getY(), 3342 CurConstraint.getX())) 3343 // if Y may be > X 3344 NewDirection |= Dependence::DVEntry::LT; 3345 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3346 CurConstraint.getY(), 3347 CurConstraint.getX())) 3348 // if Y may be < X 3349 NewDirection |= Dependence::DVEntry::GT; 3350 Level.Direction &= NewDirection; 3351 } 3352 else 3353 llvm_unreachable("constraint has unexpected kind"); 3354 } 3355 3356 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3357 /// source and destination array references are recurrences on a nested loop, 3358 /// this function flattens the nested recurrences into separate recurrences 3359 /// for each loop level. 3360 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3361 SmallVectorImpl<Subscript> &Pair) { 3362 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3363 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3364 Value *SrcPtr = getLoadStorePointerOperand(Src); 3365 Value *DstPtr = getLoadStorePointerOperand(Dst); 3366 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3367 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3368 const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop); 3369 const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop); 3370 const SCEVUnknown *SrcBase = 3371 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3372 const SCEVUnknown *DstBase = 3373 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3374 3375 if (!SrcBase || !DstBase || SrcBase != DstBase) 3376 return false; 3377 3378 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3379 3380 if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn, 3381 SrcSubscripts, DstSubscripts) && 3382 !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn, 3383 SrcSubscripts, DstSubscripts)) 3384 return false; 3385 3386 int Size = SrcSubscripts.size(); 3387 LLVM_DEBUG({ 3388 dbgs() << "\nSrcSubscripts: "; 3389 for (int I = 0; I < Size; I++) 3390 dbgs() << *SrcSubscripts[I]; 3391 dbgs() << "\nDstSubscripts: "; 3392 for (int I = 0; I < Size; I++) 3393 dbgs() << *DstSubscripts[I]; 3394 }); 3395 3396 // The delinearization transforms a single-subscript MIV dependence test into 3397 // a multi-subscript SIV dependence test that is easier to compute. So we 3398 // resize Pair to contain as many pairs of subscripts as the delinearization 3399 // has found, and then initialize the pairs following the delinearization. 3400 Pair.resize(Size); 3401 for (int I = 0; I < Size; ++I) { 3402 Pair[I].Src = SrcSubscripts[I]; 3403 Pair[I].Dst = DstSubscripts[I]; 3404 unifySubscriptType(&Pair[I]); 3405 } 3406 3407 return true; 3408 } 3409 3410 /// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying 3411 /// arrays accessed are fixed-size arrays. Return true if delinearization was 3412 /// successful. 3413 bool DependenceInfo::tryDelinearizeFixedSize( 3414 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, 3415 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, 3416 SmallVectorImpl<const SCEV *> &DstSubscripts) { 3417 LLVM_DEBUG({ 3418 const SCEVUnknown *SrcBase = 3419 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3420 const SCEVUnknown *DstBase = 3421 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3422 assert(SrcBase && DstBase && SrcBase == DstBase && 3423 "expected src and dst scev unknowns to be equal"); 3424 }); 3425 3426 SmallVector<int, 4> SrcSizes; 3427 SmallVector<int, 4> DstSizes; 3428 if (!tryDelinearizeFixedSizeImpl(SE, Src, SrcAccessFn, SrcSubscripts, 3429 SrcSizes) || 3430 !tryDelinearizeFixedSizeImpl(SE, Dst, DstAccessFn, DstSubscripts, 3431 DstSizes)) 3432 return false; 3433 3434 // Check that the two size arrays are non-empty and equal in length and 3435 // value. 3436 if (SrcSizes.size() != DstSizes.size() || 3437 !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) { 3438 SrcSubscripts.clear(); 3439 DstSubscripts.clear(); 3440 return false; 3441 } 3442 3443 assert(SrcSubscripts.size() == DstSubscripts.size() && 3444 "Expected equal number of entries in the list of SrcSubscripts and " 3445 "DstSubscripts."); 3446 3447 Value *SrcPtr = getLoadStorePointerOperand(Src); 3448 Value *DstPtr = getLoadStorePointerOperand(Dst); 3449 3450 // In general we cannot safely assume that the subscripts recovered from GEPs 3451 // are in the range of values defined for their corresponding array 3452 // dimensions. For example some C language usage/interpretation make it 3453 // impossible to verify this at compile-time. As such we can only delinearize 3454 // iff the subscripts are positive and are less than the range of the 3455 // dimension. 3456 if (!DisableDelinearizationChecks) { 3457 auto AllIndicesInRange = [&](SmallVector<int, 4> &DimensionSizes, 3458 SmallVectorImpl<const SCEV *> &Subscripts, 3459 Value *Ptr) { 3460 size_t SSize = Subscripts.size(); 3461 for (size_t I = 1; I < SSize; ++I) { 3462 const SCEV *S = Subscripts[I]; 3463 if (!isKnownNonNegative(S, Ptr)) 3464 return false; 3465 if (auto *SType = dyn_cast<IntegerType>(S->getType())) { 3466 const SCEV *Range = SE->getConstant( 3467 ConstantInt::get(SType, DimensionSizes[I - 1], false)); 3468 if (!isKnownLessThan(S, Range)) 3469 return false; 3470 } 3471 } 3472 return true; 3473 }; 3474 3475 if (!AllIndicesInRange(SrcSizes, SrcSubscripts, SrcPtr) || 3476 !AllIndicesInRange(DstSizes, DstSubscripts, DstPtr)) { 3477 SrcSubscripts.clear(); 3478 DstSubscripts.clear(); 3479 return false; 3480 } 3481 } 3482 LLVM_DEBUG({ 3483 dbgs() << "Delinearized subscripts of fixed-size array\n" 3484 << "SrcGEP:" << *SrcPtr << "\n" 3485 << "DstGEP:" << *DstPtr << "\n"; 3486 }); 3487 return true; 3488 } 3489 3490 bool DependenceInfo::tryDelinearizeParametricSize( 3491 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, 3492 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, 3493 SmallVectorImpl<const SCEV *> &DstSubscripts) { 3494 3495 Value *SrcPtr = getLoadStorePointerOperand(Src); 3496 Value *DstPtr = getLoadStorePointerOperand(Dst); 3497 const SCEVUnknown *SrcBase = 3498 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3499 const SCEVUnknown *DstBase = 3500 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3501 assert(SrcBase && DstBase && SrcBase == DstBase && 3502 "expected src and dst scev unknowns to be equal"); 3503 3504 const SCEV *ElementSize = SE->getElementSize(Src); 3505 if (ElementSize != SE->getElementSize(Dst)) 3506 return false; 3507 3508 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3509 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3510 3511 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3512 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3513 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3514 return false; 3515 3516 // First step: collect parametric terms in both array references. 3517 SmallVector<const SCEV *, 4> Terms; 3518 collectParametricTerms(*SE, SrcAR, Terms); 3519 collectParametricTerms(*SE, DstAR, Terms); 3520 3521 // Second step: find subscript sizes. 3522 SmallVector<const SCEV *, 4> Sizes; 3523 findArrayDimensions(*SE, Terms, Sizes, ElementSize); 3524 3525 // Third step: compute the access functions for each subscript. 3526 computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes); 3527 computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes); 3528 3529 // Fail when there is only a subscript: that's a linearized access function. 3530 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3531 SrcSubscripts.size() != DstSubscripts.size()) 3532 return false; 3533 3534 size_t Size = SrcSubscripts.size(); 3535 3536 // Statically check that the array bounds are in-range. The first subscript we 3537 // don't have a size for and it cannot overflow into another subscript, so is 3538 // always safe. The others need to be 0 <= subscript[i] < bound, for both src 3539 // and dst. 3540 // FIXME: It may be better to record these sizes and add them as constraints 3541 // to the dependency checks. 3542 if (!DisableDelinearizationChecks) 3543 for (size_t I = 1; I < Size; ++I) { 3544 if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr)) 3545 return false; 3546 3547 if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1])) 3548 return false; 3549 3550 if (!isKnownNonNegative(DstSubscripts[I], DstPtr)) 3551 return false; 3552 3553 if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1])) 3554 return false; 3555 } 3556 3557 return true; 3558 } 3559 3560 //===----------------------------------------------------------------------===// 3561 3562 #ifndef NDEBUG 3563 // For debugging purposes, dump a small bit vector to dbgs(). 3564 static void dumpSmallBitVector(SmallBitVector &BV) { 3565 dbgs() << "{"; 3566 for (unsigned VI : BV.set_bits()) { 3567 dbgs() << VI; 3568 if (BV.find_next(VI) >= 0) 3569 dbgs() << ' '; 3570 } 3571 dbgs() << "}\n"; 3572 } 3573 #endif 3574 3575 bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA, 3576 FunctionAnalysisManager::Invalidator &Inv) { 3577 // Check if the analysis itself has been invalidated. 3578 auto PAC = PA.getChecker<DependenceAnalysis>(); 3579 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) 3580 return true; 3581 3582 // Check transitive dependencies. 3583 return Inv.invalidate<AAManager>(F, PA) || 3584 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) || 3585 Inv.invalidate<LoopAnalysis>(F, PA); 3586 } 3587 3588 SCEVUnionPredicate DependenceInfo::getRuntimeAssumptions() const { 3589 return SCEVUnionPredicate(Assumptions, *SE); 3590 } 3591 3592 // depends - 3593 // Returns NULL if there is no dependence. 3594 // Otherwise, return a Dependence with as many details as possible. 3595 // Corresponds to Section 3.1 in the paper 3596 // 3597 // Practical Dependence Testing 3598 // Goff, Kennedy, Tseng 3599 // PLDI 1991 3600 // 3601 // Care is required to keep the routine below, getSplitIteration(), 3602 // up to date with respect to this routine. 3603 std::unique_ptr<Dependence> 3604 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3605 bool UnderRuntimeAssumptions) { 3606 SmallVector<const SCEVPredicate *, 4> Assume; 3607 bool PossiblyLoopIndependent = true; 3608 if (Src == Dst) 3609 PossiblyLoopIndependent = false; 3610 3611 if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory())) 3612 // if both instructions don't reference memory, there's no dependence 3613 return nullptr; 3614 3615 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3616 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3617 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3618 return std::make_unique<Dependence>(Src, Dst, 3619 SCEVUnionPredicate(Assume, *SE)); 3620 } 3621 3622 const MemoryLocation &DstLoc = MemoryLocation::get(Dst); 3623 const MemoryLocation &SrcLoc = MemoryLocation::get(Src); 3624 3625 switch (underlyingObjectsAlias(AA, F->getDataLayout(), DstLoc, SrcLoc)) { 3626 case AliasResult::MayAlias: 3627 case AliasResult::PartialAlias: 3628 // cannot analyse objects if we don't understand their aliasing. 3629 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3630 return std::make_unique<Dependence>(Src, Dst, 3631 SCEVUnionPredicate(Assume, *SE)); 3632 case AliasResult::NoAlias: 3633 // If the objects noalias, they are distinct, accesses are independent. 3634 LLVM_DEBUG(dbgs() << "no alias\n"); 3635 return nullptr; 3636 case AliasResult::MustAlias: 3637 break; // The underlying objects alias; test accesses for dependence. 3638 } 3639 3640 if (DstLoc.Size != SrcLoc.Size || !DstLoc.Size.isPrecise() || 3641 !SrcLoc.Size.isPrecise()) { 3642 // The dependence test gets confused if the size of the memory accesses 3643 // differ. 3644 LLVM_DEBUG(dbgs() << "can't analyze must alias with different sizes\n"); 3645 return std::make_unique<Dependence>(Src, Dst, 3646 SCEVUnionPredicate(Assume, *SE)); 3647 } 3648 3649 Value *SrcPtr = getLoadStorePointerOperand(Src); 3650 Value *DstPtr = getLoadStorePointerOperand(Dst); 3651 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3652 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3653 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3654 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3655 const SCEV *SrcBase = SE->getPointerBase(SrcSCEV); 3656 const SCEV *DstBase = SE->getPointerBase(DstSCEV); 3657 if (SrcBase != DstBase) { 3658 // If two pointers have different bases, trying to analyze indexes won't 3659 // work; we can't compare them to each other. This can happen, for example, 3660 // if one is produced by an LCSSA PHI node. 3661 // 3662 // We check this upfront so we don't crash in cases where getMinusSCEV() 3663 // returns a SCEVCouldNotCompute. 3664 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n"); 3665 return std::make_unique<Dependence>(Src, Dst, 3666 SCEVUnionPredicate(Assume, *SE)); 3667 } 3668 3669 uint64_t EltSize = SrcLoc.Size.toRaw(); 3670 const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase); 3671 const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase); 3672 3673 if (Src != Dst) { 3674 // Check that memory access offsets are multiples of element sizes. 3675 if (!SE->isKnownMultipleOf(SrcEv, EltSize, Assume) || 3676 !SE->isKnownMultipleOf(DstEv, EltSize, Assume)) { 3677 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different offsets\n"); 3678 return std::make_unique<Dependence>(Src, Dst, 3679 SCEVUnionPredicate(Assume, *SE)); 3680 } 3681 } 3682 3683 if (!Assume.empty()) { 3684 if (!UnderRuntimeAssumptions) 3685 return std::make_unique<Dependence>(Src, Dst, 3686 SCEVUnionPredicate(Assume, *SE)); 3687 // Add non-redundant assumptions. 3688 unsigned N = Assumptions.size(); 3689 for (const SCEVPredicate *P : Assume) { 3690 bool Implied = false; 3691 for (unsigned I = 0; I != N && !Implied; I++) 3692 if (Assumptions[I]->implies(P, *SE)) 3693 Implied = true; 3694 if (!Implied) 3695 Assumptions.push_back(P); 3696 } 3697 } 3698 3699 establishNestingLevels(Src, Dst); 3700 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3701 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3702 3703 FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE), 3704 PossiblyLoopIndependent, CommonLevels); 3705 ++TotalArrayPairs; 3706 3707 unsigned Pairs = 1; 3708 SmallVector<Subscript, 2> Pair(Pairs); 3709 Pair[0].Src = SrcSCEV; 3710 Pair[0].Dst = DstSCEV; 3711 3712 if (Delinearize) { 3713 if (tryDelinearize(Src, Dst, Pair)) { 3714 LLVM_DEBUG(dbgs() << " delinearized\n"); 3715 Pairs = Pair.size(); 3716 } 3717 } 3718 3719 for (unsigned P = 0; P < Pairs; ++P) { 3720 Pair[P].Loops.resize(MaxLevels + 1); 3721 Pair[P].GroupLoops.resize(MaxLevels + 1); 3722 Pair[P].Group.resize(Pairs); 3723 removeMatchingExtensions(&Pair[P]); 3724 Pair[P].Classification = 3725 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3726 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3727 Pair[P].Loops); 3728 Pair[P].GroupLoops = Pair[P].Loops; 3729 Pair[P].Group.set(P); 3730 LLVM_DEBUG(dbgs() << " subscript " << P << "\n"); 3731 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3732 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3733 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3734 LLVM_DEBUG(dbgs() << "\tloops = "); 3735 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3736 } 3737 3738 SmallBitVector Separable(Pairs); 3739 SmallBitVector Coupled(Pairs); 3740 3741 // Partition subscripts into separable and minimally-coupled groups 3742 // Algorithm in paper is algorithmically better; 3743 // this may be faster in practice. Check someday. 3744 // 3745 // Here's an example of how it works. Consider this code: 3746 // 3747 // for (i = ...) { 3748 // for (j = ...) { 3749 // for (k = ...) { 3750 // for (l = ...) { 3751 // for (m = ...) { 3752 // A[i][j][k][m] = ...; 3753 // ... = A[0][j][l][i + j]; 3754 // } 3755 // } 3756 // } 3757 // } 3758 // } 3759 // 3760 // There are 4 subscripts here: 3761 // 0 [i] and [0] 3762 // 1 [j] and [j] 3763 // 2 [k] and [l] 3764 // 3 [m] and [i + j] 3765 // 3766 // We've already classified each subscript pair as ZIV, SIV, etc., 3767 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3768 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3769 // and set Pair[P].Group = {P}. 3770 // 3771 // Src Dst Classification Loops GroupLoops Group 3772 // 0 [i] [0] SIV {1} {1} {0} 3773 // 1 [j] [j] SIV {2} {2} {1} 3774 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3775 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3776 // 3777 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3778 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3779 // 3780 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3781 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3782 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3783 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3784 // to either Separable or Coupled). 3785 // 3786 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3787 // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty, 3788 // so Pair[3].Group = {0, 1, 3} and Done = false. 3789 // 3790 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3791 // Since Done remains true, we add 2 to the set of Separable pairs. 3792 // 3793 // Finally, we consider 3. There's nothing to compare it with, 3794 // so Done remains true and we add it to the Coupled set. 3795 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3796 // 3797 // In the end, we've got 1 separable subscript and 1 coupled group. 3798 for (unsigned SI = 0; SI < Pairs; ++SI) { 3799 if (Pair[SI].Classification == Subscript::NonLinear) { 3800 // ignore these, but collect loops for later 3801 ++NonlinearSubscriptPairs; 3802 collectCommonLoops(Pair[SI].Src, 3803 LI->getLoopFor(Src->getParent()), 3804 Pair[SI].Loops); 3805 collectCommonLoops(Pair[SI].Dst, 3806 LI->getLoopFor(Dst->getParent()), 3807 Pair[SI].Loops); 3808 Result.Consistent = false; 3809 } else if (Pair[SI].Classification == Subscript::ZIV) { 3810 // always separable 3811 Separable.set(SI); 3812 } 3813 else { 3814 // SIV, RDIV, or MIV, so check for coupled group 3815 bool Done = true; 3816 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3817 SmallBitVector Intersection = Pair[SI].GroupLoops; 3818 Intersection &= Pair[SJ].GroupLoops; 3819 if (Intersection.any()) { 3820 // accumulate set of all the loops in group 3821 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3822 // accumulate set of all subscripts in group 3823 Pair[SJ].Group |= Pair[SI].Group; 3824 Done = false; 3825 } 3826 } 3827 if (Done) { 3828 if (Pair[SI].Group.count() == 1) { 3829 Separable.set(SI); 3830 ++SeparableSubscriptPairs; 3831 } 3832 else { 3833 Coupled.set(SI); 3834 ++CoupledSubscriptPairs; 3835 } 3836 } 3837 } 3838 } 3839 3840 LLVM_DEBUG(dbgs() << " Separable = "); 3841 LLVM_DEBUG(dumpSmallBitVector(Separable)); 3842 LLVM_DEBUG(dbgs() << " Coupled = "); 3843 LLVM_DEBUG(dumpSmallBitVector(Coupled)); 3844 3845 Constraint NewConstraint; 3846 NewConstraint.setAny(SE); 3847 3848 // test separable subscripts 3849 for (unsigned SI : Separable.set_bits()) { 3850 LLVM_DEBUG(dbgs() << "testing subscript " << SI); 3851 switch (Pair[SI].Classification) { 3852 case Subscript::ZIV: 3853 LLVM_DEBUG(dbgs() << ", ZIV\n"); 3854 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3855 return nullptr; 3856 break; 3857 case Subscript::SIV: { 3858 LLVM_DEBUG(dbgs() << ", SIV\n"); 3859 unsigned Level; 3860 const SCEV *SplitIter = nullptr; 3861 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3862 SplitIter)) 3863 return nullptr; 3864 break; 3865 } 3866 case Subscript::RDIV: 3867 LLVM_DEBUG(dbgs() << ", RDIV\n"); 3868 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3869 return nullptr; 3870 break; 3871 case Subscript::MIV: 3872 LLVM_DEBUG(dbgs() << ", MIV\n"); 3873 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3874 return nullptr; 3875 break; 3876 default: 3877 llvm_unreachable("subscript has unexpected classification"); 3878 } 3879 } 3880 3881 if (Coupled.count()) { 3882 // test coupled subscript groups 3883 LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n"); 3884 LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3885 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3886 for (unsigned II = 0; II <= MaxLevels; ++II) 3887 Constraints[II].setAny(SE); 3888 for (unsigned SI : Coupled.set_bits()) { 3889 LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3890 SmallBitVector Group(Pair[SI].Group); 3891 SmallBitVector Sivs(Pairs); 3892 SmallBitVector Mivs(Pairs); 3893 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3894 SmallVector<Subscript *, 4> PairsInGroup; 3895 for (unsigned SJ : Group.set_bits()) { 3896 LLVM_DEBUG(dbgs() << SJ << " "); 3897 if (Pair[SJ].Classification == Subscript::SIV) 3898 Sivs.set(SJ); 3899 else 3900 Mivs.set(SJ); 3901 PairsInGroup.push_back(&Pair[SJ]); 3902 } 3903 unifySubscriptType(PairsInGroup); 3904 LLVM_DEBUG(dbgs() << "}\n"); 3905 while (Sivs.any()) { 3906 bool Changed = false; 3907 for (unsigned SJ : Sivs.set_bits()) { 3908 LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3909 // SJ is an SIV subscript that's part of the current coupled group 3910 unsigned Level; 3911 const SCEV *SplitIter = nullptr; 3912 LLVM_DEBUG(dbgs() << "SIV\n"); 3913 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3914 SplitIter)) 3915 return nullptr; 3916 ConstrainedLevels.set(Level); 3917 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3918 if (Constraints[Level].isEmpty()) { 3919 ++DeltaIndependence; 3920 return nullptr; 3921 } 3922 Changed = true; 3923 } 3924 Sivs.reset(SJ); 3925 } 3926 if (Changed) { 3927 // propagate, possibly creating new SIVs and ZIVs 3928 LLVM_DEBUG(dbgs() << " propagating\n"); 3929 LLVM_DEBUG(dbgs() << "\tMivs = "); 3930 LLVM_DEBUG(dumpSmallBitVector(Mivs)); 3931 for (unsigned SJ : Mivs.set_bits()) { 3932 // SJ is an MIV subscript that's part of the current coupled group 3933 LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3934 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3935 Constraints, Result.Consistent)) { 3936 LLVM_DEBUG(dbgs() << "\t Changed\n"); 3937 ++DeltaPropagations; 3938 Pair[SJ].Classification = 3939 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3940 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3941 Pair[SJ].Loops); 3942 switch (Pair[SJ].Classification) { 3943 case Subscript::ZIV: 3944 LLVM_DEBUG(dbgs() << "ZIV\n"); 3945 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3946 return nullptr; 3947 Mivs.reset(SJ); 3948 break; 3949 case Subscript::SIV: 3950 Sivs.set(SJ); 3951 Mivs.reset(SJ); 3952 break; 3953 case Subscript::RDIV: 3954 case Subscript::MIV: 3955 break; 3956 default: 3957 llvm_unreachable("bad subscript classification"); 3958 } 3959 } 3960 } 3961 } 3962 } 3963 3964 // test & propagate remaining RDIVs 3965 for (unsigned SJ : Mivs.set_bits()) { 3966 if (Pair[SJ].Classification == Subscript::RDIV) { 3967 LLVM_DEBUG(dbgs() << "RDIV test\n"); 3968 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3969 return nullptr; 3970 // I don't yet understand how to propagate RDIV results 3971 Mivs.reset(SJ); 3972 } 3973 } 3974 3975 // test remaining MIVs 3976 // This code is temporary. 3977 // Better to somehow test all remaining subscripts simultaneously. 3978 for (unsigned SJ : Mivs.set_bits()) { 3979 if (Pair[SJ].Classification == Subscript::MIV) { 3980 LLVM_DEBUG(dbgs() << "MIV test\n"); 3981 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3982 return nullptr; 3983 } 3984 else 3985 llvm_unreachable("expected only MIV subscripts at this point"); 3986 } 3987 3988 // update Result.DV from constraint vector 3989 LLVM_DEBUG(dbgs() << " updating\n"); 3990 for (unsigned SJ : ConstrainedLevels.set_bits()) { 3991 if (SJ > CommonLevels) 3992 break; 3993 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3994 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3995 return nullptr; 3996 } 3997 } 3998 } 3999 4000 // Make sure the Scalar flags are set correctly. 4001 SmallBitVector CompleteLoops(MaxLevels + 1); 4002 for (unsigned SI = 0; SI < Pairs; ++SI) 4003 CompleteLoops |= Pair[SI].Loops; 4004 for (unsigned II = 1; II <= CommonLevels; ++II) 4005 if (CompleteLoops[II]) 4006 Result.DV[II - 1].Scalar = false; 4007 4008 // Set the distance to zero if the direction is EQ. 4009 // TODO: Ideally, the distance should be set to 0 immediately simultaneously 4010 // with the corresponding direction being set to EQ. 4011 for (unsigned II = 1; II <= Result.getLevels(); ++II) { 4012 if (Result.getDirection(II) == Dependence::DVEntry::EQ) { 4013 if (Result.DV[II - 1].Distance == nullptr) 4014 Result.DV[II - 1].Distance = SE->getZero(SrcSCEV->getType()); 4015 else 4016 assert(Result.DV[II - 1].Distance->isZero() && 4017 "Inconsistency between distance and direction"); 4018 } 4019 4020 #ifndef NDEBUG 4021 // Check that the converse (i.e., if the distance is zero, then the 4022 // direction is EQ) holds. 4023 const SCEV *Distance = Result.getDistance(II); 4024 if (Distance && Distance->isZero()) 4025 assert(Result.getDirection(II) == Dependence::DVEntry::EQ && 4026 "Distance is zero, but direction is not EQ"); 4027 #endif 4028 } 4029 4030 if (PossiblyLoopIndependent) { 4031 // Make sure the LoopIndependent flag is set correctly. 4032 // All directions must include equal, otherwise no 4033 // loop-independent dependence is possible. 4034 for (unsigned II = 1; II <= CommonLevels; ++II) { 4035 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 4036 Result.LoopIndependent = false; 4037 break; 4038 } 4039 } 4040 } 4041 else { 4042 // On the other hand, if all directions are equal and there's no 4043 // loop-independent dependence possible, then no dependence exists. 4044 bool AllEqual = true; 4045 for (unsigned II = 1; II <= CommonLevels; ++II) { 4046 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 4047 AllEqual = false; 4048 break; 4049 } 4050 } 4051 if (AllEqual) 4052 return nullptr; 4053 } 4054 4055 return std::make_unique<FullDependence>(std::move(Result)); 4056 } 4057 4058 //===----------------------------------------------------------------------===// 4059 // getSplitIteration - 4060 // Rather than spend rarely-used space recording the splitting iteration 4061 // during the Weak-Crossing SIV test, we re-compute it on demand. 4062 // The re-computation is basically a repeat of the entire dependence test, 4063 // though simplified since we know that the dependence exists. 4064 // It's tedious, since we must go through all propagations, etc. 4065 // 4066 // Care is required to keep this code up to date with respect to the routine 4067 // above, depends(). 4068 // 4069 // Generally, the dependence analyzer will be used to build 4070 // a dependence graph for a function (basically a map from instructions 4071 // to dependences). Looking for cycles in the graph shows us loops 4072 // that cannot be trivially vectorized/parallelized. 4073 // 4074 // We can try to improve the situation by examining all the dependences 4075 // that make up the cycle, looking for ones we can break. 4076 // Sometimes, peeling the first or last iteration of a loop will break 4077 // dependences, and we've got flags for those possibilities. 4078 // Sometimes, splitting a loop at some other iteration will do the trick, 4079 // and we've got a flag for that case. Rather than waste the space to 4080 // record the exact iteration (since we rarely know), we provide 4081 // a method that calculates the iteration. It's a drag that it must work 4082 // from scratch, but wonderful in that it's possible. 4083 // 4084 // Here's an example: 4085 // 4086 // for (i = 0; i < 10; i++) 4087 // A[i] = ... 4088 // ... = A[11 - i] 4089 // 4090 // There's a loop-carried flow dependence from the store to the load, 4091 // found by the weak-crossing SIV test. The dependence will have a flag, 4092 // indicating that the dependence can be broken by splitting the loop. 4093 // Calling getSplitIteration will return 5. 4094 // Splitting the loop breaks the dependence, like so: 4095 // 4096 // for (i = 0; i <= 5; i++) 4097 // A[i] = ... 4098 // ... = A[11 - i] 4099 // for (i = 6; i < 10; i++) 4100 // A[i] = ... 4101 // ... = A[11 - i] 4102 // 4103 // breaks the dependence and allows us to vectorize/parallelize 4104 // both loops. 4105 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 4106 unsigned SplitLevel) { 4107 assert(Dep.isSplitable(SplitLevel) && 4108 "Dep should be splitable at SplitLevel"); 4109 Instruction *Src = Dep.getSrc(); 4110 Instruction *Dst = Dep.getDst(); 4111 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 4112 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 4113 assert(isLoadOrStore(Src)); 4114 assert(isLoadOrStore(Dst)); 4115 Value *SrcPtr = getLoadStorePointerOperand(Src); 4116 Value *DstPtr = getLoadStorePointerOperand(Dst); 4117 assert(underlyingObjectsAlias( 4118 AA, F->getDataLayout(), MemoryLocation::get(Dst), 4119 MemoryLocation::get(Src)) == AliasResult::MustAlias); 4120 4121 // establish loop nesting levels 4122 establishNestingLevels(Src, Dst); 4123 4124 FullDependence Result(Src, Dst, Dep.Assumptions, false, CommonLevels); 4125 4126 unsigned Pairs = 1; 4127 SmallVector<Subscript, 2> Pair(Pairs); 4128 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 4129 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 4130 Pair[0].Src = SrcSCEV; 4131 Pair[0].Dst = DstSCEV; 4132 4133 if (Delinearize) { 4134 if (tryDelinearize(Src, Dst, Pair)) { 4135 LLVM_DEBUG(dbgs() << " delinearized\n"); 4136 Pairs = Pair.size(); 4137 } 4138 } 4139 4140 for (unsigned P = 0; P < Pairs; ++P) { 4141 Pair[P].Loops.resize(MaxLevels + 1); 4142 Pair[P].GroupLoops.resize(MaxLevels + 1); 4143 Pair[P].Group.resize(Pairs); 4144 removeMatchingExtensions(&Pair[P]); 4145 Pair[P].Classification = 4146 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 4147 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 4148 Pair[P].Loops); 4149 Pair[P].GroupLoops = Pair[P].Loops; 4150 Pair[P].Group.set(P); 4151 } 4152 4153 SmallBitVector Separable(Pairs); 4154 SmallBitVector Coupled(Pairs); 4155 4156 // partition subscripts into separable and minimally-coupled groups 4157 for (unsigned SI = 0; SI < Pairs; ++SI) { 4158 if (Pair[SI].Classification == Subscript::NonLinear) { 4159 // ignore these, but collect loops for later 4160 collectCommonLoops(Pair[SI].Src, 4161 LI->getLoopFor(Src->getParent()), 4162 Pair[SI].Loops); 4163 collectCommonLoops(Pair[SI].Dst, 4164 LI->getLoopFor(Dst->getParent()), 4165 Pair[SI].Loops); 4166 Result.Consistent = false; 4167 } 4168 else if (Pair[SI].Classification == Subscript::ZIV) 4169 Separable.set(SI); 4170 else { 4171 // SIV, RDIV, or MIV, so check for coupled group 4172 bool Done = true; 4173 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 4174 SmallBitVector Intersection = Pair[SI].GroupLoops; 4175 Intersection &= Pair[SJ].GroupLoops; 4176 if (Intersection.any()) { 4177 // accumulate set of all the loops in group 4178 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 4179 // accumulate set of all subscripts in group 4180 Pair[SJ].Group |= Pair[SI].Group; 4181 Done = false; 4182 } 4183 } 4184 if (Done) { 4185 if (Pair[SI].Group.count() == 1) 4186 Separable.set(SI); 4187 else 4188 Coupled.set(SI); 4189 } 4190 } 4191 } 4192 4193 Constraint NewConstraint; 4194 NewConstraint.setAny(SE); 4195 4196 // test separable subscripts 4197 for (unsigned SI : Separable.set_bits()) { 4198 switch (Pair[SI].Classification) { 4199 case Subscript::SIV: { 4200 unsigned Level; 4201 const SCEV *SplitIter = nullptr; 4202 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 4203 Result, NewConstraint, SplitIter); 4204 if (Level == SplitLevel) { 4205 assert(SplitIter != nullptr); 4206 return SplitIter; 4207 } 4208 break; 4209 } 4210 case Subscript::ZIV: 4211 case Subscript::RDIV: 4212 case Subscript::MIV: 4213 break; 4214 default: 4215 llvm_unreachable("subscript has unexpected classification"); 4216 } 4217 } 4218 4219 assert(!Coupled.empty() && "coupled expected non-empty"); 4220 4221 // test coupled subscript groups 4222 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 4223 for (unsigned II = 0; II <= MaxLevels; ++II) 4224 Constraints[II].setAny(SE); 4225 for (unsigned SI : Coupled.set_bits()) { 4226 SmallBitVector Group(Pair[SI].Group); 4227 SmallBitVector Sivs(Pairs); 4228 SmallBitVector Mivs(Pairs); 4229 SmallBitVector ConstrainedLevels(MaxLevels + 1); 4230 for (unsigned SJ : Group.set_bits()) { 4231 if (Pair[SJ].Classification == Subscript::SIV) 4232 Sivs.set(SJ); 4233 else 4234 Mivs.set(SJ); 4235 } 4236 while (Sivs.any()) { 4237 bool Changed = false; 4238 for (unsigned SJ : Sivs.set_bits()) { 4239 // SJ is an SIV subscript that's part of the current coupled group 4240 unsigned Level; 4241 const SCEV *SplitIter = nullptr; 4242 (void)testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 4243 SplitIter); 4244 if (Level == SplitLevel && SplitIter) 4245 return SplitIter; 4246 ConstrainedLevels.set(Level); 4247 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 4248 Changed = true; 4249 Sivs.reset(SJ); 4250 } 4251 if (!Changed) 4252 continue; 4253 // propagate, possibly creating new SIVs and ZIVs 4254 for (unsigned SJ : Mivs.set_bits()) { 4255 // SJ is an MIV subscript that's part of the current coupled group 4256 if (!propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Constraints, 4257 Result.Consistent)) 4258 continue; 4259 Pair[SJ].Classification = classifyPair( 4260 Pair[SJ].Src, LI->getLoopFor(Src->getParent()), Pair[SJ].Dst, 4261 LI->getLoopFor(Dst->getParent()), Pair[SJ].Loops); 4262 switch (Pair[SJ].Classification) { 4263 case Subscript::ZIV: 4264 Mivs.reset(SJ); 4265 break; 4266 case Subscript::SIV: 4267 Sivs.set(SJ); 4268 Mivs.reset(SJ); 4269 break; 4270 case Subscript::RDIV: 4271 case Subscript::MIV: 4272 break; 4273 default: 4274 llvm_unreachable("bad subscript classification"); 4275 } 4276 } 4277 } 4278 } 4279 llvm_unreachable("somehow reached end of routine"); 4280 } 4281