1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 10 // accesses. Currently, it is an (incomplete) implementation of the approach 11 // described in 12 // 13 // Practical Dependence Testing 14 // Goff, Kennedy, Tseng 15 // PLDI 1991 16 // 17 // There's a single entry point that analyzes the dependence between a pair 18 // of memory references in a function, returning either NULL, for no dependence, 19 // or a more-or-less detailed description of the dependence between them. 20 // 21 // Currently, the implementation cannot propagate constraints between 22 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 23 // Both of these are conservative weaknesses; 24 // that is, not a source of correctness problems. 25 // 26 // Since Clang linearizes some array subscripts, the dependence 27 // analysis is using SCEV->delinearize to recover the representation of multiple 28 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 29 // delinearization is controlled by the flag -da-delinearize. 30 // 31 // We should pay some careful attention to the possibility of integer overflow 32 // in the implementation of the various tests. This could happen with Add, 33 // Subtract, or Multiply, with both APInt's and SCEV's. 34 // 35 // Some non-linear subscript pairs can be handled by the GCD test 36 // (and perhaps other tests). 37 // Should explore how often these things occur. 38 // 39 // Finally, it seems like certain test cases expose weaknesses in the SCEV 40 // simplification, especially in the handling of sign and zero extensions. 41 // It could be useful to spend time exploring these. 42 // 43 // Please note that this is work in progress and the interface is subject to 44 // change. 45 // 46 //===----------------------------------------------------------------------===// 47 // // 48 // In memory of Ken Kennedy, 1945 - 2007 // 49 // // 50 //===----------------------------------------------------------------------===// 51 52 #include "llvm/Analysis/DependenceAnalysis.h" 53 #include "llvm/ADT/Statistic.h" 54 #include "llvm/Analysis/AliasAnalysis.h" 55 #include "llvm/Analysis/Delinearization.h" 56 #include "llvm/Analysis/LoopInfo.h" 57 #include "llvm/Analysis/ScalarEvolution.h" 58 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 59 #include "llvm/Analysis/ValueTracking.h" 60 #include "llvm/IR/InstIterator.h" 61 #include "llvm/IR/Module.h" 62 #include "llvm/InitializePasses.h" 63 #include "llvm/Support/CommandLine.h" 64 #include "llvm/Support/Debug.h" 65 #include "llvm/Support/ErrorHandling.h" 66 #include "llvm/Support/raw_ostream.h" 67 68 using namespace llvm; 69 70 #define DEBUG_TYPE "da" 71 72 //===----------------------------------------------------------------------===// 73 // statistics 74 75 STATISTIC(TotalArrayPairs, "Array pairs tested"); 76 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 77 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 78 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 79 STATISTIC(ZIVapplications, "ZIV applications"); 80 STATISTIC(ZIVindependence, "ZIV independence"); 81 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 82 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 83 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 84 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 85 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 86 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 87 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 88 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 89 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 90 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 91 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 92 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 93 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 94 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 95 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 96 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 97 STATISTIC(DeltaApplications, "Delta applications"); 98 STATISTIC(DeltaSuccesses, "Delta successes"); 99 STATISTIC(DeltaIndependence, "Delta independence"); 100 STATISTIC(DeltaPropagations, "Delta propagations"); 101 STATISTIC(GCDapplications, "GCD applications"); 102 STATISTIC(GCDsuccesses, "GCD successes"); 103 STATISTIC(GCDindependence, "GCD independence"); 104 STATISTIC(BanerjeeApplications, "Banerjee applications"); 105 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 106 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 107 108 static cl::opt<bool> 109 Delinearize("da-delinearize", cl::init(true), cl::Hidden, 110 cl::desc("Try to delinearize array references.")); 111 static cl::opt<bool> DisableDelinearizationChecks( 112 "da-disable-delinearization-checks", cl::Hidden, 113 cl::desc( 114 "Disable checks that try to statically verify validity of " 115 "delinearized subscripts. Enabling this option may result in incorrect " 116 "dependence vectors for languages that allow the subscript of one " 117 "dimension to underflow or overflow into another dimension.")); 118 119 static cl::opt<unsigned> MIVMaxLevelThreshold( 120 "da-miv-max-level-threshold", cl::init(7), cl::Hidden, 121 cl::desc("Maximum depth allowed for the recursive algorithm used to " 122 "explore MIV direction vectors.")); 123 124 //===----------------------------------------------------------------------===// 125 // basics 126 127 DependenceAnalysis::Result 128 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 129 auto &AA = FAM.getResult<AAManager>(F); 130 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 131 auto &LI = FAM.getResult<LoopAnalysis>(F); 132 return DependenceInfo(&F, &AA, &SE, &LI); 133 } 134 135 AnalysisKey DependenceAnalysis::Key; 136 137 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 138 "Dependence Analysis", true, true) 139 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 140 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 141 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 142 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 143 true, true) 144 145 char DependenceAnalysisWrapperPass::ID = 0; 146 147 DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass() 148 : FunctionPass(ID) { 149 initializeDependenceAnalysisWrapperPassPass(*PassRegistry::getPassRegistry()); 150 } 151 152 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 153 return new DependenceAnalysisWrapperPass(); 154 } 155 156 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 157 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 158 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 159 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 160 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 161 return false; 162 } 163 164 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 165 166 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 167 168 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 169 AU.setPreservesAll(); 170 AU.addRequiredTransitive<AAResultsWrapperPass>(); 171 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 172 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 173 } 174 175 // Used to test the dependence analyzer. 176 // Looks through the function, noting instructions that may access memory. 177 // Calls depends() on every possible pair and prints out the result. 178 // Ignores all other instructions. 179 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA, 180 ScalarEvolution &SE, bool NormalizeResults) { 181 auto *F = DA->getFunction(); 182 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 183 ++SrcI) { 184 if (SrcI->mayReadOrWriteMemory()) { 185 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 186 DstI != DstE; ++DstI) { 187 if (DstI->mayReadOrWriteMemory()) { 188 OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n"; 189 OS << " da analyze - "; 190 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 191 // Normalize negative direction vectors if required by clients. 192 if (NormalizeResults && D->normalize(&SE)) 193 OS << "normalized - "; 194 D->dump(OS); 195 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 196 if (D->isSplitable(Level)) { 197 OS << " da analyze - split level = " << Level; 198 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 199 OS << "!\n"; 200 } 201 } 202 } 203 else 204 OS << "none!\n"; 205 } 206 } 207 } 208 } 209 } 210 211 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 212 const Module *) const { 213 dumpExampleDependence(OS, info.get(), 214 getAnalysis<ScalarEvolutionWrapperPass>().getSE(), false); 215 } 216 217 PreservedAnalyses 218 DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { 219 OS << "'Dependence Analysis' for function '" << F.getName() << "':\n"; 220 dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F), 221 FAM.getResult<ScalarEvolutionAnalysis>(F), 222 NormalizeResults); 223 return PreservedAnalyses::all(); 224 } 225 226 //===----------------------------------------------------------------------===// 227 // Dependence methods 228 229 // Returns true if this is an input dependence. 230 bool Dependence::isInput() const { 231 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 232 } 233 234 235 // Returns true if this is an output dependence. 236 bool Dependence::isOutput() const { 237 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 238 } 239 240 241 // Returns true if this is an flow (aka true) dependence. 242 bool Dependence::isFlow() const { 243 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 244 } 245 246 247 // Returns true if this is an anti dependence. 248 bool Dependence::isAnti() const { 249 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 250 } 251 252 253 // Returns true if a particular level is scalar; that is, 254 // if no subscript in the source or destination mention the induction 255 // variable associated with the loop at this level. 256 // Leave this out of line, so it will serve as a virtual method anchor 257 bool Dependence::isScalar(unsigned level) const { 258 return false; 259 } 260 261 262 //===----------------------------------------------------------------------===// 263 // FullDependence methods 264 265 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 266 bool PossiblyLoopIndependent, 267 unsigned CommonLevels) 268 : Dependence(Source, Destination), Levels(CommonLevels), 269 LoopIndependent(PossiblyLoopIndependent) { 270 Consistent = true; 271 if (CommonLevels) 272 DV = std::make_unique<DVEntry[]>(CommonLevels); 273 } 274 275 // FIXME: in some cases the meaning of a negative direction vector 276 // may not be straightforward, e.g., 277 // for (int i = 0; i < 32; ++i) { 278 // Src: A[i] = ...; 279 // Dst: use(A[31 - i]); 280 // } 281 // The dependency is 282 // flow { Src[i] -> Dst[31 - i] : when i >= 16 } and 283 // anti { Dst[i] -> Src[31 - i] : when i < 16 }, 284 // -- hence a [<>]. 285 // As long as a dependence result contains '>' ('<>', '<=>', "*"), it 286 // means that a reversed/normalized dependence needs to be considered 287 // as well. Nevertheless, current isDirectionNegative() only returns 288 // true with a '>' or '>=' dependency for ease of canonicalizing the 289 // dependency vector, since the reverse of '<>', '<=>' and "*" is itself. 290 bool FullDependence::isDirectionNegative() const { 291 for (unsigned Level = 1; Level <= Levels; ++Level) { 292 unsigned char Direction = DV[Level - 1].Direction; 293 if (Direction == Dependence::DVEntry::EQ) 294 continue; 295 if (Direction == Dependence::DVEntry::GT || 296 Direction == Dependence::DVEntry::GE) 297 return true; 298 return false; 299 } 300 return false; 301 } 302 303 bool FullDependence::normalize(ScalarEvolution *SE) { 304 if (!isDirectionNegative()) 305 return false; 306 307 LLVM_DEBUG(dbgs() << "Before normalizing negative direction vectors:\n"; 308 dump(dbgs());); 309 std::swap(Src, Dst); 310 for (unsigned Level = 1; Level <= Levels; ++Level) { 311 unsigned char Direction = DV[Level - 1].Direction; 312 // Reverse the direction vector, this means LT becomes GT 313 // and GT becomes LT. 314 unsigned char RevDirection = Direction & Dependence::DVEntry::EQ; 315 if (Direction & Dependence::DVEntry::LT) 316 RevDirection |= Dependence::DVEntry::GT; 317 if (Direction & Dependence::DVEntry::GT) 318 RevDirection |= Dependence::DVEntry::LT; 319 DV[Level - 1].Direction = RevDirection; 320 // Reverse the dependence distance as well. 321 if (DV[Level - 1].Distance != nullptr) 322 DV[Level - 1].Distance = 323 SE->getNegativeSCEV(DV[Level - 1].Distance); 324 } 325 326 LLVM_DEBUG(dbgs() << "After normalizing negative direction vectors:\n"; 327 dump(dbgs());); 328 return true; 329 } 330 331 // The rest are simple getters that hide the implementation. 332 333 // getDirection - Returns the direction associated with a particular level. 334 unsigned FullDependence::getDirection(unsigned Level) const { 335 assert(0 < Level && Level <= Levels && "Level out of range"); 336 return DV[Level - 1].Direction; 337 } 338 339 340 // Returns the distance (or NULL) associated with a particular level. 341 const SCEV *FullDependence::getDistance(unsigned Level) const { 342 assert(0 < Level && Level <= Levels && "Level out of range"); 343 return DV[Level - 1].Distance; 344 } 345 346 347 // Returns true if a particular level is scalar; that is, 348 // if no subscript in the source or destination mention the induction 349 // variable associated with the loop at this level. 350 bool FullDependence::isScalar(unsigned Level) const { 351 assert(0 < Level && Level <= Levels && "Level out of range"); 352 return DV[Level - 1].Scalar; 353 } 354 355 356 // Returns true if peeling the first iteration from this loop 357 // will break this dependence. 358 bool FullDependence::isPeelFirst(unsigned Level) const { 359 assert(0 < Level && Level <= Levels && "Level out of range"); 360 return DV[Level - 1].PeelFirst; 361 } 362 363 364 // Returns true if peeling the last iteration from this loop 365 // will break this dependence. 366 bool FullDependence::isPeelLast(unsigned Level) const { 367 assert(0 < Level && Level <= Levels && "Level out of range"); 368 return DV[Level - 1].PeelLast; 369 } 370 371 372 // Returns true if splitting this loop will break the dependence. 373 bool FullDependence::isSplitable(unsigned Level) const { 374 assert(0 < Level && Level <= Levels && "Level out of range"); 375 return DV[Level - 1].Splitable; 376 } 377 378 379 //===----------------------------------------------------------------------===// 380 // DependenceInfo::Constraint methods 381 382 // If constraint is a point <X, Y>, returns X. 383 // Otherwise assert. 384 const SCEV *DependenceInfo::Constraint::getX() const { 385 assert(Kind == Point && "Kind should be Point"); 386 return A; 387 } 388 389 390 // If constraint is a point <X, Y>, returns Y. 391 // Otherwise assert. 392 const SCEV *DependenceInfo::Constraint::getY() const { 393 assert(Kind == Point && "Kind should be Point"); 394 return B; 395 } 396 397 398 // If constraint is a line AX + BY = C, returns A. 399 // Otherwise assert. 400 const SCEV *DependenceInfo::Constraint::getA() const { 401 assert((Kind == Line || Kind == Distance) && 402 "Kind should be Line (or Distance)"); 403 return A; 404 } 405 406 407 // If constraint is a line AX + BY = C, returns B. 408 // Otherwise assert. 409 const SCEV *DependenceInfo::Constraint::getB() const { 410 assert((Kind == Line || Kind == Distance) && 411 "Kind should be Line (or Distance)"); 412 return B; 413 } 414 415 416 // If constraint is a line AX + BY = C, returns C. 417 // Otherwise assert. 418 const SCEV *DependenceInfo::Constraint::getC() const { 419 assert((Kind == Line || Kind == Distance) && 420 "Kind should be Line (or Distance)"); 421 return C; 422 } 423 424 425 // If constraint is a distance, returns D. 426 // Otherwise assert. 427 const SCEV *DependenceInfo::Constraint::getD() const { 428 assert(Kind == Distance && "Kind should be Distance"); 429 return SE->getNegativeSCEV(C); 430 } 431 432 433 // Returns the loop associated with this constraint. 434 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 435 assert((Kind == Distance || Kind == Line || Kind == Point) && 436 "Kind should be Distance, Line, or Point"); 437 return AssociatedLoop; 438 } 439 440 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 441 const Loop *CurLoop) { 442 Kind = Point; 443 A = X; 444 B = Y; 445 AssociatedLoop = CurLoop; 446 } 447 448 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 449 const SCEV *CC, const Loop *CurLoop) { 450 Kind = Line; 451 A = AA; 452 B = BB; 453 C = CC; 454 AssociatedLoop = CurLoop; 455 } 456 457 void DependenceInfo::Constraint::setDistance(const SCEV *D, 458 const Loop *CurLoop) { 459 Kind = Distance; 460 A = SE->getOne(D->getType()); 461 B = SE->getNegativeSCEV(A); 462 C = SE->getNegativeSCEV(D); 463 AssociatedLoop = CurLoop; 464 } 465 466 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 467 468 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 469 SE = NewSE; 470 Kind = Any; 471 } 472 473 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 474 // For debugging purposes. Dumps the constraint out to OS. 475 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 476 if (isEmpty()) 477 OS << " Empty\n"; 478 else if (isAny()) 479 OS << " Any\n"; 480 else if (isPoint()) 481 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 482 else if (isDistance()) 483 OS << " Distance is " << *getD() << 484 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 485 else if (isLine()) 486 OS << " Line is " << *getA() << "*X + " << 487 *getB() << "*Y = " << *getC() << "\n"; 488 else 489 llvm_unreachable("unknown constraint type in Constraint::dump"); 490 } 491 #endif 492 493 494 // Updates X with the intersection 495 // of the Constraints X and Y. Returns true if X has changed. 496 // Corresponds to Figure 4 from the paper 497 // 498 // Practical Dependence Testing 499 // Goff, Kennedy, Tseng 500 // PLDI 1991 501 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 502 ++DeltaApplications; 503 LLVM_DEBUG(dbgs() << "\tintersect constraints\n"); 504 LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 505 LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 506 assert(!Y->isPoint() && "Y must not be a Point"); 507 if (X->isAny()) { 508 if (Y->isAny()) 509 return false; 510 *X = *Y; 511 return true; 512 } 513 if (X->isEmpty()) 514 return false; 515 if (Y->isEmpty()) { 516 X->setEmpty(); 517 return true; 518 } 519 520 if (X->isDistance() && Y->isDistance()) { 521 LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n"); 522 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 523 return false; 524 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 525 X->setEmpty(); 526 ++DeltaSuccesses; 527 return true; 528 } 529 // Hmmm, interesting situation. 530 // I guess if either is constant, keep it and ignore the other. 531 if (isa<SCEVConstant>(Y->getD())) { 532 *X = *Y; 533 return true; 534 } 535 return false; 536 } 537 538 // At this point, the pseudo-code in Figure 4 of the paper 539 // checks if (X->isPoint() && Y->isPoint()). 540 // This case can't occur in our implementation, 541 // since a Point can only arise as the result of intersecting 542 // two Line constraints, and the right-hand value, Y, is never 543 // the result of an intersection. 544 assert(!(X->isPoint() && Y->isPoint()) && 545 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 546 547 if (X->isLine() && Y->isLine()) { 548 LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n"); 549 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 550 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 551 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 552 // slopes are equal, so lines are parallel 553 LLVM_DEBUG(dbgs() << "\t\tsame slope\n"); 554 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 555 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 556 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 557 return false; 558 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 559 X->setEmpty(); 560 ++DeltaSuccesses; 561 return true; 562 } 563 return false; 564 } 565 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 566 // slopes differ, so lines intersect 567 LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n"); 568 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 569 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 570 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 571 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 572 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 573 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 574 const SCEVConstant *C1A2_C2A1 = 575 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 576 const SCEVConstant *C1B2_C2B1 = 577 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 578 const SCEVConstant *A1B2_A2B1 = 579 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 580 const SCEVConstant *A2B1_A1B2 = 581 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 582 if (!C1B2_C2B1 || !C1A2_C2A1 || 583 !A1B2_A2B1 || !A2B1_A1B2) 584 return false; 585 APInt Xtop = C1B2_C2B1->getAPInt(); 586 APInt Xbot = A1B2_A2B1->getAPInt(); 587 APInt Ytop = C1A2_C2A1->getAPInt(); 588 APInt Ybot = A2B1_A1B2->getAPInt(); 589 LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 590 LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 591 LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 592 LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 593 APInt Xq = Xtop; // these need to be initialized, even 594 APInt Xr = Xtop; // though they're just going to be overwritten 595 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 596 APInt Yq = Ytop; 597 APInt Yr = Ytop; 598 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 599 if (Xr != 0 || Yr != 0) { 600 X->setEmpty(); 601 ++DeltaSuccesses; 602 return true; 603 } 604 LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 605 if (Xq.slt(0) || Yq.slt(0)) { 606 X->setEmpty(); 607 ++DeltaSuccesses; 608 return true; 609 } 610 if (const SCEVConstant *CUB = 611 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 612 const APInt &UpperBound = CUB->getAPInt(); 613 LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 614 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 615 X->setEmpty(); 616 ++DeltaSuccesses; 617 return true; 618 } 619 } 620 X->setPoint(SE->getConstant(Xq), 621 SE->getConstant(Yq), 622 X->getAssociatedLoop()); 623 ++DeltaSuccesses; 624 return true; 625 } 626 return false; 627 } 628 629 // if (X->isLine() && Y->isPoint()) This case can't occur. 630 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 631 632 if (X->isPoint() && Y->isLine()) { 633 LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n"); 634 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 635 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 636 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 637 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 638 return false; 639 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 640 X->setEmpty(); 641 ++DeltaSuccesses; 642 return true; 643 } 644 return false; 645 } 646 647 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 648 return false; 649 } 650 651 652 //===----------------------------------------------------------------------===// 653 // DependenceInfo methods 654 655 // For debugging purposes. Dumps a dependence to OS. 656 void Dependence::dump(raw_ostream &OS) const { 657 bool Splitable = false; 658 if (isConfused()) 659 OS << "confused"; 660 else { 661 if (isConsistent()) 662 OS << "consistent "; 663 if (isFlow()) 664 OS << "flow"; 665 else if (isOutput()) 666 OS << "output"; 667 else if (isAnti()) 668 OS << "anti"; 669 else if (isInput()) 670 OS << "input"; 671 unsigned Levels = getLevels(); 672 OS << " ["; 673 for (unsigned II = 1; II <= Levels; ++II) { 674 if (isSplitable(II)) 675 Splitable = true; 676 if (isPeelFirst(II)) 677 OS << 'p'; 678 const SCEV *Distance = getDistance(II); 679 if (Distance) 680 OS << *Distance; 681 else if (isScalar(II)) 682 OS << "S"; 683 else { 684 unsigned Direction = getDirection(II); 685 if (Direction == DVEntry::ALL) 686 OS << "*"; 687 else { 688 if (Direction & DVEntry::LT) 689 OS << "<"; 690 if (Direction & DVEntry::EQ) 691 OS << "="; 692 if (Direction & DVEntry::GT) 693 OS << ">"; 694 } 695 } 696 if (isPeelLast(II)) 697 OS << 'p'; 698 if (II < Levels) 699 OS << " "; 700 } 701 if (isLoopIndependent()) 702 OS << "|<"; 703 OS << "]"; 704 if (Splitable) 705 OS << " splitable"; 706 } 707 OS << "!\n"; 708 } 709 710 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their 711 // underlaying objects. If LocA and LocB are known to not alias (for any reason: 712 // tbaa, non-overlapping regions etc), then it is known there is no dependecy. 713 // Otherwise the underlying objects are checked to see if they point to 714 // different identifiable objects. 715 static AliasResult underlyingObjectsAlias(AAResults *AA, 716 const DataLayout &DL, 717 const MemoryLocation &LocA, 718 const MemoryLocation &LocB) { 719 // Check the original locations (minus size) for noalias, which can happen for 720 // tbaa, incompatible underlying object locations, etc. 721 MemoryLocation LocAS = 722 MemoryLocation::getBeforeOrAfter(LocA.Ptr, LocA.AATags); 723 MemoryLocation LocBS = 724 MemoryLocation::getBeforeOrAfter(LocB.Ptr, LocB.AATags); 725 if (AA->isNoAlias(LocAS, LocBS)) 726 return AliasResult::NoAlias; 727 728 // Check the underlying objects are the same 729 const Value *AObj = getUnderlyingObject(LocA.Ptr); 730 const Value *BObj = getUnderlyingObject(LocB.Ptr); 731 732 // If the underlying objects are the same, they must alias 733 if (AObj == BObj) 734 return AliasResult::MustAlias; 735 736 // We may have hit the recursion limit for underlying objects, or have 737 // underlying objects where we don't know they will alias. 738 if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj)) 739 return AliasResult::MayAlias; 740 741 // Otherwise we know the objects are different and both identified objects so 742 // must not alias. 743 return AliasResult::NoAlias; 744 } 745 746 747 // Returns true if the load or store can be analyzed. Atomic and volatile 748 // operations have properties which this analysis does not understand. 749 static 750 bool isLoadOrStore(const Instruction *I) { 751 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 752 return LI->isUnordered(); 753 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 754 return SI->isUnordered(); 755 return false; 756 } 757 758 759 // Examines the loop nesting of the Src and Dst 760 // instructions and establishes their shared loops. Sets the variables 761 // CommonLevels, SrcLevels, and MaxLevels. 762 // The source and destination instructions needn't be contained in the same 763 // loop. The routine establishNestingLevels finds the level of most deeply 764 // nested loop that contains them both, CommonLevels. An instruction that's 765 // not contained in a loop is at level = 0. MaxLevels is equal to the level 766 // of the source plus the level of the destination, minus CommonLevels. 767 // This lets us allocate vectors MaxLevels in length, with room for every 768 // distinct loop referenced in both the source and destination subscripts. 769 // The variable SrcLevels is the nesting depth of the source instruction. 770 // It's used to help calculate distinct loops referenced by the destination. 771 // Here's the map from loops to levels: 772 // 0 - unused 773 // 1 - outermost common loop 774 // ... - other common loops 775 // CommonLevels - innermost common loop 776 // ... - loops containing Src but not Dst 777 // SrcLevels - innermost loop containing Src but not Dst 778 // ... - loops containing Dst but not Src 779 // MaxLevels - innermost loops containing Dst but not Src 780 // Consider the follow code fragment: 781 // for (a = ...) { 782 // for (b = ...) { 783 // for (c = ...) { 784 // for (d = ...) { 785 // A[] = ...; 786 // } 787 // } 788 // for (e = ...) { 789 // for (f = ...) { 790 // for (g = ...) { 791 // ... = A[]; 792 // } 793 // } 794 // } 795 // } 796 // } 797 // If we're looking at the possibility of a dependence between the store 798 // to A (the Src) and the load from A (the Dst), we'll note that they 799 // have 2 loops in common, so CommonLevels will equal 2 and the direction 800 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 801 // A map from loop names to loop numbers would look like 802 // a - 1 803 // b - 2 = CommonLevels 804 // c - 3 805 // d - 4 = SrcLevels 806 // e - 5 807 // f - 6 808 // g - 7 = MaxLevels 809 void DependenceInfo::establishNestingLevels(const Instruction *Src, 810 const Instruction *Dst) { 811 const BasicBlock *SrcBlock = Src->getParent(); 812 const BasicBlock *DstBlock = Dst->getParent(); 813 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 814 unsigned DstLevel = LI->getLoopDepth(DstBlock); 815 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 816 const Loop *DstLoop = LI->getLoopFor(DstBlock); 817 SrcLevels = SrcLevel; 818 MaxLevels = SrcLevel + DstLevel; 819 while (SrcLevel > DstLevel) { 820 SrcLoop = SrcLoop->getParentLoop(); 821 SrcLevel--; 822 } 823 while (DstLevel > SrcLevel) { 824 DstLoop = DstLoop->getParentLoop(); 825 DstLevel--; 826 } 827 while (SrcLoop != DstLoop) { 828 SrcLoop = SrcLoop->getParentLoop(); 829 DstLoop = DstLoop->getParentLoop(); 830 SrcLevel--; 831 } 832 CommonLevels = SrcLevel; 833 MaxLevels -= CommonLevels; 834 } 835 836 837 // Given one of the loops containing the source, return 838 // its level index in our numbering scheme. 839 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 840 return SrcLoop->getLoopDepth(); 841 } 842 843 844 // Given one of the loops containing the destination, 845 // return its level index in our numbering scheme. 846 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 847 unsigned D = DstLoop->getLoopDepth(); 848 if (D > CommonLevels) 849 // This tries to make sure that we assign unique numbers to src and dst when 850 // the memory accesses reside in different loops that have the same depth. 851 return D - CommonLevels + SrcLevels; 852 else 853 return D; 854 } 855 856 857 // Returns true if Expression is loop invariant in LoopNest. 858 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 859 const Loop *LoopNest) const { 860 // Unlike ScalarEvolution::isLoopInvariant() we consider an access outside of 861 // any loop as invariant, because we only consier expression evaluation at a 862 // specific position (where the array access takes place), and not across the 863 // entire function. 864 if (!LoopNest) 865 return true; 866 867 // If the expression is invariant in the outermost loop of the loop nest, it 868 // is invariant anywhere in the loop nest. 869 return SE->isLoopInvariant(Expression, LoopNest->getOutermostLoop()); 870 } 871 872 873 874 // Finds the set of loops from the LoopNest that 875 // have a level <= CommonLevels and are referred to by the SCEV Expression. 876 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 877 const Loop *LoopNest, 878 SmallBitVector &Loops) const { 879 while (LoopNest) { 880 unsigned Level = LoopNest->getLoopDepth(); 881 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 882 Loops.set(Level); 883 LoopNest = LoopNest->getParentLoop(); 884 } 885 } 886 887 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 888 889 unsigned widestWidthSeen = 0; 890 Type *widestType; 891 892 // Go through each pair and find the widest bit to which we need 893 // to extend all of them. 894 for (Subscript *Pair : Pairs) { 895 const SCEV *Src = Pair->Src; 896 const SCEV *Dst = Pair->Dst; 897 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 898 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 899 if (SrcTy == nullptr || DstTy == nullptr) { 900 assert(SrcTy == DstTy && "This function only unify integer types and " 901 "expect Src and Dst share the same type " 902 "otherwise."); 903 continue; 904 } 905 if (SrcTy->getBitWidth() > widestWidthSeen) { 906 widestWidthSeen = SrcTy->getBitWidth(); 907 widestType = SrcTy; 908 } 909 if (DstTy->getBitWidth() > widestWidthSeen) { 910 widestWidthSeen = DstTy->getBitWidth(); 911 widestType = DstTy; 912 } 913 } 914 915 916 assert(widestWidthSeen > 0); 917 918 // Now extend each pair to the widest seen. 919 for (Subscript *Pair : Pairs) { 920 const SCEV *Src = Pair->Src; 921 const SCEV *Dst = Pair->Dst; 922 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 923 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 924 if (SrcTy == nullptr || DstTy == nullptr) { 925 assert(SrcTy == DstTy && "This function only unify integer types and " 926 "expect Src and Dst share the same type " 927 "otherwise."); 928 continue; 929 } 930 if (SrcTy->getBitWidth() < widestWidthSeen) 931 // Sign-extend Src to widestType 932 Pair->Src = SE->getSignExtendExpr(Src, widestType); 933 if (DstTy->getBitWidth() < widestWidthSeen) { 934 // Sign-extend Dst to widestType 935 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 936 } 937 } 938 } 939 940 // removeMatchingExtensions - Examines a subscript pair. 941 // If the source and destination are identically sign (or zero) 942 // extended, it strips off the extension in an effect to simplify 943 // the actual analysis. 944 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 945 const SCEV *Src = Pair->Src; 946 const SCEV *Dst = Pair->Dst; 947 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 948 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 949 const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Src); 950 const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Dst); 951 const SCEV *SrcCastOp = SrcCast->getOperand(); 952 const SCEV *DstCastOp = DstCast->getOperand(); 953 if (SrcCastOp->getType() == DstCastOp->getType()) { 954 Pair->Src = SrcCastOp; 955 Pair->Dst = DstCastOp; 956 } 957 } 958 } 959 960 // Examine the scev and return true iff it's affine. 961 // Collect any loops mentioned in the set of "Loops". 962 bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest, 963 SmallBitVector &Loops, bool IsSrc) { 964 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 965 if (!AddRec) 966 return isLoopInvariant(Expr, LoopNest); 967 968 // The AddRec must depend on one of the containing loops. Otherwise, 969 // mapSrcLoop and mapDstLoop return indices outside the intended range. This 970 // can happen when a subscript in one loop references an IV from a sibling 971 // loop that could not be replaced with a concrete exit value by 972 // getSCEVAtScope. 973 const Loop *L = LoopNest; 974 while (L && AddRec->getLoop() != L) 975 L = L->getParentLoop(); 976 if (!L) 977 return false; 978 979 const SCEV *Start = AddRec->getStart(); 980 const SCEV *Step = AddRec->getStepRecurrence(*SE); 981 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 982 if (!isa<SCEVCouldNotCompute>(UB)) { 983 if (SE->getTypeSizeInBits(Start->getType()) < 984 SE->getTypeSizeInBits(UB->getType())) { 985 if (!AddRec->getNoWrapFlags()) 986 return false; 987 } 988 } 989 if (!isLoopInvariant(Step, LoopNest)) 990 return false; 991 if (IsSrc) 992 Loops.set(mapSrcLoop(AddRec->getLoop())); 993 else 994 Loops.set(mapDstLoop(AddRec->getLoop())); 995 return checkSubscript(Start, LoopNest, Loops, IsSrc); 996 } 997 998 // Examine the scev and return true iff it's linear. 999 // Collect any loops mentioned in the set of "Loops". 1000 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 1001 SmallBitVector &Loops) { 1002 return checkSubscript(Src, LoopNest, Loops, true); 1003 } 1004 1005 // Examine the scev and return true iff it's linear. 1006 // Collect any loops mentioned in the set of "Loops". 1007 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 1008 SmallBitVector &Loops) { 1009 return checkSubscript(Dst, LoopNest, Loops, false); 1010 } 1011 1012 1013 // Examines the subscript pair (the Src and Dst SCEVs) 1014 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 1015 // Collects the associated loops in a set. 1016 DependenceInfo::Subscript::ClassificationKind 1017 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 1018 const SCEV *Dst, const Loop *DstLoopNest, 1019 SmallBitVector &Loops) { 1020 SmallBitVector SrcLoops(MaxLevels + 1); 1021 SmallBitVector DstLoops(MaxLevels + 1); 1022 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 1023 return Subscript::NonLinear; 1024 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 1025 return Subscript::NonLinear; 1026 Loops = SrcLoops; 1027 Loops |= DstLoops; 1028 unsigned N = Loops.count(); 1029 if (N == 0) 1030 return Subscript::ZIV; 1031 if (N == 1) 1032 return Subscript::SIV; 1033 if (N == 2 && (SrcLoops.count() == 0 || 1034 DstLoops.count() == 0 || 1035 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 1036 return Subscript::RDIV; 1037 return Subscript::MIV; 1038 } 1039 1040 1041 // A wrapper around SCEV::isKnownPredicate. 1042 // Looks for cases where we're interested in comparing for equality. 1043 // If both X and Y have been identically sign or zero extended, 1044 // it strips off the (confusing) extensions before invoking 1045 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 1046 // will be similarly updated. 1047 // 1048 // If SCEV::isKnownPredicate can't prove the predicate, 1049 // we try simple subtraction, which seems to help in some cases 1050 // involving symbolics. 1051 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 1052 const SCEV *Y) const { 1053 if (Pred == CmpInst::ICMP_EQ || 1054 Pred == CmpInst::ICMP_NE) { 1055 if ((isa<SCEVSignExtendExpr>(X) && 1056 isa<SCEVSignExtendExpr>(Y)) || 1057 (isa<SCEVZeroExtendExpr>(X) && 1058 isa<SCEVZeroExtendExpr>(Y))) { 1059 const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(X); 1060 const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Y); 1061 const SCEV *Xop = CX->getOperand(); 1062 const SCEV *Yop = CY->getOperand(); 1063 if (Xop->getType() == Yop->getType()) { 1064 X = Xop; 1065 Y = Yop; 1066 } 1067 } 1068 } 1069 if (SE->isKnownPredicate(Pred, X, Y)) 1070 return true; 1071 // If SE->isKnownPredicate can't prove the condition, 1072 // we try the brute-force approach of subtracting 1073 // and testing the difference. 1074 // By testing with SE->isKnownPredicate first, we avoid 1075 // the possibility of overflow when the arguments are constants. 1076 const SCEV *Delta = SE->getMinusSCEV(X, Y); 1077 switch (Pred) { 1078 case CmpInst::ICMP_EQ: 1079 return Delta->isZero(); 1080 case CmpInst::ICMP_NE: 1081 return SE->isKnownNonZero(Delta); 1082 case CmpInst::ICMP_SGE: 1083 return SE->isKnownNonNegative(Delta); 1084 case CmpInst::ICMP_SLE: 1085 return SE->isKnownNonPositive(Delta); 1086 case CmpInst::ICMP_SGT: 1087 return SE->isKnownPositive(Delta); 1088 case CmpInst::ICMP_SLT: 1089 return SE->isKnownNegative(Delta); 1090 default: 1091 llvm_unreachable("unexpected predicate in isKnownPredicate"); 1092 } 1093 } 1094 1095 /// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1)) 1096 /// with some extra checking if S is an AddRec and we can prove less-than using 1097 /// the loop bounds. 1098 bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { 1099 // First unify to the same type 1100 auto *SType = dyn_cast<IntegerType>(S->getType()); 1101 auto *SizeType = dyn_cast<IntegerType>(Size->getType()); 1102 if (!SType || !SizeType) 1103 return false; 1104 Type *MaxType = 1105 (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType; 1106 S = SE->getTruncateOrZeroExtend(S, MaxType); 1107 Size = SE->getTruncateOrZeroExtend(Size, MaxType); 1108 1109 // Special check for addrecs using BE taken count 1110 const SCEV *Bound = SE->getMinusSCEV(S, Size); 1111 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) { 1112 if (AddRec->isAffine()) { 1113 const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); 1114 if (!isa<SCEVCouldNotCompute>(BECount)) { 1115 const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE); 1116 if (SE->isKnownNegative(Limit)) 1117 return true; 1118 } 1119 } 1120 } 1121 1122 // Check using normal isKnownNegative 1123 const SCEV *LimitedBound = 1124 SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType()))); 1125 return SE->isKnownNegative(LimitedBound); 1126 } 1127 1128 bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const { 1129 bool Inbounds = false; 1130 if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr)) 1131 Inbounds = SrcGEP->isInBounds(); 1132 if (Inbounds) { 1133 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { 1134 if (AddRec->isAffine()) { 1135 // We know S is for Ptr, the operand on a load/store, so doesn't wrap. 1136 // If both parts are NonNegative, the end result will be NonNegative 1137 if (SE->isKnownNonNegative(AddRec->getStart()) && 1138 SE->isKnownNonNegative(AddRec->getOperand(1))) 1139 return true; 1140 } 1141 } 1142 } 1143 1144 return SE->isKnownNonNegative(S); 1145 } 1146 1147 // All subscripts are all the same type. 1148 // Loop bound may be smaller (e.g., a char). 1149 // Should zero extend loop bound, since it's always >= 0. 1150 // This routine collects upper bound and extends or truncates if needed. 1151 // Truncating is safe when subscripts are known not to wrap. Cases without 1152 // nowrap flags should have been rejected earlier. 1153 // Return null if no bound available. 1154 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 1155 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 1156 const SCEV *UB = SE->getBackedgeTakenCount(L); 1157 return SE->getTruncateOrZeroExtend(UB, T); 1158 } 1159 return nullptr; 1160 } 1161 1162 1163 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1164 // If the cast fails, returns NULL. 1165 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1166 Type *T) const { 1167 if (const SCEV *UB = collectUpperBound(L, T)) 1168 return dyn_cast<SCEVConstant>(UB); 1169 return nullptr; 1170 } 1171 1172 1173 // testZIV - 1174 // When we have a pair of subscripts of the form [c1] and [c2], 1175 // where c1 and c2 are both loop invariant, we attack it using 1176 // the ZIV test. Basically, we test by comparing the two values, 1177 // but there are actually three possible results: 1178 // 1) the values are equal, so there's a dependence 1179 // 2) the values are different, so there's no dependence 1180 // 3) the values might be equal, so we have to assume a dependence. 1181 // 1182 // Return true if dependence disproved. 1183 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1184 FullDependence &Result) const { 1185 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 1186 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1187 ++ZIVapplications; 1188 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1189 LLVM_DEBUG(dbgs() << " provably dependent\n"); 1190 return false; // provably dependent 1191 } 1192 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1193 LLVM_DEBUG(dbgs() << " provably independent\n"); 1194 ++ZIVindependence; 1195 return true; // provably independent 1196 } 1197 LLVM_DEBUG(dbgs() << " possibly dependent\n"); 1198 Result.Consistent = false; 1199 return false; // possibly dependent 1200 } 1201 1202 1203 // strongSIVtest - 1204 // From the paper, Practical Dependence Testing, Section 4.2.1 1205 // 1206 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1207 // where i is an induction variable, c1 and c2 are loop invariant, 1208 // and a is a constant, we can solve it exactly using the Strong SIV test. 1209 // 1210 // Can prove independence. Failing that, can compute distance (and direction). 1211 // In the presence of symbolic terms, we can sometimes make progress. 1212 // 1213 // If there's a dependence, 1214 // 1215 // c1 + a*i = c2 + a*i' 1216 // 1217 // The dependence distance is 1218 // 1219 // d = i' - i = (c1 - c2)/a 1220 // 1221 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1222 // loop's upper bound. If a dependence exists, the dependence direction is 1223 // defined as 1224 // 1225 // { < if d > 0 1226 // direction = { = if d = 0 1227 // { > if d < 0 1228 // 1229 // Return true if dependence disproved. 1230 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1231 const SCEV *DstConst, const Loop *CurLoop, 1232 unsigned Level, FullDependence &Result, 1233 Constraint &NewConstraint) const { 1234 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n"); 1235 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1236 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1237 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1238 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1239 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1240 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1241 ++StrongSIVapplications; 1242 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1243 Level--; 1244 1245 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1246 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta); 1247 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1248 1249 // check that |Delta| < iteration count 1250 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1251 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1252 LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1253 const SCEV *AbsDelta = 1254 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1255 const SCEV *AbsCoeff = 1256 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1257 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1258 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1259 // Distance greater than trip count - no dependence 1260 ++StrongSIVindependence; 1261 ++StrongSIVsuccesses; 1262 return true; 1263 } 1264 } 1265 1266 // Can we compute distance? 1267 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1268 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1269 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1270 APInt Distance = ConstDelta; // these need to be initialized 1271 APInt Remainder = ConstDelta; 1272 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1273 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1274 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1275 // Make sure Coeff divides Delta exactly 1276 if (Remainder != 0) { 1277 // Coeff doesn't divide Distance, no dependence 1278 ++StrongSIVindependence; 1279 ++StrongSIVsuccesses; 1280 return true; 1281 } 1282 Result.DV[Level].Distance = SE->getConstant(Distance); 1283 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1284 if (Distance.sgt(0)) 1285 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1286 else if (Distance.slt(0)) 1287 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1288 else 1289 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1290 ++StrongSIVsuccesses; 1291 } 1292 else if (Delta->isZero()) { 1293 // since 0/X == 0 1294 Result.DV[Level].Distance = Delta; 1295 NewConstraint.setDistance(Delta, CurLoop); 1296 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1297 ++StrongSIVsuccesses; 1298 } 1299 else { 1300 if (Coeff->isOne()) { 1301 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1302 Result.DV[Level].Distance = Delta; // since X/1 == X 1303 NewConstraint.setDistance(Delta, CurLoop); 1304 } 1305 else { 1306 Result.Consistent = false; 1307 NewConstraint.setLine(Coeff, 1308 SE->getNegativeSCEV(Coeff), 1309 SE->getNegativeSCEV(Delta), CurLoop); 1310 } 1311 1312 // maybe we can get a useful direction 1313 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1314 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1315 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1316 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1317 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1318 // The double negatives above are confusing. 1319 // It helps to read !SE->isKnownNonZero(Delta) 1320 // as "Delta might be Zero" 1321 unsigned NewDirection = Dependence::DVEntry::NONE; 1322 if ((DeltaMaybePositive && CoeffMaybePositive) || 1323 (DeltaMaybeNegative && CoeffMaybeNegative)) 1324 NewDirection = Dependence::DVEntry::LT; 1325 if (DeltaMaybeZero) 1326 NewDirection |= Dependence::DVEntry::EQ; 1327 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1328 (DeltaMaybePositive && CoeffMaybeNegative)) 1329 NewDirection |= Dependence::DVEntry::GT; 1330 if (NewDirection < Result.DV[Level].Direction) 1331 ++StrongSIVsuccesses; 1332 Result.DV[Level].Direction &= NewDirection; 1333 } 1334 return false; 1335 } 1336 1337 1338 // weakCrossingSIVtest - 1339 // From the paper, Practical Dependence Testing, Section 4.2.2 1340 // 1341 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1342 // where i is an induction variable, c1 and c2 are loop invariant, 1343 // and a is a constant, we can solve it exactly using the 1344 // Weak-Crossing SIV test. 1345 // 1346 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1347 // the two lines, where i = i', yielding 1348 // 1349 // c1 + a*i = c2 - a*i 1350 // 2a*i = c2 - c1 1351 // i = (c2 - c1)/2a 1352 // 1353 // If i < 0, there is no dependence. 1354 // If i > upperbound, there is no dependence. 1355 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1356 // If i = upperbound, there's a dependence with distance = 0. 1357 // If i is integral, there's a dependence (all directions). 1358 // If the non-integer part = 1/2, there's a dependence (<> directions). 1359 // Otherwise, there's no dependence. 1360 // 1361 // Can prove independence. Failing that, 1362 // can sometimes refine the directions. 1363 // Can determine iteration for splitting. 1364 // 1365 // Return true if dependence disproved. 1366 bool DependenceInfo::weakCrossingSIVtest( 1367 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1368 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1369 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1370 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1371 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1372 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1373 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1374 ++WeakCrossingSIVapplications; 1375 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1376 Level--; 1377 Result.Consistent = false; 1378 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1379 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1380 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1381 if (Delta->isZero()) { 1382 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT; 1383 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT; 1384 ++WeakCrossingSIVsuccesses; 1385 if (!Result.DV[Level].Direction) { 1386 ++WeakCrossingSIVindependence; 1387 return true; 1388 } 1389 Result.DV[Level].Distance = Delta; // = 0 1390 return false; 1391 } 1392 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1393 if (!ConstCoeff) 1394 return false; 1395 1396 Result.DV[Level].Splitable = true; 1397 if (SE->isKnownNegative(ConstCoeff)) { 1398 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1399 assert(ConstCoeff && 1400 "dynamic cast of negative of ConstCoeff should yield constant"); 1401 Delta = SE->getNegativeSCEV(Delta); 1402 } 1403 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1404 1405 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1406 SplitIter = SE->getUDivExpr( 1407 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1408 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1409 LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1410 1411 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1412 if (!ConstDelta) 1413 return false; 1414 1415 // We're certain that ConstCoeff > 0; therefore, 1416 // if Delta < 0, then no dependence. 1417 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1418 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1419 if (SE->isKnownNegative(Delta)) { 1420 // No dependence, Delta < 0 1421 ++WeakCrossingSIVindependence; 1422 ++WeakCrossingSIVsuccesses; 1423 return true; 1424 } 1425 1426 // We're certain that Delta > 0 and ConstCoeff > 0. 1427 // Check Delta/(2*ConstCoeff) against upper loop bound 1428 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1429 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1430 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1431 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1432 ConstantTwo); 1433 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1434 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1435 // Delta too big, no dependence 1436 ++WeakCrossingSIVindependence; 1437 ++WeakCrossingSIVsuccesses; 1438 return true; 1439 } 1440 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1441 // i = i' = UB 1442 Result.DV[Level].Direction &= ~Dependence::DVEntry::LT; 1443 Result.DV[Level].Direction &= ~Dependence::DVEntry::GT; 1444 ++WeakCrossingSIVsuccesses; 1445 if (!Result.DV[Level].Direction) { 1446 ++WeakCrossingSIVindependence; 1447 return true; 1448 } 1449 Result.DV[Level].Splitable = false; 1450 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1451 return false; 1452 } 1453 } 1454 1455 // check that Coeff divides Delta 1456 APInt APDelta = ConstDelta->getAPInt(); 1457 APInt APCoeff = ConstCoeff->getAPInt(); 1458 APInt Distance = APDelta; // these need to be initialzed 1459 APInt Remainder = APDelta; 1460 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1461 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1462 if (Remainder != 0) { 1463 // Coeff doesn't divide Delta, no dependence 1464 ++WeakCrossingSIVindependence; 1465 ++WeakCrossingSIVsuccesses; 1466 return true; 1467 } 1468 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1469 1470 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1471 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1472 Remainder = Distance.srem(Two); 1473 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1474 if (Remainder != 0) { 1475 // Equal direction isn't possible 1476 Result.DV[Level].Direction &= ~Dependence::DVEntry::EQ; 1477 ++WeakCrossingSIVsuccesses; 1478 } 1479 return false; 1480 } 1481 1482 1483 // Kirch's algorithm, from 1484 // 1485 // Optimizing Supercompilers for Supercomputers 1486 // Michael Wolfe 1487 // MIT Press, 1989 1488 // 1489 // Program 2.1, page 29. 1490 // Computes the GCD of AM and BM. 1491 // Also finds a solution to the equation ax - by = gcd(a, b). 1492 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1493 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1494 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1495 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1496 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1497 APInt G0 = AM.abs(); 1498 APInt G1 = BM.abs(); 1499 APInt Q = G0; // these need to be initialized 1500 APInt R = G0; 1501 APInt::sdivrem(G0, G1, Q, R); 1502 while (R != 0) { 1503 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1504 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1505 G0 = G1; G1 = R; 1506 APInt::sdivrem(G0, G1, Q, R); 1507 } 1508 G = G1; 1509 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1510 X = AM.slt(0) ? -A1 : A1; 1511 Y = BM.slt(0) ? B1 : -B1; 1512 1513 // make sure gcd divides Delta 1514 R = Delta.srem(G); 1515 if (R != 0) 1516 return true; // gcd doesn't divide Delta, no dependence 1517 Q = Delta.sdiv(G); 1518 return false; 1519 } 1520 1521 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1522 APInt Q = A; // these need to be initialized 1523 APInt R = A; 1524 APInt::sdivrem(A, B, Q, R); 1525 if (R == 0) 1526 return Q; 1527 if ((A.sgt(0) && B.sgt(0)) || 1528 (A.slt(0) && B.slt(0))) 1529 return Q; 1530 else 1531 return Q - 1; 1532 } 1533 1534 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1535 APInt Q = A; // these need to be initialized 1536 APInt R = A; 1537 APInt::sdivrem(A, B, Q, R); 1538 if (R == 0) 1539 return Q; 1540 if ((A.sgt(0) && B.sgt(0)) || 1541 (A.slt(0) && B.slt(0))) 1542 return Q + 1; 1543 else 1544 return Q; 1545 } 1546 1547 // exactSIVtest - 1548 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1549 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1550 // and a2 are constant, we can solve it exactly using an algorithm developed 1551 // by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in: 1552 // 1553 // Dependence Analysis for Supercomputing 1554 // Utpal Banerjee 1555 // Kluwer Academic Publishers, 1988 1556 // 1557 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1558 // so use them if possible. They're also a bit better with symbolics and, 1559 // in the case of the strong SIV test, can compute Distances. 1560 // 1561 // Return true if dependence disproved. 1562 // 1563 // This is a modified version of the original Banerjee algorithm. The original 1564 // only tested whether Dst depends on Src. This algorithm extends that and 1565 // returns all the dependencies that exist between Dst and Src. 1566 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1567 const SCEV *SrcConst, const SCEV *DstConst, 1568 const Loop *CurLoop, unsigned Level, 1569 FullDependence &Result, 1570 Constraint &NewConstraint) const { 1571 LLVM_DEBUG(dbgs() << "\tExact SIV test\n"); 1572 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1573 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1574 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1575 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1576 ++ExactSIVapplications; 1577 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1578 Level--; 1579 Result.Consistent = false; 1580 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1581 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1582 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta, 1583 CurLoop); 1584 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1585 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1586 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1587 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1588 return false; 1589 1590 // find gcd 1591 APInt G, X, Y; 1592 APInt AM = ConstSrcCoeff->getAPInt(); 1593 APInt BM = ConstDstCoeff->getAPInt(); 1594 APInt CM = ConstDelta->getAPInt(); 1595 unsigned Bits = AM.getBitWidth(); 1596 if (findGCD(Bits, AM, BM, CM, G, X, Y)) { 1597 // gcd doesn't divide Delta, no dependence 1598 ++ExactSIVindependence; 1599 ++ExactSIVsuccesses; 1600 return true; 1601 } 1602 1603 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1604 1605 // since SCEV construction normalizes, LM = 0 1606 APInt UM(Bits, 1, true); 1607 bool UMValid = false; 1608 // UM is perhaps unavailable, let's check 1609 if (const SCEVConstant *CUB = 1610 collectConstantUpperBound(CurLoop, Delta->getType())) { 1611 UM = CUB->getAPInt(); 1612 LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1613 UMValid = true; 1614 } 1615 1616 APInt TU(APInt::getSignedMaxValue(Bits)); 1617 APInt TL(APInt::getSignedMinValue(Bits)); 1618 APInt TC = CM.sdiv(G); 1619 APInt TX = X * TC; 1620 APInt TY = Y * TC; 1621 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); 1622 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); 1623 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); 1624 1625 SmallVector<APInt, 2> TLVec, TUVec; 1626 APInt TB = BM.sdiv(G); 1627 if (TB.sgt(0)) { 1628 TLVec.push_back(ceilingOfQuotient(-TX, TB)); 1629 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1630 // New bound check - modification to Banerjee's e3 check 1631 if (UMValid) { 1632 TUVec.push_back(floorOfQuotient(UM - TX, TB)); 1633 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1634 } 1635 } else { 1636 TUVec.push_back(floorOfQuotient(-TX, TB)); 1637 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1638 // New bound check - modification to Banerjee's e3 check 1639 if (UMValid) { 1640 TLVec.push_back(ceilingOfQuotient(UM - TX, TB)); 1641 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1642 } 1643 } 1644 1645 APInt TA = AM.sdiv(G); 1646 if (TA.sgt(0)) { 1647 if (UMValid) { 1648 TUVec.push_back(floorOfQuotient(UM - TY, TA)); 1649 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1650 } 1651 // New bound check - modification to Banerjee's e3 check 1652 TLVec.push_back(ceilingOfQuotient(-TY, TA)); 1653 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1654 } else { 1655 if (UMValid) { 1656 TLVec.push_back(ceilingOfQuotient(UM - TY, TA)); 1657 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1658 } 1659 // New bound check - modification to Banerjee's e3 check 1660 TUVec.push_back(floorOfQuotient(-TY, TA)); 1661 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1662 } 1663 1664 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); 1665 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); 1666 1667 if (TLVec.empty() || TUVec.empty()) 1668 return false; 1669 TL = APIntOps::smax(TLVec.front(), TLVec.back()); 1670 TU = APIntOps::smin(TUVec.front(), TUVec.back()); 1671 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1672 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1673 1674 if (TL.sgt(TU)) { 1675 ++ExactSIVindependence; 1676 ++ExactSIVsuccesses; 1677 return true; 1678 } 1679 1680 // explore directions 1681 unsigned NewDirection = Dependence::DVEntry::NONE; 1682 APInt LowerDistance, UpperDistance; 1683 if (TA.sgt(TB)) { 1684 LowerDistance = (TY - TX) + (TA - TB) * TL; 1685 UpperDistance = (TY - TX) + (TA - TB) * TU; 1686 } else { 1687 LowerDistance = (TY - TX) + (TA - TB) * TU; 1688 UpperDistance = (TY - TX) + (TA - TB) * TL; 1689 } 1690 1691 LLVM_DEBUG(dbgs() << "\t LowerDistance = " << LowerDistance << "\n"); 1692 LLVM_DEBUG(dbgs() << "\t UpperDistance = " << UpperDistance << "\n"); 1693 1694 APInt Zero(Bits, 0, true); 1695 if (LowerDistance.sle(Zero) && UpperDistance.sge(Zero)) { 1696 NewDirection |= Dependence::DVEntry::EQ; 1697 ++ExactSIVsuccesses; 1698 } 1699 if (LowerDistance.slt(0)) { 1700 NewDirection |= Dependence::DVEntry::GT; 1701 ++ExactSIVsuccesses; 1702 } 1703 if (UpperDistance.sgt(0)) { 1704 NewDirection |= Dependence::DVEntry::LT; 1705 ++ExactSIVsuccesses; 1706 } 1707 1708 // finished 1709 Result.DV[Level].Direction &= NewDirection; 1710 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1711 ++ExactSIVindependence; 1712 LLVM_DEBUG(dbgs() << "\t Result = "); 1713 LLVM_DEBUG(Result.dump(dbgs())); 1714 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1715 } 1716 1717 1718 // Return true if the divisor evenly divides the dividend. 1719 static 1720 bool isRemainderZero(const SCEVConstant *Dividend, 1721 const SCEVConstant *Divisor) { 1722 const APInt &ConstDividend = Dividend->getAPInt(); 1723 const APInt &ConstDivisor = Divisor->getAPInt(); 1724 return ConstDividend.srem(ConstDivisor) == 0; 1725 } 1726 1727 1728 // weakZeroSrcSIVtest - 1729 // From the paper, Practical Dependence Testing, Section 4.2.2 1730 // 1731 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1732 // where i is an induction variable, c1 and c2 are loop invariant, 1733 // and a is a constant, we can solve it exactly using the 1734 // Weak-Zero SIV test. 1735 // 1736 // Given 1737 // 1738 // c1 = c2 + a*i 1739 // 1740 // we get 1741 // 1742 // (c1 - c2)/a = i 1743 // 1744 // If i is not an integer, there's no dependence. 1745 // If i < 0 or > UB, there's no dependence. 1746 // If i = 0, the direction is >= and peeling the 1747 // 1st iteration will break the dependence. 1748 // If i = UB, the direction is <= and peeling the 1749 // last iteration will break the dependence. 1750 // Otherwise, the direction is *. 1751 // 1752 // Can prove independence. Failing that, we can sometimes refine 1753 // the directions. Can sometimes show that first or last 1754 // iteration carries all the dependences (so worth peeling). 1755 // 1756 // (see also weakZeroDstSIVtest) 1757 // 1758 // Return true if dependence disproved. 1759 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1760 const SCEV *SrcConst, 1761 const SCEV *DstConst, 1762 const Loop *CurLoop, unsigned Level, 1763 FullDependence &Result, 1764 Constraint &NewConstraint) const { 1765 // For the WeakSIV test, it's possible the loop isn't common to 1766 // the Src and Dst loops. If it isn't, then there's no need to 1767 // record a direction. 1768 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1769 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1770 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1771 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1772 ++WeakZeroSIVapplications; 1773 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1774 Level--; 1775 Result.Consistent = false; 1776 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1777 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1778 CurLoop); 1779 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1780 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1781 if (Level < CommonLevels) { 1782 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1783 Result.DV[Level].PeelFirst = true; 1784 ++WeakZeroSIVsuccesses; 1785 } 1786 return false; // dependences caused by first iteration 1787 } 1788 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1789 if (!ConstCoeff) 1790 return false; 1791 const SCEV *AbsCoeff = 1792 SE->isKnownNegative(ConstCoeff) ? 1793 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1794 const SCEV *NewDelta = 1795 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1796 1797 // check that Delta/SrcCoeff < iteration count 1798 // really check NewDelta < count*AbsCoeff 1799 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1800 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1801 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1802 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1803 ++WeakZeroSIVindependence; 1804 ++WeakZeroSIVsuccesses; 1805 return true; 1806 } 1807 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1808 // dependences caused by last iteration 1809 if (Level < CommonLevels) { 1810 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1811 Result.DV[Level].PeelLast = true; 1812 ++WeakZeroSIVsuccesses; 1813 } 1814 return false; 1815 } 1816 } 1817 1818 // check that Delta/SrcCoeff >= 0 1819 // really check that NewDelta >= 0 1820 if (SE->isKnownNegative(NewDelta)) { 1821 // No dependence, newDelta < 0 1822 ++WeakZeroSIVindependence; 1823 ++WeakZeroSIVsuccesses; 1824 return true; 1825 } 1826 1827 // if SrcCoeff doesn't divide Delta, then no dependence 1828 if (isa<SCEVConstant>(Delta) && 1829 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1830 ++WeakZeroSIVindependence; 1831 ++WeakZeroSIVsuccesses; 1832 return true; 1833 } 1834 return false; 1835 } 1836 1837 1838 // weakZeroDstSIVtest - 1839 // From the paper, Practical Dependence Testing, Section 4.2.2 1840 // 1841 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1842 // where i is an induction variable, c1 and c2 are loop invariant, 1843 // and a is a constant, we can solve it exactly using the 1844 // Weak-Zero SIV test. 1845 // 1846 // Given 1847 // 1848 // c1 + a*i = c2 1849 // 1850 // we get 1851 // 1852 // i = (c2 - c1)/a 1853 // 1854 // If i is not an integer, there's no dependence. 1855 // If i < 0 or > UB, there's no dependence. 1856 // If i = 0, the direction is <= and peeling the 1857 // 1st iteration will break the dependence. 1858 // If i = UB, the direction is >= and peeling the 1859 // last iteration will break the dependence. 1860 // Otherwise, the direction is *. 1861 // 1862 // Can prove independence. Failing that, we can sometimes refine 1863 // the directions. Can sometimes show that first or last 1864 // iteration carries all the dependences (so worth peeling). 1865 // 1866 // (see also weakZeroSrcSIVtest) 1867 // 1868 // Return true if dependence disproved. 1869 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1870 const SCEV *SrcConst, 1871 const SCEV *DstConst, 1872 const Loop *CurLoop, unsigned Level, 1873 FullDependence &Result, 1874 Constraint &NewConstraint) const { 1875 // For the WeakSIV test, it's possible the loop isn't common to the 1876 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1877 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1878 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1879 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1880 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1881 ++WeakZeroSIVapplications; 1882 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1883 Level--; 1884 Result.Consistent = false; 1885 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1886 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1887 CurLoop); 1888 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1889 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1890 if (Level < CommonLevels) { 1891 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1892 Result.DV[Level].PeelFirst = true; 1893 ++WeakZeroSIVsuccesses; 1894 } 1895 return false; // dependences caused by first iteration 1896 } 1897 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1898 if (!ConstCoeff) 1899 return false; 1900 const SCEV *AbsCoeff = 1901 SE->isKnownNegative(ConstCoeff) ? 1902 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1903 const SCEV *NewDelta = 1904 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1905 1906 // check that Delta/SrcCoeff < iteration count 1907 // really check NewDelta < count*AbsCoeff 1908 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1909 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1910 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1911 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1912 ++WeakZeroSIVindependence; 1913 ++WeakZeroSIVsuccesses; 1914 return true; 1915 } 1916 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1917 // dependences caused by last iteration 1918 if (Level < CommonLevels) { 1919 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1920 Result.DV[Level].PeelLast = true; 1921 ++WeakZeroSIVsuccesses; 1922 } 1923 return false; 1924 } 1925 } 1926 1927 // check that Delta/SrcCoeff >= 0 1928 // really check that NewDelta >= 0 1929 if (SE->isKnownNegative(NewDelta)) { 1930 // No dependence, newDelta < 0 1931 ++WeakZeroSIVindependence; 1932 ++WeakZeroSIVsuccesses; 1933 return true; 1934 } 1935 1936 // if SrcCoeff doesn't divide Delta, then no dependence 1937 if (isa<SCEVConstant>(Delta) && 1938 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1939 ++WeakZeroSIVindependence; 1940 ++WeakZeroSIVsuccesses; 1941 return true; 1942 } 1943 return false; 1944 } 1945 1946 1947 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1948 // Things of the form [c1 + a*i] and [c2 + b*j], 1949 // where i and j are induction variable, c1 and c2 are loop invariant, 1950 // and a and b are constants. 1951 // Returns true if any possible dependence is disproved. 1952 // Marks the result as inconsistent. 1953 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1954 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1955 const SCEV *SrcConst, const SCEV *DstConst, 1956 const Loop *SrcLoop, const Loop *DstLoop, 1957 FullDependence &Result) const { 1958 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n"); 1959 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1960 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1961 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1962 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1963 ++ExactRDIVapplications; 1964 Result.Consistent = false; 1965 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1966 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1967 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1968 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1969 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1970 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1971 return false; 1972 1973 // find gcd 1974 APInt G, X, Y; 1975 APInt AM = ConstSrcCoeff->getAPInt(); 1976 APInt BM = ConstDstCoeff->getAPInt(); 1977 APInt CM = ConstDelta->getAPInt(); 1978 unsigned Bits = AM.getBitWidth(); 1979 if (findGCD(Bits, AM, BM, CM, G, X, Y)) { 1980 // gcd doesn't divide Delta, no dependence 1981 ++ExactRDIVindependence; 1982 return true; 1983 } 1984 1985 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1986 1987 // since SCEV construction seems to normalize, LM = 0 1988 APInt SrcUM(Bits, 1, true); 1989 bool SrcUMvalid = false; 1990 // SrcUM is perhaps unavailable, let's check 1991 if (const SCEVConstant *UpperBound = 1992 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1993 SrcUM = UpperBound->getAPInt(); 1994 LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1995 SrcUMvalid = true; 1996 } 1997 1998 APInt DstUM(Bits, 1, true); 1999 bool DstUMvalid = false; 2000 // UM is perhaps unavailable, let's check 2001 if (const SCEVConstant *UpperBound = 2002 collectConstantUpperBound(DstLoop, Delta->getType())) { 2003 DstUM = UpperBound->getAPInt(); 2004 LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 2005 DstUMvalid = true; 2006 } 2007 2008 APInt TU(APInt::getSignedMaxValue(Bits)); 2009 APInt TL(APInt::getSignedMinValue(Bits)); 2010 APInt TC = CM.sdiv(G); 2011 APInt TX = X * TC; 2012 APInt TY = Y * TC; 2013 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); 2014 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); 2015 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); 2016 2017 SmallVector<APInt, 2> TLVec, TUVec; 2018 APInt TB = BM.sdiv(G); 2019 if (TB.sgt(0)) { 2020 TLVec.push_back(ceilingOfQuotient(-TX, TB)); 2021 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 2022 if (SrcUMvalid) { 2023 TUVec.push_back(floorOfQuotient(SrcUM - TX, TB)); 2024 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 2025 } 2026 } else { 2027 TUVec.push_back(floorOfQuotient(-TX, TB)); 2028 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 2029 if (SrcUMvalid) { 2030 TLVec.push_back(ceilingOfQuotient(SrcUM - TX, TB)); 2031 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 2032 } 2033 } 2034 2035 APInt TA = AM.sdiv(G); 2036 if (TA.sgt(0)) { 2037 TLVec.push_back(ceilingOfQuotient(-TY, TA)); 2038 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 2039 if (DstUMvalid) { 2040 TUVec.push_back(floorOfQuotient(DstUM - TY, TA)); 2041 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 2042 } 2043 } else { 2044 TUVec.push_back(floorOfQuotient(-TY, TA)); 2045 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 2046 if (DstUMvalid) { 2047 TLVec.push_back(ceilingOfQuotient(DstUM - TY, TA)); 2048 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 2049 } 2050 } 2051 2052 if (TLVec.empty() || TUVec.empty()) 2053 return false; 2054 2055 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); 2056 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); 2057 2058 TL = APIntOps::smax(TLVec.front(), TLVec.back()); 2059 TU = APIntOps::smin(TUVec.front(), TUVec.back()); 2060 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 2061 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 2062 2063 if (TL.sgt(TU)) 2064 ++ExactRDIVindependence; 2065 return TL.sgt(TU); 2066 } 2067 2068 2069 // symbolicRDIVtest - 2070 // In Section 4.5 of the Practical Dependence Testing paper,the authors 2071 // introduce a special case of Banerjee's Inequalities (also called the 2072 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 2073 // particularly cases with symbolics. Since it's only able to disprove 2074 // dependence (not compute distances or directions), we'll use it as a 2075 // fall back for the other tests. 2076 // 2077 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2078 // where i and j are induction variables and c1 and c2 are loop invariants, 2079 // we can use the symbolic tests to disprove some dependences, serving as a 2080 // backup for the RDIV test. Note that i and j can be the same variable, 2081 // letting this test serve as a backup for the various SIV tests. 2082 // 2083 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 2084 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 2085 // loop bounds for the i and j loops, respectively. So, ... 2086 // 2087 // c1 + a1*i = c2 + a2*j 2088 // a1*i - a2*j = c2 - c1 2089 // 2090 // To test for a dependence, we compute c2 - c1 and make sure it's in the 2091 // range of the maximum and minimum possible values of a1*i - a2*j. 2092 // Considering the signs of a1 and a2, we have 4 possible cases: 2093 // 2094 // 1) If a1 >= 0 and a2 >= 0, then 2095 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 2096 // -a2*N2 <= c2 - c1 <= a1*N1 2097 // 2098 // 2) If a1 >= 0 and a2 <= 0, then 2099 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 2100 // 0 <= c2 - c1 <= a1*N1 - a2*N2 2101 // 2102 // 3) If a1 <= 0 and a2 >= 0, then 2103 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 2104 // a1*N1 - a2*N2 <= c2 - c1 <= 0 2105 // 2106 // 4) If a1 <= 0 and a2 <= 0, then 2107 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 2108 // a1*N1 <= c2 - c1 <= -a2*N2 2109 // 2110 // return true if dependence disproved 2111 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 2112 const SCEV *C1, const SCEV *C2, 2113 const Loop *Loop1, 2114 const Loop *Loop2) const { 2115 ++SymbolicRDIVapplications; 2116 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 2117 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1); 2118 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 2119 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 2120 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 2121 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 2122 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 2123 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 2124 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 2125 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 2126 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 2127 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 2128 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 2129 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 2130 if (SE->isKnownNonNegative(A1)) { 2131 if (SE->isKnownNonNegative(A2)) { 2132 // A1 >= 0 && A2 >= 0 2133 if (N1) { 2134 // make sure that c2 - c1 <= a1*N1 2135 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2136 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2137 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 2138 ++SymbolicRDIVindependence; 2139 return true; 2140 } 2141 } 2142 if (N2) { 2143 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 2144 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2145 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2146 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 2147 ++SymbolicRDIVindependence; 2148 return true; 2149 } 2150 } 2151 } 2152 else if (SE->isKnownNonPositive(A2)) { 2153 // a1 >= 0 && a2 <= 0 2154 if (N1 && N2) { 2155 // make sure that c2 - c1 <= a1*N1 - a2*N2 2156 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2157 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2158 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2159 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2160 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2161 ++SymbolicRDIVindependence; 2162 return true; 2163 } 2164 } 2165 // make sure that 0 <= c2 - c1 2166 if (SE->isKnownNegative(C2_C1)) { 2167 ++SymbolicRDIVindependence; 2168 return true; 2169 } 2170 } 2171 } 2172 else if (SE->isKnownNonPositive(A1)) { 2173 if (SE->isKnownNonNegative(A2)) { 2174 // a1 <= 0 && a2 >= 0 2175 if (N1 && N2) { 2176 // make sure that a1*N1 - a2*N2 <= c2 - c1 2177 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2178 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2179 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2180 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2181 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2182 ++SymbolicRDIVindependence; 2183 return true; 2184 } 2185 } 2186 // make sure that c2 - c1 <= 0 2187 if (SE->isKnownPositive(C2_C1)) { 2188 ++SymbolicRDIVindependence; 2189 return true; 2190 } 2191 } 2192 else if (SE->isKnownNonPositive(A2)) { 2193 // a1 <= 0 && a2 <= 0 2194 if (N1) { 2195 // make sure that a1*N1 <= c2 - c1 2196 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2197 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2198 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2199 ++SymbolicRDIVindependence; 2200 return true; 2201 } 2202 } 2203 if (N2) { 2204 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2205 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2206 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2207 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2208 ++SymbolicRDIVindependence; 2209 return true; 2210 } 2211 } 2212 } 2213 } 2214 return false; 2215 } 2216 2217 2218 // testSIV - 2219 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2220 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2221 // a2 are constant, we attack it with an SIV test. While they can all be 2222 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2223 // they apply; they're cheaper and sometimes more precise. 2224 // 2225 // Return true if dependence disproved. 2226 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2227 FullDependence &Result, Constraint &NewConstraint, 2228 const SCEV *&SplitIter) const { 2229 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2230 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2231 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2232 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2233 if (SrcAddRec && DstAddRec) { 2234 const SCEV *SrcConst = SrcAddRec->getStart(); 2235 const SCEV *DstConst = DstAddRec->getStart(); 2236 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2237 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2238 const Loop *CurLoop = SrcAddRec->getLoop(); 2239 assert(CurLoop == DstAddRec->getLoop() && 2240 "both loops in SIV should be same"); 2241 Level = mapSrcLoop(CurLoop); 2242 bool disproven; 2243 if (SrcCoeff == DstCoeff) 2244 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2245 Level, Result, NewConstraint); 2246 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2247 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2248 Level, Result, NewConstraint, SplitIter); 2249 else 2250 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2251 Level, Result, NewConstraint); 2252 return disproven || 2253 gcdMIVtest(Src, Dst, Result) || 2254 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2255 } 2256 if (SrcAddRec) { 2257 const SCEV *SrcConst = SrcAddRec->getStart(); 2258 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2259 const SCEV *DstConst = Dst; 2260 const Loop *CurLoop = SrcAddRec->getLoop(); 2261 Level = mapSrcLoop(CurLoop); 2262 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2263 Level, Result, NewConstraint) || 2264 gcdMIVtest(Src, Dst, Result); 2265 } 2266 if (DstAddRec) { 2267 const SCEV *DstConst = DstAddRec->getStart(); 2268 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2269 const SCEV *SrcConst = Src; 2270 const Loop *CurLoop = DstAddRec->getLoop(); 2271 Level = mapDstLoop(CurLoop); 2272 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2273 CurLoop, Level, Result, NewConstraint) || 2274 gcdMIVtest(Src, Dst, Result); 2275 } 2276 llvm_unreachable("SIV test expected at least one AddRec"); 2277 return false; 2278 } 2279 2280 2281 // testRDIV - 2282 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2283 // where i and j are induction variables, c1 and c2 are loop invariant, 2284 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2285 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2286 // It doesn't make sense to talk about distance or direction in this case, 2287 // so there's no point in making special versions of the Strong SIV test or 2288 // the Weak-crossing SIV test. 2289 // 2290 // With minor algebra, this test can also be used for things like 2291 // [c1 + a1*i + a2*j][c2]. 2292 // 2293 // Return true if dependence disproved. 2294 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2295 FullDependence &Result) const { 2296 // we have 3 possible situations here: 2297 // 1) [a*i + b] and [c*j + d] 2298 // 2) [a*i + c*j + b] and [d] 2299 // 3) [b] and [a*i + c*j + d] 2300 // We need to find what we've got and get organized 2301 2302 const SCEV *SrcConst, *DstConst; 2303 const SCEV *SrcCoeff, *DstCoeff; 2304 const Loop *SrcLoop, *DstLoop; 2305 2306 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2307 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2308 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2309 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2310 if (SrcAddRec && DstAddRec) { 2311 SrcConst = SrcAddRec->getStart(); 2312 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2313 SrcLoop = SrcAddRec->getLoop(); 2314 DstConst = DstAddRec->getStart(); 2315 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2316 DstLoop = DstAddRec->getLoop(); 2317 } 2318 else if (SrcAddRec) { 2319 if (const SCEVAddRecExpr *tmpAddRec = 2320 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2321 SrcConst = tmpAddRec->getStart(); 2322 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2323 SrcLoop = tmpAddRec->getLoop(); 2324 DstConst = Dst; 2325 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2326 DstLoop = SrcAddRec->getLoop(); 2327 } 2328 else 2329 llvm_unreachable("RDIV reached by surprising SCEVs"); 2330 } 2331 else if (DstAddRec) { 2332 if (const SCEVAddRecExpr *tmpAddRec = 2333 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2334 DstConst = tmpAddRec->getStart(); 2335 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2336 DstLoop = tmpAddRec->getLoop(); 2337 SrcConst = Src; 2338 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2339 SrcLoop = DstAddRec->getLoop(); 2340 } 2341 else 2342 llvm_unreachable("RDIV reached by surprising SCEVs"); 2343 } 2344 else 2345 llvm_unreachable("RDIV expected at least one AddRec"); 2346 return exactRDIVtest(SrcCoeff, DstCoeff, 2347 SrcConst, DstConst, 2348 SrcLoop, DstLoop, 2349 Result) || 2350 gcdMIVtest(Src, Dst, Result) || 2351 symbolicRDIVtest(SrcCoeff, DstCoeff, 2352 SrcConst, DstConst, 2353 SrcLoop, DstLoop); 2354 } 2355 2356 2357 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2358 // Return true if dependence disproved. 2359 // Can sometimes refine direction vectors. 2360 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2361 const SmallBitVector &Loops, 2362 FullDependence &Result) const { 2363 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2364 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2365 Result.Consistent = false; 2366 return gcdMIVtest(Src, Dst, Result) || 2367 banerjeeMIVtest(Src, Dst, Loops, Result); 2368 } 2369 2370 2371 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2372 // in this case 10. If there is no constant part, returns NULL. 2373 static 2374 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2375 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2376 return Constant; 2377 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2378 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2379 return Constant; 2380 return nullptr; 2381 } 2382 2383 2384 //===----------------------------------------------------------------------===// 2385 // gcdMIVtest - 2386 // Tests an MIV subscript pair for dependence. 2387 // Returns true if any possible dependence is disproved. 2388 // Marks the result as inconsistent. 2389 // Can sometimes disprove the equal direction for 1 or more loops, 2390 // as discussed in Michael Wolfe's book, 2391 // High Performance Compilers for Parallel Computing, page 235. 2392 // 2393 // We spend some effort (code!) to handle cases like 2394 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2395 // but M and N are just loop-invariant variables. 2396 // This should help us handle linearized subscripts; 2397 // also makes this test a useful backup to the various SIV tests. 2398 // 2399 // It occurs to me that the presence of loop-invariant variables 2400 // changes the nature of the test from "greatest common divisor" 2401 // to "a common divisor". 2402 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2403 FullDependence &Result) const { 2404 LLVM_DEBUG(dbgs() << "starting gcd\n"); 2405 ++GCDapplications; 2406 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2407 APInt RunningGCD = APInt::getZero(BitWidth); 2408 2409 // Examine Src coefficients. 2410 // Compute running GCD and record source constant. 2411 // Because we're looking for the constant at the end of the chain, 2412 // we can't quit the loop just because the GCD == 1. 2413 const SCEV *Coefficients = Src; 2414 while (const SCEVAddRecExpr *AddRec = 2415 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2416 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2417 // If the coefficient is the product of a constant and other stuff, 2418 // we can use the constant in the GCD computation. 2419 const auto *Constant = getConstantPart(Coeff); 2420 if (!Constant) 2421 return false; 2422 APInt ConstCoeff = Constant->getAPInt(); 2423 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2424 Coefficients = AddRec->getStart(); 2425 } 2426 const SCEV *SrcConst = Coefficients; 2427 2428 // Examine Dst coefficients. 2429 // Compute running GCD and record destination constant. 2430 // Because we're looking for the constant at the end of the chain, 2431 // we can't quit the loop just because the GCD == 1. 2432 Coefficients = Dst; 2433 while (const SCEVAddRecExpr *AddRec = 2434 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2435 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2436 // If the coefficient is the product of a constant and other stuff, 2437 // we can use the constant in the GCD computation. 2438 const auto *Constant = getConstantPart(Coeff); 2439 if (!Constant) 2440 return false; 2441 APInt ConstCoeff = Constant->getAPInt(); 2442 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2443 Coefficients = AddRec->getStart(); 2444 } 2445 const SCEV *DstConst = Coefficients; 2446 2447 APInt ExtraGCD = APInt::getZero(BitWidth); 2448 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2449 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2450 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2451 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2452 // If Delta is a sum of products, we may be able to make further progress. 2453 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2454 const SCEV *Operand = Sum->getOperand(Op); 2455 if (isa<SCEVConstant>(Operand)) { 2456 assert(!Constant && "Surprised to find multiple constants"); 2457 Constant = cast<SCEVConstant>(Operand); 2458 } 2459 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2460 // Search for constant operand to participate in GCD; 2461 // If none found; return false. 2462 const SCEVConstant *ConstOp = getConstantPart(Product); 2463 if (!ConstOp) 2464 return false; 2465 APInt ConstOpValue = ConstOp->getAPInt(); 2466 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2467 ConstOpValue.abs()); 2468 } 2469 else 2470 return false; 2471 } 2472 } 2473 if (!Constant) 2474 return false; 2475 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2476 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2477 if (ConstDelta == 0) 2478 return false; 2479 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2480 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2481 APInt Remainder = ConstDelta.srem(RunningGCD); 2482 if (Remainder != 0) { 2483 ++GCDindependence; 2484 return true; 2485 } 2486 2487 // Try to disprove equal directions. 2488 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2489 // the code above can't disprove the dependence because the GCD = 1. 2490 // So we consider what happen if i = i' and what happens if j = j'. 2491 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2492 // which is infeasible, so we can disallow the = direction for the i level. 2493 // Setting j = j' doesn't help matters, so we end up with a direction vector 2494 // of [<>, *] 2495 // 2496 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2497 // we need to remember that the constant part is 5 and the RunningGCD should 2498 // be initialized to ExtraGCD = 30. 2499 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2500 2501 bool Improved = false; 2502 Coefficients = Src; 2503 while (const SCEVAddRecExpr *AddRec = 2504 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2505 Coefficients = AddRec->getStart(); 2506 const Loop *CurLoop = AddRec->getLoop(); 2507 RunningGCD = ExtraGCD; 2508 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2509 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2510 const SCEV *Inner = Src; 2511 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2512 AddRec = cast<SCEVAddRecExpr>(Inner); 2513 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2514 if (CurLoop == AddRec->getLoop()) 2515 ; // SrcCoeff == Coeff 2516 else { 2517 // If the coefficient is the product of a constant and other stuff, 2518 // we can use the constant in the GCD computation. 2519 Constant = getConstantPart(Coeff); 2520 if (!Constant) 2521 return false; 2522 APInt ConstCoeff = Constant->getAPInt(); 2523 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2524 } 2525 Inner = AddRec->getStart(); 2526 } 2527 Inner = Dst; 2528 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2529 AddRec = cast<SCEVAddRecExpr>(Inner); 2530 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2531 if (CurLoop == AddRec->getLoop()) 2532 DstCoeff = Coeff; 2533 else { 2534 // If the coefficient is the product of a constant and other stuff, 2535 // we can use the constant in the GCD computation. 2536 Constant = getConstantPart(Coeff); 2537 if (!Constant) 2538 return false; 2539 APInt ConstCoeff = Constant->getAPInt(); 2540 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2541 } 2542 Inner = AddRec->getStart(); 2543 } 2544 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2545 // If the coefficient is the product of a constant and other stuff, 2546 // we can use the constant in the GCD computation. 2547 Constant = getConstantPart(Delta); 2548 if (!Constant) 2549 // The difference of the two coefficients might not be a product 2550 // or constant, in which case we give up on this direction. 2551 continue; 2552 APInt ConstCoeff = Constant->getAPInt(); 2553 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2554 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2555 if (RunningGCD != 0) { 2556 Remainder = ConstDelta.srem(RunningGCD); 2557 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2558 if (Remainder != 0) { 2559 unsigned Level = mapSrcLoop(CurLoop); 2560 Result.DV[Level - 1].Direction &= ~Dependence::DVEntry::EQ; 2561 Improved = true; 2562 } 2563 } 2564 } 2565 if (Improved) 2566 ++GCDsuccesses; 2567 LLVM_DEBUG(dbgs() << "all done\n"); 2568 return false; 2569 } 2570 2571 2572 //===----------------------------------------------------------------------===// 2573 // banerjeeMIVtest - 2574 // Use Banerjee's Inequalities to test an MIV subscript pair. 2575 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2576 // Generally follows the discussion in Section 2.5.2 of 2577 // 2578 // Optimizing Supercompilers for Supercomputers 2579 // Michael Wolfe 2580 // 2581 // The inequalities given on page 25 are simplified in that loops are 2582 // normalized so that the lower bound is always 0 and the stride is always 1. 2583 // For example, Wolfe gives 2584 // 2585 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2586 // 2587 // where A_k is the coefficient of the kth index in the source subscript, 2588 // B_k is the coefficient of the kth index in the destination subscript, 2589 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2590 // index, and N_k is the stride of the kth index. Since all loops are normalized 2591 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2592 // equation to 2593 // 2594 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2595 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2596 // 2597 // Similar simplifications are possible for the other equations. 2598 // 2599 // When we can't determine the number of iterations for a loop, 2600 // we use NULL as an indicator for the worst case, infinity. 2601 // When computing the upper bound, NULL denotes +inf; 2602 // for the lower bound, NULL denotes -inf. 2603 // 2604 // Return true if dependence disproved. 2605 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2606 const SmallBitVector &Loops, 2607 FullDependence &Result) const { 2608 LLVM_DEBUG(dbgs() << "starting Banerjee\n"); 2609 ++BanerjeeApplications; 2610 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n'); 2611 const SCEV *A0; 2612 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2613 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2614 const SCEV *B0; 2615 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2616 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2617 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2618 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2619 2620 // Compute bounds for all the * directions. 2621 LLVM_DEBUG(dbgs() << "\tBounds[*]\n"); 2622 for (unsigned K = 1; K <= MaxLevels; ++K) { 2623 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2624 Bound[K].Direction = Dependence::DVEntry::ALL; 2625 Bound[K].DirSet = Dependence::DVEntry::NONE; 2626 findBoundsALL(A, B, Bound, K); 2627 #ifndef NDEBUG 2628 LLVM_DEBUG(dbgs() << "\t " << K << '\t'); 2629 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2630 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2631 else 2632 LLVM_DEBUG(dbgs() << "-inf\t"); 2633 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2634 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2635 else 2636 LLVM_DEBUG(dbgs() << "+inf\n"); 2637 #endif 2638 } 2639 2640 // Test the *, *, *, ... case. 2641 bool Disproved = false; 2642 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2643 // Explore the direction vector hierarchy. 2644 unsigned DepthExpanded = 0; 2645 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2646 Loops, DepthExpanded, Delta); 2647 if (NewDeps > 0) { 2648 bool Improved = false; 2649 for (unsigned K = 1; K <= CommonLevels; ++K) { 2650 if (Loops[K]) { 2651 unsigned Old = Result.DV[K - 1].Direction; 2652 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2653 Improved |= Old != Result.DV[K - 1].Direction; 2654 if (!Result.DV[K - 1].Direction) { 2655 Improved = false; 2656 Disproved = true; 2657 break; 2658 } 2659 } 2660 } 2661 if (Improved) 2662 ++BanerjeeSuccesses; 2663 } 2664 else { 2665 ++BanerjeeIndependence; 2666 Disproved = true; 2667 } 2668 } 2669 else { 2670 ++BanerjeeIndependence; 2671 Disproved = true; 2672 } 2673 delete [] Bound; 2674 delete [] A; 2675 delete [] B; 2676 return Disproved; 2677 } 2678 2679 2680 // Hierarchically expands the direction vector 2681 // search space, combining the directions of discovered dependences 2682 // in the DirSet field of Bound. Returns the number of distinct 2683 // dependences discovered. If the dependence is disproved, 2684 // it will return 0. 2685 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2686 CoefficientInfo *B, BoundInfo *Bound, 2687 const SmallBitVector &Loops, 2688 unsigned &DepthExpanded, 2689 const SCEV *Delta) const { 2690 // This algorithm has worst case complexity of O(3^n), where 'n' is the number 2691 // of common loop levels. To avoid excessive compile-time, pessimize all the 2692 // results and immediately return when the number of common levels is beyond 2693 // the given threshold. 2694 if (CommonLevels > MIVMaxLevelThreshold) { 2695 LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV " 2696 "direction exploration is terminated.\n"); 2697 for (unsigned K = 1; K <= CommonLevels; ++K) 2698 if (Loops[K]) 2699 Bound[K].DirSet = Dependence::DVEntry::ALL; 2700 return 1; 2701 } 2702 2703 if (Level > CommonLevels) { 2704 // record result 2705 LLVM_DEBUG(dbgs() << "\t["); 2706 for (unsigned K = 1; K <= CommonLevels; ++K) { 2707 if (Loops[K]) { 2708 Bound[K].DirSet |= Bound[K].Direction; 2709 #ifndef NDEBUG 2710 switch (Bound[K].Direction) { 2711 case Dependence::DVEntry::LT: 2712 LLVM_DEBUG(dbgs() << " <"); 2713 break; 2714 case Dependence::DVEntry::EQ: 2715 LLVM_DEBUG(dbgs() << " ="); 2716 break; 2717 case Dependence::DVEntry::GT: 2718 LLVM_DEBUG(dbgs() << " >"); 2719 break; 2720 case Dependence::DVEntry::ALL: 2721 LLVM_DEBUG(dbgs() << " *"); 2722 break; 2723 default: 2724 llvm_unreachable("unexpected Bound[K].Direction"); 2725 } 2726 #endif 2727 } 2728 } 2729 LLVM_DEBUG(dbgs() << " ]\n"); 2730 return 1; 2731 } 2732 if (Loops[Level]) { 2733 if (Level > DepthExpanded) { 2734 DepthExpanded = Level; 2735 // compute bounds for <, =, > at current level 2736 findBoundsLT(A, B, Bound, Level); 2737 findBoundsGT(A, B, Bound, Level); 2738 findBoundsEQ(A, B, Bound, Level); 2739 #ifndef NDEBUG 2740 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2741 LLVM_DEBUG(dbgs() << "\t <\t"); 2742 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2743 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] 2744 << '\t'); 2745 else 2746 LLVM_DEBUG(dbgs() << "-inf\t"); 2747 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2748 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] 2749 << '\n'); 2750 else 2751 LLVM_DEBUG(dbgs() << "+inf\n"); 2752 LLVM_DEBUG(dbgs() << "\t =\t"); 2753 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2754 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] 2755 << '\t'); 2756 else 2757 LLVM_DEBUG(dbgs() << "-inf\t"); 2758 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2759 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] 2760 << '\n'); 2761 else 2762 LLVM_DEBUG(dbgs() << "+inf\n"); 2763 LLVM_DEBUG(dbgs() << "\t >\t"); 2764 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2765 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] 2766 << '\t'); 2767 else 2768 LLVM_DEBUG(dbgs() << "-inf\t"); 2769 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2770 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] 2771 << '\n'); 2772 else 2773 LLVM_DEBUG(dbgs() << "+inf\n"); 2774 #endif 2775 } 2776 2777 unsigned NewDeps = 0; 2778 2779 // test bounds for <, *, *, ... 2780 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2781 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2782 Loops, DepthExpanded, Delta); 2783 2784 // Test bounds for =, *, *, ... 2785 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2786 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2787 Loops, DepthExpanded, Delta); 2788 2789 // test bounds for >, *, *, ... 2790 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2791 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2792 Loops, DepthExpanded, Delta); 2793 2794 Bound[Level].Direction = Dependence::DVEntry::ALL; 2795 return NewDeps; 2796 } 2797 else 2798 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2799 } 2800 2801 2802 // Returns true iff the current bounds are plausible. 2803 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2804 BoundInfo *Bound, const SCEV *Delta) const { 2805 Bound[Level].Direction = DirKind; 2806 if (const SCEV *LowerBound = getLowerBound(Bound)) 2807 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2808 return false; 2809 if (const SCEV *UpperBound = getUpperBound(Bound)) 2810 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2811 return false; 2812 return true; 2813 } 2814 2815 2816 // Computes the upper and lower bounds for level K 2817 // using the * direction. Records them in Bound. 2818 // Wolfe gives the equations 2819 // 2820 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2821 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2822 // 2823 // Since we normalize loops, we can simplify these equations to 2824 // 2825 // LB^*_k = (A^-_k - B^+_k)U_k 2826 // UB^*_k = (A^+_k - B^-_k)U_k 2827 // 2828 // We must be careful to handle the case where the upper bound is unknown. 2829 // Note that the lower bound is always <= 0 2830 // and the upper bound is always >= 0. 2831 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2832 BoundInfo *Bound, unsigned K) const { 2833 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2834 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2835 if (Bound[K].Iterations) { 2836 Bound[K].Lower[Dependence::DVEntry::ALL] = 2837 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2838 Bound[K].Iterations); 2839 Bound[K].Upper[Dependence::DVEntry::ALL] = 2840 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2841 Bound[K].Iterations); 2842 } 2843 else { 2844 // If the difference is 0, we won't need to know the number of iterations. 2845 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2846 Bound[K].Lower[Dependence::DVEntry::ALL] = 2847 SE->getZero(A[K].Coeff->getType()); 2848 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2849 Bound[K].Upper[Dependence::DVEntry::ALL] = 2850 SE->getZero(A[K].Coeff->getType()); 2851 } 2852 } 2853 2854 2855 // Computes the upper and lower bounds for level K 2856 // using the = direction. Records them in Bound. 2857 // Wolfe gives the equations 2858 // 2859 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2860 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2861 // 2862 // Since we normalize loops, we can simplify these equations to 2863 // 2864 // LB^=_k = (A_k - B_k)^- U_k 2865 // UB^=_k = (A_k - B_k)^+ U_k 2866 // 2867 // We must be careful to handle the case where the upper bound is unknown. 2868 // Note that the lower bound is always <= 0 2869 // and the upper bound is always >= 0. 2870 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2871 BoundInfo *Bound, unsigned K) const { 2872 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2873 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2874 if (Bound[K].Iterations) { 2875 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2876 const SCEV *NegativePart = getNegativePart(Delta); 2877 Bound[K].Lower[Dependence::DVEntry::EQ] = 2878 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2879 const SCEV *PositivePart = getPositivePart(Delta); 2880 Bound[K].Upper[Dependence::DVEntry::EQ] = 2881 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2882 } 2883 else { 2884 // If the positive/negative part of the difference is 0, 2885 // we won't need to know the number of iterations. 2886 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2887 const SCEV *NegativePart = getNegativePart(Delta); 2888 if (NegativePart->isZero()) 2889 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2890 const SCEV *PositivePart = getPositivePart(Delta); 2891 if (PositivePart->isZero()) 2892 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2893 } 2894 } 2895 2896 2897 // Computes the upper and lower bounds for level K 2898 // using the < direction. Records them in Bound. 2899 // Wolfe gives the equations 2900 // 2901 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2902 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2903 // 2904 // Since we normalize loops, we can simplify these equations to 2905 // 2906 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2907 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2908 // 2909 // We must be careful to handle the case where the upper bound is unknown. 2910 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2911 BoundInfo *Bound, unsigned K) const { 2912 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2913 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2914 if (Bound[K].Iterations) { 2915 const SCEV *Iter_1 = SE->getMinusSCEV( 2916 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2917 const SCEV *NegPart = 2918 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2919 Bound[K].Lower[Dependence::DVEntry::LT] = 2920 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2921 const SCEV *PosPart = 2922 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2923 Bound[K].Upper[Dependence::DVEntry::LT] = 2924 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2925 } 2926 else { 2927 // If the positive/negative part of the difference is 0, 2928 // we won't need to know the number of iterations. 2929 const SCEV *NegPart = 2930 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2931 if (NegPart->isZero()) 2932 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2933 const SCEV *PosPart = 2934 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2935 if (PosPart->isZero()) 2936 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2937 } 2938 } 2939 2940 2941 // Computes the upper and lower bounds for level K 2942 // using the > direction. Records them in Bound. 2943 // Wolfe gives the equations 2944 // 2945 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2946 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2947 // 2948 // Since we normalize loops, we can simplify these equations to 2949 // 2950 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2951 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2952 // 2953 // We must be careful to handle the case where the upper bound is unknown. 2954 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2955 BoundInfo *Bound, unsigned K) const { 2956 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2957 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2958 if (Bound[K].Iterations) { 2959 const SCEV *Iter_1 = SE->getMinusSCEV( 2960 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2961 const SCEV *NegPart = 2962 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2963 Bound[K].Lower[Dependence::DVEntry::GT] = 2964 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2965 const SCEV *PosPart = 2966 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2967 Bound[K].Upper[Dependence::DVEntry::GT] = 2968 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2969 } 2970 else { 2971 // If the positive/negative part of the difference is 0, 2972 // we won't need to know the number of iterations. 2973 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2974 if (NegPart->isZero()) 2975 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2976 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2977 if (PosPart->isZero()) 2978 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2979 } 2980 } 2981 2982 2983 // X^+ = max(X, 0) 2984 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2985 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2986 } 2987 2988 2989 // X^- = min(X, 0) 2990 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2991 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2992 } 2993 2994 2995 // Walks through the subscript, 2996 // collecting each coefficient, the associated loop bounds, 2997 // and recording its positive and negative parts for later use. 2998 DependenceInfo::CoefficientInfo * 2999 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 3000 const SCEV *&Constant) const { 3001 const SCEV *Zero = SE->getZero(Subscript->getType()); 3002 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 3003 for (unsigned K = 1; K <= MaxLevels; ++K) { 3004 CI[K].Coeff = Zero; 3005 CI[K].PosPart = Zero; 3006 CI[K].NegPart = Zero; 3007 CI[K].Iterations = nullptr; 3008 } 3009 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 3010 const Loop *L = AddRec->getLoop(); 3011 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 3012 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 3013 CI[K].PosPart = getPositivePart(CI[K].Coeff); 3014 CI[K].NegPart = getNegativePart(CI[K].Coeff); 3015 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 3016 Subscript = AddRec->getStart(); 3017 } 3018 Constant = Subscript; 3019 #ifndef NDEBUG 3020 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n"); 3021 for (unsigned K = 1; K <= MaxLevels; ++K) { 3022 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 3023 LLVM_DEBUG(dbgs() << "\tPos Part = "); 3024 LLVM_DEBUG(dbgs() << *CI[K].PosPart); 3025 LLVM_DEBUG(dbgs() << "\tNeg Part = "); 3026 LLVM_DEBUG(dbgs() << *CI[K].NegPart); 3027 LLVM_DEBUG(dbgs() << "\tUpper Bound = "); 3028 if (CI[K].Iterations) 3029 LLVM_DEBUG(dbgs() << *CI[K].Iterations); 3030 else 3031 LLVM_DEBUG(dbgs() << "+inf"); 3032 LLVM_DEBUG(dbgs() << '\n'); 3033 } 3034 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 3035 #endif 3036 return CI; 3037 } 3038 3039 3040 // Looks through all the bounds info and 3041 // computes the lower bound given the current direction settings 3042 // at each level. If the lower bound for any level is -inf, 3043 // the result is -inf. 3044 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 3045 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 3046 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 3047 if (Bound[K].Lower[Bound[K].Direction]) 3048 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 3049 else 3050 Sum = nullptr; 3051 } 3052 return Sum; 3053 } 3054 3055 3056 // Looks through all the bounds info and 3057 // computes the upper bound given the current direction settings 3058 // at each level. If the upper bound at any level is +inf, 3059 // the result is +inf. 3060 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 3061 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 3062 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 3063 if (Bound[K].Upper[Bound[K].Direction]) 3064 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 3065 else 3066 Sum = nullptr; 3067 } 3068 return Sum; 3069 } 3070 3071 3072 //===----------------------------------------------------------------------===// 3073 // Constraint manipulation for Delta test. 3074 3075 // Given a linear SCEV, 3076 // return the coefficient (the step) 3077 // corresponding to the specified loop. 3078 // If there isn't one, return 0. 3079 // For example, given a*i + b*j + c*k, finding the coefficient 3080 // corresponding to the j loop would yield b. 3081 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 3082 const Loop *TargetLoop) const { 3083 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3084 if (!AddRec) 3085 return SE->getZero(Expr->getType()); 3086 if (AddRec->getLoop() == TargetLoop) 3087 return AddRec->getStepRecurrence(*SE); 3088 return findCoefficient(AddRec->getStart(), TargetLoop); 3089 } 3090 3091 3092 // Given a linear SCEV, 3093 // return the SCEV given by zeroing out the coefficient 3094 // corresponding to the specified loop. 3095 // For example, given a*i + b*j + c*k, zeroing the coefficient 3096 // corresponding to the j loop would yield a*i + c*k. 3097 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 3098 const Loop *TargetLoop) const { 3099 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3100 if (!AddRec) 3101 return Expr; // ignore 3102 if (AddRec->getLoop() == TargetLoop) 3103 return AddRec->getStart(); 3104 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 3105 AddRec->getStepRecurrence(*SE), 3106 AddRec->getLoop(), 3107 AddRec->getNoWrapFlags()); 3108 } 3109 3110 3111 // Given a linear SCEV Expr, 3112 // return the SCEV given by adding some Value to the 3113 // coefficient corresponding to the specified TargetLoop. 3114 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 3115 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 3116 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 3117 const Loop *TargetLoop, 3118 const SCEV *Value) const { 3119 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3120 if (!AddRec) // create a new addRec 3121 return SE->getAddRecExpr(Expr, 3122 Value, 3123 TargetLoop, 3124 SCEV::FlagAnyWrap); // Worst case, with no info. 3125 if (AddRec->getLoop() == TargetLoop) { 3126 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 3127 if (Sum->isZero()) 3128 return AddRec->getStart(); 3129 return SE->getAddRecExpr(AddRec->getStart(), 3130 Sum, 3131 AddRec->getLoop(), 3132 AddRec->getNoWrapFlags()); 3133 } 3134 if (SE->isLoopInvariant(AddRec, TargetLoop)) 3135 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 3136 return SE->getAddRecExpr( 3137 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 3138 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 3139 AddRec->getNoWrapFlags()); 3140 } 3141 3142 3143 // Review the constraints, looking for opportunities 3144 // to simplify a subscript pair (Src and Dst). 3145 // Return true if some simplification occurs. 3146 // If the simplification isn't exact (that is, if it is conservative 3147 // in terms of dependence), set consistent to false. 3148 // Corresponds to Figure 5 from the paper 3149 // 3150 // Practical Dependence Testing 3151 // Goff, Kennedy, Tseng 3152 // PLDI 1991 3153 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 3154 SmallBitVector &Loops, 3155 SmallVectorImpl<Constraint> &Constraints, 3156 bool &Consistent) { 3157 bool Result = false; 3158 for (unsigned LI : Loops.set_bits()) { 3159 LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3160 LLVM_DEBUG(Constraints[LI].dump(dbgs())); 3161 if (Constraints[LI].isDistance()) 3162 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3163 else if (Constraints[LI].isLine()) 3164 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3165 else if (Constraints[LI].isPoint()) 3166 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3167 } 3168 return Result; 3169 } 3170 3171 3172 // Attempt to propagate a distance 3173 // constraint into a subscript pair (Src and Dst). 3174 // Return true if some simplification occurs. 3175 // If the simplification isn't exact (that is, if it is conservative 3176 // in terms of dependence), set consistent to false. 3177 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3178 Constraint &CurConstraint, 3179 bool &Consistent) { 3180 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3181 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3182 const SCEV *A_K = findCoefficient(Src, CurLoop); 3183 if (A_K->isZero()) 3184 return false; 3185 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3186 Src = SE->getMinusSCEV(Src, DA_K); 3187 Src = zeroCoefficient(Src, CurLoop); 3188 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3189 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3190 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3191 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3192 if (!findCoefficient(Dst, CurLoop)->isZero()) 3193 Consistent = false; 3194 return true; 3195 } 3196 3197 3198 // Attempt to propagate a line 3199 // constraint into a subscript pair (Src and Dst). 3200 // Return true if some simplification occurs. 3201 // If the simplification isn't exact (that is, if it is conservative 3202 // in terms of dependence), set consistent to false. 3203 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3204 Constraint &CurConstraint, 3205 bool &Consistent) { 3206 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3207 const SCEV *A = CurConstraint.getA(); 3208 const SCEV *B = CurConstraint.getB(); 3209 const SCEV *C = CurConstraint.getC(); 3210 LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C 3211 << "\n"); 3212 LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3213 LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3214 if (A->isZero()) { 3215 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3216 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3217 if (!Bconst || !Cconst) return false; 3218 APInt Beta = Bconst->getAPInt(); 3219 APInt Charlie = Cconst->getAPInt(); 3220 APInt CdivB = Charlie.sdiv(Beta); 3221 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3222 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3223 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3224 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3225 Dst = zeroCoefficient(Dst, CurLoop); 3226 if (!findCoefficient(Src, CurLoop)->isZero()) 3227 Consistent = false; 3228 } 3229 else if (B->isZero()) { 3230 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3231 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3232 if (!Aconst || !Cconst) return false; 3233 APInt Alpha = Aconst->getAPInt(); 3234 APInt Charlie = Cconst->getAPInt(); 3235 APInt CdivA = Charlie.sdiv(Alpha); 3236 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3237 const SCEV *A_K = findCoefficient(Src, CurLoop); 3238 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3239 Src = zeroCoefficient(Src, CurLoop); 3240 if (!findCoefficient(Dst, CurLoop)->isZero()) 3241 Consistent = false; 3242 } 3243 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3244 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3245 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3246 if (!Aconst || !Cconst) return false; 3247 APInt Alpha = Aconst->getAPInt(); 3248 APInt Charlie = Cconst->getAPInt(); 3249 APInt CdivA = Charlie.sdiv(Alpha); 3250 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3251 const SCEV *A_K = findCoefficient(Src, CurLoop); 3252 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3253 Src = zeroCoefficient(Src, CurLoop); 3254 Dst = addToCoefficient(Dst, CurLoop, A_K); 3255 if (!findCoefficient(Dst, CurLoop)->isZero()) 3256 Consistent = false; 3257 } 3258 else { 3259 // paper is incorrect here, or perhaps just misleading 3260 const SCEV *A_K = findCoefficient(Src, CurLoop); 3261 Src = SE->getMulExpr(Src, A); 3262 Dst = SE->getMulExpr(Dst, A); 3263 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3264 Src = zeroCoefficient(Src, CurLoop); 3265 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3266 if (!findCoefficient(Dst, CurLoop)->isZero()) 3267 Consistent = false; 3268 } 3269 LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3270 LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3271 return true; 3272 } 3273 3274 3275 // Attempt to propagate a point 3276 // constraint into a subscript pair (Src and Dst). 3277 // Return true if some simplification occurs. 3278 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3279 Constraint &CurConstraint) { 3280 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3281 const SCEV *A_K = findCoefficient(Src, CurLoop); 3282 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3283 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3284 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3285 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3286 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3287 Src = zeroCoefficient(Src, CurLoop); 3288 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3289 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3290 Dst = zeroCoefficient(Dst, CurLoop); 3291 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3292 return true; 3293 } 3294 3295 3296 // Update direction vector entry based on the current constraint. 3297 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3298 const Constraint &CurConstraint) const { 3299 LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3300 LLVM_DEBUG(CurConstraint.dump(dbgs())); 3301 if (CurConstraint.isAny()) 3302 ; // use defaults 3303 else if (CurConstraint.isDistance()) { 3304 // this one is consistent, the others aren't 3305 Level.Scalar = false; 3306 Level.Distance = CurConstraint.getD(); 3307 unsigned NewDirection = Dependence::DVEntry::NONE; 3308 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3309 NewDirection = Dependence::DVEntry::EQ; 3310 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3311 NewDirection |= Dependence::DVEntry::LT; 3312 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3313 NewDirection |= Dependence::DVEntry::GT; 3314 Level.Direction &= NewDirection; 3315 } 3316 else if (CurConstraint.isLine()) { 3317 Level.Scalar = false; 3318 Level.Distance = nullptr; 3319 // direction should be accurate 3320 } 3321 else if (CurConstraint.isPoint()) { 3322 Level.Scalar = false; 3323 Level.Distance = nullptr; 3324 unsigned NewDirection = Dependence::DVEntry::NONE; 3325 if (!isKnownPredicate(CmpInst::ICMP_NE, 3326 CurConstraint.getY(), 3327 CurConstraint.getX())) 3328 // if X may be = Y 3329 NewDirection |= Dependence::DVEntry::EQ; 3330 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3331 CurConstraint.getY(), 3332 CurConstraint.getX())) 3333 // if Y may be > X 3334 NewDirection |= Dependence::DVEntry::LT; 3335 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3336 CurConstraint.getY(), 3337 CurConstraint.getX())) 3338 // if Y may be < X 3339 NewDirection |= Dependence::DVEntry::GT; 3340 Level.Direction &= NewDirection; 3341 } 3342 else 3343 llvm_unreachable("constraint has unexpected kind"); 3344 } 3345 3346 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3347 /// source and destination array references are recurrences on a nested loop, 3348 /// this function flattens the nested recurrences into separate recurrences 3349 /// for each loop level. 3350 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3351 SmallVectorImpl<Subscript> &Pair) { 3352 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3353 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3354 Value *SrcPtr = getLoadStorePointerOperand(Src); 3355 Value *DstPtr = getLoadStorePointerOperand(Dst); 3356 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3357 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3358 const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop); 3359 const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop); 3360 const SCEVUnknown *SrcBase = 3361 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3362 const SCEVUnknown *DstBase = 3363 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3364 3365 if (!SrcBase || !DstBase || SrcBase != DstBase) 3366 return false; 3367 3368 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3369 3370 if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn, 3371 SrcSubscripts, DstSubscripts) && 3372 !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn, 3373 SrcSubscripts, DstSubscripts)) 3374 return false; 3375 3376 int Size = SrcSubscripts.size(); 3377 LLVM_DEBUG({ 3378 dbgs() << "\nSrcSubscripts: "; 3379 for (int I = 0; I < Size; I++) 3380 dbgs() << *SrcSubscripts[I]; 3381 dbgs() << "\nDstSubscripts: "; 3382 for (int I = 0; I < Size; I++) 3383 dbgs() << *DstSubscripts[I]; 3384 }); 3385 3386 // The delinearization transforms a single-subscript MIV dependence test into 3387 // a multi-subscript SIV dependence test that is easier to compute. So we 3388 // resize Pair to contain as many pairs of subscripts as the delinearization 3389 // has found, and then initialize the pairs following the delinearization. 3390 Pair.resize(Size); 3391 for (int I = 0; I < Size; ++I) { 3392 Pair[I].Src = SrcSubscripts[I]; 3393 Pair[I].Dst = DstSubscripts[I]; 3394 unifySubscriptType(&Pair[I]); 3395 } 3396 3397 return true; 3398 } 3399 3400 /// Try to delinearize \p SrcAccessFn and \p DstAccessFn if the underlying 3401 /// arrays accessed are fixed-size arrays. Return true if delinearization was 3402 /// successful. 3403 bool DependenceInfo::tryDelinearizeFixedSize( 3404 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, 3405 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, 3406 SmallVectorImpl<const SCEV *> &DstSubscripts) { 3407 LLVM_DEBUG({ 3408 const SCEVUnknown *SrcBase = 3409 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3410 const SCEVUnknown *DstBase = 3411 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3412 assert(SrcBase && DstBase && SrcBase == DstBase && 3413 "expected src and dst scev unknowns to be equal"); 3414 }); 3415 3416 SmallVector<int, 4> SrcSizes; 3417 SmallVector<int, 4> DstSizes; 3418 if (!tryDelinearizeFixedSizeImpl(SE, Src, SrcAccessFn, SrcSubscripts, 3419 SrcSizes) || 3420 !tryDelinearizeFixedSizeImpl(SE, Dst, DstAccessFn, DstSubscripts, 3421 DstSizes)) 3422 return false; 3423 3424 // Check that the two size arrays are non-empty and equal in length and 3425 // value. 3426 if (SrcSizes.size() != DstSizes.size() || 3427 !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) { 3428 SrcSubscripts.clear(); 3429 DstSubscripts.clear(); 3430 return false; 3431 } 3432 3433 assert(SrcSubscripts.size() == DstSubscripts.size() && 3434 "Expected equal number of entries in the list of SrcSubscripts and " 3435 "DstSubscripts."); 3436 3437 Value *SrcPtr = getLoadStorePointerOperand(Src); 3438 Value *DstPtr = getLoadStorePointerOperand(Dst); 3439 3440 // In general we cannot safely assume that the subscripts recovered from GEPs 3441 // are in the range of values defined for their corresponding array 3442 // dimensions. For example some C language usage/interpretation make it 3443 // impossible to verify this at compile-time. As such we can only delinearize 3444 // iff the subscripts are positive and are less than the range of the 3445 // dimension. 3446 if (!DisableDelinearizationChecks) { 3447 auto AllIndiciesInRange = [&](SmallVector<int, 4> &DimensionSizes, 3448 SmallVectorImpl<const SCEV *> &Subscripts, 3449 Value *Ptr) { 3450 size_t SSize = Subscripts.size(); 3451 for (size_t I = 1; I < SSize; ++I) { 3452 const SCEV *S = Subscripts[I]; 3453 if (!isKnownNonNegative(S, Ptr)) 3454 return false; 3455 if (auto *SType = dyn_cast<IntegerType>(S->getType())) { 3456 const SCEV *Range = SE->getConstant( 3457 ConstantInt::get(SType, DimensionSizes[I - 1], false)); 3458 if (!isKnownLessThan(S, Range)) 3459 return false; 3460 } 3461 } 3462 return true; 3463 }; 3464 3465 if (!AllIndiciesInRange(SrcSizes, SrcSubscripts, SrcPtr) || 3466 !AllIndiciesInRange(DstSizes, DstSubscripts, DstPtr)) { 3467 SrcSubscripts.clear(); 3468 DstSubscripts.clear(); 3469 return false; 3470 } 3471 } 3472 LLVM_DEBUG({ 3473 dbgs() << "Delinearized subscripts of fixed-size array\n" 3474 << "SrcGEP:" << *SrcPtr << "\n" 3475 << "DstGEP:" << *DstPtr << "\n"; 3476 }); 3477 return true; 3478 } 3479 3480 bool DependenceInfo::tryDelinearizeParametricSize( 3481 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, 3482 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, 3483 SmallVectorImpl<const SCEV *> &DstSubscripts) { 3484 3485 Value *SrcPtr = getLoadStorePointerOperand(Src); 3486 Value *DstPtr = getLoadStorePointerOperand(Dst); 3487 const SCEVUnknown *SrcBase = 3488 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3489 const SCEVUnknown *DstBase = 3490 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3491 assert(SrcBase && DstBase && SrcBase == DstBase && 3492 "expected src and dst scev unknowns to be equal"); 3493 3494 const SCEV *ElementSize = SE->getElementSize(Src); 3495 if (ElementSize != SE->getElementSize(Dst)) 3496 return false; 3497 3498 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3499 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3500 3501 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3502 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3503 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3504 return false; 3505 3506 // First step: collect parametric terms in both array references. 3507 SmallVector<const SCEV *, 4> Terms; 3508 collectParametricTerms(*SE, SrcAR, Terms); 3509 collectParametricTerms(*SE, DstAR, Terms); 3510 3511 // Second step: find subscript sizes. 3512 SmallVector<const SCEV *, 4> Sizes; 3513 findArrayDimensions(*SE, Terms, Sizes, ElementSize); 3514 3515 // Third step: compute the access functions for each subscript. 3516 computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes); 3517 computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes); 3518 3519 // Fail when there is only a subscript: that's a linearized access function. 3520 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3521 SrcSubscripts.size() != DstSubscripts.size()) 3522 return false; 3523 3524 size_t Size = SrcSubscripts.size(); 3525 3526 // Statically check that the array bounds are in-range. The first subscript we 3527 // don't have a size for and it cannot overflow into another subscript, so is 3528 // always safe. The others need to be 0 <= subscript[i] < bound, for both src 3529 // and dst. 3530 // FIXME: It may be better to record these sizes and add them as constraints 3531 // to the dependency checks. 3532 if (!DisableDelinearizationChecks) 3533 for (size_t I = 1; I < Size; ++I) { 3534 if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr)) 3535 return false; 3536 3537 if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1])) 3538 return false; 3539 3540 if (!isKnownNonNegative(DstSubscripts[I], DstPtr)) 3541 return false; 3542 3543 if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1])) 3544 return false; 3545 } 3546 3547 return true; 3548 } 3549 3550 //===----------------------------------------------------------------------===// 3551 3552 #ifndef NDEBUG 3553 // For debugging purposes, dump a small bit vector to dbgs(). 3554 static void dumpSmallBitVector(SmallBitVector &BV) { 3555 dbgs() << "{"; 3556 for (unsigned VI : BV.set_bits()) { 3557 dbgs() << VI; 3558 if (BV.find_next(VI) >= 0) 3559 dbgs() << ' '; 3560 } 3561 dbgs() << "}\n"; 3562 } 3563 #endif 3564 3565 bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA, 3566 FunctionAnalysisManager::Invalidator &Inv) { 3567 // Check if the analysis itself has been invalidated. 3568 auto PAC = PA.getChecker<DependenceAnalysis>(); 3569 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) 3570 return true; 3571 3572 // Check transitive dependencies. 3573 return Inv.invalidate<AAManager>(F, PA) || 3574 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) || 3575 Inv.invalidate<LoopAnalysis>(F, PA); 3576 } 3577 3578 // depends - 3579 // Returns NULL if there is no dependence. 3580 // Otherwise, return a Dependence with as many details as possible. 3581 // Corresponds to Section 3.1 in the paper 3582 // 3583 // Practical Dependence Testing 3584 // Goff, Kennedy, Tseng 3585 // PLDI 1991 3586 // 3587 // Care is required to keep the routine below, getSplitIteration(), 3588 // up to date with respect to this routine. 3589 std::unique_ptr<Dependence> 3590 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3591 bool PossiblyLoopIndependent) { 3592 if (Src == Dst) 3593 PossiblyLoopIndependent = false; 3594 3595 if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory())) 3596 // if both instructions don't reference memory, there's no dependence 3597 return nullptr; 3598 3599 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3600 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3601 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3602 return std::make_unique<Dependence>(Src, Dst); 3603 } 3604 3605 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3606 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3607 Value *SrcPtr = getLoadStorePointerOperand(Src); 3608 Value *DstPtr = getLoadStorePointerOperand(Dst); 3609 3610 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3611 MemoryLocation::get(Dst), 3612 MemoryLocation::get(Src))) { 3613 case AliasResult::MayAlias: 3614 case AliasResult::PartialAlias: 3615 // cannot analyse objects if we don't understand their aliasing. 3616 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3617 return std::make_unique<Dependence>(Src, Dst); 3618 case AliasResult::NoAlias: 3619 // If the objects noalias, they are distinct, accesses are independent. 3620 LLVM_DEBUG(dbgs() << "no alias\n"); 3621 return nullptr; 3622 case AliasResult::MustAlias: 3623 break; // The underlying objects alias; test accesses for dependence. 3624 } 3625 3626 // establish loop nesting levels 3627 establishNestingLevels(Src, Dst); 3628 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3629 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3630 3631 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3632 ++TotalArrayPairs; 3633 3634 unsigned Pairs = 1; 3635 SmallVector<Subscript, 2> Pair(Pairs); 3636 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3637 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3638 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3639 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3640 if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) { 3641 // If two pointers have different bases, trying to analyze indexes won't 3642 // work; we can't compare them to each other. This can happen, for example, 3643 // if one is produced by an LCSSA PHI node. 3644 // 3645 // We check this upfront so we don't crash in cases where getMinusSCEV() 3646 // returns a SCEVCouldNotCompute. 3647 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n"); 3648 return std::make_unique<Dependence>(Src, Dst); 3649 } 3650 Pair[0].Src = SrcSCEV; 3651 Pair[0].Dst = DstSCEV; 3652 3653 if (Delinearize) { 3654 if (tryDelinearize(Src, Dst, Pair)) { 3655 LLVM_DEBUG(dbgs() << " delinearized\n"); 3656 Pairs = Pair.size(); 3657 } 3658 } 3659 3660 for (unsigned P = 0; P < Pairs; ++P) { 3661 Pair[P].Loops.resize(MaxLevels + 1); 3662 Pair[P].GroupLoops.resize(MaxLevels + 1); 3663 Pair[P].Group.resize(Pairs); 3664 removeMatchingExtensions(&Pair[P]); 3665 Pair[P].Classification = 3666 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3667 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3668 Pair[P].Loops); 3669 Pair[P].GroupLoops = Pair[P].Loops; 3670 Pair[P].Group.set(P); 3671 LLVM_DEBUG(dbgs() << " subscript " << P << "\n"); 3672 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3673 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3674 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3675 LLVM_DEBUG(dbgs() << "\tloops = "); 3676 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3677 } 3678 3679 SmallBitVector Separable(Pairs); 3680 SmallBitVector Coupled(Pairs); 3681 3682 // Partition subscripts into separable and minimally-coupled groups 3683 // Algorithm in paper is algorithmically better; 3684 // this may be faster in practice. Check someday. 3685 // 3686 // Here's an example of how it works. Consider this code: 3687 // 3688 // for (i = ...) { 3689 // for (j = ...) { 3690 // for (k = ...) { 3691 // for (l = ...) { 3692 // for (m = ...) { 3693 // A[i][j][k][m] = ...; 3694 // ... = A[0][j][l][i + j]; 3695 // } 3696 // } 3697 // } 3698 // } 3699 // } 3700 // 3701 // There are 4 subscripts here: 3702 // 0 [i] and [0] 3703 // 1 [j] and [j] 3704 // 2 [k] and [l] 3705 // 3 [m] and [i + j] 3706 // 3707 // We've already classified each subscript pair as ZIV, SIV, etc., 3708 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3709 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3710 // and set Pair[P].Group = {P}. 3711 // 3712 // Src Dst Classification Loops GroupLoops Group 3713 // 0 [i] [0] SIV {1} {1} {0} 3714 // 1 [j] [j] SIV {2} {2} {1} 3715 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3716 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3717 // 3718 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3719 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3720 // 3721 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3722 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3723 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3724 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3725 // to either Separable or Coupled). 3726 // 3727 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3728 // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty, 3729 // so Pair[3].Group = {0, 1, 3} and Done = false. 3730 // 3731 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3732 // Since Done remains true, we add 2 to the set of Separable pairs. 3733 // 3734 // Finally, we consider 3. There's nothing to compare it with, 3735 // so Done remains true and we add it to the Coupled set. 3736 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3737 // 3738 // In the end, we've got 1 separable subscript and 1 coupled group. 3739 for (unsigned SI = 0; SI < Pairs; ++SI) { 3740 if (Pair[SI].Classification == Subscript::NonLinear) { 3741 // ignore these, but collect loops for later 3742 ++NonlinearSubscriptPairs; 3743 collectCommonLoops(Pair[SI].Src, 3744 LI->getLoopFor(Src->getParent()), 3745 Pair[SI].Loops); 3746 collectCommonLoops(Pair[SI].Dst, 3747 LI->getLoopFor(Dst->getParent()), 3748 Pair[SI].Loops); 3749 Result.Consistent = false; 3750 } else if (Pair[SI].Classification == Subscript::ZIV) { 3751 // always separable 3752 Separable.set(SI); 3753 } 3754 else { 3755 // SIV, RDIV, or MIV, so check for coupled group 3756 bool Done = true; 3757 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3758 SmallBitVector Intersection = Pair[SI].GroupLoops; 3759 Intersection &= Pair[SJ].GroupLoops; 3760 if (Intersection.any()) { 3761 // accumulate set of all the loops in group 3762 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3763 // accumulate set of all subscripts in group 3764 Pair[SJ].Group |= Pair[SI].Group; 3765 Done = false; 3766 } 3767 } 3768 if (Done) { 3769 if (Pair[SI].Group.count() == 1) { 3770 Separable.set(SI); 3771 ++SeparableSubscriptPairs; 3772 } 3773 else { 3774 Coupled.set(SI); 3775 ++CoupledSubscriptPairs; 3776 } 3777 } 3778 } 3779 } 3780 3781 LLVM_DEBUG(dbgs() << " Separable = "); 3782 LLVM_DEBUG(dumpSmallBitVector(Separable)); 3783 LLVM_DEBUG(dbgs() << " Coupled = "); 3784 LLVM_DEBUG(dumpSmallBitVector(Coupled)); 3785 3786 Constraint NewConstraint; 3787 NewConstraint.setAny(SE); 3788 3789 // test separable subscripts 3790 for (unsigned SI : Separable.set_bits()) { 3791 LLVM_DEBUG(dbgs() << "testing subscript " << SI); 3792 switch (Pair[SI].Classification) { 3793 case Subscript::ZIV: 3794 LLVM_DEBUG(dbgs() << ", ZIV\n"); 3795 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3796 return nullptr; 3797 break; 3798 case Subscript::SIV: { 3799 LLVM_DEBUG(dbgs() << ", SIV\n"); 3800 unsigned Level; 3801 const SCEV *SplitIter = nullptr; 3802 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3803 SplitIter)) 3804 return nullptr; 3805 break; 3806 } 3807 case Subscript::RDIV: 3808 LLVM_DEBUG(dbgs() << ", RDIV\n"); 3809 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3810 return nullptr; 3811 break; 3812 case Subscript::MIV: 3813 LLVM_DEBUG(dbgs() << ", MIV\n"); 3814 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3815 return nullptr; 3816 break; 3817 default: 3818 llvm_unreachable("subscript has unexpected classification"); 3819 } 3820 } 3821 3822 if (Coupled.count()) { 3823 // test coupled subscript groups 3824 LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n"); 3825 LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3826 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3827 for (unsigned II = 0; II <= MaxLevels; ++II) 3828 Constraints[II].setAny(SE); 3829 for (unsigned SI : Coupled.set_bits()) { 3830 LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3831 SmallBitVector Group(Pair[SI].Group); 3832 SmallBitVector Sivs(Pairs); 3833 SmallBitVector Mivs(Pairs); 3834 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3835 SmallVector<Subscript *, 4> PairsInGroup; 3836 for (unsigned SJ : Group.set_bits()) { 3837 LLVM_DEBUG(dbgs() << SJ << " "); 3838 if (Pair[SJ].Classification == Subscript::SIV) 3839 Sivs.set(SJ); 3840 else 3841 Mivs.set(SJ); 3842 PairsInGroup.push_back(&Pair[SJ]); 3843 } 3844 unifySubscriptType(PairsInGroup); 3845 LLVM_DEBUG(dbgs() << "}\n"); 3846 while (Sivs.any()) { 3847 bool Changed = false; 3848 for (unsigned SJ : Sivs.set_bits()) { 3849 LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3850 // SJ is an SIV subscript that's part of the current coupled group 3851 unsigned Level; 3852 const SCEV *SplitIter = nullptr; 3853 LLVM_DEBUG(dbgs() << "SIV\n"); 3854 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3855 SplitIter)) 3856 return nullptr; 3857 ConstrainedLevels.set(Level); 3858 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3859 if (Constraints[Level].isEmpty()) { 3860 ++DeltaIndependence; 3861 return nullptr; 3862 } 3863 Changed = true; 3864 } 3865 Sivs.reset(SJ); 3866 } 3867 if (Changed) { 3868 // propagate, possibly creating new SIVs and ZIVs 3869 LLVM_DEBUG(dbgs() << " propagating\n"); 3870 LLVM_DEBUG(dbgs() << "\tMivs = "); 3871 LLVM_DEBUG(dumpSmallBitVector(Mivs)); 3872 for (unsigned SJ : Mivs.set_bits()) { 3873 // SJ is an MIV subscript that's part of the current coupled group 3874 LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3875 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3876 Constraints, Result.Consistent)) { 3877 LLVM_DEBUG(dbgs() << "\t Changed\n"); 3878 ++DeltaPropagations; 3879 Pair[SJ].Classification = 3880 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3881 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3882 Pair[SJ].Loops); 3883 switch (Pair[SJ].Classification) { 3884 case Subscript::ZIV: 3885 LLVM_DEBUG(dbgs() << "ZIV\n"); 3886 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3887 return nullptr; 3888 Mivs.reset(SJ); 3889 break; 3890 case Subscript::SIV: 3891 Sivs.set(SJ); 3892 Mivs.reset(SJ); 3893 break; 3894 case Subscript::RDIV: 3895 case Subscript::MIV: 3896 break; 3897 default: 3898 llvm_unreachable("bad subscript classification"); 3899 } 3900 } 3901 } 3902 } 3903 } 3904 3905 // test & propagate remaining RDIVs 3906 for (unsigned SJ : Mivs.set_bits()) { 3907 if (Pair[SJ].Classification == Subscript::RDIV) { 3908 LLVM_DEBUG(dbgs() << "RDIV test\n"); 3909 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3910 return nullptr; 3911 // I don't yet understand how to propagate RDIV results 3912 Mivs.reset(SJ); 3913 } 3914 } 3915 3916 // test remaining MIVs 3917 // This code is temporary. 3918 // Better to somehow test all remaining subscripts simultaneously. 3919 for (unsigned SJ : Mivs.set_bits()) { 3920 if (Pair[SJ].Classification == Subscript::MIV) { 3921 LLVM_DEBUG(dbgs() << "MIV test\n"); 3922 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3923 return nullptr; 3924 } 3925 else 3926 llvm_unreachable("expected only MIV subscripts at this point"); 3927 } 3928 3929 // update Result.DV from constraint vector 3930 LLVM_DEBUG(dbgs() << " updating\n"); 3931 for (unsigned SJ : ConstrainedLevels.set_bits()) { 3932 if (SJ > CommonLevels) 3933 break; 3934 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3935 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3936 return nullptr; 3937 } 3938 } 3939 } 3940 3941 // Make sure the Scalar flags are set correctly. 3942 SmallBitVector CompleteLoops(MaxLevels + 1); 3943 for (unsigned SI = 0; SI < Pairs; ++SI) 3944 CompleteLoops |= Pair[SI].Loops; 3945 for (unsigned II = 1; II <= CommonLevels; ++II) 3946 if (CompleteLoops[II]) 3947 Result.DV[II - 1].Scalar = false; 3948 3949 if (PossiblyLoopIndependent) { 3950 // Make sure the LoopIndependent flag is set correctly. 3951 // All directions must include equal, otherwise no 3952 // loop-independent dependence is possible. 3953 for (unsigned II = 1; II <= CommonLevels; ++II) { 3954 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3955 Result.LoopIndependent = false; 3956 break; 3957 } 3958 } 3959 } 3960 else { 3961 // On the other hand, if all directions are equal and there's no 3962 // loop-independent dependence possible, then no dependence exists. 3963 bool AllEqual = true; 3964 for (unsigned II = 1; II <= CommonLevels; ++II) { 3965 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3966 AllEqual = false; 3967 break; 3968 } 3969 } 3970 if (AllEqual) 3971 return nullptr; 3972 } 3973 3974 return std::make_unique<FullDependence>(std::move(Result)); 3975 } 3976 3977 //===----------------------------------------------------------------------===// 3978 // getSplitIteration - 3979 // Rather than spend rarely-used space recording the splitting iteration 3980 // during the Weak-Crossing SIV test, we re-compute it on demand. 3981 // The re-computation is basically a repeat of the entire dependence test, 3982 // though simplified since we know that the dependence exists. 3983 // It's tedious, since we must go through all propagations, etc. 3984 // 3985 // Care is required to keep this code up to date with respect to the routine 3986 // above, depends(). 3987 // 3988 // Generally, the dependence analyzer will be used to build 3989 // a dependence graph for a function (basically a map from instructions 3990 // to dependences). Looking for cycles in the graph shows us loops 3991 // that cannot be trivially vectorized/parallelized. 3992 // 3993 // We can try to improve the situation by examining all the dependences 3994 // that make up the cycle, looking for ones we can break. 3995 // Sometimes, peeling the first or last iteration of a loop will break 3996 // dependences, and we've got flags for those possibilities. 3997 // Sometimes, splitting a loop at some other iteration will do the trick, 3998 // and we've got a flag for that case. Rather than waste the space to 3999 // record the exact iteration (since we rarely know), we provide 4000 // a method that calculates the iteration. It's a drag that it must work 4001 // from scratch, but wonderful in that it's possible. 4002 // 4003 // Here's an example: 4004 // 4005 // for (i = 0; i < 10; i++) 4006 // A[i] = ... 4007 // ... = A[11 - i] 4008 // 4009 // There's a loop-carried flow dependence from the store to the load, 4010 // found by the weak-crossing SIV test. The dependence will have a flag, 4011 // indicating that the dependence can be broken by splitting the loop. 4012 // Calling getSplitIteration will return 5. 4013 // Splitting the loop breaks the dependence, like so: 4014 // 4015 // for (i = 0; i <= 5; i++) 4016 // A[i] = ... 4017 // ... = A[11 - i] 4018 // for (i = 6; i < 10; i++) 4019 // A[i] = ... 4020 // ... = A[11 - i] 4021 // 4022 // breaks the dependence and allows us to vectorize/parallelize 4023 // both loops. 4024 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 4025 unsigned SplitLevel) { 4026 assert(Dep.isSplitable(SplitLevel) && 4027 "Dep should be splitable at SplitLevel"); 4028 Instruction *Src = Dep.getSrc(); 4029 Instruction *Dst = Dep.getDst(); 4030 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 4031 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 4032 assert(isLoadOrStore(Src)); 4033 assert(isLoadOrStore(Dst)); 4034 Value *SrcPtr = getLoadStorePointerOperand(Src); 4035 Value *DstPtr = getLoadStorePointerOperand(Dst); 4036 assert(underlyingObjectsAlias( 4037 AA, F->getParent()->getDataLayout(), MemoryLocation::get(Dst), 4038 MemoryLocation::get(Src)) == AliasResult::MustAlias); 4039 4040 // establish loop nesting levels 4041 establishNestingLevels(Src, Dst); 4042 4043 FullDependence Result(Src, Dst, false, CommonLevels); 4044 4045 unsigned Pairs = 1; 4046 SmallVector<Subscript, 2> Pair(Pairs); 4047 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 4048 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 4049 Pair[0].Src = SrcSCEV; 4050 Pair[0].Dst = DstSCEV; 4051 4052 if (Delinearize) { 4053 if (tryDelinearize(Src, Dst, Pair)) { 4054 LLVM_DEBUG(dbgs() << " delinearized\n"); 4055 Pairs = Pair.size(); 4056 } 4057 } 4058 4059 for (unsigned P = 0; P < Pairs; ++P) { 4060 Pair[P].Loops.resize(MaxLevels + 1); 4061 Pair[P].GroupLoops.resize(MaxLevels + 1); 4062 Pair[P].Group.resize(Pairs); 4063 removeMatchingExtensions(&Pair[P]); 4064 Pair[P].Classification = 4065 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 4066 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 4067 Pair[P].Loops); 4068 Pair[P].GroupLoops = Pair[P].Loops; 4069 Pair[P].Group.set(P); 4070 } 4071 4072 SmallBitVector Separable(Pairs); 4073 SmallBitVector Coupled(Pairs); 4074 4075 // partition subscripts into separable and minimally-coupled groups 4076 for (unsigned SI = 0; SI < Pairs; ++SI) { 4077 if (Pair[SI].Classification == Subscript::NonLinear) { 4078 // ignore these, but collect loops for later 4079 collectCommonLoops(Pair[SI].Src, 4080 LI->getLoopFor(Src->getParent()), 4081 Pair[SI].Loops); 4082 collectCommonLoops(Pair[SI].Dst, 4083 LI->getLoopFor(Dst->getParent()), 4084 Pair[SI].Loops); 4085 Result.Consistent = false; 4086 } 4087 else if (Pair[SI].Classification == Subscript::ZIV) 4088 Separable.set(SI); 4089 else { 4090 // SIV, RDIV, or MIV, so check for coupled group 4091 bool Done = true; 4092 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 4093 SmallBitVector Intersection = Pair[SI].GroupLoops; 4094 Intersection &= Pair[SJ].GroupLoops; 4095 if (Intersection.any()) { 4096 // accumulate set of all the loops in group 4097 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 4098 // accumulate set of all subscripts in group 4099 Pair[SJ].Group |= Pair[SI].Group; 4100 Done = false; 4101 } 4102 } 4103 if (Done) { 4104 if (Pair[SI].Group.count() == 1) 4105 Separable.set(SI); 4106 else 4107 Coupled.set(SI); 4108 } 4109 } 4110 } 4111 4112 Constraint NewConstraint; 4113 NewConstraint.setAny(SE); 4114 4115 // test separable subscripts 4116 for (unsigned SI : Separable.set_bits()) { 4117 switch (Pair[SI].Classification) { 4118 case Subscript::SIV: { 4119 unsigned Level; 4120 const SCEV *SplitIter = nullptr; 4121 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 4122 Result, NewConstraint, SplitIter); 4123 if (Level == SplitLevel) { 4124 assert(SplitIter != nullptr); 4125 return SplitIter; 4126 } 4127 break; 4128 } 4129 case Subscript::ZIV: 4130 case Subscript::RDIV: 4131 case Subscript::MIV: 4132 break; 4133 default: 4134 llvm_unreachable("subscript has unexpected classification"); 4135 } 4136 } 4137 4138 if (Coupled.count()) { 4139 // test coupled subscript groups 4140 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 4141 for (unsigned II = 0; II <= MaxLevels; ++II) 4142 Constraints[II].setAny(SE); 4143 for (unsigned SI : Coupled.set_bits()) { 4144 SmallBitVector Group(Pair[SI].Group); 4145 SmallBitVector Sivs(Pairs); 4146 SmallBitVector Mivs(Pairs); 4147 SmallBitVector ConstrainedLevels(MaxLevels + 1); 4148 for (unsigned SJ : Group.set_bits()) { 4149 if (Pair[SJ].Classification == Subscript::SIV) 4150 Sivs.set(SJ); 4151 else 4152 Mivs.set(SJ); 4153 } 4154 while (Sivs.any()) { 4155 bool Changed = false; 4156 for (unsigned SJ : Sivs.set_bits()) { 4157 // SJ is an SIV subscript that's part of the current coupled group 4158 unsigned Level; 4159 const SCEV *SplitIter = nullptr; 4160 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 4161 Result, NewConstraint, SplitIter); 4162 if (Level == SplitLevel && SplitIter) 4163 return SplitIter; 4164 ConstrainedLevels.set(Level); 4165 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 4166 Changed = true; 4167 Sivs.reset(SJ); 4168 } 4169 if (Changed) { 4170 // propagate, possibly creating new SIVs and ZIVs 4171 for (unsigned SJ : Mivs.set_bits()) { 4172 // SJ is an MIV subscript that's part of the current coupled group 4173 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 4174 Pair[SJ].Loops, Constraints, Result.Consistent)) { 4175 Pair[SJ].Classification = 4176 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 4177 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 4178 Pair[SJ].Loops); 4179 switch (Pair[SJ].Classification) { 4180 case Subscript::ZIV: 4181 Mivs.reset(SJ); 4182 break; 4183 case Subscript::SIV: 4184 Sivs.set(SJ); 4185 Mivs.reset(SJ); 4186 break; 4187 case Subscript::RDIV: 4188 case Subscript::MIV: 4189 break; 4190 default: 4191 llvm_unreachable("bad subscript classification"); 4192 } 4193 } 4194 } 4195 } 4196 } 4197 } 4198 } 4199 llvm_unreachable("somehow reached end of routine"); 4200 return nullptr; 4201 } 4202