1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 10 // accesses. Currently, it is an (incomplete) implementation of the approach 11 // described in 12 // 13 // Practical Dependence Testing 14 // Goff, Kennedy, Tseng 15 // PLDI 1991 16 // 17 // There's a single entry point that analyzes the dependence between a pair 18 // of memory references in a function, returning either NULL, for no dependence, 19 // or a more-or-less detailed description of the dependence between them. 20 // 21 // Currently, the implementation cannot propagate constraints between 22 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 23 // Both of these are conservative weaknesses; 24 // that is, not a source of correctness problems. 25 // 26 // Since Clang linearizes some array subscripts, the dependence 27 // analysis is using SCEV->delinearize to recover the representation of multiple 28 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 29 // delinearization is controlled by the flag -da-delinearize. 30 // 31 // We should pay some careful attention to the possibility of integer overflow 32 // in the implementation of the various tests. This could happen with Add, 33 // Subtract, or Multiply, with both APInt's and SCEV's. 34 // 35 // Some non-linear subscript pairs can be handled by the GCD test 36 // (and perhaps other tests). 37 // Should explore how often these things occur. 38 // 39 // Finally, it seems like certain test cases expose weaknesses in the SCEV 40 // simplification, especially in the handling of sign and zero extensions. 41 // It could be useful to spend time exploring these. 42 // 43 // Please note that this is work in progress and the interface is subject to 44 // change. 45 // 46 //===----------------------------------------------------------------------===// 47 // // 48 // In memory of Ken Kennedy, 1945 - 2007 // 49 // // 50 //===----------------------------------------------------------------------===// 51 52 #include "llvm/Analysis/DependenceAnalysis.h" 53 #include "llvm/ADT/STLExtras.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/Delinearization.h" 57 #include "llvm/Analysis/LoopInfo.h" 58 #include "llvm/Analysis/ScalarEvolution.h" 59 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 60 #include "llvm/Analysis/ValueTracking.h" 61 #include "llvm/Config/llvm-config.h" 62 #include "llvm/IR/InstIterator.h" 63 #include "llvm/IR/Module.h" 64 #include "llvm/IR/Operator.h" 65 #include "llvm/InitializePasses.h" 66 #include "llvm/Support/CommandLine.h" 67 #include "llvm/Support/Debug.h" 68 #include "llvm/Support/ErrorHandling.h" 69 #include "llvm/Support/raw_ostream.h" 70 71 using namespace llvm; 72 73 #define DEBUG_TYPE "da" 74 75 //===----------------------------------------------------------------------===// 76 // statistics 77 78 STATISTIC(TotalArrayPairs, "Array pairs tested"); 79 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 80 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 81 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 82 STATISTIC(ZIVapplications, "ZIV applications"); 83 STATISTIC(ZIVindependence, "ZIV independence"); 84 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 85 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 86 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 87 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 88 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 89 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 90 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 91 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 92 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 93 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 94 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 95 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 96 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 97 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 98 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 99 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 100 STATISTIC(DeltaApplications, "Delta applications"); 101 STATISTIC(DeltaSuccesses, "Delta successes"); 102 STATISTIC(DeltaIndependence, "Delta independence"); 103 STATISTIC(DeltaPropagations, "Delta propagations"); 104 STATISTIC(GCDapplications, "GCD applications"); 105 STATISTIC(GCDsuccesses, "GCD successes"); 106 STATISTIC(GCDindependence, "GCD independence"); 107 STATISTIC(BanerjeeApplications, "Banerjee applications"); 108 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 109 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 110 111 static cl::opt<bool> 112 Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore, 113 cl::desc("Try to delinearize array references.")); 114 static cl::opt<bool> DisableDelinearizationChecks( 115 "da-disable-delinearization-checks", cl::init(false), cl::Hidden, 116 cl::ZeroOrMore, 117 cl::desc( 118 "Disable checks that try to statically verify validity of " 119 "delinearized subscripts. Enabling this option may result in incorrect " 120 "dependence vectors for languages that allow the subscript of one " 121 "dimension to underflow or overflow into another dimension.")); 122 123 static cl::opt<unsigned> MIVMaxLevelThreshold( 124 "da-miv-max-level-threshold", cl::init(7), cl::Hidden, cl::ZeroOrMore, 125 cl::desc("Maximum depth allowed for the recursive algorithm used to " 126 "explore MIV direction vectors.")); 127 128 //===----------------------------------------------------------------------===// 129 // basics 130 131 DependenceAnalysis::Result 132 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 133 auto &AA = FAM.getResult<AAManager>(F); 134 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 135 auto &LI = FAM.getResult<LoopAnalysis>(F); 136 return DependenceInfo(&F, &AA, &SE, &LI); 137 } 138 139 AnalysisKey DependenceAnalysis::Key; 140 141 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 142 "Dependence Analysis", true, true) 143 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 144 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 145 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 146 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 147 true, true) 148 149 char DependenceAnalysisWrapperPass::ID = 0; 150 151 DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass() 152 : FunctionPass(ID) { 153 initializeDependenceAnalysisWrapperPassPass(*PassRegistry::getPassRegistry()); 154 } 155 156 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 157 return new DependenceAnalysisWrapperPass(); 158 } 159 160 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 161 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 162 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 163 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 164 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 165 return false; 166 } 167 168 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 169 170 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 171 172 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 173 AU.setPreservesAll(); 174 AU.addRequiredTransitive<AAResultsWrapperPass>(); 175 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 176 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 177 } 178 179 // Used to test the dependence analyzer. 180 // Looks through the function, noting instructions that may access memory. 181 // Calls depends() on every possible pair and prints out the result. 182 // Ignores all other instructions. 183 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { 184 auto *F = DA->getFunction(); 185 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 186 ++SrcI) { 187 if (SrcI->mayReadOrWriteMemory()) { 188 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 189 DstI != DstE; ++DstI) { 190 if (DstI->mayReadOrWriteMemory()) { 191 OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n"; 192 OS << " da analyze - "; 193 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 194 D->dump(OS); 195 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 196 if (D->isSplitable(Level)) { 197 OS << " da analyze - split level = " << Level; 198 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 199 OS << "!\n"; 200 } 201 } 202 } 203 else 204 OS << "none!\n"; 205 } 206 } 207 } 208 } 209 } 210 211 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 212 const Module *) const { 213 dumpExampleDependence(OS, info.get()); 214 } 215 216 PreservedAnalyses 217 DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { 218 OS << "'Dependence Analysis' for function '" << F.getName() << "':\n"; 219 dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F)); 220 return PreservedAnalyses::all(); 221 } 222 223 //===----------------------------------------------------------------------===// 224 // Dependence methods 225 226 // Returns true if this is an input dependence. 227 bool Dependence::isInput() const { 228 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 229 } 230 231 232 // Returns true if this is an output dependence. 233 bool Dependence::isOutput() const { 234 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 235 } 236 237 238 // Returns true if this is an flow (aka true) dependence. 239 bool Dependence::isFlow() const { 240 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 241 } 242 243 244 // Returns true if this is an anti dependence. 245 bool Dependence::isAnti() const { 246 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 247 } 248 249 250 // Returns true if a particular level is scalar; that is, 251 // if no subscript in the source or destination mention the induction 252 // variable associated with the loop at this level. 253 // Leave this out of line, so it will serve as a virtual method anchor 254 bool Dependence::isScalar(unsigned level) const { 255 return false; 256 } 257 258 259 //===----------------------------------------------------------------------===// 260 // FullDependence methods 261 262 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 263 bool PossiblyLoopIndependent, 264 unsigned CommonLevels) 265 : Dependence(Source, Destination), Levels(CommonLevels), 266 LoopIndependent(PossiblyLoopIndependent) { 267 Consistent = true; 268 if (CommonLevels) 269 DV = std::make_unique<DVEntry[]>(CommonLevels); 270 } 271 272 // The rest are simple getters that hide the implementation. 273 274 // getDirection - Returns the direction associated with a particular level. 275 unsigned FullDependence::getDirection(unsigned Level) const { 276 assert(0 < Level && Level <= Levels && "Level out of range"); 277 return DV[Level - 1].Direction; 278 } 279 280 281 // Returns the distance (or NULL) associated with a particular level. 282 const SCEV *FullDependence::getDistance(unsigned Level) const { 283 assert(0 < Level && Level <= Levels && "Level out of range"); 284 return DV[Level - 1].Distance; 285 } 286 287 288 // Returns true if a particular level is scalar; that is, 289 // if no subscript in the source or destination mention the induction 290 // variable associated with the loop at this level. 291 bool FullDependence::isScalar(unsigned Level) const { 292 assert(0 < Level && Level <= Levels && "Level out of range"); 293 return DV[Level - 1].Scalar; 294 } 295 296 297 // Returns true if peeling the first iteration from this loop 298 // will break this dependence. 299 bool FullDependence::isPeelFirst(unsigned Level) const { 300 assert(0 < Level && Level <= Levels && "Level out of range"); 301 return DV[Level - 1].PeelFirst; 302 } 303 304 305 // Returns true if peeling the last iteration from this loop 306 // will break this dependence. 307 bool FullDependence::isPeelLast(unsigned Level) const { 308 assert(0 < Level && Level <= Levels && "Level out of range"); 309 return DV[Level - 1].PeelLast; 310 } 311 312 313 // Returns true if splitting this loop will break the dependence. 314 bool FullDependence::isSplitable(unsigned Level) const { 315 assert(0 < Level && Level <= Levels && "Level out of range"); 316 return DV[Level - 1].Splitable; 317 } 318 319 320 //===----------------------------------------------------------------------===// 321 // DependenceInfo::Constraint methods 322 323 // If constraint is a point <X, Y>, returns X. 324 // Otherwise assert. 325 const SCEV *DependenceInfo::Constraint::getX() const { 326 assert(Kind == Point && "Kind should be Point"); 327 return A; 328 } 329 330 331 // If constraint is a point <X, Y>, returns Y. 332 // Otherwise assert. 333 const SCEV *DependenceInfo::Constraint::getY() const { 334 assert(Kind == Point && "Kind should be Point"); 335 return B; 336 } 337 338 339 // If constraint is a line AX + BY = C, returns A. 340 // Otherwise assert. 341 const SCEV *DependenceInfo::Constraint::getA() const { 342 assert((Kind == Line || Kind == Distance) && 343 "Kind should be Line (or Distance)"); 344 return A; 345 } 346 347 348 // If constraint is a line AX + BY = C, returns B. 349 // Otherwise assert. 350 const SCEV *DependenceInfo::Constraint::getB() const { 351 assert((Kind == Line || Kind == Distance) && 352 "Kind should be Line (or Distance)"); 353 return B; 354 } 355 356 357 // If constraint is a line AX + BY = C, returns C. 358 // Otherwise assert. 359 const SCEV *DependenceInfo::Constraint::getC() const { 360 assert((Kind == Line || Kind == Distance) && 361 "Kind should be Line (or Distance)"); 362 return C; 363 } 364 365 366 // If constraint is a distance, returns D. 367 // Otherwise assert. 368 const SCEV *DependenceInfo::Constraint::getD() const { 369 assert(Kind == Distance && "Kind should be Distance"); 370 return SE->getNegativeSCEV(C); 371 } 372 373 374 // Returns the loop associated with this constraint. 375 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 376 assert((Kind == Distance || Kind == Line || Kind == Point) && 377 "Kind should be Distance, Line, or Point"); 378 return AssociatedLoop; 379 } 380 381 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 382 const Loop *CurLoop) { 383 Kind = Point; 384 A = X; 385 B = Y; 386 AssociatedLoop = CurLoop; 387 } 388 389 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 390 const SCEV *CC, const Loop *CurLoop) { 391 Kind = Line; 392 A = AA; 393 B = BB; 394 C = CC; 395 AssociatedLoop = CurLoop; 396 } 397 398 void DependenceInfo::Constraint::setDistance(const SCEV *D, 399 const Loop *CurLoop) { 400 Kind = Distance; 401 A = SE->getOne(D->getType()); 402 B = SE->getNegativeSCEV(A); 403 C = SE->getNegativeSCEV(D); 404 AssociatedLoop = CurLoop; 405 } 406 407 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 408 409 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 410 SE = NewSE; 411 Kind = Any; 412 } 413 414 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 415 // For debugging purposes. Dumps the constraint out to OS. 416 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 417 if (isEmpty()) 418 OS << " Empty\n"; 419 else if (isAny()) 420 OS << " Any\n"; 421 else if (isPoint()) 422 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 423 else if (isDistance()) 424 OS << " Distance is " << *getD() << 425 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 426 else if (isLine()) 427 OS << " Line is " << *getA() << "*X + " << 428 *getB() << "*Y = " << *getC() << "\n"; 429 else 430 llvm_unreachable("unknown constraint type in Constraint::dump"); 431 } 432 #endif 433 434 435 // Updates X with the intersection 436 // of the Constraints X and Y. Returns true if X has changed. 437 // Corresponds to Figure 4 from the paper 438 // 439 // Practical Dependence Testing 440 // Goff, Kennedy, Tseng 441 // PLDI 1991 442 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 443 ++DeltaApplications; 444 LLVM_DEBUG(dbgs() << "\tintersect constraints\n"); 445 LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 446 LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 447 assert(!Y->isPoint() && "Y must not be a Point"); 448 if (X->isAny()) { 449 if (Y->isAny()) 450 return false; 451 *X = *Y; 452 return true; 453 } 454 if (X->isEmpty()) 455 return false; 456 if (Y->isEmpty()) { 457 X->setEmpty(); 458 return true; 459 } 460 461 if (X->isDistance() && Y->isDistance()) { 462 LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n"); 463 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 464 return false; 465 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 466 X->setEmpty(); 467 ++DeltaSuccesses; 468 return true; 469 } 470 // Hmmm, interesting situation. 471 // I guess if either is constant, keep it and ignore the other. 472 if (isa<SCEVConstant>(Y->getD())) { 473 *X = *Y; 474 return true; 475 } 476 return false; 477 } 478 479 // At this point, the pseudo-code in Figure 4 of the paper 480 // checks if (X->isPoint() && Y->isPoint()). 481 // This case can't occur in our implementation, 482 // since a Point can only arise as the result of intersecting 483 // two Line constraints, and the right-hand value, Y, is never 484 // the result of an intersection. 485 assert(!(X->isPoint() && Y->isPoint()) && 486 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 487 488 if (X->isLine() && Y->isLine()) { 489 LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n"); 490 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 491 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 492 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 493 // slopes are equal, so lines are parallel 494 LLVM_DEBUG(dbgs() << "\t\tsame slope\n"); 495 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 496 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 497 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 498 return false; 499 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 500 X->setEmpty(); 501 ++DeltaSuccesses; 502 return true; 503 } 504 return false; 505 } 506 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 507 // slopes differ, so lines intersect 508 LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n"); 509 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 510 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 511 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 512 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 513 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 514 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 515 const SCEVConstant *C1A2_C2A1 = 516 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 517 const SCEVConstant *C1B2_C2B1 = 518 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 519 const SCEVConstant *A1B2_A2B1 = 520 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 521 const SCEVConstant *A2B1_A1B2 = 522 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 523 if (!C1B2_C2B1 || !C1A2_C2A1 || 524 !A1B2_A2B1 || !A2B1_A1B2) 525 return false; 526 APInt Xtop = C1B2_C2B1->getAPInt(); 527 APInt Xbot = A1B2_A2B1->getAPInt(); 528 APInt Ytop = C1A2_C2A1->getAPInt(); 529 APInt Ybot = A2B1_A1B2->getAPInt(); 530 LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 531 LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 532 LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 533 LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 534 APInt Xq = Xtop; // these need to be initialized, even 535 APInt Xr = Xtop; // though they're just going to be overwritten 536 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 537 APInt Yq = Ytop; 538 APInt Yr = Ytop; 539 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 540 if (Xr != 0 || Yr != 0) { 541 X->setEmpty(); 542 ++DeltaSuccesses; 543 return true; 544 } 545 LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 546 if (Xq.slt(0) || Yq.slt(0)) { 547 X->setEmpty(); 548 ++DeltaSuccesses; 549 return true; 550 } 551 if (const SCEVConstant *CUB = 552 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 553 const APInt &UpperBound = CUB->getAPInt(); 554 LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 555 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 556 X->setEmpty(); 557 ++DeltaSuccesses; 558 return true; 559 } 560 } 561 X->setPoint(SE->getConstant(Xq), 562 SE->getConstant(Yq), 563 X->getAssociatedLoop()); 564 ++DeltaSuccesses; 565 return true; 566 } 567 return false; 568 } 569 570 // if (X->isLine() && Y->isPoint()) This case can't occur. 571 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 572 573 if (X->isPoint() && Y->isLine()) { 574 LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n"); 575 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 576 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 577 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 578 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 579 return false; 580 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 581 X->setEmpty(); 582 ++DeltaSuccesses; 583 return true; 584 } 585 return false; 586 } 587 588 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 589 return false; 590 } 591 592 593 //===----------------------------------------------------------------------===// 594 // DependenceInfo methods 595 596 // For debugging purposes. Dumps a dependence to OS. 597 void Dependence::dump(raw_ostream &OS) const { 598 bool Splitable = false; 599 if (isConfused()) 600 OS << "confused"; 601 else { 602 if (isConsistent()) 603 OS << "consistent "; 604 if (isFlow()) 605 OS << "flow"; 606 else if (isOutput()) 607 OS << "output"; 608 else if (isAnti()) 609 OS << "anti"; 610 else if (isInput()) 611 OS << "input"; 612 unsigned Levels = getLevels(); 613 OS << " ["; 614 for (unsigned II = 1; II <= Levels; ++II) { 615 if (isSplitable(II)) 616 Splitable = true; 617 if (isPeelFirst(II)) 618 OS << 'p'; 619 const SCEV *Distance = getDistance(II); 620 if (Distance) 621 OS << *Distance; 622 else if (isScalar(II)) 623 OS << "S"; 624 else { 625 unsigned Direction = getDirection(II); 626 if (Direction == DVEntry::ALL) 627 OS << "*"; 628 else { 629 if (Direction & DVEntry::LT) 630 OS << "<"; 631 if (Direction & DVEntry::EQ) 632 OS << "="; 633 if (Direction & DVEntry::GT) 634 OS << ">"; 635 } 636 } 637 if (isPeelLast(II)) 638 OS << 'p'; 639 if (II < Levels) 640 OS << " "; 641 } 642 if (isLoopIndependent()) 643 OS << "|<"; 644 OS << "]"; 645 if (Splitable) 646 OS << " splitable"; 647 } 648 OS << "!\n"; 649 } 650 651 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their 652 // underlaying objects. If LocA and LocB are known to not alias (for any reason: 653 // tbaa, non-overlapping regions etc), then it is known there is no dependecy. 654 // Otherwise the underlying objects are checked to see if they point to 655 // different identifiable objects. 656 static AliasResult underlyingObjectsAlias(AAResults *AA, 657 const DataLayout &DL, 658 const MemoryLocation &LocA, 659 const MemoryLocation &LocB) { 660 // Check the original locations (minus size) for noalias, which can happen for 661 // tbaa, incompatible underlying object locations, etc. 662 MemoryLocation LocAS = 663 MemoryLocation::getBeforeOrAfter(LocA.Ptr, LocA.AATags); 664 MemoryLocation LocBS = 665 MemoryLocation::getBeforeOrAfter(LocB.Ptr, LocB.AATags); 666 if (AA->isNoAlias(LocAS, LocBS)) 667 return AliasResult::NoAlias; 668 669 // Check the underlying objects are the same 670 const Value *AObj = getUnderlyingObject(LocA.Ptr); 671 const Value *BObj = getUnderlyingObject(LocB.Ptr); 672 673 // If the underlying objects are the same, they must alias 674 if (AObj == BObj) 675 return AliasResult::MustAlias; 676 677 // We may have hit the recursion limit for underlying objects, or have 678 // underlying objects where we don't know they will alias. 679 if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj)) 680 return AliasResult::MayAlias; 681 682 // Otherwise we know the objects are different and both identified objects so 683 // must not alias. 684 return AliasResult::NoAlias; 685 } 686 687 688 // Returns true if the load or store can be analyzed. Atomic and volatile 689 // operations have properties which this analysis does not understand. 690 static 691 bool isLoadOrStore(const Instruction *I) { 692 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 693 return LI->isUnordered(); 694 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 695 return SI->isUnordered(); 696 return false; 697 } 698 699 700 // Examines the loop nesting of the Src and Dst 701 // instructions and establishes their shared loops. Sets the variables 702 // CommonLevels, SrcLevels, and MaxLevels. 703 // The source and destination instructions needn't be contained in the same 704 // loop. The routine establishNestingLevels finds the level of most deeply 705 // nested loop that contains them both, CommonLevels. An instruction that's 706 // not contained in a loop is at level = 0. MaxLevels is equal to the level 707 // of the source plus the level of the destination, minus CommonLevels. 708 // This lets us allocate vectors MaxLevels in length, with room for every 709 // distinct loop referenced in both the source and destination subscripts. 710 // The variable SrcLevels is the nesting depth of the source instruction. 711 // It's used to help calculate distinct loops referenced by the destination. 712 // Here's the map from loops to levels: 713 // 0 - unused 714 // 1 - outermost common loop 715 // ... - other common loops 716 // CommonLevels - innermost common loop 717 // ... - loops containing Src but not Dst 718 // SrcLevels - innermost loop containing Src but not Dst 719 // ... - loops containing Dst but not Src 720 // MaxLevels - innermost loops containing Dst but not Src 721 // Consider the follow code fragment: 722 // for (a = ...) { 723 // for (b = ...) { 724 // for (c = ...) { 725 // for (d = ...) { 726 // A[] = ...; 727 // } 728 // } 729 // for (e = ...) { 730 // for (f = ...) { 731 // for (g = ...) { 732 // ... = A[]; 733 // } 734 // } 735 // } 736 // } 737 // } 738 // If we're looking at the possibility of a dependence between the store 739 // to A (the Src) and the load from A (the Dst), we'll note that they 740 // have 2 loops in common, so CommonLevels will equal 2 and the direction 741 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 742 // A map from loop names to loop numbers would look like 743 // a - 1 744 // b - 2 = CommonLevels 745 // c - 3 746 // d - 4 = SrcLevels 747 // e - 5 748 // f - 6 749 // g - 7 = MaxLevels 750 void DependenceInfo::establishNestingLevels(const Instruction *Src, 751 const Instruction *Dst) { 752 const BasicBlock *SrcBlock = Src->getParent(); 753 const BasicBlock *DstBlock = Dst->getParent(); 754 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 755 unsigned DstLevel = LI->getLoopDepth(DstBlock); 756 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 757 const Loop *DstLoop = LI->getLoopFor(DstBlock); 758 SrcLevels = SrcLevel; 759 MaxLevels = SrcLevel + DstLevel; 760 while (SrcLevel > DstLevel) { 761 SrcLoop = SrcLoop->getParentLoop(); 762 SrcLevel--; 763 } 764 while (DstLevel > SrcLevel) { 765 DstLoop = DstLoop->getParentLoop(); 766 DstLevel--; 767 } 768 while (SrcLoop != DstLoop) { 769 SrcLoop = SrcLoop->getParentLoop(); 770 DstLoop = DstLoop->getParentLoop(); 771 SrcLevel--; 772 } 773 CommonLevels = SrcLevel; 774 MaxLevels -= CommonLevels; 775 } 776 777 778 // Given one of the loops containing the source, return 779 // its level index in our numbering scheme. 780 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 781 return SrcLoop->getLoopDepth(); 782 } 783 784 785 // Given one of the loops containing the destination, 786 // return its level index in our numbering scheme. 787 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 788 unsigned D = DstLoop->getLoopDepth(); 789 if (D > CommonLevels) 790 return D - CommonLevels + SrcLevels; 791 else 792 return D; 793 } 794 795 796 // Returns true if Expression is loop invariant in LoopNest. 797 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 798 const Loop *LoopNest) const { 799 if (!LoopNest) 800 return true; 801 return SE->isLoopInvariant(Expression, LoopNest) && 802 isLoopInvariant(Expression, LoopNest->getParentLoop()); 803 } 804 805 806 807 // Finds the set of loops from the LoopNest that 808 // have a level <= CommonLevels and are referred to by the SCEV Expression. 809 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 810 const Loop *LoopNest, 811 SmallBitVector &Loops) const { 812 while (LoopNest) { 813 unsigned Level = LoopNest->getLoopDepth(); 814 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 815 Loops.set(Level); 816 LoopNest = LoopNest->getParentLoop(); 817 } 818 } 819 820 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 821 822 unsigned widestWidthSeen = 0; 823 Type *widestType; 824 825 // Go through each pair and find the widest bit to which we need 826 // to extend all of them. 827 for (Subscript *Pair : Pairs) { 828 const SCEV *Src = Pair->Src; 829 const SCEV *Dst = Pair->Dst; 830 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 831 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 832 if (SrcTy == nullptr || DstTy == nullptr) { 833 assert(SrcTy == DstTy && "This function only unify integer types and " 834 "expect Src and Dst share the same type " 835 "otherwise."); 836 continue; 837 } 838 if (SrcTy->getBitWidth() > widestWidthSeen) { 839 widestWidthSeen = SrcTy->getBitWidth(); 840 widestType = SrcTy; 841 } 842 if (DstTy->getBitWidth() > widestWidthSeen) { 843 widestWidthSeen = DstTy->getBitWidth(); 844 widestType = DstTy; 845 } 846 } 847 848 849 assert(widestWidthSeen > 0); 850 851 // Now extend each pair to the widest seen. 852 for (Subscript *Pair : Pairs) { 853 const SCEV *Src = Pair->Src; 854 const SCEV *Dst = Pair->Dst; 855 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 856 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 857 if (SrcTy == nullptr || DstTy == nullptr) { 858 assert(SrcTy == DstTy && "This function only unify integer types and " 859 "expect Src and Dst share the same type " 860 "otherwise."); 861 continue; 862 } 863 if (SrcTy->getBitWidth() < widestWidthSeen) 864 // Sign-extend Src to widestType 865 Pair->Src = SE->getSignExtendExpr(Src, widestType); 866 if (DstTy->getBitWidth() < widestWidthSeen) { 867 // Sign-extend Dst to widestType 868 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 869 } 870 } 871 } 872 873 // removeMatchingExtensions - Examines a subscript pair. 874 // If the source and destination are identically sign (or zero) 875 // extended, it strips off the extension in an effect to simplify 876 // the actual analysis. 877 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 878 const SCEV *Src = Pair->Src; 879 const SCEV *Dst = Pair->Dst; 880 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 881 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 882 const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Src); 883 const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Dst); 884 const SCEV *SrcCastOp = SrcCast->getOperand(); 885 const SCEV *DstCastOp = DstCast->getOperand(); 886 if (SrcCastOp->getType() == DstCastOp->getType()) { 887 Pair->Src = SrcCastOp; 888 Pair->Dst = DstCastOp; 889 } 890 } 891 } 892 893 // Examine the scev and return true iff it's linear. 894 // Collect any loops mentioned in the set of "Loops". 895 bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest, 896 SmallBitVector &Loops, bool IsSrc) { 897 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 898 if (!AddRec) 899 return isLoopInvariant(Expr, LoopNest); 900 const SCEV *Start = AddRec->getStart(); 901 const SCEV *Step = AddRec->getStepRecurrence(*SE); 902 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 903 if (!isa<SCEVCouldNotCompute>(UB)) { 904 if (SE->getTypeSizeInBits(Start->getType()) < 905 SE->getTypeSizeInBits(UB->getType())) { 906 if (!AddRec->getNoWrapFlags()) 907 return false; 908 } 909 } 910 if (!isLoopInvariant(Step, LoopNest)) 911 return false; 912 if (IsSrc) 913 Loops.set(mapSrcLoop(AddRec->getLoop())); 914 else 915 Loops.set(mapDstLoop(AddRec->getLoop())); 916 return checkSubscript(Start, LoopNest, Loops, IsSrc); 917 } 918 919 // Examine the scev and return true iff it's linear. 920 // Collect any loops mentioned in the set of "Loops". 921 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 922 SmallBitVector &Loops) { 923 return checkSubscript(Src, LoopNest, Loops, true); 924 } 925 926 // Examine the scev and return true iff it's linear. 927 // Collect any loops mentioned in the set of "Loops". 928 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 929 SmallBitVector &Loops) { 930 return checkSubscript(Dst, LoopNest, Loops, false); 931 } 932 933 934 // Examines the subscript pair (the Src and Dst SCEVs) 935 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 936 // Collects the associated loops in a set. 937 DependenceInfo::Subscript::ClassificationKind 938 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 939 const SCEV *Dst, const Loop *DstLoopNest, 940 SmallBitVector &Loops) { 941 SmallBitVector SrcLoops(MaxLevels + 1); 942 SmallBitVector DstLoops(MaxLevels + 1); 943 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 944 return Subscript::NonLinear; 945 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 946 return Subscript::NonLinear; 947 Loops = SrcLoops; 948 Loops |= DstLoops; 949 unsigned N = Loops.count(); 950 if (N == 0) 951 return Subscript::ZIV; 952 if (N == 1) 953 return Subscript::SIV; 954 if (N == 2 && (SrcLoops.count() == 0 || 955 DstLoops.count() == 0 || 956 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 957 return Subscript::RDIV; 958 return Subscript::MIV; 959 } 960 961 962 // A wrapper around SCEV::isKnownPredicate. 963 // Looks for cases where we're interested in comparing for equality. 964 // If both X and Y have been identically sign or zero extended, 965 // it strips off the (confusing) extensions before invoking 966 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 967 // will be similarly updated. 968 // 969 // If SCEV::isKnownPredicate can't prove the predicate, 970 // we try simple subtraction, which seems to help in some cases 971 // involving symbolics. 972 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 973 const SCEV *Y) const { 974 if (Pred == CmpInst::ICMP_EQ || 975 Pred == CmpInst::ICMP_NE) { 976 if ((isa<SCEVSignExtendExpr>(X) && 977 isa<SCEVSignExtendExpr>(Y)) || 978 (isa<SCEVZeroExtendExpr>(X) && 979 isa<SCEVZeroExtendExpr>(Y))) { 980 const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(X); 981 const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Y); 982 const SCEV *Xop = CX->getOperand(); 983 const SCEV *Yop = CY->getOperand(); 984 if (Xop->getType() == Yop->getType()) { 985 X = Xop; 986 Y = Yop; 987 } 988 } 989 } 990 if (SE->isKnownPredicate(Pred, X, Y)) 991 return true; 992 // If SE->isKnownPredicate can't prove the condition, 993 // we try the brute-force approach of subtracting 994 // and testing the difference. 995 // By testing with SE->isKnownPredicate first, we avoid 996 // the possibility of overflow when the arguments are constants. 997 const SCEV *Delta = SE->getMinusSCEV(X, Y); 998 switch (Pred) { 999 case CmpInst::ICMP_EQ: 1000 return Delta->isZero(); 1001 case CmpInst::ICMP_NE: 1002 return SE->isKnownNonZero(Delta); 1003 case CmpInst::ICMP_SGE: 1004 return SE->isKnownNonNegative(Delta); 1005 case CmpInst::ICMP_SLE: 1006 return SE->isKnownNonPositive(Delta); 1007 case CmpInst::ICMP_SGT: 1008 return SE->isKnownPositive(Delta); 1009 case CmpInst::ICMP_SLT: 1010 return SE->isKnownNegative(Delta); 1011 default: 1012 llvm_unreachable("unexpected predicate in isKnownPredicate"); 1013 } 1014 } 1015 1016 /// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1)) 1017 /// with some extra checking if S is an AddRec and we can prove less-than using 1018 /// the loop bounds. 1019 bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { 1020 // First unify to the same type 1021 auto *SType = dyn_cast<IntegerType>(S->getType()); 1022 auto *SizeType = dyn_cast<IntegerType>(Size->getType()); 1023 if (!SType || !SizeType) 1024 return false; 1025 Type *MaxType = 1026 (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType; 1027 S = SE->getTruncateOrZeroExtend(S, MaxType); 1028 Size = SE->getTruncateOrZeroExtend(Size, MaxType); 1029 1030 // Special check for addrecs using BE taken count 1031 const SCEV *Bound = SE->getMinusSCEV(S, Size); 1032 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) { 1033 if (AddRec->isAffine()) { 1034 const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); 1035 if (!isa<SCEVCouldNotCompute>(BECount)) { 1036 const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE); 1037 if (SE->isKnownNegative(Limit)) 1038 return true; 1039 } 1040 } 1041 } 1042 1043 // Check using normal isKnownNegative 1044 const SCEV *LimitedBound = 1045 SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType()))); 1046 return SE->isKnownNegative(LimitedBound); 1047 } 1048 1049 bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const { 1050 bool Inbounds = false; 1051 if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr)) 1052 Inbounds = SrcGEP->isInBounds(); 1053 if (Inbounds) { 1054 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { 1055 if (AddRec->isAffine()) { 1056 // We know S is for Ptr, the operand on a load/store, so doesn't wrap. 1057 // If both parts are NonNegative, the end result will be NonNegative 1058 if (SE->isKnownNonNegative(AddRec->getStart()) && 1059 SE->isKnownNonNegative(AddRec->getOperand(1))) 1060 return true; 1061 } 1062 } 1063 } 1064 1065 return SE->isKnownNonNegative(S); 1066 } 1067 1068 // All subscripts are all the same type. 1069 // Loop bound may be smaller (e.g., a char). 1070 // Should zero extend loop bound, since it's always >= 0. 1071 // This routine collects upper bound and extends or truncates if needed. 1072 // Truncating is safe when subscripts are known not to wrap. Cases without 1073 // nowrap flags should have been rejected earlier. 1074 // Return null if no bound available. 1075 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 1076 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 1077 const SCEV *UB = SE->getBackedgeTakenCount(L); 1078 return SE->getTruncateOrZeroExtend(UB, T); 1079 } 1080 return nullptr; 1081 } 1082 1083 1084 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1085 // If the cast fails, returns NULL. 1086 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1087 Type *T) const { 1088 if (const SCEV *UB = collectUpperBound(L, T)) 1089 return dyn_cast<SCEVConstant>(UB); 1090 return nullptr; 1091 } 1092 1093 1094 // testZIV - 1095 // When we have a pair of subscripts of the form [c1] and [c2], 1096 // where c1 and c2 are both loop invariant, we attack it using 1097 // the ZIV test. Basically, we test by comparing the two values, 1098 // but there are actually three possible results: 1099 // 1) the values are equal, so there's a dependence 1100 // 2) the values are different, so there's no dependence 1101 // 3) the values might be equal, so we have to assume a dependence. 1102 // 1103 // Return true if dependence disproved. 1104 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1105 FullDependence &Result) const { 1106 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 1107 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1108 ++ZIVapplications; 1109 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1110 LLVM_DEBUG(dbgs() << " provably dependent\n"); 1111 return false; // provably dependent 1112 } 1113 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1114 LLVM_DEBUG(dbgs() << " provably independent\n"); 1115 ++ZIVindependence; 1116 return true; // provably independent 1117 } 1118 LLVM_DEBUG(dbgs() << " possibly dependent\n"); 1119 Result.Consistent = false; 1120 return false; // possibly dependent 1121 } 1122 1123 1124 // strongSIVtest - 1125 // From the paper, Practical Dependence Testing, Section 4.2.1 1126 // 1127 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1128 // where i is an induction variable, c1 and c2 are loop invariant, 1129 // and a is a constant, we can solve it exactly using the Strong SIV test. 1130 // 1131 // Can prove independence. Failing that, can compute distance (and direction). 1132 // In the presence of symbolic terms, we can sometimes make progress. 1133 // 1134 // If there's a dependence, 1135 // 1136 // c1 + a*i = c2 + a*i' 1137 // 1138 // The dependence distance is 1139 // 1140 // d = i' - i = (c1 - c2)/a 1141 // 1142 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1143 // loop's upper bound. If a dependence exists, the dependence direction is 1144 // defined as 1145 // 1146 // { < if d > 0 1147 // direction = { = if d = 0 1148 // { > if d < 0 1149 // 1150 // Return true if dependence disproved. 1151 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1152 const SCEV *DstConst, const Loop *CurLoop, 1153 unsigned Level, FullDependence &Result, 1154 Constraint &NewConstraint) const { 1155 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n"); 1156 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1157 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1158 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1159 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1160 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1161 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1162 ++StrongSIVapplications; 1163 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1164 Level--; 1165 1166 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1167 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta); 1168 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1169 1170 // check that |Delta| < iteration count 1171 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1172 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1173 LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1174 const SCEV *AbsDelta = 1175 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1176 const SCEV *AbsCoeff = 1177 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1178 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1179 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1180 // Distance greater than trip count - no dependence 1181 ++StrongSIVindependence; 1182 ++StrongSIVsuccesses; 1183 return true; 1184 } 1185 } 1186 1187 // Can we compute distance? 1188 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1189 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1190 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1191 APInt Distance = ConstDelta; // these need to be initialized 1192 APInt Remainder = ConstDelta; 1193 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1194 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1195 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1196 // Make sure Coeff divides Delta exactly 1197 if (Remainder != 0) { 1198 // Coeff doesn't divide Distance, no dependence 1199 ++StrongSIVindependence; 1200 ++StrongSIVsuccesses; 1201 return true; 1202 } 1203 Result.DV[Level].Distance = SE->getConstant(Distance); 1204 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1205 if (Distance.sgt(0)) 1206 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1207 else if (Distance.slt(0)) 1208 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1209 else 1210 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1211 ++StrongSIVsuccesses; 1212 } 1213 else if (Delta->isZero()) { 1214 // since 0/X == 0 1215 Result.DV[Level].Distance = Delta; 1216 NewConstraint.setDistance(Delta, CurLoop); 1217 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1218 ++StrongSIVsuccesses; 1219 } 1220 else { 1221 if (Coeff->isOne()) { 1222 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1223 Result.DV[Level].Distance = Delta; // since X/1 == X 1224 NewConstraint.setDistance(Delta, CurLoop); 1225 } 1226 else { 1227 Result.Consistent = false; 1228 NewConstraint.setLine(Coeff, 1229 SE->getNegativeSCEV(Coeff), 1230 SE->getNegativeSCEV(Delta), CurLoop); 1231 } 1232 1233 // maybe we can get a useful direction 1234 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1235 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1236 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1237 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1238 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1239 // The double negatives above are confusing. 1240 // It helps to read !SE->isKnownNonZero(Delta) 1241 // as "Delta might be Zero" 1242 unsigned NewDirection = Dependence::DVEntry::NONE; 1243 if ((DeltaMaybePositive && CoeffMaybePositive) || 1244 (DeltaMaybeNegative && CoeffMaybeNegative)) 1245 NewDirection = Dependence::DVEntry::LT; 1246 if (DeltaMaybeZero) 1247 NewDirection |= Dependence::DVEntry::EQ; 1248 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1249 (DeltaMaybePositive && CoeffMaybeNegative)) 1250 NewDirection |= Dependence::DVEntry::GT; 1251 if (NewDirection < Result.DV[Level].Direction) 1252 ++StrongSIVsuccesses; 1253 Result.DV[Level].Direction &= NewDirection; 1254 } 1255 return false; 1256 } 1257 1258 1259 // weakCrossingSIVtest - 1260 // From the paper, Practical Dependence Testing, Section 4.2.2 1261 // 1262 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1263 // where i is an induction variable, c1 and c2 are loop invariant, 1264 // and a is a constant, we can solve it exactly using the 1265 // Weak-Crossing SIV test. 1266 // 1267 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1268 // the two lines, where i = i', yielding 1269 // 1270 // c1 + a*i = c2 - a*i 1271 // 2a*i = c2 - c1 1272 // i = (c2 - c1)/2a 1273 // 1274 // If i < 0, there is no dependence. 1275 // If i > upperbound, there is no dependence. 1276 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1277 // If i = upperbound, there's a dependence with distance = 0. 1278 // If i is integral, there's a dependence (all directions). 1279 // If the non-integer part = 1/2, there's a dependence (<> directions). 1280 // Otherwise, there's no dependence. 1281 // 1282 // Can prove independence. Failing that, 1283 // can sometimes refine the directions. 1284 // Can determine iteration for splitting. 1285 // 1286 // Return true if dependence disproved. 1287 bool DependenceInfo::weakCrossingSIVtest( 1288 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1289 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1290 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1291 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1292 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1293 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1294 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1295 ++WeakCrossingSIVapplications; 1296 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1297 Level--; 1298 Result.Consistent = false; 1299 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1300 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1301 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1302 if (Delta->isZero()) { 1303 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1304 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1305 ++WeakCrossingSIVsuccesses; 1306 if (!Result.DV[Level].Direction) { 1307 ++WeakCrossingSIVindependence; 1308 return true; 1309 } 1310 Result.DV[Level].Distance = Delta; // = 0 1311 return false; 1312 } 1313 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1314 if (!ConstCoeff) 1315 return false; 1316 1317 Result.DV[Level].Splitable = true; 1318 if (SE->isKnownNegative(ConstCoeff)) { 1319 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1320 assert(ConstCoeff && 1321 "dynamic cast of negative of ConstCoeff should yield constant"); 1322 Delta = SE->getNegativeSCEV(Delta); 1323 } 1324 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1325 1326 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1327 SplitIter = SE->getUDivExpr( 1328 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1329 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1330 LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1331 1332 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1333 if (!ConstDelta) 1334 return false; 1335 1336 // We're certain that ConstCoeff > 0; therefore, 1337 // if Delta < 0, then no dependence. 1338 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1339 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1340 if (SE->isKnownNegative(Delta)) { 1341 // No dependence, Delta < 0 1342 ++WeakCrossingSIVindependence; 1343 ++WeakCrossingSIVsuccesses; 1344 return true; 1345 } 1346 1347 // We're certain that Delta > 0 and ConstCoeff > 0. 1348 // Check Delta/(2*ConstCoeff) against upper loop bound 1349 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1350 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1351 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1352 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1353 ConstantTwo); 1354 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1355 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1356 // Delta too big, no dependence 1357 ++WeakCrossingSIVindependence; 1358 ++WeakCrossingSIVsuccesses; 1359 return true; 1360 } 1361 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1362 // i = i' = UB 1363 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1364 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1365 ++WeakCrossingSIVsuccesses; 1366 if (!Result.DV[Level].Direction) { 1367 ++WeakCrossingSIVindependence; 1368 return true; 1369 } 1370 Result.DV[Level].Splitable = false; 1371 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1372 return false; 1373 } 1374 } 1375 1376 // check that Coeff divides Delta 1377 APInt APDelta = ConstDelta->getAPInt(); 1378 APInt APCoeff = ConstCoeff->getAPInt(); 1379 APInt Distance = APDelta; // these need to be initialzed 1380 APInt Remainder = APDelta; 1381 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1382 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1383 if (Remainder != 0) { 1384 // Coeff doesn't divide Delta, no dependence 1385 ++WeakCrossingSIVindependence; 1386 ++WeakCrossingSIVsuccesses; 1387 return true; 1388 } 1389 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1390 1391 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1392 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1393 Remainder = Distance.srem(Two); 1394 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1395 if (Remainder != 0) { 1396 // Equal direction isn't possible 1397 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1398 ++WeakCrossingSIVsuccesses; 1399 } 1400 return false; 1401 } 1402 1403 1404 // Kirch's algorithm, from 1405 // 1406 // Optimizing Supercompilers for Supercomputers 1407 // Michael Wolfe 1408 // MIT Press, 1989 1409 // 1410 // Program 2.1, page 29. 1411 // Computes the GCD of AM and BM. 1412 // Also finds a solution to the equation ax - by = gcd(a, b). 1413 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1414 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1415 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1416 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1417 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1418 APInt G0 = AM.abs(); 1419 APInt G1 = BM.abs(); 1420 APInt Q = G0; // these need to be initialized 1421 APInt R = G0; 1422 APInt::sdivrem(G0, G1, Q, R); 1423 while (R != 0) { 1424 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1425 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1426 G0 = G1; G1 = R; 1427 APInt::sdivrem(G0, G1, Q, R); 1428 } 1429 G = G1; 1430 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1431 X = AM.slt(0) ? -A1 : A1; 1432 Y = BM.slt(0) ? B1 : -B1; 1433 1434 // make sure gcd divides Delta 1435 R = Delta.srem(G); 1436 if (R != 0) 1437 return true; // gcd doesn't divide Delta, no dependence 1438 Q = Delta.sdiv(G); 1439 return false; 1440 } 1441 1442 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1443 APInt Q = A; // these need to be initialized 1444 APInt R = A; 1445 APInt::sdivrem(A, B, Q, R); 1446 if (R == 0) 1447 return Q; 1448 if ((A.sgt(0) && B.sgt(0)) || 1449 (A.slt(0) && B.slt(0))) 1450 return Q; 1451 else 1452 return Q - 1; 1453 } 1454 1455 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1456 APInt Q = A; // these need to be initialized 1457 APInt R = A; 1458 APInt::sdivrem(A, B, Q, R); 1459 if (R == 0) 1460 return Q; 1461 if ((A.sgt(0) && B.sgt(0)) || 1462 (A.slt(0) && B.slt(0))) 1463 return Q + 1; 1464 else 1465 return Q; 1466 } 1467 1468 // exactSIVtest - 1469 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1470 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1471 // and a2 are constant, we can solve it exactly using an algorithm developed 1472 // by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in: 1473 // 1474 // Dependence Analysis for Supercomputing 1475 // Utpal Banerjee 1476 // Kluwer Academic Publishers, 1988 1477 // 1478 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1479 // so use them if possible. They're also a bit better with symbolics and, 1480 // in the case of the strong SIV test, can compute Distances. 1481 // 1482 // Return true if dependence disproved. 1483 // 1484 // This is a modified version of the original Banerjee algorithm. The original 1485 // only tested whether Dst depends on Src. This algorithm extends that and 1486 // returns all the dependencies that exist between Dst and Src. 1487 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1488 const SCEV *SrcConst, const SCEV *DstConst, 1489 const Loop *CurLoop, unsigned Level, 1490 FullDependence &Result, 1491 Constraint &NewConstraint) const { 1492 LLVM_DEBUG(dbgs() << "\tExact SIV test\n"); 1493 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1494 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1495 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1496 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1497 ++ExactSIVapplications; 1498 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1499 Level--; 1500 Result.Consistent = false; 1501 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1502 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1503 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta, 1504 CurLoop); 1505 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1506 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1507 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1508 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1509 return false; 1510 1511 // find gcd 1512 APInt G, X, Y; 1513 APInt AM = ConstSrcCoeff->getAPInt(); 1514 APInt BM = ConstDstCoeff->getAPInt(); 1515 APInt CM = ConstDelta->getAPInt(); 1516 unsigned Bits = AM.getBitWidth(); 1517 if (findGCD(Bits, AM, BM, CM, G, X, Y)) { 1518 // gcd doesn't divide Delta, no dependence 1519 ++ExactSIVindependence; 1520 ++ExactSIVsuccesses; 1521 return true; 1522 } 1523 1524 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1525 1526 // since SCEV construction normalizes, LM = 0 1527 APInt UM(Bits, 1, true); 1528 bool UMValid = false; 1529 // UM is perhaps unavailable, let's check 1530 if (const SCEVConstant *CUB = 1531 collectConstantUpperBound(CurLoop, Delta->getType())) { 1532 UM = CUB->getAPInt(); 1533 LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1534 UMValid = true; 1535 } 1536 1537 APInt TU(APInt::getSignedMaxValue(Bits)); 1538 APInt TL(APInt::getSignedMinValue(Bits)); 1539 APInt TC = CM.sdiv(G); 1540 APInt TX = X * TC; 1541 APInt TY = Y * TC; 1542 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); 1543 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); 1544 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); 1545 1546 SmallVector<APInt, 2> TLVec, TUVec; 1547 APInt TB = BM.sdiv(G); 1548 if (TB.sgt(0)) { 1549 TLVec.push_back(ceilingOfQuotient(-TX, TB)); 1550 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1551 // New bound check - modification to Banerjee's e3 check 1552 if (UMValid) { 1553 TUVec.push_back(floorOfQuotient(UM - TX, TB)); 1554 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1555 } 1556 } else { 1557 TUVec.push_back(floorOfQuotient(-TX, TB)); 1558 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1559 // New bound check - modification to Banerjee's e3 check 1560 if (UMValid) { 1561 TLVec.push_back(ceilingOfQuotient(UM - TX, TB)); 1562 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1563 } 1564 } 1565 1566 APInt TA = AM.sdiv(G); 1567 if (TA.sgt(0)) { 1568 if (UMValid) { 1569 TUVec.push_back(floorOfQuotient(UM - TY, TA)); 1570 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1571 } 1572 // New bound check - modification to Banerjee's e3 check 1573 TLVec.push_back(ceilingOfQuotient(-TY, TA)); 1574 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1575 } else { 1576 if (UMValid) { 1577 TLVec.push_back(ceilingOfQuotient(UM - TY, TA)); 1578 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1579 } 1580 // New bound check - modification to Banerjee's e3 check 1581 TUVec.push_back(floorOfQuotient(-TY, TA)); 1582 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1583 } 1584 1585 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); 1586 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); 1587 1588 if (TLVec.empty() || TUVec.empty()) 1589 return false; 1590 TL = APIntOps::smax(TLVec.front(), TLVec.back()); 1591 TU = APIntOps::smin(TUVec.front(), TUVec.back()); 1592 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1593 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1594 1595 if (TL.sgt(TU)) { 1596 ++ExactSIVindependence; 1597 ++ExactSIVsuccesses; 1598 return true; 1599 } 1600 1601 // explore directions 1602 unsigned NewDirection = Dependence::DVEntry::NONE; 1603 APInt LowerDistance, UpperDistance; 1604 if (TA.sgt(TB)) { 1605 LowerDistance = (TY - TX) + (TA - TB) * TL; 1606 UpperDistance = (TY - TX) + (TA - TB) * TU; 1607 } else { 1608 LowerDistance = (TY - TX) + (TA - TB) * TU; 1609 UpperDistance = (TY - TX) + (TA - TB) * TL; 1610 } 1611 1612 LLVM_DEBUG(dbgs() << "\t LowerDistance = " << LowerDistance << "\n"); 1613 LLVM_DEBUG(dbgs() << "\t UpperDistance = " << UpperDistance << "\n"); 1614 1615 APInt Zero(Bits, 0, true); 1616 if (LowerDistance.sle(Zero) && UpperDistance.sge(Zero)) { 1617 NewDirection |= Dependence::DVEntry::EQ; 1618 ++ExactSIVsuccesses; 1619 } 1620 if (LowerDistance.slt(0)) { 1621 NewDirection |= Dependence::DVEntry::GT; 1622 ++ExactSIVsuccesses; 1623 } 1624 if (UpperDistance.sgt(0)) { 1625 NewDirection |= Dependence::DVEntry::LT; 1626 ++ExactSIVsuccesses; 1627 } 1628 1629 // finished 1630 Result.DV[Level].Direction &= NewDirection; 1631 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1632 ++ExactSIVindependence; 1633 LLVM_DEBUG(dbgs() << "\t Result = "); 1634 LLVM_DEBUG(Result.dump(dbgs())); 1635 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1636 } 1637 1638 1639 // Return true if the divisor evenly divides the dividend. 1640 static 1641 bool isRemainderZero(const SCEVConstant *Dividend, 1642 const SCEVConstant *Divisor) { 1643 const APInt &ConstDividend = Dividend->getAPInt(); 1644 const APInt &ConstDivisor = Divisor->getAPInt(); 1645 return ConstDividend.srem(ConstDivisor) == 0; 1646 } 1647 1648 1649 // weakZeroSrcSIVtest - 1650 // From the paper, Practical Dependence Testing, Section 4.2.2 1651 // 1652 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1653 // where i is an induction variable, c1 and c2 are loop invariant, 1654 // and a is a constant, we can solve it exactly using the 1655 // Weak-Zero SIV test. 1656 // 1657 // Given 1658 // 1659 // c1 = c2 + a*i 1660 // 1661 // we get 1662 // 1663 // (c1 - c2)/a = i 1664 // 1665 // If i is not an integer, there's no dependence. 1666 // If i < 0 or > UB, there's no dependence. 1667 // If i = 0, the direction is >= and peeling the 1668 // 1st iteration will break the dependence. 1669 // If i = UB, the direction is <= and peeling the 1670 // last iteration will break the dependence. 1671 // Otherwise, the direction is *. 1672 // 1673 // Can prove independence. Failing that, we can sometimes refine 1674 // the directions. Can sometimes show that first or last 1675 // iteration carries all the dependences (so worth peeling). 1676 // 1677 // (see also weakZeroDstSIVtest) 1678 // 1679 // Return true if dependence disproved. 1680 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1681 const SCEV *SrcConst, 1682 const SCEV *DstConst, 1683 const Loop *CurLoop, unsigned Level, 1684 FullDependence &Result, 1685 Constraint &NewConstraint) const { 1686 // For the WeakSIV test, it's possible the loop isn't common to 1687 // the Src and Dst loops. If it isn't, then there's no need to 1688 // record a direction. 1689 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1690 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1691 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1692 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1693 ++WeakZeroSIVapplications; 1694 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1695 Level--; 1696 Result.Consistent = false; 1697 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1698 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1699 CurLoop); 1700 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1701 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1702 if (Level < CommonLevels) { 1703 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1704 Result.DV[Level].PeelFirst = true; 1705 ++WeakZeroSIVsuccesses; 1706 } 1707 return false; // dependences caused by first iteration 1708 } 1709 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1710 if (!ConstCoeff) 1711 return false; 1712 const SCEV *AbsCoeff = 1713 SE->isKnownNegative(ConstCoeff) ? 1714 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1715 const SCEV *NewDelta = 1716 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1717 1718 // check that Delta/SrcCoeff < iteration count 1719 // really check NewDelta < count*AbsCoeff 1720 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1721 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1722 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1723 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1724 ++WeakZeroSIVindependence; 1725 ++WeakZeroSIVsuccesses; 1726 return true; 1727 } 1728 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1729 // dependences caused by last iteration 1730 if (Level < CommonLevels) { 1731 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1732 Result.DV[Level].PeelLast = true; 1733 ++WeakZeroSIVsuccesses; 1734 } 1735 return false; 1736 } 1737 } 1738 1739 // check that Delta/SrcCoeff >= 0 1740 // really check that NewDelta >= 0 1741 if (SE->isKnownNegative(NewDelta)) { 1742 // No dependence, newDelta < 0 1743 ++WeakZeroSIVindependence; 1744 ++WeakZeroSIVsuccesses; 1745 return true; 1746 } 1747 1748 // if SrcCoeff doesn't divide Delta, then no dependence 1749 if (isa<SCEVConstant>(Delta) && 1750 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1751 ++WeakZeroSIVindependence; 1752 ++WeakZeroSIVsuccesses; 1753 return true; 1754 } 1755 return false; 1756 } 1757 1758 1759 // weakZeroDstSIVtest - 1760 // From the paper, Practical Dependence Testing, Section 4.2.2 1761 // 1762 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1763 // where i is an induction variable, c1 and c2 are loop invariant, 1764 // and a is a constant, we can solve it exactly using the 1765 // Weak-Zero SIV test. 1766 // 1767 // Given 1768 // 1769 // c1 + a*i = c2 1770 // 1771 // we get 1772 // 1773 // i = (c2 - c1)/a 1774 // 1775 // If i is not an integer, there's no dependence. 1776 // If i < 0 or > UB, there's no dependence. 1777 // If i = 0, the direction is <= and peeling the 1778 // 1st iteration will break the dependence. 1779 // If i = UB, the direction is >= and peeling the 1780 // last iteration will break the dependence. 1781 // Otherwise, the direction is *. 1782 // 1783 // Can prove independence. Failing that, we can sometimes refine 1784 // the directions. Can sometimes show that first or last 1785 // iteration carries all the dependences (so worth peeling). 1786 // 1787 // (see also weakZeroSrcSIVtest) 1788 // 1789 // Return true if dependence disproved. 1790 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1791 const SCEV *SrcConst, 1792 const SCEV *DstConst, 1793 const Loop *CurLoop, unsigned Level, 1794 FullDependence &Result, 1795 Constraint &NewConstraint) const { 1796 // For the WeakSIV test, it's possible the loop isn't common to the 1797 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1798 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1799 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1800 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1801 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1802 ++WeakZeroSIVapplications; 1803 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1804 Level--; 1805 Result.Consistent = false; 1806 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1807 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1808 CurLoop); 1809 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1810 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1811 if (Level < CommonLevels) { 1812 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1813 Result.DV[Level].PeelFirst = true; 1814 ++WeakZeroSIVsuccesses; 1815 } 1816 return false; // dependences caused by first iteration 1817 } 1818 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1819 if (!ConstCoeff) 1820 return false; 1821 const SCEV *AbsCoeff = 1822 SE->isKnownNegative(ConstCoeff) ? 1823 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1824 const SCEV *NewDelta = 1825 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1826 1827 // check that Delta/SrcCoeff < iteration count 1828 // really check NewDelta < count*AbsCoeff 1829 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1830 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1831 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1832 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1833 ++WeakZeroSIVindependence; 1834 ++WeakZeroSIVsuccesses; 1835 return true; 1836 } 1837 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1838 // dependences caused by last iteration 1839 if (Level < CommonLevels) { 1840 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1841 Result.DV[Level].PeelLast = true; 1842 ++WeakZeroSIVsuccesses; 1843 } 1844 return false; 1845 } 1846 } 1847 1848 // check that Delta/SrcCoeff >= 0 1849 // really check that NewDelta >= 0 1850 if (SE->isKnownNegative(NewDelta)) { 1851 // No dependence, newDelta < 0 1852 ++WeakZeroSIVindependence; 1853 ++WeakZeroSIVsuccesses; 1854 return true; 1855 } 1856 1857 // if SrcCoeff doesn't divide Delta, then no dependence 1858 if (isa<SCEVConstant>(Delta) && 1859 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1860 ++WeakZeroSIVindependence; 1861 ++WeakZeroSIVsuccesses; 1862 return true; 1863 } 1864 return false; 1865 } 1866 1867 1868 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1869 // Things of the form [c1 + a*i] and [c2 + b*j], 1870 // where i and j are induction variable, c1 and c2 are loop invariant, 1871 // and a and b are constants. 1872 // Returns true if any possible dependence is disproved. 1873 // Marks the result as inconsistent. 1874 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1875 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1876 const SCEV *SrcConst, const SCEV *DstConst, 1877 const Loop *SrcLoop, const Loop *DstLoop, 1878 FullDependence &Result) const { 1879 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n"); 1880 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1881 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1882 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1883 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1884 ++ExactRDIVapplications; 1885 Result.Consistent = false; 1886 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1887 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1888 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1889 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1890 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1891 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1892 return false; 1893 1894 // find gcd 1895 APInt G, X, Y; 1896 APInt AM = ConstSrcCoeff->getAPInt(); 1897 APInt BM = ConstDstCoeff->getAPInt(); 1898 APInt CM = ConstDelta->getAPInt(); 1899 unsigned Bits = AM.getBitWidth(); 1900 if (findGCD(Bits, AM, BM, CM, G, X, Y)) { 1901 // gcd doesn't divide Delta, no dependence 1902 ++ExactRDIVindependence; 1903 return true; 1904 } 1905 1906 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1907 1908 // since SCEV construction seems to normalize, LM = 0 1909 APInt SrcUM(Bits, 1, true); 1910 bool SrcUMvalid = false; 1911 // SrcUM is perhaps unavailable, let's check 1912 if (const SCEVConstant *UpperBound = 1913 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1914 SrcUM = UpperBound->getAPInt(); 1915 LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1916 SrcUMvalid = true; 1917 } 1918 1919 APInt DstUM(Bits, 1, true); 1920 bool DstUMvalid = false; 1921 // UM is perhaps unavailable, let's check 1922 if (const SCEVConstant *UpperBound = 1923 collectConstantUpperBound(DstLoop, Delta->getType())) { 1924 DstUM = UpperBound->getAPInt(); 1925 LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1926 DstUMvalid = true; 1927 } 1928 1929 APInt TU(APInt::getSignedMaxValue(Bits)); 1930 APInt TL(APInt::getSignedMinValue(Bits)); 1931 APInt TC = CM.sdiv(G); 1932 APInt TX = X * TC; 1933 APInt TY = Y * TC; 1934 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); 1935 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); 1936 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); 1937 1938 SmallVector<APInt, 2> TLVec, TUVec; 1939 APInt TB = BM.sdiv(G); 1940 if (TB.sgt(0)) { 1941 TLVec.push_back(ceilingOfQuotient(-TX, TB)); 1942 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1943 if (SrcUMvalid) { 1944 TUVec.push_back(floorOfQuotient(SrcUM - TX, TB)); 1945 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1946 } 1947 } else { 1948 TUVec.push_back(floorOfQuotient(-TX, TB)); 1949 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1950 if (SrcUMvalid) { 1951 TLVec.push_back(ceilingOfQuotient(SrcUM - TX, TB)); 1952 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1953 } 1954 } 1955 1956 APInt TA = AM.sdiv(G); 1957 if (TA.sgt(0)) { 1958 TLVec.push_back(ceilingOfQuotient(-TY, TA)); 1959 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1960 if (DstUMvalid) { 1961 TUVec.push_back(floorOfQuotient(DstUM - TY, TA)); 1962 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1963 } 1964 } else { 1965 TUVec.push_back(floorOfQuotient(-TY, TA)); 1966 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1967 if (DstUMvalid) { 1968 TLVec.push_back(ceilingOfQuotient(DstUM - TY, TA)); 1969 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1970 } 1971 } 1972 1973 if (TLVec.empty() || TUVec.empty()) 1974 return false; 1975 1976 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); 1977 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); 1978 1979 TL = APIntOps::smax(TLVec.front(), TLVec.back()); 1980 TU = APIntOps::smin(TUVec.front(), TUVec.back()); 1981 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1982 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1983 1984 if (TL.sgt(TU)) 1985 ++ExactRDIVindependence; 1986 return TL.sgt(TU); 1987 } 1988 1989 1990 // symbolicRDIVtest - 1991 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1992 // introduce a special case of Banerjee's Inequalities (also called the 1993 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1994 // particularly cases with symbolics. Since it's only able to disprove 1995 // dependence (not compute distances or directions), we'll use it as a 1996 // fall back for the other tests. 1997 // 1998 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1999 // where i and j are induction variables and c1 and c2 are loop invariants, 2000 // we can use the symbolic tests to disprove some dependences, serving as a 2001 // backup for the RDIV test. Note that i and j can be the same variable, 2002 // letting this test serve as a backup for the various SIV tests. 2003 // 2004 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 2005 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 2006 // loop bounds for the i and j loops, respectively. So, ... 2007 // 2008 // c1 + a1*i = c2 + a2*j 2009 // a1*i - a2*j = c2 - c1 2010 // 2011 // To test for a dependence, we compute c2 - c1 and make sure it's in the 2012 // range of the maximum and minimum possible values of a1*i - a2*j. 2013 // Considering the signs of a1 and a2, we have 4 possible cases: 2014 // 2015 // 1) If a1 >= 0 and a2 >= 0, then 2016 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 2017 // -a2*N2 <= c2 - c1 <= a1*N1 2018 // 2019 // 2) If a1 >= 0 and a2 <= 0, then 2020 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 2021 // 0 <= c2 - c1 <= a1*N1 - a2*N2 2022 // 2023 // 3) If a1 <= 0 and a2 >= 0, then 2024 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 2025 // a1*N1 - a2*N2 <= c2 - c1 <= 0 2026 // 2027 // 4) If a1 <= 0 and a2 <= 0, then 2028 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 2029 // a1*N1 <= c2 - c1 <= -a2*N2 2030 // 2031 // return true if dependence disproved 2032 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 2033 const SCEV *C1, const SCEV *C2, 2034 const Loop *Loop1, 2035 const Loop *Loop2) const { 2036 ++SymbolicRDIVapplications; 2037 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 2038 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1); 2039 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 2040 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 2041 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 2042 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 2043 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 2044 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 2045 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 2046 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 2047 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 2048 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 2049 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 2050 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 2051 if (SE->isKnownNonNegative(A1)) { 2052 if (SE->isKnownNonNegative(A2)) { 2053 // A1 >= 0 && A2 >= 0 2054 if (N1) { 2055 // make sure that c2 - c1 <= a1*N1 2056 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2057 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2058 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 2059 ++SymbolicRDIVindependence; 2060 return true; 2061 } 2062 } 2063 if (N2) { 2064 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 2065 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2066 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2067 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 2068 ++SymbolicRDIVindependence; 2069 return true; 2070 } 2071 } 2072 } 2073 else if (SE->isKnownNonPositive(A2)) { 2074 // a1 >= 0 && a2 <= 0 2075 if (N1 && N2) { 2076 // make sure that c2 - c1 <= a1*N1 - a2*N2 2077 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2078 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2079 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2080 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2081 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2082 ++SymbolicRDIVindependence; 2083 return true; 2084 } 2085 } 2086 // make sure that 0 <= c2 - c1 2087 if (SE->isKnownNegative(C2_C1)) { 2088 ++SymbolicRDIVindependence; 2089 return true; 2090 } 2091 } 2092 } 2093 else if (SE->isKnownNonPositive(A1)) { 2094 if (SE->isKnownNonNegative(A2)) { 2095 // a1 <= 0 && a2 >= 0 2096 if (N1 && N2) { 2097 // make sure that a1*N1 - a2*N2 <= c2 - c1 2098 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2099 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2100 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2101 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2102 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2103 ++SymbolicRDIVindependence; 2104 return true; 2105 } 2106 } 2107 // make sure that c2 - c1 <= 0 2108 if (SE->isKnownPositive(C2_C1)) { 2109 ++SymbolicRDIVindependence; 2110 return true; 2111 } 2112 } 2113 else if (SE->isKnownNonPositive(A2)) { 2114 // a1 <= 0 && a2 <= 0 2115 if (N1) { 2116 // make sure that a1*N1 <= c2 - c1 2117 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2118 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2119 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2120 ++SymbolicRDIVindependence; 2121 return true; 2122 } 2123 } 2124 if (N2) { 2125 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2126 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2127 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2128 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2129 ++SymbolicRDIVindependence; 2130 return true; 2131 } 2132 } 2133 } 2134 } 2135 return false; 2136 } 2137 2138 2139 // testSIV - 2140 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2141 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2142 // a2 are constant, we attack it with an SIV test. While they can all be 2143 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2144 // they apply; they're cheaper and sometimes more precise. 2145 // 2146 // Return true if dependence disproved. 2147 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2148 FullDependence &Result, Constraint &NewConstraint, 2149 const SCEV *&SplitIter) const { 2150 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2151 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2152 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2153 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2154 if (SrcAddRec && DstAddRec) { 2155 const SCEV *SrcConst = SrcAddRec->getStart(); 2156 const SCEV *DstConst = DstAddRec->getStart(); 2157 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2158 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2159 const Loop *CurLoop = SrcAddRec->getLoop(); 2160 assert(CurLoop == DstAddRec->getLoop() && 2161 "both loops in SIV should be same"); 2162 Level = mapSrcLoop(CurLoop); 2163 bool disproven; 2164 if (SrcCoeff == DstCoeff) 2165 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2166 Level, Result, NewConstraint); 2167 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2168 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2169 Level, Result, NewConstraint, SplitIter); 2170 else 2171 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2172 Level, Result, NewConstraint); 2173 return disproven || 2174 gcdMIVtest(Src, Dst, Result) || 2175 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2176 } 2177 if (SrcAddRec) { 2178 const SCEV *SrcConst = SrcAddRec->getStart(); 2179 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2180 const SCEV *DstConst = Dst; 2181 const Loop *CurLoop = SrcAddRec->getLoop(); 2182 Level = mapSrcLoop(CurLoop); 2183 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2184 Level, Result, NewConstraint) || 2185 gcdMIVtest(Src, Dst, Result); 2186 } 2187 if (DstAddRec) { 2188 const SCEV *DstConst = DstAddRec->getStart(); 2189 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2190 const SCEV *SrcConst = Src; 2191 const Loop *CurLoop = DstAddRec->getLoop(); 2192 Level = mapDstLoop(CurLoop); 2193 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2194 CurLoop, Level, Result, NewConstraint) || 2195 gcdMIVtest(Src, Dst, Result); 2196 } 2197 llvm_unreachable("SIV test expected at least one AddRec"); 2198 return false; 2199 } 2200 2201 2202 // testRDIV - 2203 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2204 // where i and j are induction variables, c1 and c2 are loop invariant, 2205 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2206 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2207 // It doesn't make sense to talk about distance or direction in this case, 2208 // so there's no point in making special versions of the Strong SIV test or 2209 // the Weak-crossing SIV test. 2210 // 2211 // With minor algebra, this test can also be used for things like 2212 // [c1 + a1*i + a2*j][c2]. 2213 // 2214 // Return true if dependence disproved. 2215 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2216 FullDependence &Result) const { 2217 // we have 3 possible situations here: 2218 // 1) [a*i + b] and [c*j + d] 2219 // 2) [a*i + c*j + b] and [d] 2220 // 3) [b] and [a*i + c*j + d] 2221 // We need to find what we've got and get organized 2222 2223 const SCEV *SrcConst, *DstConst; 2224 const SCEV *SrcCoeff, *DstCoeff; 2225 const Loop *SrcLoop, *DstLoop; 2226 2227 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2228 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2229 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2230 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2231 if (SrcAddRec && DstAddRec) { 2232 SrcConst = SrcAddRec->getStart(); 2233 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2234 SrcLoop = SrcAddRec->getLoop(); 2235 DstConst = DstAddRec->getStart(); 2236 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2237 DstLoop = DstAddRec->getLoop(); 2238 } 2239 else if (SrcAddRec) { 2240 if (const SCEVAddRecExpr *tmpAddRec = 2241 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2242 SrcConst = tmpAddRec->getStart(); 2243 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2244 SrcLoop = tmpAddRec->getLoop(); 2245 DstConst = Dst; 2246 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2247 DstLoop = SrcAddRec->getLoop(); 2248 } 2249 else 2250 llvm_unreachable("RDIV reached by surprising SCEVs"); 2251 } 2252 else if (DstAddRec) { 2253 if (const SCEVAddRecExpr *tmpAddRec = 2254 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2255 DstConst = tmpAddRec->getStart(); 2256 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2257 DstLoop = tmpAddRec->getLoop(); 2258 SrcConst = Src; 2259 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2260 SrcLoop = DstAddRec->getLoop(); 2261 } 2262 else 2263 llvm_unreachable("RDIV reached by surprising SCEVs"); 2264 } 2265 else 2266 llvm_unreachable("RDIV expected at least one AddRec"); 2267 return exactRDIVtest(SrcCoeff, DstCoeff, 2268 SrcConst, DstConst, 2269 SrcLoop, DstLoop, 2270 Result) || 2271 gcdMIVtest(Src, Dst, Result) || 2272 symbolicRDIVtest(SrcCoeff, DstCoeff, 2273 SrcConst, DstConst, 2274 SrcLoop, DstLoop); 2275 } 2276 2277 2278 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2279 // Return true if dependence disproved. 2280 // Can sometimes refine direction vectors. 2281 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2282 const SmallBitVector &Loops, 2283 FullDependence &Result) const { 2284 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2285 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2286 Result.Consistent = false; 2287 return gcdMIVtest(Src, Dst, Result) || 2288 banerjeeMIVtest(Src, Dst, Loops, Result); 2289 } 2290 2291 2292 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2293 // in this case 10. If there is no constant part, returns NULL. 2294 static 2295 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2296 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2297 return Constant; 2298 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2299 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2300 return Constant; 2301 return nullptr; 2302 } 2303 2304 2305 //===----------------------------------------------------------------------===// 2306 // gcdMIVtest - 2307 // Tests an MIV subscript pair for dependence. 2308 // Returns true if any possible dependence is disproved. 2309 // Marks the result as inconsistent. 2310 // Can sometimes disprove the equal direction for 1 or more loops, 2311 // as discussed in Michael Wolfe's book, 2312 // High Performance Compilers for Parallel Computing, page 235. 2313 // 2314 // We spend some effort (code!) to handle cases like 2315 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2316 // but M and N are just loop-invariant variables. 2317 // This should help us handle linearized subscripts; 2318 // also makes this test a useful backup to the various SIV tests. 2319 // 2320 // It occurs to me that the presence of loop-invariant variables 2321 // changes the nature of the test from "greatest common divisor" 2322 // to "a common divisor". 2323 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2324 FullDependence &Result) const { 2325 LLVM_DEBUG(dbgs() << "starting gcd\n"); 2326 ++GCDapplications; 2327 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2328 APInt RunningGCD = APInt::getZero(BitWidth); 2329 2330 // Examine Src coefficients. 2331 // Compute running GCD and record source constant. 2332 // Because we're looking for the constant at the end of the chain, 2333 // we can't quit the loop just because the GCD == 1. 2334 const SCEV *Coefficients = Src; 2335 while (const SCEVAddRecExpr *AddRec = 2336 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2337 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2338 // If the coefficient is the product of a constant and other stuff, 2339 // we can use the constant in the GCD computation. 2340 const auto *Constant = getConstantPart(Coeff); 2341 if (!Constant) 2342 return false; 2343 APInt ConstCoeff = Constant->getAPInt(); 2344 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2345 Coefficients = AddRec->getStart(); 2346 } 2347 const SCEV *SrcConst = Coefficients; 2348 2349 // Examine Dst coefficients. 2350 // Compute running GCD and record destination constant. 2351 // Because we're looking for the constant at the end of the chain, 2352 // we can't quit the loop just because the GCD == 1. 2353 Coefficients = Dst; 2354 while (const SCEVAddRecExpr *AddRec = 2355 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2356 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2357 // If the coefficient is the product of a constant and other stuff, 2358 // we can use the constant in the GCD computation. 2359 const auto *Constant = getConstantPart(Coeff); 2360 if (!Constant) 2361 return false; 2362 APInt ConstCoeff = Constant->getAPInt(); 2363 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2364 Coefficients = AddRec->getStart(); 2365 } 2366 const SCEV *DstConst = Coefficients; 2367 2368 APInt ExtraGCD = APInt::getZero(BitWidth); 2369 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2370 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2371 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2372 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2373 // If Delta is a sum of products, we may be able to make further progress. 2374 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2375 const SCEV *Operand = Sum->getOperand(Op); 2376 if (isa<SCEVConstant>(Operand)) { 2377 assert(!Constant && "Surprised to find multiple constants"); 2378 Constant = cast<SCEVConstant>(Operand); 2379 } 2380 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2381 // Search for constant operand to participate in GCD; 2382 // If none found; return false. 2383 const SCEVConstant *ConstOp = getConstantPart(Product); 2384 if (!ConstOp) 2385 return false; 2386 APInt ConstOpValue = ConstOp->getAPInt(); 2387 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2388 ConstOpValue.abs()); 2389 } 2390 else 2391 return false; 2392 } 2393 } 2394 if (!Constant) 2395 return false; 2396 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2397 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2398 if (ConstDelta == 0) 2399 return false; 2400 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2401 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2402 APInt Remainder = ConstDelta.srem(RunningGCD); 2403 if (Remainder != 0) { 2404 ++GCDindependence; 2405 return true; 2406 } 2407 2408 // Try to disprove equal directions. 2409 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2410 // the code above can't disprove the dependence because the GCD = 1. 2411 // So we consider what happen if i = i' and what happens if j = j'. 2412 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2413 // which is infeasible, so we can disallow the = direction for the i level. 2414 // Setting j = j' doesn't help matters, so we end up with a direction vector 2415 // of [<>, *] 2416 // 2417 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2418 // we need to remember that the constant part is 5 and the RunningGCD should 2419 // be initialized to ExtraGCD = 30. 2420 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2421 2422 bool Improved = false; 2423 Coefficients = Src; 2424 while (const SCEVAddRecExpr *AddRec = 2425 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2426 Coefficients = AddRec->getStart(); 2427 const Loop *CurLoop = AddRec->getLoop(); 2428 RunningGCD = ExtraGCD; 2429 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2430 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2431 const SCEV *Inner = Src; 2432 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2433 AddRec = cast<SCEVAddRecExpr>(Inner); 2434 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2435 if (CurLoop == AddRec->getLoop()) 2436 ; // SrcCoeff == Coeff 2437 else { 2438 // If the coefficient is the product of a constant and other stuff, 2439 // we can use the constant in the GCD computation. 2440 Constant = getConstantPart(Coeff); 2441 if (!Constant) 2442 return false; 2443 APInt ConstCoeff = Constant->getAPInt(); 2444 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2445 } 2446 Inner = AddRec->getStart(); 2447 } 2448 Inner = Dst; 2449 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2450 AddRec = cast<SCEVAddRecExpr>(Inner); 2451 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2452 if (CurLoop == AddRec->getLoop()) 2453 DstCoeff = Coeff; 2454 else { 2455 // If the coefficient is the product of a constant and other stuff, 2456 // we can use the constant in the GCD computation. 2457 Constant = getConstantPart(Coeff); 2458 if (!Constant) 2459 return false; 2460 APInt ConstCoeff = Constant->getAPInt(); 2461 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2462 } 2463 Inner = AddRec->getStart(); 2464 } 2465 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2466 // If the coefficient is the product of a constant and other stuff, 2467 // we can use the constant in the GCD computation. 2468 Constant = getConstantPart(Delta); 2469 if (!Constant) 2470 // The difference of the two coefficients might not be a product 2471 // or constant, in which case we give up on this direction. 2472 continue; 2473 APInt ConstCoeff = Constant->getAPInt(); 2474 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2475 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2476 if (RunningGCD != 0) { 2477 Remainder = ConstDelta.srem(RunningGCD); 2478 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2479 if (Remainder != 0) { 2480 unsigned Level = mapSrcLoop(CurLoop); 2481 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2482 Improved = true; 2483 } 2484 } 2485 } 2486 if (Improved) 2487 ++GCDsuccesses; 2488 LLVM_DEBUG(dbgs() << "all done\n"); 2489 return false; 2490 } 2491 2492 2493 //===----------------------------------------------------------------------===// 2494 // banerjeeMIVtest - 2495 // Use Banerjee's Inequalities to test an MIV subscript pair. 2496 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2497 // Generally follows the discussion in Section 2.5.2 of 2498 // 2499 // Optimizing Supercompilers for Supercomputers 2500 // Michael Wolfe 2501 // 2502 // The inequalities given on page 25 are simplified in that loops are 2503 // normalized so that the lower bound is always 0 and the stride is always 1. 2504 // For example, Wolfe gives 2505 // 2506 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2507 // 2508 // where A_k is the coefficient of the kth index in the source subscript, 2509 // B_k is the coefficient of the kth index in the destination subscript, 2510 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2511 // index, and N_k is the stride of the kth index. Since all loops are normalized 2512 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2513 // equation to 2514 // 2515 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2516 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2517 // 2518 // Similar simplifications are possible for the other equations. 2519 // 2520 // When we can't determine the number of iterations for a loop, 2521 // we use NULL as an indicator for the worst case, infinity. 2522 // When computing the upper bound, NULL denotes +inf; 2523 // for the lower bound, NULL denotes -inf. 2524 // 2525 // Return true if dependence disproved. 2526 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2527 const SmallBitVector &Loops, 2528 FullDependence &Result) const { 2529 LLVM_DEBUG(dbgs() << "starting Banerjee\n"); 2530 ++BanerjeeApplications; 2531 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n'); 2532 const SCEV *A0; 2533 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2534 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2535 const SCEV *B0; 2536 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2537 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2538 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2539 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2540 2541 // Compute bounds for all the * directions. 2542 LLVM_DEBUG(dbgs() << "\tBounds[*]\n"); 2543 for (unsigned K = 1; K <= MaxLevels; ++K) { 2544 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2545 Bound[K].Direction = Dependence::DVEntry::ALL; 2546 Bound[K].DirSet = Dependence::DVEntry::NONE; 2547 findBoundsALL(A, B, Bound, K); 2548 #ifndef NDEBUG 2549 LLVM_DEBUG(dbgs() << "\t " << K << '\t'); 2550 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2551 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2552 else 2553 LLVM_DEBUG(dbgs() << "-inf\t"); 2554 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2555 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2556 else 2557 LLVM_DEBUG(dbgs() << "+inf\n"); 2558 #endif 2559 } 2560 2561 // Test the *, *, *, ... case. 2562 bool Disproved = false; 2563 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2564 // Explore the direction vector hierarchy. 2565 unsigned DepthExpanded = 0; 2566 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2567 Loops, DepthExpanded, Delta); 2568 if (NewDeps > 0) { 2569 bool Improved = false; 2570 for (unsigned K = 1; K <= CommonLevels; ++K) { 2571 if (Loops[K]) { 2572 unsigned Old = Result.DV[K - 1].Direction; 2573 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2574 Improved |= Old != Result.DV[K - 1].Direction; 2575 if (!Result.DV[K - 1].Direction) { 2576 Improved = false; 2577 Disproved = true; 2578 break; 2579 } 2580 } 2581 } 2582 if (Improved) 2583 ++BanerjeeSuccesses; 2584 } 2585 else { 2586 ++BanerjeeIndependence; 2587 Disproved = true; 2588 } 2589 } 2590 else { 2591 ++BanerjeeIndependence; 2592 Disproved = true; 2593 } 2594 delete [] Bound; 2595 delete [] A; 2596 delete [] B; 2597 return Disproved; 2598 } 2599 2600 2601 // Hierarchically expands the direction vector 2602 // search space, combining the directions of discovered dependences 2603 // in the DirSet field of Bound. Returns the number of distinct 2604 // dependences discovered. If the dependence is disproved, 2605 // it will return 0. 2606 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2607 CoefficientInfo *B, BoundInfo *Bound, 2608 const SmallBitVector &Loops, 2609 unsigned &DepthExpanded, 2610 const SCEV *Delta) const { 2611 // This algorithm has worst case complexity of O(3^n), where 'n' is the number 2612 // of common loop levels. To avoid excessive compile-time, pessimize all the 2613 // results and immediately return when the number of common levels is beyond 2614 // the given threshold. 2615 if (CommonLevels > MIVMaxLevelThreshold) { 2616 LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV " 2617 "direction exploration is terminated.\n"); 2618 for (unsigned K = 1; K <= CommonLevels; ++K) 2619 if (Loops[K]) 2620 Bound[K].DirSet = Dependence::DVEntry::ALL; 2621 return 1; 2622 } 2623 2624 if (Level > CommonLevels) { 2625 // record result 2626 LLVM_DEBUG(dbgs() << "\t["); 2627 for (unsigned K = 1; K <= CommonLevels; ++K) { 2628 if (Loops[K]) { 2629 Bound[K].DirSet |= Bound[K].Direction; 2630 #ifndef NDEBUG 2631 switch (Bound[K].Direction) { 2632 case Dependence::DVEntry::LT: 2633 LLVM_DEBUG(dbgs() << " <"); 2634 break; 2635 case Dependence::DVEntry::EQ: 2636 LLVM_DEBUG(dbgs() << " ="); 2637 break; 2638 case Dependence::DVEntry::GT: 2639 LLVM_DEBUG(dbgs() << " >"); 2640 break; 2641 case Dependence::DVEntry::ALL: 2642 LLVM_DEBUG(dbgs() << " *"); 2643 break; 2644 default: 2645 llvm_unreachable("unexpected Bound[K].Direction"); 2646 } 2647 #endif 2648 } 2649 } 2650 LLVM_DEBUG(dbgs() << " ]\n"); 2651 return 1; 2652 } 2653 if (Loops[Level]) { 2654 if (Level > DepthExpanded) { 2655 DepthExpanded = Level; 2656 // compute bounds for <, =, > at current level 2657 findBoundsLT(A, B, Bound, Level); 2658 findBoundsGT(A, B, Bound, Level); 2659 findBoundsEQ(A, B, Bound, Level); 2660 #ifndef NDEBUG 2661 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2662 LLVM_DEBUG(dbgs() << "\t <\t"); 2663 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2664 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] 2665 << '\t'); 2666 else 2667 LLVM_DEBUG(dbgs() << "-inf\t"); 2668 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2669 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] 2670 << '\n'); 2671 else 2672 LLVM_DEBUG(dbgs() << "+inf\n"); 2673 LLVM_DEBUG(dbgs() << "\t =\t"); 2674 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2675 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] 2676 << '\t'); 2677 else 2678 LLVM_DEBUG(dbgs() << "-inf\t"); 2679 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2680 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] 2681 << '\n'); 2682 else 2683 LLVM_DEBUG(dbgs() << "+inf\n"); 2684 LLVM_DEBUG(dbgs() << "\t >\t"); 2685 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2686 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] 2687 << '\t'); 2688 else 2689 LLVM_DEBUG(dbgs() << "-inf\t"); 2690 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2691 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] 2692 << '\n'); 2693 else 2694 LLVM_DEBUG(dbgs() << "+inf\n"); 2695 #endif 2696 } 2697 2698 unsigned NewDeps = 0; 2699 2700 // test bounds for <, *, *, ... 2701 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2702 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2703 Loops, DepthExpanded, Delta); 2704 2705 // Test bounds for =, *, *, ... 2706 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2707 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2708 Loops, DepthExpanded, Delta); 2709 2710 // test bounds for >, *, *, ... 2711 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2712 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2713 Loops, DepthExpanded, Delta); 2714 2715 Bound[Level].Direction = Dependence::DVEntry::ALL; 2716 return NewDeps; 2717 } 2718 else 2719 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2720 } 2721 2722 2723 // Returns true iff the current bounds are plausible. 2724 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2725 BoundInfo *Bound, const SCEV *Delta) const { 2726 Bound[Level].Direction = DirKind; 2727 if (const SCEV *LowerBound = getLowerBound(Bound)) 2728 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2729 return false; 2730 if (const SCEV *UpperBound = getUpperBound(Bound)) 2731 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2732 return false; 2733 return true; 2734 } 2735 2736 2737 // Computes the upper and lower bounds for level K 2738 // using the * direction. Records them in Bound. 2739 // Wolfe gives the equations 2740 // 2741 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2742 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2743 // 2744 // Since we normalize loops, we can simplify these equations to 2745 // 2746 // LB^*_k = (A^-_k - B^+_k)U_k 2747 // UB^*_k = (A^+_k - B^-_k)U_k 2748 // 2749 // We must be careful to handle the case where the upper bound is unknown. 2750 // Note that the lower bound is always <= 0 2751 // and the upper bound is always >= 0. 2752 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2753 BoundInfo *Bound, unsigned K) const { 2754 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2755 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2756 if (Bound[K].Iterations) { 2757 Bound[K].Lower[Dependence::DVEntry::ALL] = 2758 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2759 Bound[K].Iterations); 2760 Bound[K].Upper[Dependence::DVEntry::ALL] = 2761 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2762 Bound[K].Iterations); 2763 } 2764 else { 2765 // If the difference is 0, we won't need to know the number of iterations. 2766 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2767 Bound[K].Lower[Dependence::DVEntry::ALL] = 2768 SE->getZero(A[K].Coeff->getType()); 2769 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2770 Bound[K].Upper[Dependence::DVEntry::ALL] = 2771 SE->getZero(A[K].Coeff->getType()); 2772 } 2773 } 2774 2775 2776 // Computes the upper and lower bounds for level K 2777 // using the = direction. Records them in Bound. 2778 // Wolfe gives the equations 2779 // 2780 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2781 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2782 // 2783 // Since we normalize loops, we can simplify these equations to 2784 // 2785 // LB^=_k = (A_k - B_k)^- U_k 2786 // UB^=_k = (A_k - B_k)^+ U_k 2787 // 2788 // We must be careful to handle the case where the upper bound is unknown. 2789 // Note that the lower bound is always <= 0 2790 // and the upper bound is always >= 0. 2791 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2792 BoundInfo *Bound, unsigned K) const { 2793 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2794 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2795 if (Bound[K].Iterations) { 2796 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2797 const SCEV *NegativePart = getNegativePart(Delta); 2798 Bound[K].Lower[Dependence::DVEntry::EQ] = 2799 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2800 const SCEV *PositivePart = getPositivePart(Delta); 2801 Bound[K].Upper[Dependence::DVEntry::EQ] = 2802 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2803 } 2804 else { 2805 // If the positive/negative part of the difference is 0, 2806 // we won't need to know the number of iterations. 2807 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2808 const SCEV *NegativePart = getNegativePart(Delta); 2809 if (NegativePart->isZero()) 2810 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2811 const SCEV *PositivePart = getPositivePart(Delta); 2812 if (PositivePart->isZero()) 2813 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2814 } 2815 } 2816 2817 2818 // Computes the upper and lower bounds for level K 2819 // using the < direction. Records them in Bound. 2820 // Wolfe gives the equations 2821 // 2822 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2823 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2824 // 2825 // Since we normalize loops, we can simplify these equations to 2826 // 2827 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2828 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2829 // 2830 // We must be careful to handle the case where the upper bound is unknown. 2831 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2832 BoundInfo *Bound, unsigned K) const { 2833 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2834 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2835 if (Bound[K].Iterations) { 2836 const SCEV *Iter_1 = SE->getMinusSCEV( 2837 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2838 const SCEV *NegPart = 2839 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2840 Bound[K].Lower[Dependence::DVEntry::LT] = 2841 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2842 const SCEV *PosPart = 2843 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2844 Bound[K].Upper[Dependence::DVEntry::LT] = 2845 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2846 } 2847 else { 2848 // If the positive/negative part of the difference is 0, 2849 // we won't need to know the number of iterations. 2850 const SCEV *NegPart = 2851 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2852 if (NegPart->isZero()) 2853 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2854 const SCEV *PosPart = 2855 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2856 if (PosPart->isZero()) 2857 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2858 } 2859 } 2860 2861 2862 // Computes the upper and lower bounds for level K 2863 // using the > direction. Records them in Bound. 2864 // Wolfe gives the equations 2865 // 2866 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2867 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2868 // 2869 // Since we normalize loops, we can simplify these equations to 2870 // 2871 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2872 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2873 // 2874 // We must be careful to handle the case where the upper bound is unknown. 2875 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2876 BoundInfo *Bound, unsigned K) const { 2877 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2878 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2879 if (Bound[K].Iterations) { 2880 const SCEV *Iter_1 = SE->getMinusSCEV( 2881 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2882 const SCEV *NegPart = 2883 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2884 Bound[K].Lower[Dependence::DVEntry::GT] = 2885 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2886 const SCEV *PosPart = 2887 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2888 Bound[K].Upper[Dependence::DVEntry::GT] = 2889 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2890 } 2891 else { 2892 // If the positive/negative part of the difference is 0, 2893 // we won't need to know the number of iterations. 2894 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2895 if (NegPart->isZero()) 2896 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2897 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2898 if (PosPart->isZero()) 2899 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2900 } 2901 } 2902 2903 2904 // X^+ = max(X, 0) 2905 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2906 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2907 } 2908 2909 2910 // X^- = min(X, 0) 2911 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2912 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2913 } 2914 2915 2916 // Walks through the subscript, 2917 // collecting each coefficient, the associated loop bounds, 2918 // and recording its positive and negative parts for later use. 2919 DependenceInfo::CoefficientInfo * 2920 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 2921 const SCEV *&Constant) const { 2922 const SCEV *Zero = SE->getZero(Subscript->getType()); 2923 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2924 for (unsigned K = 1; K <= MaxLevels; ++K) { 2925 CI[K].Coeff = Zero; 2926 CI[K].PosPart = Zero; 2927 CI[K].NegPart = Zero; 2928 CI[K].Iterations = nullptr; 2929 } 2930 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2931 const Loop *L = AddRec->getLoop(); 2932 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2933 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2934 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2935 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2936 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2937 Subscript = AddRec->getStart(); 2938 } 2939 Constant = Subscript; 2940 #ifndef NDEBUG 2941 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n"); 2942 for (unsigned K = 1; K <= MaxLevels; ++K) { 2943 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2944 LLVM_DEBUG(dbgs() << "\tPos Part = "); 2945 LLVM_DEBUG(dbgs() << *CI[K].PosPart); 2946 LLVM_DEBUG(dbgs() << "\tNeg Part = "); 2947 LLVM_DEBUG(dbgs() << *CI[K].NegPart); 2948 LLVM_DEBUG(dbgs() << "\tUpper Bound = "); 2949 if (CI[K].Iterations) 2950 LLVM_DEBUG(dbgs() << *CI[K].Iterations); 2951 else 2952 LLVM_DEBUG(dbgs() << "+inf"); 2953 LLVM_DEBUG(dbgs() << '\n'); 2954 } 2955 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2956 #endif 2957 return CI; 2958 } 2959 2960 2961 // Looks through all the bounds info and 2962 // computes the lower bound given the current direction settings 2963 // at each level. If the lower bound for any level is -inf, 2964 // the result is -inf. 2965 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 2966 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2967 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2968 if (Bound[K].Lower[Bound[K].Direction]) 2969 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2970 else 2971 Sum = nullptr; 2972 } 2973 return Sum; 2974 } 2975 2976 2977 // Looks through all the bounds info and 2978 // computes the upper bound given the current direction settings 2979 // at each level. If the upper bound at any level is +inf, 2980 // the result is +inf. 2981 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 2982 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2983 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2984 if (Bound[K].Upper[Bound[K].Direction]) 2985 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2986 else 2987 Sum = nullptr; 2988 } 2989 return Sum; 2990 } 2991 2992 2993 //===----------------------------------------------------------------------===// 2994 // Constraint manipulation for Delta test. 2995 2996 // Given a linear SCEV, 2997 // return the coefficient (the step) 2998 // corresponding to the specified loop. 2999 // If there isn't one, return 0. 3000 // For example, given a*i + b*j + c*k, finding the coefficient 3001 // corresponding to the j loop would yield b. 3002 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 3003 const Loop *TargetLoop) const { 3004 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3005 if (!AddRec) 3006 return SE->getZero(Expr->getType()); 3007 if (AddRec->getLoop() == TargetLoop) 3008 return AddRec->getStepRecurrence(*SE); 3009 return findCoefficient(AddRec->getStart(), TargetLoop); 3010 } 3011 3012 3013 // Given a linear SCEV, 3014 // return the SCEV given by zeroing out the coefficient 3015 // corresponding to the specified loop. 3016 // For example, given a*i + b*j + c*k, zeroing the coefficient 3017 // corresponding to the j loop would yield a*i + c*k. 3018 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 3019 const Loop *TargetLoop) const { 3020 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3021 if (!AddRec) 3022 return Expr; // ignore 3023 if (AddRec->getLoop() == TargetLoop) 3024 return AddRec->getStart(); 3025 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 3026 AddRec->getStepRecurrence(*SE), 3027 AddRec->getLoop(), 3028 AddRec->getNoWrapFlags()); 3029 } 3030 3031 3032 // Given a linear SCEV Expr, 3033 // return the SCEV given by adding some Value to the 3034 // coefficient corresponding to the specified TargetLoop. 3035 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 3036 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 3037 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 3038 const Loop *TargetLoop, 3039 const SCEV *Value) const { 3040 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3041 if (!AddRec) // create a new addRec 3042 return SE->getAddRecExpr(Expr, 3043 Value, 3044 TargetLoop, 3045 SCEV::FlagAnyWrap); // Worst case, with no info. 3046 if (AddRec->getLoop() == TargetLoop) { 3047 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 3048 if (Sum->isZero()) 3049 return AddRec->getStart(); 3050 return SE->getAddRecExpr(AddRec->getStart(), 3051 Sum, 3052 AddRec->getLoop(), 3053 AddRec->getNoWrapFlags()); 3054 } 3055 if (SE->isLoopInvariant(AddRec, TargetLoop)) 3056 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 3057 return SE->getAddRecExpr( 3058 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 3059 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 3060 AddRec->getNoWrapFlags()); 3061 } 3062 3063 3064 // Review the constraints, looking for opportunities 3065 // to simplify a subscript pair (Src and Dst). 3066 // Return true if some simplification occurs. 3067 // If the simplification isn't exact (that is, if it is conservative 3068 // in terms of dependence), set consistent to false. 3069 // Corresponds to Figure 5 from the paper 3070 // 3071 // Practical Dependence Testing 3072 // Goff, Kennedy, Tseng 3073 // PLDI 1991 3074 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 3075 SmallBitVector &Loops, 3076 SmallVectorImpl<Constraint> &Constraints, 3077 bool &Consistent) { 3078 bool Result = false; 3079 for (unsigned LI : Loops.set_bits()) { 3080 LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3081 LLVM_DEBUG(Constraints[LI].dump(dbgs())); 3082 if (Constraints[LI].isDistance()) 3083 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3084 else if (Constraints[LI].isLine()) 3085 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3086 else if (Constraints[LI].isPoint()) 3087 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3088 } 3089 return Result; 3090 } 3091 3092 3093 // Attempt to propagate a distance 3094 // constraint into a subscript pair (Src and Dst). 3095 // Return true if some simplification occurs. 3096 // If the simplification isn't exact (that is, if it is conservative 3097 // in terms of dependence), set consistent to false. 3098 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3099 Constraint &CurConstraint, 3100 bool &Consistent) { 3101 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3102 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3103 const SCEV *A_K = findCoefficient(Src, CurLoop); 3104 if (A_K->isZero()) 3105 return false; 3106 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3107 Src = SE->getMinusSCEV(Src, DA_K); 3108 Src = zeroCoefficient(Src, CurLoop); 3109 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3110 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3111 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3112 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3113 if (!findCoefficient(Dst, CurLoop)->isZero()) 3114 Consistent = false; 3115 return true; 3116 } 3117 3118 3119 // Attempt to propagate a line 3120 // constraint into a subscript pair (Src and Dst). 3121 // Return true if some simplification occurs. 3122 // If the simplification isn't exact (that is, if it is conservative 3123 // in terms of dependence), set consistent to false. 3124 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3125 Constraint &CurConstraint, 3126 bool &Consistent) { 3127 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3128 const SCEV *A = CurConstraint.getA(); 3129 const SCEV *B = CurConstraint.getB(); 3130 const SCEV *C = CurConstraint.getC(); 3131 LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C 3132 << "\n"); 3133 LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3134 LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3135 if (A->isZero()) { 3136 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3137 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3138 if (!Bconst || !Cconst) return false; 3139 APInt Beta = Bconst->getAPInt(); 3140 APInt Charlie = Cconst->getAPInt(); 3141 APInt CdivB = Charlie.sdiv(Beta); 3142 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3143 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3144 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3145 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3146 Dst = zeroCoefficient(Dst, CurLoop); 3147 if (!findCoefficient(Src, CurLoop)->isZero()) 3148 Consistent = false; 3149 } 3150 else if (B->isZero()) { 3151 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3152 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3153 if (!Aconst || !Cconst) return false; 3154 APInt Alpha = Aconst->getAPInt(); 3155 APInt Charlie = Cconst->getAPInt(); 3156 APInt CdivA = Charlie.sdiv(Alpha); 3157 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3158 const SCEV *A_K = findCoefficient(Src, CurLoop); 3159 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3160 Src = zeroCoefficient(Src, CurLoop); 3161 if (!findCoefficient(Dst, CurLoop)->isZero()) 3162 Consistent = false; 3163 } 3164 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3165 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3166 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3167 if (!Aconst || !Cconst) return false; 3168 APInt Alpha = Aconst->getAPInt(); 3169 APInt Charlie = Cconst->getAPInt(); 3170 APInt CdivA = Charlie.sdiv(Alpha); 3171 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3172 const SCEV *A_K = findCoefficient(Src, CurLoop); 3173 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3174 Src = zeroCoefficient(Src, CurLoop); 3175 Dst = addToCoefficient(Dst, CurLoop, A_K); 3176 if (!findCoefficient(Dst, CurLoop)->isZero()) 3177 Consistent = false; 3178 } 3179 else { 3180 // paper is incorrect here, or perhaps just misleading 3181 const SCEV *A_K = findCoefficient(Src, CurLoop); 3182 Src = SE->getMulExpr(Src, A); 3183 Dst = SE->getMulExpr(Dst, A); 3184 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3185 Src = zeroCoefficient(Src, CurLoop); 3186 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3187 if (!findCoefficient(Dst, CurLoop)->isZero()) 3188 Consistent = false; 3189 } 3190 LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3191 LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3192 return true; 3193 } 3194 3195 3196 // Attempt to propagate a point 3197 // constraint into a subscript pair (Src and Dst). 3198 // Return true if some simplification occurs. 3199 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3200 Constraint &CurConstraint) { 3201 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3202 const SCEV *A_K = findCoefficient(Src, CurLoop); 3203 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3204 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3205 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3206 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3207 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3208 Src = zeroCoefficient(Src, CurLoop); 3209 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3210 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3211 Dst = zeroCoefficient(Dst, CurLoop); 3212 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3213 return true; 3214 } 3215 3216 3217 // Update direction vector entry based on the current constraint. 3218 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3219 const Constraint &CurConstraint) const { 3220 LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3221 LLVM_DEBUG(CurConstraint.dump(dbgs())); 3222 if (CurConstraint.isAny()) 3223 ; // use defaults 3224 else if (CurConstraint.isDistance()) { 3225 // this one is consistent, the others aren't 3226 Level.Scalar = false; 3227 Level.Distance = CurConstraint.getD(); 3228 unsigned NewDirection = Dependence::DVEntry::NONE; 3229 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3230 NewDirection = Dependence::DVEntry::EQ; 3231 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3232 NewDirection |= Dependence::DVEntry::LT; 3233 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3234 NewDirection |= Dependence::DVEntry::GT; 3235 Level.Direction &= NewDirection; 3236 } 3237 else if (CurConstraint.isLine()) { 3238 Level.Scalar = false; 3239 Level.Distance = nullptr; 3240 // direction should be accurate 3241 } 3242 else if (CurConstraint.isPoint()) { 3243 Level.Scalar = false; 3244 Level.Distance = nullptr; 3245 unsigned NewDirection = Dependence::DVEntry::NONE; 3246 if (!isKnownPredicate(CmpInst::ICMP_NE, 3247 CurConstraint.getY(), 3248 CurConstraint.getX())) 3249 // if X may be = Y 3250 NewDirection |= Dependence::DVEntry::EQ; 3251 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3252 CurConstraint.getY(), 3253 CurConstraint.getX())) 3254 // if Y may be > X 3255 NewDirection |= Dependence::DVEntry::LT; 3256 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3257 CurConstraint.getY(), 3258 CurConstraint.getX())) 3259 // if Y may be < X 3260 NewDirection |= Dependence::DVEntry::GT; 3261 Level.Direction &= NewDirection; 3262 } 3263 else 3264 llvm_unreachable("constraint has unexpected kind"); 3265 } 3266 3267 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3268 /// source and destination array references are recurrences on a nested loop, 3269 /// this function flattens the nested recurrences into separate recurrences 3270 /// for each loop level. 3271 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3272 SmallVectorImpl<Subscript> &Pair) { 3273 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3274 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3275 Value *SrcPtr = getLoadStorePointerOperand(Src); 3276 Value *DstPtr = getLoadStorePointerOperand(Dst); 3277 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3278 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3279 const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop); 3280 const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop); 3281 const SCEVUnknown *SrcBase = 3282 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3283 const SCEVUnknown *DstBase = 3284 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3285 3286 if (!SrcBase || !DstBase || SrcBase != DstBase) 3287 return false; 3288 3289 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3290 3291 if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn, 3292 SrcSubscripts, DstSubscripts) && 3293 !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn, 3294 SrcSubscripts, DstSubscripts)) 3295 return false; 3296 3297 int Size = SrcSubscripts.size(); 3298 LLVM_DEBUG({ 3299 dbgs() << "\nSrcSubscripts: "; 3300 for (int I = 0; I < Size; I++) 3301 dbgs() << *SrcSubscripts[I]; 3302 dbgs() << "\nDstSubscripts: "; 3303 for (int I = 0; I < Size; I++) 3304 dbgs() << *DstSubscripts[I]; 3305 }); 3306 3307 // The delinearization transforms a single-subscript MIV dependence test into 3308 // a multi-subscript SIV dependence test that is easier to compute. So we 3309 // resize Pair to contain as many pairs of subscripts as the delinearization 3310 // has found, and then initialize the pairs following the delinearization. 3311 Pair.resize(Size); 3312 for (int I = 0; I < Size; ++I) { 3313 Pair[I].Src = SrcSubscripts[I]; 3314 Pair[I].Dst = DstSubscripts[I]; 3315 unifySubscriptType(&Pair[I]); 3316 } 3317 3318 return true; 3319 } 3320 3321 bool DependenceInfo::tryDelinearizeFixedSize( 3322 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, 3323 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, 3324 SmallVectorImpl<const SCEV *> &DstSubscripts) { 3325 3326 Value *SrcPtr = getLoadStorePointerOperand(Src); 3327 Value *DstPtr = getLoadStorePointerOperand(Dst); 3328 const SCEVUnknown *SrcBase = 3329 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3330 const SCEVUnknown *DstBase = 3331 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3332 assert(SrcBase && DstBase && SrcBase == DstBase && 3333 "expected src and dst scev unknowns to be equal"); 3334 3335 // Check the simple case where the array dimensions are fixed size. 3336 auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr); 3337 auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr); 3338 if (!SrcGEP || !DstGEP) 3339 return false; 3340 3341 SmallVector<int, 4> SrcSizes, DstSizes; 3342 getIndexExpressionsFromGEP(*SE, SrcGEP, SrcSubscripts, SrcSizes); 3343 getIndexExpressionsFromGEP(*SE, DstGEP, DstSubscripts, DstSizes); 3344 3345 // Check that the two size arrays are non-empty and equal in length and 3346 // value. 3347 if (SrcSizes.empty() || SrcSubscripts.size() <= 1 || 3348 SrcSizes.size() != DstSizes.size() || 3349 !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) { 3350 SrcSubscripts.clear(); 3351 DstSubscripts.clear(); 3352 return false; 3353 } 3354 3355 Value *SrcBasePtr = SrcGEP->getOperand(0); 3356 Value *DstBasePtr = DstGEP->getOperand(0); 3357 while (auto *PCast = dyn_cast<BitCastInst>(SrcBasePtr)) 3358 SrcBasePtr = PCast->getOperand(0); 3359 while (auto *PCast = dyn_cast<BitCastInst>(DstBasePtr)) 3360 DstBasePtr = PCast->getOperand(0); 3361 3362 // Check that for identical base pointers we do not miss index offsets 3363 // that have been added before this GEP is applied. 3364 if (SrcBasePtr != SrcBase->getValue() || DstBasePtr != DstBase->getValue()) { 3365 SrcSubscripts.clear(); 3366 DstSubscripts.clear(); 3367 return false; 3368 } 3369 3370 assert(SrcSubscripts.size() == DstSubscripts.size() && 3371 SrcSubscripts.size() == SrcSizes.size() + 1 && 3372 "Expected equal number of entries in the list of sizes and " 3373 "subscripts."); 3374 3375 // In general we cannot safely assume that the subscripts recovered from GEPs 3376 // are in the range of values defined for their corresponding array 3377 // dimensions. For example some C language usage/interpretation make it 3378 // impossible to verify this at compile-time. As such we can only delinearize 3379 // iff the subscripts are positive and are less than the range of the 3380 // dimension. 3381 if (!DisableDelinearizationChecks) { 3382 auto AllIndiciesInRange = [&](SmallVector<int, 4> &DimensionSizes, 3383 SmallVectorImpl<const SCEV *> &Subscripts, 3384 Value *Ptr) { 3385 size_t SSize = Subscripts.size(); 3386 for (size_t I = 1; I < SSize; ++I) { 3387 const SCEV *S = Subscripts[I]; 3388 if (!isKnownNonNegative(S, Ptr)) 3389 return false; 3390 if (auto *SType = dyn_cast<IntegerType>(S->getType())) { 3391 const SCEV *Range = SE->getConstant( 3392 ConstantInt::get(SType, DimensionSizes[I - 1], false)); 3393 if (!isKnownLessThan(S, Range)) 3394 return false; 3395 } 3396 } 3397 return true; 3398 }; 3399 3400 if (!AllIndiciesInRange(SrcSizes, SrcSubscripts, SrcPtr) || 3401 !AllIndiciesInRange(DstSizes, DstSubscripts, DstPtr)) { 3402 SrcSubscripts.clear(); 3403 DstSubscripts.clear(); 3404 return false; 3405 } 3406 } 3407 LLVM_DEBUG({ 3408 dbgs() << "Delinearized subscripts of fixed-size array\n" 3409 << "SrcGEP:" << *SrcGEP << "\n" 3410 << "DstGEP:" << *DstGEP << "\n"; 3411 }); 3412 return true; 3413 } 3414 3415 bool DependenceInfo::tryDelinearizeParametricSize( 3416 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, 3417 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, 3418 SmallVectorImpl<const SCEV *> &DstSubscripts) { 3419 3420 Value *SrcPtr = getLoadStorePointerOperand(Src); 3421 Value *DstPtr = getLoadStorePointerOperand(Dst); 3422 const SCEVUnknown *SrcBase = 3423 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3424 const SCEVUnknown *DstBase = 3425 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3426 assert(SrcBase && DstBase && SrcBase == DstBase && 3427 "expected src and dst scev unknowns to be equal"); 3428 3429 const SCEV *ElementSize = SE->getElementSize(Src); 3430 if (ElementSize != SE->getElementSize(Dst)) 3431 return false; 3432 3433 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3434 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3435 3436 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3437 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3438 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3439 return false; 3440 3441 // First step: collect parametric terms in both array references. 3442 SmallVector<const SCEV *, 4> Terms; 3443 collectParametricTerms(*SE, SrcAR, Terms); 3444 collectParametricTerms(*SE, DstAR, Terms); 3445 3446 // Second step: find subscript sizes. 3447 SmallVector<const SCEV *, 4> Sizes; 3448 findArrayDimensions(*SE, Terms, Sizes, ElementSize); 3449 3450 // Third step: compute the access functions for each subscript. 3451 computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes); 3452 computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes); 3453 3454 // Fail when there is only a subscript: that's a linearized access function. 3455 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3456 SrcSubscripts.size() != DstSubscripts.size()) 3457 return false; 3458 3459 size_t Size = SrcSubscripts.size(); 3460 3461 // Statically check that the array bounds are in-range. The first subscript we 3462 // don't have a size for and it cannot overflow into another subscript, so is 3463 // always safe. The others need to be 0 <= subscript[i] < bound, for both src 3464 // and dst. 3465 // FIXME: It may be better to record these sizes and add them as constraints 3466 // to the dependency checks. 3467 if (!DisableDelinearizationChecks) 3468 for (size_t I = 1; I < Size; ++I) { 3469 if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr)) 3470 return false; 3471 3472 if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1])) 3473 return false; 3474 3475 if (!isKnownNonNegative(DstSubscripts[I], DstPtr)) 3476 return false; 3477 3478 if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1])) 3479 return false; 3480 } 3481 3482 return true; 3483 } 3484 3485 //===----------------------------------------------------------------------===// 3486 3487 #ifndef NDEBUG 3488 // For debugging purposes, dump a small bit vector to dbgs(). 3489 static void dumpSmallBitVector(SmallBitVector &BV) { 3490 dbgs() << "{"; 3491 for (unsigned VI : BV.set_bits()) { 3492 dbgs() << VI; 3493 if (BV.find_next(VI) >= 0) 3494 dbgs() << ' '; 3495 } 3496 dbgs() << "}\n"; 3497 } 3498 #endif 3499 3500 bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA, 3501 FunctionAnalysisManager::Invalidator &Inv) { 3502 // Check if the analysis itself has been invalidated. 3503 auto PAC = PA.getChecker<DependenceAnalysis>(); 3504 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) 3505 return true; 3506 3507 // Check transitive dependencies. 3508 return Inv.invalidate<AAManager>(F, PA) || 3509 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) || 3510 Inv.invalidate<LoopAnalysis>(F, PA); 3511 } 3512 3513 // depends - 3514 // Returns NULL if there is no dependence. 3515 // Otherwise, return a Dependence with as many details as possible. 3516 // Corresponds to Section 3.1 in the paper 3517 // 3518 // Practical Dependence Testing 3519 // Goff, Kennedy, Tseng 3520 // PLDI 1991 3521 // 3522 // Care is required to keep the routine below, getSplitIteration(), 3523 // up to date with respect to this routine. 3524 std::unique_ptr<Dependence> 3525 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3526 bool PossiblyLoopIndependent) { 3527 if (Src == Dst) 3528 PossiblyLoopIndependent = false; 3529 3530 if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory())) 3531 // if both instructions don't reference memory, there's no dependence 3532 return nullptr; 3533 3534 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3535 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3536 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3537 return std::make_unique<Dependence>(Src, Dst); 3538 } 3539 3540 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3541 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3542 Value *SrcPtr = getLoadStorePointerOperand(Src); 3543 Value *DstPtr = getLoadStorePointerOperand(Dst); 3544 3545 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3546 MemoryLocation::get(Dst), 3547 MemoryLocation::get(Src))) { 3548 case AliasResult::MayAlias: 3549 case AliasResult::PartialAlias: 3550 // cannot analyse objects if we don't understand their aliasing. 3551 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3552 return std::make_unique<Dependence>(Src, Dst); 3553 case AliasResult::NoAlias: 3554 // If the objects noalias, they are distinct, accesses are independent. 3555 LLVM_DEBUG(dbgs() << "no alias\n"); 3556 return nullptr; 3557 case AliasResult::MustAlias: 3558 break; // The underlying objects alias; test accesses for dependence. 3559 } 3560 3561 // establish loop nesting levels 3562 establishNestingLevels(Src, Dst); 3563 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3564 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3565 3566 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3567 ++TotalArrayPairs; 3568 3569 unsigned Pairs = 1; 3570 SmallVector<Subscript, 2> Pair(Pairs); 3571 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3572 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3573 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3574 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3575 if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) { 3576 // If two pointers have different bases, trying to analyze indexes won't 3577 // work; we can't compare them to each other. This can happen, for example, 3578 // if one is produced by an LCSSA PHI node. 3579 // 3580 // We check this upfront so we don't crash in cases where getMinusSCEV() 3581 // returns a SCEVCouldNotCompute. 3582 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n"); 3583 return std::make_unique<Dependence>(Src, Dst); 3584 } 3585 Pair[0].Src = SrcSCEV; 3586 Pair[0].Dst = DstSCEV; 3587 3588 if (Delinearize) { 3589 if (tryDelinearize(Src, Dst, Pair)) { 3590 LLVM_DEBUG(dbgs() << " delinearized\n"); 3591 Pairs = Pair.size(); 3592 } 3593 } 3594 3595 for (unsigned P = 0; P < Pairs; ++P) { 3596 Pair[P].Loops.resize(MaxLevels + 1); 3597 Pair[P].GroupLoops.resize(MaxLevels + 1); 3598 Pair[P].Group.resize(Pairs); 3599 removeMatchingExtensions(&Pair[P]); 3600 Pair[P].Classification = 3601 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3602 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3603 Pair[P].Loops); 3604 Pair[P].GroupLoops = Pair[P].Loops; 3605 Pair[P].Group.set(P); 3606 LLVM_DEBUG(dbgs() << " subscript " << P << "\n"); 3607 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3608 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3609 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3610 LLVM_DEBUG(dbgs() << "\tloops = "); 3611 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3612 } 3613 3614 SmallBitVector Separable(Pairs); 3615 SmallBitVector Coupled(Pairs); 3616 3617 // Partition subscripts into separable and minimally-coupled groups 3618 // Algorithm in paper is algorithmically better; 3619 // this may be faster in practice. Check someday. 3620 // 3621 // Here's an example of how it works. Consider this code: 3622 // 3623 // for (i = ...) { 3624 // for (j = ...) { 3625 // for (k = ...) { 3626 // for (l = ...) { 3627 // for (m = ...) { 3628 // A[i][j][k][m] = ...; 3629 // ... = A[0][j][l][i + j]; 3630 // } 3631 // } 3632 // } 3633 // } 3634 // } 3635 // 3636 // There are 4 subscripts here: 3637 // 0 [i] and [0] 3638 // 1 [j] and [j] 3639 // 2 [k] and [l] 3640 // 3 [m] and [i + j] 3641 // 3642 // We've already classified each subscript pair as ZIV, SIV, etc., 3643 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3644 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3645 // and set Pair[P].Group = {P}. 3646 // 3647 // Src Dst Classification Loops GroupLoops Group 3648 // 0 [i] [0] SIV {1} {1} {0} 3649 // 1 [j] [j] SIV {2} {2} {1} 3650 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3651 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3652 // 3653 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3654 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3655 // 3656 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3657 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3658 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3659 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3660 // to either Separable or Coupled). 3661 // 3662 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3663 // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty, 3664 // so Pair[3].Group = {0, 1, 3} and Done = false. 3665 // 3666 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3667 // Since Done remains true, we add 2 to the set of Separable pairs. 3668 // 3669 // Finally, we consider 3. There's nothing to compare it with, 3670 // so Done remains true and we add it to the Coupled set. 3671 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3672 // 3673 // In the end, we've got 1 separable subscript and 1 coupled group. 3674 for (unsigned SI = 0; SI < Pairs; ++SI) { 3675 if (Pair[SI].Classification == Subscript::NonLinear) { 3676 // ignore these, but collect loops for later 3677 ++NonlinearSubscriptPairs; 3678 collectCommonLoops(Pair[SI].Src, 3679 LI->getLoopFor(Src->getParent()), 3680 Pair[SI].Loops); 3681 collectCommonLoops(Pair[SI].Dst, 3682 LI->getLoopFor(Dst->getParent()), 3683 Pair[SI].Loops); 3684 Result.Consistent = false; 3685 } else if (Pair[SI].Classification == Subscript::ZIV) { 3686 // always separable 3687 Separable.set(SI); 3688 } 3689 else { 3690 // SIV, RDIV, or MIV, so check for coupled group 3691 bool Done = true; 3692 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3693 SmallBitVector Intersection = Pair[SI].GroupLoops; 3694 Intersection &= Pair[SJ].GroupLoops; 3695 if (Intersection.any()) { 3696 // accumulate set of all the loops in group 3697 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3698 // accumulate set of all subscripts in group 3699 Pair[SJ].Group |= Pair[SI].Group; 3700 Done = false; 3701 } 3702 } 3703 if (Done) { 3704 if (Pair[SI].Group.count() == 1) { 3705 Separable.set(SI); 3706 ++SeparableSubscriptPairs; 3707 } 3708 else { 3709 Coupled.set(SI); 3710 ++CoupledSubscriptPairs; 3711 } 3712 } 3713 } 3714 } 3715 3716 LLVM_DEBUG(dbgs() << " Separable = "); 3717 LLVM_DEBUG(dumpSmallBitVector(Separable)); 3718 LLVM_DEBUG(dbgs() << " Coupled = "); 3719 LLVM_DEBUG(dumpSmallBitVector(Coupled)); 3720 3721 Constraint NewConstraint; 3722 NewConstraint.setAny(SE); 3723 3724 // test separable subscripts 3725 for (unsigned SI : Separable.set_bits()) { 3726 LLVM_DEBUG(dbgs() << "testing subscript " << SI); 3727 switch (Pair[SI].Classification) { 3728 case Subscript::ZIV: 3729 LLVM_DEBUG(dbgs() << ", ZIV\n"); 3730 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3731 return nullptr; 3732 break; 3733 case Subscript::SIV: { 3734 LLVM_DEBUG(dbgs() << ", SIV\n"); 3735 unsigned Level; 3736 const SCEV *SplitIter = nullptr; 3737 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3738 SplitIter)) 3739 return nullptr; 3740 break; 3741 } 3742 case Subscript::RDIV: 3743 LLVM_DEBUG(dbgs() << ", RDIV\n"); 3744 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3745 return nullptr; 3746 break; 3747 case Subscript::MIV: 3748 LLVM_DEBUG(dbgs() << ", MIV\n"); 3749 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3750 return nullptr; 3751 break; 3752 default: 3753 llvm_unreachable("subscript has unexpected classification"); 3754 } 3755 } 3756 3757 if (Coupled.count()) { 3758 // test coupled subscript groups 3759 LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n"); 3760 LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3761 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3762 for (unsigned II = 0; II <= MaxLevels; ++II) 3763 Constraints[II].setAny(SE); 3764 for (unsigned SI : Coupled.set_bits()) { 3765 LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3766 SmallBitVector Group(Pair[SI].Group); 3767 SmallBitVector Sivs(Pairs); 3768 SmallBitVector Mivs(Pairs); 3769 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3770 SmallVector<Subscript *, 4> PairsInGroup; 3771 for (unsigned SJ : Group.set_bits()) { 3772 LLVM_DEBUG(dbgs() << SJ << " "); 3773 if (Pair[SJ].Classification == Subscript::SIV) 3774 Sivs.set(SJ); 3775 else 3776 Mivs.set(SJ); 3777 PairsInGroup.push_back(&Pair[SJ]); 3778 } 3779 unifySubscriptType(PairsInGroup); 3780 LLVM_DEBUG(dbgs() << "}\n"); 3781 while (Sivs.any()) { 3782 bool Changed = false; 3783 for (unsigned SJ : Sivs.set_bits()) { 3784 LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3785 // SJ is an SIV subscript that's part of the current coupled group 3786 unsigned Level; 3787 const SCEV *SplitIter = nullptr; 3788 LLVM_DEBUG(dbgs() << "SIV\n"); 3789 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3790 SplitIter)) 3791 return nullptr; 3792 ConstrainedLevels.set(Level); 3793 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3794 if (Constraints[Level].isEmpty()) { 3795 ++DeltaIndependence; 3796 return nullptr; 3797 } 3798 Changed = true; 3799 } 3800 Sivs.reset(SJ); 3801 } 3802 if (Changed) { 3803 // propagate, possibly creating new SIVs and ZIVs 3804 LLVM_DEBUG(dbgs() << " propagating\n"); 3805 LLVM_DEBUG(dbgs() << "\tMivs = "); 3806 LLVM_DEBUG(dumpSmallBitVector(Mivs)); 3807 for (unsigned SJ : Mivs.set_bits()) { 3808 // SJ is an MIV subscript that's part of the current coupled group 3809 LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3810 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3811 Constraints, Result.Consistent)) { 3812 LLVM_DEBUG(dbgs() << "\t Changed\n"); 3813 ++DeltaPropagations; 3814 Pair[SJ].Classification = 3815 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3816 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3817 Pair[SJ].Loops); 3818 switch (Pair[SJ].Classification) { 3819 case Subscript::ZIV: 3820 LLVM_DEBUG(dbgs() << "ZIV\n"); 3821 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3822 return nullptr; 3823 Mivs.reset(SJ); 3824 break; 3825 case Subscript::SIV: 3826 Sivs.set(SJ); 3827 Mivs.reset(SJ); 3828 break; 3829 case Subscript::RDIV: 3830 case Subscript::MIV: 3831 break; 3832 default: 3833 llvm_unreachable("bad subscript classification"); 3834 } 3835 } 3836 } 3837 } 3838 } 3839 3840 // test & propagate remaining RDIVs 3841 for (unsigned SJ : Mivs.set_bits()) { 3842 if (Pair[SJ].Classification == Subscript::RDIV) { 3843 LLVM_DEBUG(dbgs() << "RDIV test\n"); 3844 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3845 return nullptr; 3846 // I don't yet understand how to propagate RDIV results 3847 Mivs.reset(SJ); 3848 } 3849 } 3850 3851 // test remaining MIVs 3852 // This code is temporary. 3853 // Better to somehow test all remaining subscripts simultaneously. 3854 for (unsigned SJ : Mivs.set_bits()) { 3855 if (Pair[SJ].Classification == Subscript::MIV) { 3856 LLVM_DEBUG(dbgs() << "MIV test\n"); 3857 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3858 return nullptr; 3859 } 3860 else 3861 llvm_unreachable("expected only MIV subscripts at this point"); 3862 } 3863 3864 // update Result.DV from constraint vector 3865 LLVM_DEBUG(dbgs() << " updating\n"); 3866 for (unsigned SJ : ConstrainedLevels.set_bits()) { 3867 if (SJ > CommonLevels) 3868 break; 3869 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3870 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3871 return nullptr; 3872 } 3873 } 3874 } 3875 3876 // Make sure the Scalar flags are set correctly. 3877 SmallBitVector CompleteLoops(MaxLevels + 1); 3878 for (unsigned SI = 0; SI < Pairs; ++SI) 3879 CompleteLoops |= Pair[SI].Loops; 3880 for (unsigned II = 1; II <= CommonLevels; ++II) 3881 if (CompleteLoops[II]) 3882 Result.DV[II - 1].Scalar = false; 3883 3884 if (PossiblyLoopIndependent) { 3885 // Make sure the LoopIndependent flag is set correctly. 3886 // All directions must include equal, otherwise no 3887 // loop-independent dependence is possible. 3888 for (unsigned II = 1; II <= CommonLevels; ++II) { 3889 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3890 Result.LoopIndependent = false; 3891 break; 3892 } 3893 } 3894 } 3895 else { 3896 // On the other hand, if all directions are equal and there's no 3897 // loop-independent dependence possible, then no dependence exists. 3898 bool AllEqual = true; 3899 for (unsigned II = 1; II <= CommonLevels; ++II) { 3900 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3901 AllEqual = false; 3902 break; 3903 } 3904 } 3905 if (AllEqual) 3906 return nullptr; 3907 } 3908 3909 return std::make_unique<FullDependence>(std::move(Result)); 3910 } 3911 3912 //===----------------------------------------------------------------------===// 3913 // getSplitIteration - 3914 // Rather than spend rarely-used space recording the splitting iteration 3915 // during the Weak-Crossing SIV test, we re-compute it on demand. 3916 // The re-computation is basically a repeat of the entire dependence test, 3917 // though simplified since we know that the dependence exists. 3918 // It's tedious, since we must go through all propagations, etc. 3919 // 3920 // Care is required to keep this code up to date with respect to the routine 3921 // above, depends(). 3922 // 3923 // Generally, the dependence analyzer will be used to build 3924 // a dependence graph for a function (basically a map from instructions 3925 // to dependences). Looking for cycles in the graph shows us loops 3926 // that cannot be trivially vectorized/parallelized. 3927 // 3928 // We can try to improve the situation by examining all the dependences 3929 // that make up the cycle, looking for ones we can break. 3930 // Sometimes, peeling the first or last iteration of a loop will break 3931 // dependences, and we've got flags for those possibilities. 3932 // Sometimes, splitting a loop at some other iteration will do the trick, 3933 // and we've got a flag for that case. Rather than waste the space to 3934 // record the exact iteration (since we rarely know), we provide 3935 // a method that calculates the iteration. It's a drag that it must work 3936 // from scratch, but wonderful in that it's possible. 3937 // 3938 // Here's an example: 3939 // 3940 // for (i = 0; i < 10; i++) 3941 // A[i] = ... 3942 // ... = A[11 - i] 3943 // 3944 // There's a loop-carried flow dependence from the store to the load, 3945 // found by the weak-crossing SIV test. The dependence will have a flag, 3946 // indicating that the dependence can be broken by splitting the loop. 3947 // Calling getSplitIteration will return 5. 3948 // Splitting the loop breaks the dependence, like so: 3949 // 3950 // for (i = 0; i <= 5; i++) 3951 // A[i] = ... 3952 // ... = A[11 - i] 3953 // for (i = 6; i < 10; i++) 3954 // A[i] = ... 3955 // ... = A[11 - i] 3956 // 3957 // breaks the dependence and allows us to vectorize/parallelize 3958 // both loops. 3959 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 3960 unsigned SplitLevel) { 3961 assert(Dep.isSplitable(SplitLevel) && 3962 "Dep should be splitable at SplitLevel"); 3963 Instruction *Src = Dep.getSrc(); 3964 Instruction *Dst = Dep.getDst(); 3965 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3966 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3967 assert(isLoadOrStore(Src)); 3968 assert(isLoadOrStore(Dst)); 3969 Value *SrcPtr = getLoadStorePointerOperand(Src); 3970 Value *DstPtr = getLoadStorePointerOperand(Dst); 3971 assert(underlyingObjectsAlias( 3972 AA, F->getParent()->getDataLayout(), MemoryLocation::get(Dst), 3973 MemoryLocation::get(Src)) == AliasResult::MustAlias); 3974 3975 // establish loop nesting levels 3976 establishNestingLevels(Src, Dst); 3977 3978 FullDependence Result(Src, Dst, false, CommonLevels); 3979 3980 unsigned Pairs = 1; 3981 SmallVector<Subscript, 2> Pair(Pairs); 3982 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3983 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3984 Pair[0].Src = SrcSCEV; 3985 Pair[0].Dst = DstSCEV; 3986 3987 if (Delinearize) { 3988 if (tryDelinearize(Src, Dst, Pair)) { 3989 LLVM_DEBUG(dbgs() << " delinearized\n"); 3990 Pairs = Pair.size(); 3991 } 3992 } 3993 3994 for (unsigned P = 0; P < Pairs; ++P) { 3995 Pair[P].Loops.resize(MaxLevels + 1); 3996 Pair[P].GroupLoops.resize(MaxLevels + 1); 3997 Pair[P].Group.resize(Pairs); 3998 removeMatchingExtensions(&Pair[P]); 3999 Pair[P].Classification = 4000 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 4001 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 4002 Pair[P].Loops); 4003 Pair[P].GroupLoops = Pair[P].Loops; 4004 Pair[P].Group.set(P); 4005 } 4006 4007 SmallBitVector Separable(Pairs); 4008 SmallBitVector Coupled(Pairs); 4009 4010 // partition subscripts into separable and minimally-coupled groups 4011 for (unsigned SI = 0; SI < Pairs; ++SI) { 4012 if (Pair[SI].Classification == Subscript::NonLinear) { 4013 // ignore these, but collect loops for later 4014 collectCommonLoops(Pair[SI].Src, 4015 LI->getLoopFor(Src->getParent()), 4016 Pair[SI].Loops); 4017 collectCommonLoops(Pair[SI].Dst, 4018 LI->getLoopFor(Dst->getParent()), 4019 Pair[SI].Loops); 4020 Result.Consistent = false; 4021 } 4022 else if (Pair[SI].Classification == Subscript::ZIV) 4023 Separable.set(SI); 4024 else { 4025 // SIV, RDIV, or MIV, so check for coupled group 4026 bool Done = true; 4027 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 4028 SmallBitVector Intersection = Pair[SI].GroupLoops; 4029 Intersection &= Pair[SJ].GroupLoops; 4030 if (Intersection.any()) { 4031 // accumulate set of all the loops in group 4032 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 4033 // accumulate set of all subscripts in group 4034 Pair[SJ].Group |= Pair[SI].Group; 4035 Done = false; 4036 } 4037 } 4038 if (Done) { 4039 if (Pair[SI].Group.count() == 1) 4040 Separable.set(SI); 4041 else 4042 Coupled.set(SI); 4043 } 4044 } 4045 } 4046 4047 Constraint NewConstraint; 4048 NewConstraint.setAny(SE); 4049 4050 // test separable subscripts 4051 for (unsigned SI : Separable.set_bits()) { 4052 switch (Pair[SI].Classification) { 4053 case Subscript::SIV: { 4054 unsigned Level; 4055 const SCEV *SplitIter = nullptr; 4056 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 4057 Result, NewConstraint, SplitIter); 4058 if (Level == SplitLevel) { 4059 assert(SplitIter != nullptr); 4060 return SplitIter; 4061 } 4062 break; 4063 } 4064 case Subscript::ZIV: 4065 case Subscript::RDIV: 4066 case Subscript::MIV: 4067 break; 4068 default: 4069 llvm_unreachable("subscript has unexpected classification"); 4070 } 4071 } 4072 4073 if (Coupled.count()) { 4074 // test coupled subscript groups 4075 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 4076 for (unsigned II = 0; II <= MaxLevels; ++II) 4077 Constraints[II].setAny(SE); 4078 for (unsigned SI : Coupled.set_bits()) { 4079 SmallBitVector Group(Pair[SI].Group); 4080 SmallBitVector Sivs(Pairs); 4081 SmallBitVector Mivs(Pairs); 4082 SmallBitVector ConstrainedLevels(MaxLevels + 1); 4083 for (unsigned SJ : Group.set_bits()) { 4084 if (Pair[SJ].Classification == Subscript::SIV) 4085 Sivs.set(SJ); 4086 else 4087 Mivs.set(SJ); 4088 } 4089 while (Sivs.any()) { 4090 bool Changed = false; 4091 for (unsigned SJ : Sivs.set_bits()) { 4092 // SJ is an SIV subscript that's part of the current coupled group 4093 unsigned Level; 4094 const SCEV *SplitIter = nullptr; 4095 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 4096 Result, NewConstraint, SplitIter); 4097 if (Level == SplitLevel && SplitIter) 4098 return SplitIter; 4099 ConstrainedLevels.set(Level); 4100 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 4101 Changed = true; 4102 Sivs.reset(SJ); 4103 } 4104 if (Changed) { 4105 // propagate, possibly creating new SIVs and ZIVs 4106 for (unsigned SJ : Mivs.set_bits()) { 4107 // SJ is an MIV subscript that's part of the current coupled group 4108 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 4109 Pair[SJ].Loops, Constraints, Result.Consistent)) { 4110 Pair[SJ].Classification = 4111 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 4112 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 4113 Pair[SJ].Loops); 4114 switch (Pair[SJ].Classification) { 4115 case Subscript::ZIV: 4116 Mivs.reset(SJ); 4117 break; 4118 case Subscript::SIV: 4119 Sivs.set(SJ); 4120 Mivs.reset(SJ); 4121 break; 4122 case Subscript::RDIV: 4123 case Subscript::MIV: 4124 break; 4125 default: 4126 llvm_unreachable("bad subscript classification"); 4127 } 4128 } 4129 } 4130 } 4131 } 4132 } 4133 } 4134 llvm_unreachable("somehow reached end of routine"); 4135 return nullptr; 4136 } 4137