1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 10 // accesses. Currently, it is an (incomplete) implementation of the approach 11 // described in 12 // 13 // Practical Dependence Testing 14 // Goff, Kennedy, Tseng 15 // PLDI 1991 16 // 17 // There's a single entry point that analyzes the dependence between a pair 18 // of memory references in a function, returning either NULL, for no dependence, 19 // or a more-or-less detailed description of the dependence between them. 20 // 21 // Currently, the implementation cannot propagate constraints between 22 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 23 // Both of these are conservative weaknesses; 24 // that is, not a source of correctness problems. 25 // 26 // Since Clang linearizes some array subscripts, the dependence 27 // analysis is using SCEV->delinearize to recover the representation of multiple 28 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 29 // delinearization is controlled by the flag -da-delinearize. 30 // 31 // We should pay some careful attention to the possibility of integer overflow 32 // in the implementation of the various tests. This could happen with Add, 33 // Subtract, or Multiply, with both APInt's and SCEV's. 34 // 35 // Some non-linear subscript pairs can be handled by the GCD test 36 // (and perhaps other tests). 37 // Should explore how often these things occur. 38 // 39 // Finally, it seems like certain test cases expose weaknesses in the SCEV 40 // simplification, especially in the handling of sign and zero extensions. 41 // It could be useful to spend time exploring these. 42 // 43 // Please note that this is work in progress and the interface is subject to 44 // change. 45 // 46 //===----------------------------------------------------------------------===// 47 // // 48 // In memory of Ken Kennedy, 1945 - 2007 // 49 // // 50 //===----------------------------------------------------------------------===// 51 52 #include "llvm/Analysis/DependenceAnalysis.h" 53 #include "llvm/ADT/STLExtras.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/LoopInfo.h" 57 #include "llvm/Analysis/ScalarEvolution.h" 58 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 59 #include "llvm/Analysis/ValueTracking.h" 60 #include "llvm/Config/llvm-config.h" 61 #include "llvm/IR/InstIterator.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/InitializePasses.h" 65 #include "llvm/Support/CommandLine.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/raw_ostream.h" 69 70 using namespace llvm; 71 72 #define DEBUG_TYPE "da" 73 74 //===----------------------------------------------------------------------===// 75 // statistics 76 77 STATISTIC(TotalArrayPairs, "Array pairs tested"); 78 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 79 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 80 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 81 STATISTIC(ZIVapplications, "ZIV applications"); 82 STATISTIC(ZIVindependence, "ZIV independence"); 83 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 84 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 85 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 86 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 87 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 88 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 89 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 90 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 91 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 92 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 93 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 94 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 95 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 96 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 97 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 98 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 99 STATISTIC(DeltaApplications, "Delta applications"); 100 STATISTIC(DeltaSuccesses, "Delta successes"); 101 STATISTIC(DeltaIndependence, "Delta independence"); 102 STATISTIC(DeltaPropagations, "Delta propagations"); 103 STATISTIC(GCDapplications, "GCD applications"); 104 STATISTIC(GCDsuccesses, "GCD successes"); 105 STATISTIC(GCDindependence, "GCD independence"); 106 STATISTIC(BanerjeeApplications, "Banerjee applications"); 107 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 108 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 109 110 static cl::opt<bool> 111 Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore, 112 cl::desc("Try to delinearize array references.")); 113 static cl::opt<bool> DisableDelinearizationChecks( 114 "da-disable-delinearization-checks", cl::init(false), cl::Hidden, 115 cl::ZeroOrMore, 116 cl::desc( 117 "Disable checks that try to statically verify validity of " 118 "delinearized subscripts. Enabling this option may result in incorrect " 119 "dependence vectors for languages that allow the subscript of one " 120 "dimension to underflow or overflow into another dimension.")); 121 122 //===----------------------------------------------------------------------===// 123 // basics 124 125 DependenceAnalysis::Result 126 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 127 auto &AA = FAM.getResult<AAManager>(F); 128 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 129 auto &LI = FAM.getResult<LoopAnalysis>(F); 130 return DependenceInfo(&F, &AA, &SE, &LI); 131 } 132 133 AnalysisKey DependenceAnalysis::Key; 134 135 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 136 "Dependence Analysis", true, true) 137 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 138 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 139 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 140 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 141 true, true) 142 143 char DependenceAnalysisWrapperPass::ID = 0; 144 145 DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass() 146 : FunctionPass(ID) { 147 initializeDependenceAnalysisWrapperPassPass(*PassRegistry::getPassRegistry()); 148 } 149 150 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 151 return new DependenceAnalysisWrapperPass(); 152 } 153 154 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 155 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 156 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 157 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 158 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 159 return false; 160 } 161 162 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 163 164 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 165 166 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 167 AU.setPreservesAll(); 168 AU.addRequiredTransitive<AAResultsWrapperPass>(); 169 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 170 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 171 } 172 173 // Used to test the dependence analyzer. 174 // Looks through the function, noting instructions that may access memory. 175 // Calls depends() on every possible pair and prints out the result. 176 // Ignores all other instructions. 177 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { 178 auto *F = DA->getFunction(); 179 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 180 ++SrcI) { 181 if (SrcI->mayReadOrWriteMemory()) { 182 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 183 DstI != DstE; ++DstI) { 184 if (DstI->mayReadOrWriteMemory()) { 185 OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n"; 186 OS << " da analyze - "; 187 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 188 D->dump(OS); 189 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 190 if (D->isSplitable(Level)) { 191 OS << " da analyze - split level = " << Level; 192 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 193 OS << "!\n"; 194 } 195 } 196 } 197 else 198 OS << "none!\n"; 199 } 200 } 201 } 202 } 203 } 204 205 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 206 const Module *) const { 207 dumpExampleDependence(OS, info.get()); 208 } 209 210 PreservedAnalyses 211 DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { 212 OS << "'Dependence Analysis' for function '" << F.getName() << "':\n"; 213 dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F)); 214 return PreservedAnalyses::all(); 215 } 216 217 //===----------------------------------------------------------------------===// 218 // Dependence methods 219 220 // Returns true if this is an input dependence. 221 bool Dependence::isInput() const { 222 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 223 } 224 225 226 // Returns true if this is an output dependence. 227 bool Dependence::isOutput() const { 228 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 229 } 230 231 232 // Returns true if this is an flow (aka true) dependence. 233 bool Dependence::isFlow() const { 234 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 235 } 236 237 238 // Returns true if this is an anti dependence. 239 bool Dependence::isAnti() const { 240 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 241 } 242 243 244 // Returns true if a particular level is scalar; that is, 245 // if no subscript in the source or destination mention the induction 246 // variable associated with the loop at this level. 247 // Leave this out of line, so it will serve as a virtual method anchor 248 bool Dependence::isScalar(unsigned level) const { 249 return false; 250 } 251 252 253 //===----------------------------------------------------------------------===// 254 // FullDependence methods 255 256 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 257 bool PossiblyLoopIndependent, 258 unsigned CommonLevels) 259 : Dependence(Source, Destination), Levels(CommonLevels), 260 LoopIndependent(PossiblyLoopIndependent) { 261 Consistent = true; 262 if (CommonLevels) 263 DV = std::make_unique<DVEntry[]>(CommonLevels); 264 } 265 266 // The rest are simple getters that hide the implementation. 267 268 // getDirection - Returns the direction associated with a particular level. 269 unsigned FullDependence::getDirection(unsigned Level) const { 270 assert(0 < Level && Level <= Levels && "Level out of range"); 271 return DV[Level - 1].Direction; 272 } 273 274 275 // Returns the distance (or NULL) associated with a particular level. 276 const SCEV *FullDependence::getDistance(unsigned Level) const { 277 assert(0 < Level && Level <= Levels && "Level out of range"); 278 return DV[Level - 1].Distance; 279 } 280 281 282 // Returns true if a particular level is scalar; that is, 283 // if no subscript in the source or destination mention the induction 284 // variable associated with the loop at this level. 285 bool FullDependence::isScalar(unsigned Level) const { 286 assert(0 < Level && Level <= Levels && "Level out of range"); 287 return DV[Level - 1].Scalar; 288 } 289 290 291 // Returns true if peeling the first iteration from this loop 292 // will break this dependence. 293 bool FullDependence::isPeelFirst(unsigned Level) const { 294 assert(0 < Level && Level <= Levels && "Level out of range"); 295 return DV[Level - 1].PeelFirst; 296 } 297 298 299 // Returns true if peeling the last iteration from this loop 300 // will break this dependence. 301 bool FullDependence::isPeelLast(unsigned Level) const { 302 assert(0 < Level && Level <= Levels && "Level out of range"); 303 return DV[Level - 1].PeelLast; 304 } 305 306 307 // Returns true if splitting this loop will break the dependence. 308 bool FullDependence::isSplitable(unsigned Level) const { 309 assert(0 < Level && Level <= Levels && "Level out of range"); 310 return DV[Level - 1].Splitable; 311 } 312 313 314 //===----------------------------------------------------------------------===// 315 // DependenceInfo::Constraint methods 316 317 // If constraint is a point <X, Y>, returns X. 318 // Otherwise assert. 319 const SCEV *DependenceInfo::Constraint::getX() const { 320 assert(Kind == Point && "Kind should be Point"); 321 return A; 322 } 323 324 325 // If constraint is a point <X, Y>, returns Y. 326 // Otherwise assert. 327 const SCEV *DependenceInfo::Constraint::getY() const { 328 assert(Kind == Point && "Kind should be Point"); 329 return B; 330 } 331 332 333 // If constraint is a line AX + BY = C, returns A. 334 // Otherwise assert. 335 const SCEV *DependenceInfo::Constraint::getA() const { 336 assert((Kind == Line || Kind == Distance) && 337 "Kind should be Line (or Distance)"); 338 return A; 339 } 340 341 342 // If constraint is a line AX + BY = C, returns B. 343 // Otherwise assert. 344 const SCEV *DependenceInfo::Constraint::getB() const { 345 assert((Kind == Line || Kind == Distance) && 346 "Kind should be Line (or Distance)"); 347 return B; 348 } 349 350 351 // If constraint is a line AX + BY = C, returns C. 352 // Otherwise assert. 353 const SCEV *DependenceInfo::Constraint::getC() const { 354 assert((Kind == Line || Kind == Distance) && 355 "Kind should be Line (or Distance)"); 356 return C; 357 } 358 359 360 // If constraint is a distance, returns D. 361 // Otherwise assert. 362 const SCEV *DependenceInfo::Constraint::getD() const { 363 assert(Kind == Distance && "Kind should be Distance"); 364 return SE->getNegativeSCEV(C); 365 } 366 367 368 // Returns the loop associated with this constraint. 369 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 370 assert((Kind == Distance || Kind == Line || Kind == Point) && 371 "Kind should be Distance, Line, or Point"); 372 return AssociatedLoop; 373 } 374 375 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 376 const Loop *CurLoop) { 377 Kind = Point; 378 A = X; 379 B = Y; 380 AssociatedLoop = CurLoop; 381 } 382 383 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 384 const SCEV *CC, const Loop *CurLoop) { 385 Kind = Line; 386 A = AA; 387 B = BB; 388 C = CC; 389 AssociatedLoop = CurLoop; 390 } 391 392 void DependenceInfo::Constraint::setDistance(const SCEV *D, 393 const Loop *CurLoop) { 394 Kind = Distance; 395 A = SE->getOne(D->getType()); 396 B = SE->getNegativeSCEV(A); 397 C = SE->getNegativeSCEV(D); 398 AssociatedLoop = CurLoop; 399 } 400 401 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 402 403 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 404 SE = NewSE; 405 Kind = Any; 406 } 407 408 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 409 // For debugging purposes. Dumps the constraint out to OS. 410 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 411 if (isEmpty()) 412 OS << " Empty\n"; 413 else if (isAny()) 414 OS << " Any\n"; 415 else if (isPoint()) 416 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 417 else if (isDistance()) 418 OS << " Distance is " << *getD() << 419 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 420 else if (isLine()) 421 OS << " Line is " << *getA() << "*X + " << 422 *getB() << "*Y = " << *getC() << "\n"; 423 else 424 llvm_unreachable("unknown constraint type in Constraint::dump"); 425 } 426 #endif 427 428 429 // Updates X with the intersection 430 // of the Constraints X and Y. Returns true if X has changed. 431 // Corresponds to Figure 4 from the paper 432 // 433 // Practical Dependence Testing 434 // Goff, Kennedy, Tseng 435 // PLDI 1991 436 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 437 ++DeltaApplications; 438 LLVM_DEBUG(dbgs() << "\tintersect constraints\n"); 439 LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 440 LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 441 assert(!Y->isPoint() && "Y must not be a Point"); 442 if (X->isAny()) { 443 if (Y->isAny()) 444 return false; 445 *X = *Y; 446 return true; 447 } 448 if (X->isEmpty()) 449 return false; 450 if (Y->isEmpty()) { 451 X->setEmpty(); 452 return true; 453 } 454 455 if (X->isDistance() && Y->isDistance()) { 456 LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n"); 457 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 458 return false; 459 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 460 X->setEmpty(); 461 ++DeltaSuccesses; 462 return true; 463 } 464 // Hmmm, interesting situation. 465 // I guess if either is constant, keep it and ignore the other. 466 if (isa<SCEVConstant>(Y->getD())) { 467 *X = *Y; 468 return true; 469 } 470 return false; 471 } 472 473 // At this point, the pseudo-code in Figure 4 of the paper 474 // checks if (X->isPoint() && Y->isPoint()). 475 // This case can't occur in our implementation, 476 // since a Point can only arise as the result of intersecting 477 // two Line constraints, and the right-hand value, Y, is never 478 // the result of an intersection. 479 assert(!(X->isPoint() && Y->isPoint()) && 480 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 481 482 if (X->isLine() && Y->isLine()) { 483 LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n"); 484 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 485 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 486 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 487 // slopes are equal, so lines are parallel 488 LLVM_DEBUG(dbgs() << "\t\tsame slope\n"); 489 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 490 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 491 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 492 return false; 493 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 494 X->setEmpty(); 495 ++DeltaSuccesses; 496 return true; 497 } 498 return false; 499 } 500 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 501 // slopes differ, so lines intersect 502 LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n"); 503 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 504 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 505 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 506 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 507 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 508 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 509 const SCEVConstant *C1A2_C2A1 = 510 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 511 const SCEVConstant *C1B2_C2B1 = 512 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 513 const SCEVConstant *A1B2_A2B1 = 514 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 515 const SCEVConstant *A2B1_A1B2 = 516 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 517 if (!C1B2_C2B1 || !C1A2_C2A1 || 518 !A1B2_A2B1 || !A2B1_A1B2) 519 return false; 520 APInt Xtop = C1B2_C2B1->getAPInt(); 521 APInt Xbot = A1B2_A2B1->getAPInt(); 522 APInt Ytop = C1A2_C2A1->getAPInt(); 523 APInt Ybot = A2B1_A1B2->getAPInt(); 524 LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 525 LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 526 LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 527 LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 528 APInt Xq = Xtop; // these need to be initialized, even 529 APInt Xr = Xtop; // though they're just going to be overwritten 530 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 531 APInt Yq = Ytop; 532 APInt Yr = Ytop; 533 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 534 if (Xr != 0 || Yr != 0) { 535 X->setEmpty(); 536 ++DeltaSuccesses; 537 return true; 538 } 539 LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 540 if (Xq.slt(0) || Yq.slt(0)) { 541 X->setEmpty(); 542 ++DeltaSuccesses; 543 return true; 544 } 545 if (const SCEVConstant *CUB = 546 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 547 const APInt &UpperBound = CUB->getAPInt(); 548 LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 549 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 550 X->setEmpty(); 551 ++DeltaSuccesses; 552 return true; 553 } 554 } 555 X->setPoint(SE->getConstant(Xq), 556 SE->getConstant(Yq), 557 X->getAssociatedLoop()); 558 ++DeltaSuccesses; 559 return true; 560 } 561 return false; 562 } 563 564 // if (X->isLine() && Y->isPoint()) This case can't occur. 565 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 566 567 if (X->isPoint() && Y->isLine()) { 568 LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n"); 569 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 570 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 571 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 572 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 573 return false; 574 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 575 X->setEmpty(); 576 ++DeltaSuccesses; 577 return true; 578 } 579 return false; 580 } 581 582 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 583 return false; 584 } 585 586 587 //===----------------------------------------------------------------------===// 588 // DependenceInfo methods 589 590 // For debugging purposes. Dumps a dependence to OS. 591 void Dependence::dump(raw_ostream &OS) const { 592 bool Splitable = false; 593 if (isConfused()) 594 OS << "confused"; 595 else { 596 if (isConsistent()) 597 OS << "consistent "; 598 if (isFlow()) 599 OS << "flow"; 600 else if (isOutput()) 601 OS << "output"; 602 else if (isAnti()) 603 OS << "anti"; 604 else if (isInput()) 605 OS << "input"; 606 unsigned Levels = getLevels(); 607 OS << " ["; 608 for (unsigned II = 1; II <= Levels; ++II) { 609 if (isSplitable(II)) 610 Splitable = true; 611 if (isPeelFirst(II)) 612 OS << 'p'; 613 const SCEV *Distance = getDistance(II); 614 if (Distance) 615 OS << *Distance; 616 else if (isScalar(II)) 617 OS << "S"; 618 else { 619 unsigned Direction = getDirection(II); 620 if (Direction == DVEntry::ALL) 621 OS << "*"; 622 else { 623 if (Direction & DVEntry::LT) 624 OS << "<"; 625 if (Direction & DVEntry::EQ) 626 OS << "="; 627 if (Direction & DVEntry::GT) 628 OS << ">"; 629 } 630 } 631 if (isPeelLast(II)) 632 OS << 'p'; 633 if (II < Levels) 634 OS << " "; 635 } 636 if (isLoopIndependent()) 637 OS << "|<"; 638 OS << "]"; 639 if (Splitable) 640 OS << " splitable"; 641 } 642 OS << "!\n"; 643 } 644 645 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their 646 // underlaying objects. If LocA and LocB are known to not alias (for any reason: 647 // tbaa, non-overlapping regions etc), then it is known there is no dependecy. 648 // Otherwise the underlying objects are checked to see if they point to 649 // different identifiable objects. 650 static AliasResult underlyingObjectsAlias(AAResults *AA, 651 const DataLayout &DL, 652 const MemoryLocation &LocA, 653 const MemoryLocation &LocB) { 654 // Check the original locations (minus size) for noalias, which can happen for 655 // tbaa, incompatible underlying object locations, etc. 656 MemoryLocation LocAS = 657 MemoryLocation::getBeforeOrAfter(LocA.Ptr, LocA.AATags); 658 MemoryLocation LocBS = 659 MemoryLocation::getBeforeOrAfter(LocB.Ptr, LocB.AATags); 660 if (AA->isNoAlias(LocAS, LocBS)) 661 return AliasResult::NoAlias; 662 663 // Check the underlying objects are the same 664 const Value *AObj = getUnderlyingObject(LocA.Ptr); 665 const Value *BObj = getUnderlyingObject(LocB.Ptr); 666 667 // If the underlying objects are the same, they must alias 668 if (AObj == BObj) 669 return AliasResult::MustAlias; 670 671 // We may have hit the recursion limit for underlying objects, or have 672 // underlying objects where we don't know they will alias. 673 if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj)) 674 return AliasResult::MayAlias; 675 676 // Otherwise we know the objects are different and both identified objects so 677 // must not alias. 678 return AliasResult::NoAlias; 679 } 680 681 682 // Returns true if the load or store can be analyzed. Atomic and volatile 683 // operations have properties which this analysis does not understand. 684 static 685 bool isLoadOrStore(const Instruction *I) { 686 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 687 return LI->isUnordered(); 688 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 689 return SI->isUnordered(); 690 return false; 691 } 692 693 694 // Examines the loop nesting of the Src and Dst 695 // instructions and establishes their shared loops. Sets the variables 696 // CommonLevels, SrcLevels, and MaxLevels. 697 // The source and destination instructions needn't be contained in the same 698 // loop. The routine establishNestingLevels finds the level of most deeply 699 // nested loop that contains them both, CommonLevels. An instruction that's 700 // not contained in a loop is at level = 0. MaxLevels is equal to the level 701 // of the source plus the level of the destination, minus CommonLevels. 702 // This lets us allocate vectors MaxLevels in length, with room for every 703 // distinct loop referenced in both the source and destination subscripts. 704 // The variable SrcLevels is the nesting depth of the source instruction. 705 // It's used to help calculate distinct loops referenced by the destination. 706 // Here's the map from loops to levels: 707 // 0 - unused 708 // 1 - outermost common loop 709 // ... - other common loops 710 // CommonLevels - innermost common loop 711 // ... - loops containing Src but not Dst 712 // SrcLevels - innermost loop containing Src but not Dst 713 // ... - loops containing Dst but not Src 714 // MaxLevels - innermost loops containing Dst but not Src 715 // Consider the follow code fragment: 716 // for (a = ...) { 717 // for (b = ...) { 718 // for (c = ...) { 719 // for (d = ...) { 720 // A[] = ...; 721 // } 722 // } 723 // for (e = ...) { 724 // for (f = ...) { 725 // for (g = ...) { 726 // ... = A[]; 727 // } 728 // } 729 // } 730 // } 731 // } 732 // If we're looking at the possibility of a dependence between the store 733 // to A (the Src) and the load from A (the Dst), we'll note that they 734 // have 2 loops in common, so CommonLevels will equal 2 and the direction 735 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 736 // A map from loop names to loop numbers would look like 737 // a - 1 738 // b - 2 = CommonLevels 739 // c - 3 740 // d - 4 = SrcLevels 741 // e - 5 742 // f - 6 743 // g - 7 = MaxLevels 744 void DependenceInfo::establishNestingLevels(const Instruction *Src, 745 const Instruction *Dst) { 746 const BasicBlock *SrcBlock = Src->getParent(); 747 const BasicBlock *DstBlock = Dst->getParent(); 748 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 749 unsigned DstLevel = LI->getLoopDepth(DstBlock); 750 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 751 const Loop *DstLoop = LI->getLoopFor(DstBlock); 752 SrcLevels = SrcLevel; 753 MaxLevels = SrcLevel + DstLevel; 754 while (SrcLevel > DstLevel) { 755 SrcLoop = SrcLoop->getParentLoop(); 756 SrcLevel--; 757 } 758 while (DstLevel > SrcLevel) { 759 DstLoop = DstLoop->getParentLoop(); 760 DstLevel--; 761 } 762 while (SrcLoop != DstLoop) { 763 SrcLoop = SrcLoop->getParentLoop(); 764 DstLoop = DstLoop->getParentLoop(); 765 SrcLevel--; 766 } 767 CommonLevels = SrcLevel; 768 MaxLevels -= CommonLevels; 769 } 770 771 772 // Given one of the loops containing the source, return 773 // its level index in our numbering scheme. 774 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 775 return SrcLoop->getLoopDepth(); 776 } 777 778 779 // Given one of the loops containing the destination, 780 // return its level index in our numbering scheme. 781 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 782 unsigned D = DstLoop->getLoopDepth(); 783 if (D > CommonLevels) 784 return D - CommonLevels + SrcLevels; 785 else 786 return D; 787 } 788 789 790 // Returns true if Expression is loop invariant in LoopNest. 791 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 792 const Loop *LoopNest) const { 793 if (!LoopNest) 794 return true; 795 return SE->isLoopInvariant(Expression, LoopNest) && 796 isLoopInvariant(Expression, LoopNest->getParentLoop()); 797 } 798 799 800 801 // Finds the set of loops from the LoopNest that 802 // have a level <= CommonLevels and are referred to by the SCEV Expression. 803 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 804 const Loop *LoopNest, 805 SmallBitVector &Loops) const { 806 while (LoopNest) { 807 unsigned Level = LoopNest->getLoopDepth(); 808 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 809 Loops.set(Level); 810 LoopNest = LoopNest->getParentLoop(); 811 } 812 } 813 814 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 815 816 unsigned widestWidthSeen = 0; 817 Type *widestType; 818 819 // Go through each pair and find the widest bit to which we need 820 // to extend all of them. 821 for (Subscript *Pair : Pairs) { 822 const SCEV *Src = Pair->Src; 823 const SCEV *Dst = Pair->Dst; 824 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 825 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 826 if (SrcTy == nullptr || DstTy == nullptr) { 827 assert(SrcTy == DstTy && "This function only unify integer types and " 828 "expect Src and Dst share the same type " 829 "otherwise."); 830 continue; 831 } 832 if (SrcTy->getBitWidth() > widestWidthSeen) { 833 widestWidthSeen = SrcTy->getBitWidth(); 834 widestType = SrcTy; 835 } 836 if (DstTy->getBitWidth() > widestWidthSeen) { 837 widestWidthSeen = DstTy->getBitWidth(); 838 widestType = DstTy; 839 } 840 } 841 842 843 assert(widestWidthSeen > 0); 844 845 // Now extend each pair to the widest seen. 846 for (Subscript *Pair : Pairs) { 847 const SCEV *Src = Pair->Src; 848 const SCEV *Dst = Pair->Dst; 849 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 850 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 851 if (SrcTy == nullptr || DstTy == nullptr) { 852 assert(SrcTy == DstTy && "This function only unify integer types and " 853 "expect Src and Dst share the same type " 854 "otherwise."); 855 continue; 856 } 857 if (SrcTy->getBitWidth() < widestWidthSeen) 858 // Sign-extend Src to widestType 859 Pair->Src = SE->getSignExtendExpr(Src, widestType); 860 if (DstTy->getBitWidth() < widestWidthSeen) { 861 // Sign-extend Dst to widestType 862 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 863 } 864 } 865 } 866 867 // removeMatchingExtensions - Examines a subscript pair. 868 // If the source and destination are identically sign (or zero) 869 // extended, it strips off the extension in an effect to simplify 870 // the actual analysis. 871 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 872 const SCEV *Src = Pair->Src; 873 const SCEV *Dst = Pair->Dst; 874 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 875 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 876 const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Src); 877 const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Dst); 878 const SCEV *SrcCastOp = SrcCast->getOperand(); 879 const SCEV *DstCastOp = DstCast->getOperand(); 880 if (SrcCastOp->getType() == DstCastOp->getType()) { 881 Pair->Src = SrcCastOp; 882 Pair->Dst = DstCastOp; 883 } 884 } 885 } 886 887 // Examine the scev and return true iff it's linear. 888 // Collect any loops mentioned in the set of "Loops". 889 bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest, 890 SmallBitVector &Loops, bool IsSrc) { 891 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 892 if (!AddRec) 893 return isLoopInvariant(Expr, LoopNest); 894 const SCEV *Start = AddRec->getStart(); 895 const SCEV *Step = AddRec->getStepRecurrence(*SE); 896 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 897 if (!isa<SCEVCouldNotCompute>(UB)) { 898 if (SE->getTypeSizeInBits(Start->getType()) < 899 SE->getTypeSizeInBits(UB->getType())) { 900 if (!AddRec->getNoWrapFlags()) 901 return false; 902 } 903 } 904 if (!isLoopInvariant(Step, LoopNest)) 905 return false; 906 if (IsSrc) 907 Loops.set(mapSrcLoop(AddRec->getLoop())); 908 else 909 Loops.set(mapDstLoop(AddRec->getLoop())); 910 return checkSubscript(Start, LoopNest, Loops, IsSrc); 911 } 912 913 // Examine the scev and return true iff it's linear. 914 // Collect any loops mentioned in the set of "Loops". 915 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 916 SmallBitVector &Loops) { 917 return checkSubscript(Src, LoopNest, Loops, true); 918 } 919 920 // Examine the scev and return true iff it's linear. 921 // Collect any loops mentioned in the set of "Loops". 922 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 923 SmallBitVector &Loops) { 924 return checkSubscript(Dst, LoopNest, Loops, false); 925 } 926 927 928 // Examines the subscript pair (the Src and Dst SCEVs) 929 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 930 // Collects the associated loops in a set. 931 DependenceInfo::Subscript::ClassificationKind 932 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 933 const SCEV *Dst, const Loop *DstLoopNest, 934 SmallBitVector &Loops) { 935 SmallBitVector SrcLoops(MaxLevels + 1); 936 SmallBitVector DstLoops(MaxLevels + 1); 937 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 938 return Subscript::NonLinear; 939 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 940 return Subscript::NonLinear; 941 Loops = SrcLoops; 942 Loops |= DstLoops; 943 unsigned N = Loops.count(); 944 if (N == 0) 945 return Subscript::ZIV; 946 if (N == 1) 947 return Subscript::SIV; 948 if (N == 2 && (SrcLoops.count() == 0 || 949 DstLoops.count() == 0 || 950 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 951 return Subscript::RDIV; 952 return Subscript::MIV; 953 } 954 955 956 // A wrapper around SCEV::isKnownPredicate. 957 // Looks for cases where we're interested in comparing for equality. 958 // If both X and Y have been identically sign or zero extended, 959 // it strips off the (confusing) extensions before invoking 960 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 961 // will be similarly updated. 962 // 963 // If SCEV::isKnownPredicate can't prove the predicate, 964 // we try simple subtraction, which seems to help in some cases 965 // involving symbolics. 966 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 967 const SCEV *Y) const { 968 if (Pred == CmpInst::ICMP_EQ || 969 Pred == CmpInst::ICMP_NE) { 970 if ((isa<SCEVSignExtendExpr>(X) && 971 isa<SCEVSignExtendExpr>(Y)) || 972 (isa<SCEVZeroExtendExpr>(X) && 973 isa<SCEVZeroExtendExpr>(Y))) { 974 const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(X); 975 const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Y); 976 const SCEV *Xop = CX->getOperand(); 977 const SCEV *Yop = CY->getOperand(); 978 if (Xop->getType() == Yop->getType()) { 979 X = Xop; 980 Y = Yop; 981 } 982 } 983 } 984 if (SE->isKnownPredicate(Pred, X, Y)) 985 return true; 986 // If SE->isKnownPredicate can't prove the condition, 987 // we try the brute-force approach of subtracting 988 // and testing the difference. 989 // By testing with SE->isKnownPredicate first, we avoid 990 // the possibility of overflow when the arguments are constants. 991 const SCEV *Delta = SE->getMinusSCEV(X, Y); 992 switch (Pred) { 993 case CmpInst::ICMP_EQ: 994 return Delta->isZero(); 995 case CmpInst::ICMP_NE: 996 return SE->isKnownNonZero(Delta); 997 case CmpInst::ICMP_SGE: 998 return SE->isKnownNonNegative(Delta); 999 case CmpInst::ICMP_SLE: 1000 return SE->isKnownNonPositive(Delta); 1001 case CmpInst::ICMP_SGT: 1002 return SE->isKnownPositive(Delta); 1003 case CmpInst::ICMP_SLT: 1004 return SE->isKnownNegative(Delta); 1005 default: 1006 llvm_unreachable("unexpected predicate in isKnownPredicate"); 1007 } 1008 } 1009 1010 /// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1)) 1011 /// with some extra checking if S is an AddRec and we can prove less-than using 1012 /// the loop bounds. 1013 bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { 1014 // First unify to the same type 1015 auto *SType = dyn_cast<IntegerType>(S->getType()); 1016 auto *SizeType = dyn_cast<IntegerType>(Size->getType()); 1017 if (!SType || !SizeType) 1018 return false; 1019 Type *MaxType = 1020 (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType; 1021 S = SE->getTruncateOrZeroExtend(S, MaxType); 1022 Size = SE->getTruncateOrZeroExtend(Size, MaxType); 1023 1024 // Special check for addrecs using BE taken count 1025 const SCEV *Bound = SE->getMinusSCEV(S, Size); 1026 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) { 1027 if (AddRec->isAffine()) { 1028 const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); 1029 if (!isa<SCEVCouldNotCompute>(BECount)) { 1030 const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE); 1031 if (SE->isKnownNegative(Limit)) 1032 return true; 1033 } 1034 } 1035 } 1036 1037 // Check using normal isKnownNegative 1038 const SCEV *LimitedBound = 1039 SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType()))); 1040 return SE->isKnownNegative(LimitedBound); 1041 } 1042 1043 bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const { 1044 bool Inbounds = false; 1045 if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr)) 1046 Inbounds = SrcGEP->isInBounds(); 1047 if (Inbounds) { 1048 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { 1049 if (AddRec->isAffine()) { 1050 // We know S is for Ptr, the operand on a load/store, so doesn't wrap. 1051 // If both parts are NonNegative, the end result will be NonNegative 1052 if (SE->isKnownNonNegative(AddRec->getStart()) && 1053 SE->isKnownNonNegative(AddRec->getOperand(1))) 1054 return true; 1055 } 1056 } 1057 } 1058 1059 return SE->isKnownNonNegative(S); 1060 } 1061 1062 // All subscripts are all the same type. 1063 // Loop bound may be smaller (e.g., a char). 1064 // Should zero extend loop bound, since it's always >= 0. 1065 // This routine collects upper bound and extends or truncates if needed. 1066 // Truncating is safe when subscripts are known not to wrap. Cases without 1067 // nowrap flags should have been rejected earlier. 1068 // Return null if no bound available. 1069 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 1070 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 1071 const SCEV *UB = SE->getBackedgeTakenCount(L); 1072 return SE->getTruncateOrZeroExtend(UB, T); 1073 } 1074 return nullptr; 1075 } 1076 1077 1078 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1079 // If the cast fails, returns NULL. 1080 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1081 Type *T) const { 1082 if (const SCEV *UB = collectUpperBound(L, T)) 1083 return dyn_cast<SCEVConstant>(UB); 1084 return nullptr; 1085 } 1086 1087 1088 // testZIV - 1089 // When we have a pair of subscripts of the form [c1] and [c2], 1090 // where c1 and c2 are both loop invariant, we attack it using 1091 // the ZIV test. Basically, we test by comparing the two values, 1092 // but there are actually three possible results: 1093 // 1) the values are equal, so there's a dependence 1094 // 2) the values are different, so there's no dependence 1095 // 3) the values might be equal, so we have to assume a dependence. 1096 // 1097 // Return true if dependence disproved. 1098 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1099 FullDependence &Result) const { 1100 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 1101 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1102 ++ZIVapplications; 1103 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1104 LLVM_DEBUG(dbgs() << " provably dependent\n"); 1105 return false; // provably dependent 1106 } 1107 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1108 LLVM_DEBUG(dbgs() << " provably independent\n"); 1109 ++ZIVindependence; 1110 return true; // provably independent 1111 } 1112 LLVM_DEBUG(dbgs() << " possibly dependent\n"); 1113 Result.Consistent = false; 1114 return false; // possibly dependent 1115 } 1116 1117 1118 // strongSIVtest - 1119 // From the paper, Practical Dependence Testing, Section 4.2.1 1120 // 1121 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1122 // where i is an induction variable, c1 and c2 are loop invariant, 1123 // and a is a constant, we can solve it exactly using the Strong SIV test. 1124 // 1125 // Can prove independence. Failing that, can compute distance (and direction). 1126 // In the presence of symbolic terms, we can sometimes make progress. 1127 // 1128 // If there's a dependence, 1129 // 1130 // c1 + a*i = c2 + a*i' 1131 // 1132 // The dependence distance is 1133 // 1134 // d = i' - i = (c1 - c2)/a 1135 // 1136 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1137 // loop's upper bound. If a dependence exists, the dependence direction is 1138 // defined as 1139 // 1140 // { < if d > 0 1141 // direction = { = if d = 0 1142 // { > if d < 0 1143 // 1144 // Return true if dependence disproved. 1145 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1146 const SCEV *DstConst, const Loop *CurLoop, 1147 unsigned Level, FullDependence &Result, 1148 Constraint &NewConstraint) const { 1149 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n"); 1150 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1151 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1152 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1153 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1154 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1155 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1156 ++StrongSIVapplications; 1157 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1158 Level--; 1159 1160 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1161 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta); 1162 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1163 1164 // check that |Delta| < iteration count 1165 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1166 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1167 LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1168 const SCEV *AbsDelta = 1169 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1170 const SCEV *AbsCoeff = 1171 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1172 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1173 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1174 // Distance greater than trip count - no dependence 1175 ++StrongSIVindependence; 1176 ++StrongSIVsuccesses; 1177 return true; 1178 } 1179 } 1180 1181 // Can we compute distance? 1182 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1183 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1184 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1185 APInt Distance = ConstDelta; // these need to be initialized 1186 APInt Remainder = ConstDelta; 1187 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1188 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1189 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1190 // Make sure Coeff divides Delta exactly 1191 if (Remainder != 0) { 1192 // Coeff doesn't divide Distance, no dependence 1193 ++StrongSIVindependence; 1194 ++StrongSIVsuccesses; 1195 return true; 1196 } 1197 Result.DV[Level].Distance = SE->getConstant(Distance); 1198 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1199 if (Distance.sgt(0)) 1200 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1201 else if (Distance.slt(0)) 1202 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1203 else 1204 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1205 ++StrongSIVsuccesses; 1206 } 1207 else if (Delta->isZero()) { 1208 // since 0/X == 0 1209 Result.DV[Level].Distance = Delta; 1210 NewConstraint.setDistance(Delta, CurLoop); 1211 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1212 ++StrongSIVsuccesses; 1213 } 1214 else { 1215 if (Coeff->isOne()) { 1216 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1217 Result.DV[Level].Distance = Delta; // since X/1 == X 1218 NewConstraint.setDistance(Delta, CurLoop); 1219 } 1220 else { 1221 Result.Consistent = false; 1222 NewConstraint.setLine(Coeff, 1223 SE->getNegativeSCEV(Coeff), 1224 SE->getNegativeSCEV(Delta), CurLoop); 1225 } 1226 1227 // maybe we can get a useful direction 1228 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1229 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1230 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1231 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1232 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1233 // The double negatives above are confusing. 1234 // It helps to read !SE->isKnownNonZero(Delta) 1235 // as "Delta might be Zero" 1236 unsigned NewDirection = Dependence::DVEntry::NONE; 1237 if ((DeltaMaybePositive && CoeffMaybePositive) || 1238 (DeltaMaybeNegative && CoeffMaybeNegative)) 1239 NewDirection = Dependence::DVEntry::LT; 1240 if (DeltaMaybeZero) 1241 NewDirection |= Dependence::DVEntry::EQ; 1242 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1243 (DeltaMaybePositive && CoeffMaybeNegative)) 1244 NewDirection |= Dependence::DVEntry::GT; 1245 if (NewDirection < Result.DV[Level].Direction) 1246 ++StrongSIVsuccesses; 1247 Result.DV[Level].Direction &= NewDirection; 1248 } 1249 return false; 1250 } 1251 1252 1253 // weakCrossingSIVtest - 1254 // From the paper, Practical Dependence Testing, Section 4.2.2 1255 // 1256 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1257 // where i is an induction variable, c1 and c2 are loop invariant, 1258 // and a is a constant, we can solve it exactly using the 1259 // Weak-Crossing SIV test. 1260 // 1261 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1262 // the two lines, where i = i', yielding 1263 // 1264 // c1 + a*i = c2 - a*i 1265 // 2a*i = c2 - c1 1266 // i = (c2 - c1)/2a 1267 // 1268 // If i < 0, there is no dependence. 1269 // If i > upperbound, there is no dependence. 1270 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1271 // If i = upperbound, there's a dependence with distance = 0. 1272 // If i is integral, there's a dependence (all directions). 1273 // If the non-integer part = 1/2, there's a dependence (<> directions). 1274 // Otherwise, there's no dependence. 1275 // 1276 // Can prove independence. Failing that, 1277 // can sometimes refine the directions. 1278 // Can determine iteration for splitting. 1279 // 1280 // Return true if dependence disproved. 1281 bool DependenceInfo::weakCrossingSIVtest( 1282 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1283 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1284 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1285 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1286 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1287 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1288 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1289 ++WeakCrossingSIVapplications; 1290 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1291 Level--; 1292 Result.Consistent = false; 1293 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1294 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1295 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1296 if (Delta->isZero()) { 1297 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1298 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1299 ++WeakCrossingSIVsuccesses; 1300 if (!Result.DV[Level].Direction) { 1301 ++WeakCrossingSIVindependence; 1302 return true; 1303 } 1304 Result.DV[Level].Distance = Delta; // = 0 1305 return false; 1306 } 1307 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1308 if (!ConstCoeff) 1309 return false; 1310 1311 Result.DV[Level].Splitable = true; 1312 if (SE->isKnownNegative(ConstCoeff)) { 1313 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1314 assert(ConstCoeff && 1315 "dynamic cast of negative of ConstCoeff should yield constant"); 1316 Delta = SE->getNegativeSCEV(Delta); 1317 } 1318 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1319 1320 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1321 SplitIter = SE->getUDivExpr( 1322 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1323 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1324 LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1325 1326 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1327 if (!ConstDelta) 1328 return false; 1329 1330 // We're certain that ConstCoeff > 0; therefore, 1331 // if Delta < 0, then no dependence. 1332 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1333 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1334 if (SE->isKnownNegative(Delta)) { 1335 // No dependence, Delta < 0 1336 ++WeakCrossingSIVindependence; 1337 ++WeakCrossingSIVsuccesses; 1338 return true; 1339 } 1340 1341 // We're certain that Delta > 0 and ConstCoeff > 0. 1342 // Check Delta/(2*ConstCoeff) against upper loop bound 1343 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1344 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1345 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1346 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1347 ConstantTwo); 1348 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1349 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1350 // Delta too big, no dependence 1351 ++WeakCrossingSIVindependence; 1352 ++WeakCrossingSIVsuccesses; 1353 return true; 1354 } 1355 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1356 // i = i' = UB 1357 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1358 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1359 ++WeakCrossingSIVsuccesses; 1360 if (!Result.DV[Level].Direction) { 1361 ++WeakCrossingSIVindependence; 1362 return true; 1363 } 1364 Result.DV[Level].Splitable = false; 1365 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1366 return false; 1367 } 1368 } 1369 1370 // check that Coeff divides Delta 1371 APInt APDelta = ConstDelta->getAPInt(); 1372 APInt APCoeff = ConstCoeff->getAPInt(); 1373 APInt Distance = APDelta; // these need to be initialzed 1374 APInt Remainder = APDelta; 1375 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1376 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1377 if (Remainder != 0) { 1378 // Coeff doesn't divide Delta, no dependence 1379 ++WeakCrossingSIVindependence; 1380 ++WeakCrossingSIVsuccesses; 1381 return true; 1382 } 1383 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1384 1385 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1386 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1387 Remainder = Distance.srem(Two); 1388 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1389 if (Remainder != 0) { 1390 // Equal direction isn't possible 1391 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1392 ++WeakCrossingSIVsuccesses; 1393 } 1394 return false; 1395 } 1396 1397 1398 // Kirch's algorithm, from 1399 // 1400 // Optimizing Supercompilers for Supercomputers 1401 // Michael Wolfe 1402 // MIT Press, 1989 1403 // 1404 // Program 2.1, page 29. 1405 // Computes the GCD of AM and BM. 1406 // Also finds a solution to the equation ax - by = gcd(a, b). 1407 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1408 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1409 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1410 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1411 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1412 APInt G0 = AM.abs(); 1413 APInt G1 = BM.abs(); 1414 APInt Q = G0; // these need to be initialized 1415 APInt R = G0; 1416 APInt::sdivrem(G0, G1, Q, R); 1417 while (R != 0) { 1418 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1419 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1420 G0 = G1; G1 = R; 1421 APInt::sdivrem(G0, G1, Q, R); 1422 } 1423 G = G1; 1424 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1425 X = AM.slt(0) ? -A1 : A1; 1426 Y = BM.slt(0) ? B1 : -B1; 1427 1428 // make sure gcd divides Delta 1429 R = Delta.srem(G); 1430 if (R != 0) 1431 return true; // gcd doesn't divide Delta, no dependence 1432 Q = Delta.sdiv(G); 1433 return false; 1434 } 1435 1436 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1437 APInt Q = A; // these need to be initialized 1438 APInt R = A; 1439 APInt::sdivrem(A, B, Q, R); 1440 if (R == 0) 1441 return Q; 1442 if ((A.sgt(0) && B.sgt(0)) || 1443 (A.slt(0) && B.slt(0))) 1444 return Q; 1445 else 1446 return Q - 1; 1447 } 1448 1449 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1450 APInt Q = A; // these need to be initialized 1451 APInt R = A; 1452 APInt::sdivrem(A, B, Q, R); 1453 if (R == 0) 1454 return Q; 1455 if ((A.sgt(0) && B.sgt(0)) || 1456 (A.slt(0) && B.slt(0))) 1457 return Q + 1; 1458 else 1459 return Q; 1460 } 1461 1462 // exactSIVtest - 1463 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1464 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1465 // and a2 are constant, we can solve it exactly using an algorithm developed 1466 // by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in: 1467 // 1468 // Dependence Analysis for Supercomputing 1469 // Utpal Banerjee 1470 // Kluwer Academic Publishers, 1988 1471 // 1472 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1473 // so use them if possible. They're also a bit better with symbolics and, 1474 // in the case of the strong SIV test, can compute Distances. 1475 // 1476 // Return true if dependence disproved. 1477 // 1478 // This is a modified version of the original Banerjee algorithm. The original 1479 // only tested whether Dst depends on Src. This algorithm extends that and 1480 // returns all the dependencies that exist between Dst and Src. 1481 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1482 const SCEV *SrcConst, const SCEV *DstConst, 1483 const Loop *CurLoop, unsigned Level, 1484 FullDependence &Result, 1485 Constraint &NewConstraint) const { 1486 LLVM_DEBUG(dbgs() << "\tExact SIV test\n"); 1487 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1488 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1489 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1490 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1491 ++ExactSIVapplications; 1492 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1493 Level--; 1494 Result.Consistent = false; 1495 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1496 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1497 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta, 1498 CurLoop); 1499 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1500 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1501 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1502 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1503 return false; 1504 1505 // find gcd 1506 APInt G, X, Y; 1507 APInt AM = ConstSrcCoeff->getAPInt(); 1508 APInt BM = ConstDstCoeff->getAPInt(); 1509 APInt CM = ConstDelta->getAPInt(); 1510 unsigned Bits = AM.getBitWidth(); 1511 if (findGCD(Bits, AM, BM, CM, G, X, Y)) { 1512 // gcd doesn't divide Delta, no dependence 1513 ++ExactSIVindependence; 1514 ++ExactSIVsuccesses; 1515 return true; 1516 } 1517 1518 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1519 1520 // since SCEV construction normalizes, LM = 0 1521 APInt UM(Bits, 1, true); 1522 bool UMValid = false; 1523 // UM is perhaps unavailable, let's check 1524 if (const SCEVConstant *CUB = 1525 collectConstantUpperBound(CurLoop, Delta->getType())) { 1526 UM = CUB->getAPInt(); 1527 LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1528 UMValid = true; 1529 } 1530 1531 APInt TU(APInt::getSignedMaxValue(Bits)); 1532 APInt TL(APInt::getSignedMinValue(Bits)); 1533 APInt TC = CM.sdiv(G); 1534 APInt TX = X * TC; 1535 APInt TY = Y * TC; 1536 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); 1537 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); 1538 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); 1539 1540 SmallVector<APInt, 2> TLVec, TUVec; 1541 APInt TB = BM.sdiv(G); 1542 if (TB.sgt(0)) { 1543 TLVec.push_back(ceilingOfQuotient(-TX, TB)); 1544 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1545 // New bound check - modification to Banerjee's e3 check 1546 if (UMValid) { 1547 TUVec.push_back(floorOfQuotient(UM - TX, TB)); 1548 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1549 } 1550 } else { 1551 TUVec.push_back(floorOfQuotient(-TX, TB)); 1552 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1553 // New bound check - modification to Banerjee's e3 check 1554 if (UMValid) { 1555 TLVec.push_back(ceilingOfQuotient(UM - TX, TB)); 1556 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1557 } 1558 } 1559 1560 APInt TA = AM.sdiv(G); 1561 if (TA.sgt(0)) { 1562 if (UMValid) { 1563 TUVec.push_back(floorOfQuotient(UM - TY, TA)); 1564 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1565 } 1566 // New bound check - modification to Banerjee's e3 check 1567 TLVec.push_back(ceilingOfQuotient(-TY, TA)); 1568 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1569 } else { 1570 if (UMValid) { 1571 TLVec.push_back(ceilingOfQuotient(UM - TY, TA)); 1572 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1573 } 1574 // New bound check - modification to Banerjee's e3 check 1575 TUVec.push_back(floorOfQuotient(-TY, TA)); 1576 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1577 } 1578 1579 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); 1580 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); 1581 1582 if (TLVec.empty() || TUVec.empty()) 1583 return false; 1584 TL = APIntOps::smax(TLVec.front(), TLVec.back()); 1585 TU = APIntOps::smin(TUVec.front(), TUVec.back()); 1586 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1587 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1588 1589 if (TL.sgt(TU)) { 1590 ++ExactSIVindependence; 1591 ++ExactSIVsuccesses; 1592 return true; 1593 } 1594 1595 // explore directions 1596 unsigned NewDirection = Dependence::DVEntry::NONE; 1597 APInt LowerDistance, UpperDistance; 1598 if (TA.sgt(TB)) { 1599 LowerDistance = (TY - TX) + (TA - TB) * TL; 1600 UpperDistance = (TY - TX) + (TA - TB) * TU; 1601 } else { 1602 LowerDistance = (TY - TX) + (TA - TB) * TU; 1603 UpperDistance = (TY - TX) + (TA - TB) * TL; 1604 } 1605 1606 LLVM_DEBUG(dbgs() << "\t LowerDistance = " << LowerDistance << "\n"); 1607 LLVM_DEBUG(dbgs() << "\t UpperDistance = " << UpperDistance << "\n"); 1608 1609 APInt Zero(Bits, 0, true); 1610 if (LowerDistance.sle(Zero) && UpperDistance.sge(Zero)) { 1611 NewDirection |= Dependence::DVEntry::EQ; 1612 ++ExactSIVsuccesses; 1613 } 1614 if (LowerDistance.slt(0)) { 1615 NewDirection |= Dependence::DVEntry::GT; 1616 ++ExactSIVsuccesses; 1617 } 1618 if (UpperDistance.sgt(0)) { 1619 NewDirection |= Dependence::DVEntry::LT; 1620 ++ExactSIVsuccesses; 1621 } 1622 1623 // finished 1624 Result.DV[Level].Direction &= NewDirection; 1625 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1626 ++ExactSIVindependence; 1627 LLVM_DEBUG(dbgs() << "\t Result = "); 1628 LLVM_DEBUG(Result.dump(dbgs())); 1629 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1630 } 1631 1632 1633 // Return true if the divisor evenly divides the dividend. 1634 static 1635 bool isRemainderZero(const SCEVConstant *Dividend, 1636 const SCEVConstant *Divisor) { 1637 const APInt &ConstDividend = Dividend->getAPInt(); 1638 const APInt &ConstDivisor = Divisor->getAPInt(); 1639 return ConstDividend.srem(ConstDivisor) == 0; 1640 } 1641 1642 1643 // weakZeroSrcSIVtest - 1644 // From the paper, Practical Dependence Testing, Section 4.2.2 1645 // 1646 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1647 // where i is an induction variable, c1 and c2 are loop invariant, 1648 // and a is a constant, we can solve it exactly using the 1649 // Weak-Zero SIV test. 1650 // 1651 // Given 1652 // 1653 // c1 = c2 + a*i 1654 // 1655 // we get 1656 // 1657 // (c1 - c2)/a = i 1658 // 1659 // If i is not an integer, there's no dependence. 1660 // If i < 0 or > UB, there's no dependence. 1661 // If i = 0, the direction is >= and peeling the 1662 // 1st iteration will break the dependence. 1663 // If i = UB, the direction is <= and peeling the 1664 // last iteration will break the dependence. 1665 // Otherwise, the direction is *. 1666 // 1667 // Can prove independence. Failing that, we can sometimes refine 1668 // the directions. Can sometimes show that first or last 1669 // iteration carries all the dependences (so worth peeling). 1670 // 1671 // (see also weakZeroDstSIVtest) 1672 // 1673 // Return true if dependence disproved. 1674 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1675 const SCEV *SrcConst, 1676 const SCEV *DstConst, 1677 const Loop *CurLoop, unsigned Level, 1678 FullDependence &Result, 1679 Constraint &NewConstraint) const { 1680 // For the WeakSIV test, it's possible the loop isn't common to 1681 // the Src and Dst loops. If it isn't, then there's no need to 1682 // record a direction. 1683 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1684 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1685 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1686 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1687 ++WeakZeroSIVapplications; 1688 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1689 Level--; 1690 Result.Consistent = false; 1691 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1692 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1693 CurLoop); 1694 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1695 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1696 if (Level < CommonLevels) { 1697 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1698 Result.DV[Level].PeelFirst = true; 1699 ++WeakZeroSIVsuccesses; 1700 } 1701 return false; // dependences caused by first iteration 1702 } 1703 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1704 if (!ConstCoeff) 1705 return false; 1706 const SCEV *AbsCoeff = 1707 SE->isKnownNegative(ConstCoeff) ? 1708 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1709 const SCEV *NewDelta = 1710 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1711 1712 // check that Delta/SrcCoeff < iteration count 1713 // really check NewDelta < count*AbsCoeff 1714 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1715 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1716 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1717 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1718 ++WeakZeroSIVindependence; 1719 ++WeakZeroSIVsuccesses; 1720 return true; 1721 } 1722 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1723 // dependences caused by last iteration 1724 if (Level < CommonLevels) { 1725 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1726 Result.DV[Level].PeelLast = true; 1727 ++WeakZeroSIVsuccesses; 1728 } 1729 return false; 1730 } 1731 } 1732 1733 // check that Delta/SrcCoeff >= 0 1734 // really check that NewDelta >= 0 1735 if (SE->isKnownNegative(NewDelta)) { 1736 // No dependence, newDelta < 0 1737 ++WeakZeroSIVindependence; 1738 ++WeakZeroSIVsuccesses; 1739 return true; 1740 } 1741 1742 // if SrcCoeff doesn't divide Delta, then no dependence 1743 if (isa<SCEVConstant>(Delta) && 1744 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1745 ++WeakZeroSIVindependence; 1746 ++WeakZeroSIVsuccesses; 1747 return true; 1748 } 1749 return false; 1750 } 1751 1752 1753 // weakZeroDstSIVtest - 1754 // From the paper, Practical Dependence Testing, Section 4.2.2 1755 // 1756 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1757 // where i is an induction variable, c1 and c2 are loop invariant, 1758 // and a is a constant, we can solve it exactly using the 1759 // Weak-Zero SIV test. 1760 // 1761 // Given 1762 // 1763 // c1 + a*i = c2 1764 // 1765 // we get 1766 // 1767 // i = (c2 - c1)/a 1768 // 1769 // If i is not an integer, there's no dependence. 1770 // If i < 0 or > UB, there's no dependence. 1771 // If i = 0, the direction is <= and peeling the 1772 // 1st iteration will break the dependence. 1773 // If i = UB, the direction is >= and peeling the 1774 // last iteration will break the dependence. 1775 // Otherwise, the direction is *. 1776 // 1777 // Can prove independence. Failing that, we can sometimes refine 1778 // the directions. Can sometimes show that first or last 1779 // iteration carries all the dependences (so worth peeling). 1780 // 1781 // (see also weakZeroSrcSIVtest) 1782 // 1783 // Return true if dependence disproved. 1784 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1785 const SCEV *SrcConst, 1786 const SCEV *DstConst, 1787 const Loop *CurLoop, unsigned Level, 1788 FullDependence &Result, 1789 Constraint &NewConstraint) const { 1790 // For the WeakSIV test, it's possible the loop isn't common to the 1791 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1792 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1793 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1794 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1795 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1796 ++WeakZeroSIVapplications; 1797 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1798 Level--; 1799 Result.Consistent = false; 1800 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1801 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1802 CurLoop); 1803 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1804 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1805 if (Level < CommonLevels) { 1806 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1807 Result.DV[Level].PeelFirst = true; 1808 ++WeakZeroSIVsuccesses; 1809 } 1810 return false; // dependences caused by first iteration 1811 } 1812 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1813 if (!ConstCoeff) 1814 return false; 1815 const SCEV *AbsCoeff = 1816 SE->isKnownNegative(ConstCoeff) ? 1817 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1818 const SCEV *NewDelta = 1819 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1820 1821 // check that Delta/SrcCoeff < iteration count 1822 // really check NewDelta < count*AbsCoeff 1823 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1824 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1825 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1826 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1827 ++WeakZeroSIVindependence; 1828 ++WeakZeroSIVsuccesses; 1829 return true; 1830 } 1831 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1832 // dependences caused by last iteration 1833 if (Level < CommonLevels) { 1834 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1835 Result.DV[Level].PeelLast = true; 1836 ++WeakZeroSIVsuccesses; 1837 } 1838 return false; 1839 } 1840 } 1841 1842 // check that Delta/SrcCoeff >= 0 1843 // really check that NewDelta >= 0 1844 if (SE->isKnownNegative(NewDelta)) { 1845 // No dependence, newDelta < 0 1846 ++WeakZeroSIVindependence; 1847 ++WeakZeroSIVsuccesses; 1848 return true; 1849 } 1850 1851 // if SrcCoeff doesn't divide Delta, then no dependence 1852 if (isa<SCEVConstant>(Delta) && 1853 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1854 ++WeakZeroSIVindependence; 1855 ++WeakZeroSIVsuccesses; 1856 return true; 1857 } 1858 return false; 1859 } 1860 1861 1862 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1863 // Things of the form [c1 + a*i] and [c2 + b*j], 1864 // where i and j are induction variable, c1 and c2 are loop invariant, 1865 // and a and b are constants. 1866 // Returns true if any possible dependence is disproved. 1867 // Marks the result as inconsistent. 1868 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1869 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1870 const SCEV *SrcConst, const SCEV *DstConst, 1871 const Loop *SrcLoop, const Loop *DstLoop, 1872 FullDependence &Result) const { 1873 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n"); 1874 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1875 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1876 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1877 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1878 ++ExactRDIVapplications; 1879 Result.Consistent = false; 1880 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1881 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1882 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1883 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1884 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1885 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1886 return false; 1887 1888 // find gcd 1889 APInt G, X, Y; 1890 APInt AM = ConstSrcCoeff->getAPInt(); 1891 APInt BM = ConstDstCoeff->getAPInt(); 1892 APInt CM = ConstDelta->getAPInt(); 1893 unsigned Bits = AM.getBitWidth(); 1894 if (findGCD(Bits, AM, BM, CM, G, X, Y)) { 1895 // gcd doesn't divide Delta, no dependence 1896 ++ExactRDIVindependence; 1897 return true; 1898 } 1899 1900 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1901 1902 // since SCEV construction seems to normalize, LM = 0 1903 APInt SrcUM(Bits, 1, true); 1904 bool SrcUMvalid = false; 1905 // SrcUM is perhaps unavailable, let's check 1906 if (const SCEVConstant *UpperBound = 1907 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1908 SrcUM = UpperBound->getAPInt(); 1909 LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1910 SrcUMvalid = true; 1911 } 1912 1913 APInt DstUM(Bits, 1, true); 1914 bool DstUMvalid = false; 1915 // UM is perhaps unavailable, let's check 1916 if (const SCEVConstant *UpperBound = 1917 collectConstantUpperBound(DstLoop, Delta->getType())) { 1918 DstUM = UpperBound->getAPInt(); 1919 LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1920 DstUMvalid = true; 1921 } 1922 1923 APInt TU(APInt::getSignedMaxValue(Bits)); 1924 APInt TL(APInt::getSignedMinValue(Bits)); 1925 APInt TC = CM.sdiv(G); 1926 APInt TX = X * TC; 1927 APInt TY = Y * TC; 1928 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); 1929 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); 1930 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); 1931 1932 SmallVector<APInt, 2> TLVec, TUVec; 1933 APInt TB = BM.sdiv(G); 1934 if (TB.sgt(0)) { 1935 TLVec.push_back(ceilingOfQuotient(-TX, TB)); 1936 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1937 if (SrcUMvalid) { 1938 TUVec.push_back(floorOfQuotient(SrcUM - TX, TB)); 1939 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1940 } 1941 } else { 1942 TUVec.push_back(floorOfQuotient(-TX, TB)); 1943 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1944 if (SrcUMvalid) { 1945 TLVec.push_back(ceilingOfQuotient(SrcUM - TX, TB)); 1946 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1947 } 1948 } 1949 1950 APInt TA = AM.sdiv(G); 1951 if (TA.sgt(0)) { 1952 TLVec.push_back(ceilingOfQuotient(-TY, TA)); 1953 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1954 if (DstUMvalid) { 1955 TUVec.push_back(floorOfQuotient(DstUM - TY, TA)); 1956 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1957 } 1958 } else { 1959 TUVec.push_back(floorOfQuotient(-TY, TA)); 1960 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1961 if (DstUMvalid) { 1962 TLVec.push_back(ceilingOfQuotient(DstUM - TY, TA)); 1963 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1964 } 1965 } 1966 1967 if (TLVec.empty() || TUVec.empty()) 1968 return false; 1969 1970 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); 1971 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); 1972 1973 TL = APIntOps::smax(TLVec.front(), TLVec.back()); 1974 TU = APIntOps::smin(TUVec.front(), TUVec.back()); 1975 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1976 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1977 1978 if (TL.sgt(TU)) 1979 ++ExactRDIVindependence; 1980 return TL.sgt(TU); 1981 } 1982 1983 1984 // symbolicRDIVtest - 1985 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1986 // introduce a special case of Banerjee's Inequalities (also called the 1987 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1988 // particularly cases with symbolics. Since it's only able to disprove 1989 // dependence (not compute distances or directions), we'll use it as a 1990 // fall back for the other tests. 1991 // 1992 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1993 // where i and j are induction variables and c1 and c2 are loop invariants, 1994 // we can use the symbolic tests to disprove some dependences, serving as a 1995 // backup for the RDIV test. Note that i and j can be the same variable, 1996 // letting this test serve as a backup for the various SIV tests. 1997 // 1998 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1999 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 2000 // loop bounds for the i and j loops, respectively. So, ... 2001 // 2002 // c1 + a1*i = c2 + a2*j 2003 // a1*i - a2*j = c2 - c1 2004 // 2005 // To test for a dependence, we compute c2 - c1 and make sure it's in the 2006 // range of the maximum and minimum possible values of a1*i - a2*j. 2007 // Considering the signs of a1 and a2, we have 4 possible cases: 2008 // 2009 // 1) If a1 >= 0 and a2 >= 0, then 2010 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 2011 // -a2*N2 <= c2 - c1 <= a1*N1 2012 // 2013 // 2) If a1 >= 0 and a2 <= 0, then 2014 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 2015 // 0 <= c2 - c1 <= a1*N1 - a2*N2 2016 // 2017 // 3) If a1 <= 0 and a2 >= 0, then 2018 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 2019 // a1*N1 - a2*N2 <= c2 - c1 <= 0 2020 // 2021 // 4) If a1 <= 0 and a2 <= 0, then 2022 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 2023 // a1*N1 <= c2 - c1 <= -a2*N2 2024 // 2025 // return true if dependence disproved 2026 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 2027 const SCEV *C1, const SCEV *C2, 2028 const Loop *Loop1, 2029 const Loop *Loop2) const { 2030 ++SymbolicRDIVapplications; 2031 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 2032 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1); 2033 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 2034 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 2035 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 2036 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 2037 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 2038 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 2039 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 2040 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 2041 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 2042 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 2043 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 2044 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 2045 if (SE->isKnownNonNegative(A1)) { 2046 if (SE->isKnownNonNegative(A2)) { 2047 // A1 >= 0 && A2 >= 0 2048 if (N1) { 2049 // make sure that c2 - c1 <= a1*N1 2050 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2051 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2052 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 2053 ++SymbolicRDIVindependence; 2054 return true; 2055 } 2056 } 2057 if (N2) { 2058 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 2059 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2060 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2061 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 2062 ++SymbolicRDIVindependence; 2063 return true; 2064 } 2065 } 2066 } 2067 else if (SE->isKnownNonPositive(A2)) { 2068 // a1 >= 0 && a2 <= 0 2069 if (N1 && N2) { 2070 // make sure that c2 - c1 <= a1*N1 - a2*N2 2071 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2072 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2073 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2074 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2075 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2076 ++SymbolicRDIVindependence; 2077 return true; 2078 } 2079 } 2080 // make sure that 0 <= c2 - c1 2081 if (SE->isKnownNegative(C2_C1)) { 2082 ++SymbolicRDIVindependence; 2083 return true; 2084 } 2085 } 2086 } 2087 else if (SE->isKnownNonPositive(A1)) { 2088 if (SE->isKnownNonNegative(A2)) { 2089 // a1 <= 0 && a2 >= 0 2090 if (N1 && N2) { 2091 // make sure that a1*N1 - a2*N2 <= c2 - c1 2092 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2093 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2094 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2095 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2096 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2097 ++SymbolicRDIVindependence; 2098 return true; 2099 } 2100 } 2101 // make sure that c2 - c1 <= 0 2102 if (SE->isKnownPositive(C2_C1)) { 2103 ++SymbolicRDIVindependence; 2104 return true; 2105 } 2106 } 2107 else if (SE->isKnownNonPositive(A2)) { 2108 // a1 <= 0 && a2 <= 0 2109 if (N1) { 2110 // make sure that a1*N1 <= c2 - c1 2111 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2112 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2113 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2114 ++SymbolicRDIVindependence; 2115 return true; 2116 } 2117 } 2118 if (N2) { 2119 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2120 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2121 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2122 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2123 ++SymbolicRDIVindependence; 2124 return true; 2125 } 2126 } 2127 } 2128 } 2129 return false; 2130 } 2131 2132 2133 // testSIV - 2134 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2135 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2136 // a2 are constant, we attack it with an SIV test. While they can all be 2137 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2138 // they apply; they're cheaper and sometimes more precise. 2139 // 2140 // Return true if dependence disproved. 2141 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2142 FullDependence &Result, Constraint &NewConstraint, 2143 const SCEV *&SplitIter) const { 2144 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2145 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2146 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2147 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2148 if (SrcAddRec && DstAddRec) { 2149 const SCEV *SrcConst = SrcAddRec->getStart(); 2150 const SCEV *DstConst = DstAddRec->getStart(); 2151 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2152 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2153 const Loop *CurLoop = SrcAddRec->getLoop(); 2154 assert(CurLoop == DstAddRec->getLoop() && 2155 "both loops in SIV should be same"); 2156 Level = mapSrcLoop(CurLoop); 2157 bool disproven; 2158 if (SrcCoeff == DstCoeff) 2159 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2160 Level, Result, NewConstraint); 2161 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2162 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2163 Level, Result, NewConstraint, SplitIter); 2164 else 2165 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2166 Level, Result, NewConstraint); 2167 return disproven || 2168 gcdMIVtest(Src, Dst, Result) || 2169 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2170 } 2171 if (SrcAddRec) { 2172 const SCEV *SrcConst = SrcAddRec->getStart(); 2173 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2174 const SCEV *DstConst = Dst; 2175 const Loop *CurLoop = SrcAddRec->getLoop(); 2176 Level = mapSrcLoop(CurLoop); 2177 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2178 Level, Result, NewConstraint) || 2179 gcdMIVtest(Src, Dst, Result); 2180 } 2181 if (DstAddRec) { 2182 const SCEV *DstConst = DstAddRec->getStart(); 2183 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2184 const SCEV *SrcConst = Src; 2185 const Loop *CurLoop = DstAddRec->getLoop(); 2186 Level = mapDstLoop(CurLoop); 2187 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2188 CurLoop, Level, Result, NewConstraint) || 2189 gcdMIVtest(Src, Dst, Result); 2190 } 2191 llvm_unreachable("SIV test expected at least one AddRec"); 2192 return false; 2193 } 2194 2195 2196 // testRDIV - 2197 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2198 // where i and j are induction variables, c1 and c2 are loop invariant, 2199 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2200 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2201 // It doesn't make sense to talk about distance or direction in this case, 2202 // so there's no point in making special versions of the Strong SIV test or 2203 // the Weak-crossing SIV test. 2204 // 2205 // With minor algebra, this test can also be used for things like 2206 // [c1 + a1*i + a2*j][c2]. 2207 // 2208 // Return true if dependence disproved. 2209 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2210 FullDependence &Result) const { 2211 // we have 3 possible situations here: 2212 // 1) [a*i + b] and [c*j + d] 2213 // 2) [a*i + c*j + b] and [d] 2214 // 3) [b] and [a*i + c*j + d] 2215 // We need to find what we've got and get organized 2216 2217 const SCEV *SrcConst, *DstConst; 2218 const SCEV *SrcCoeff, *DstCoeff; 2219 const Loop *SrcLoop, *DstLoop; 2220 2221 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2222 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2223 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2224 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2225 if (SrcAddRec && DstAddRec) { 2226 SrcConst = SrcAddRec->getStart(); 2227 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2228 SrcLoop = SrcAddRec->getLoop(); 2229 DstConst = DstAddRec->getStart(); 2230 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2231 DstLoop = DstAddRec->getLoop(); 2232 } 2233 else if (SrcAddRec) { 2234 if (const SCEVAddRecExpr *tmpAddRec = 2235 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2236 SrcConst = tmpAddRec->getStart(); 2237 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2238 SrcLoop = tmpAddRec->getLoop(); 2239 DstConst = Dst; 2240 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2241 DstLoop = SrcAddRec->getLoop(); 2242 } 2243 else 2244 llvm_unreachable("RDIV reached by surprising SCEVs"); 2245 } 2246 else if (DstAddRec) { 2247 if (const SCEVAddRecExpr *tmpAddRec = 2248 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2249 DstConst = tmpAddRec->getStart(); 2250 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2251 DstLoop = tmpAddRec->getLoop(); 2252 SrcConst = Src; 2253 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2254 SrcLoop = DstAddRec->getLoop(); 2255 } 2256 else 2257 llvm_unreachable("RDIV reached by surprising SCEVs"); 2258 } 2259 else 2260 llvm_unreachable("RDIV expected at least one AddRec"); 2261 return exactRDIVtest(SrcCoeff, DstCoeff, 2262 SrcConst, DstConst, 2263 SrcLoop, DstLoop, 2264 Result) || 2265 gcdMIVtest(Src, Dst, Result) || 2266 symbolicRDIVtest(SrcCoeff, DstCoeff, 2267 SrcConst, DstConst, 2268 SrcLoop, DstLoop); 2269 } 2270 2271 2272 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2273 // Return true if dependence disproved. 2274 // Can sometimes refine direction vectors. 2275 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2276 const SmallBitVector &Loops, 2277 FullDependence &Result) const { 2278 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2279 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2280 Result.Consistent = false; 2281 return gcdMIVtest(Src, Dst, Result) || 2282 banerjeeMIVtest(Src, Dst, Loops, Result); 2283 } 2284 2285 2286 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2287 // in this case 10. If there is no constant part, returns NULL. 2288 static 2289 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2290 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2291 return Constant; 2292 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2293 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2294 return Constant; 2295 return nullptr; 2296 } 2297 2298 2299 //===----------------------------------------------------------------------===// 2300 // gcdMIVtest - 2301 // Tests an MIV subscript pair for dependence. 2302 // Returns true if any possible dependence is disproved. 2303 // Marks the result as inconsistent. 2304 // Can sometimes disprove the equal direction for 1 or more loops, 2305 // as discussed in Michael Wolfe's book, 2306 // High Performance Compilers for Parallel Computing, page 235. 2307 // 2308 // We spend some effort (code!) to handle cases like 2309 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2310 // but M and N are just loop-invariant variables. 2311 // This should help us handle linearized subscripts; 2312 // also makes this test a useful backup to the various SIV tests. 2313 // 2314 // It occurs to me that the presence of loop-invariant variables 2315 // changes the nature of the test from "greatest common divisor" 2316 // to "a common divisor". 2317 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2318 FullDependence &Result) const { 2319 LLVM_DEBUG(dbgs() << "starting gcd\n"); 2320 ++GCDapplications; 2321 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2322 APInt RunningGCD = APInt::getNullValue(BitWidth); 2323 2324 // Examine Src coefficients. 2325 // Compute running GCD and record source constant. 2326 // Because we're looking for the constant at the end of the chain, 2327 // we can't quit the loop just because the GCD == 1. 2328 const SCEV *Coefficients = Src; 2329 while (const SCEVAddRecExpr *AddRec = 2330 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2331 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2332 // If the coefficient is the product of a constant and other stuff, 2333 // we can use the constant in the GCD computation. 2334 const auto *Constant = getConstantPart(Coeff); 2335 if (!Constant) 2336 return false; 2337 APInt ConstCoeff = Constant->getAPInt(); 2338 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2339 Coefficients = AddRec->getStart(); 2340 } 2341 const SCEV *SrcConst = Coefficients; 2342 2343 // Examine Dst coefficients. 2344 // Compute running GCD and record destination constant. 2345 // Because we're looking for the constant at the end of the chain, 2346 // we can't quit the loop just because the GCD == 1. 2347 Coefficients = Dst; 2348 while (const SCEVAddRecExpr *AddRec = 2349 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2350 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2351 // If the coefficient is the product of a constant and other stuff, 2352 // we can use the constant in the GCD computation. 2353 const auto *Constant = getConstantPart(Coeff); 2354 if (!Constant) 2355 return false; 2356 APInt ConstCoeff = Constant->getAPInt(); 2357 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2358 Coefficients = AddRec->getStart(); 2359 } 2360 const SCEV *DstConst = Coefficients; 2361 2362 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2363 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2364 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2365 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2366 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2367 // If Delta is a sum of products, we may be able to make further progress. 2368 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2369 const SCEV *Operand = Sum->getOperand(Op); 2370 if (isa<SCEVConstant>(Operand)) { 2371 assert(!Constant && "Surprised to find multiple constants"); 2372 Constant = cast<SCEVConstant>(Operand); 2373 } 2374 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2375 // Search for constant operand to participate in GCD; 2376 // If none found; return false. 2377 const SCEVConstant *ConstOp = getConstantPart(Product); 2378 if (!ConstOp) 2379 return false; 2380 APInt ConstOpValue = ConstOp->getAPInt(); 2381 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2382 ConstOpValue.abs()); 2383 } 2384 else 2385 return false; 2386 } 2387 } 2388 if (!Constant) 2389 return false; 2390 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2391 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2392 if (ConstDelta == 0) 2393 return false; 2394 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2395 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2396 APInt Remainder = ConstDelta.srem(RunningGCD); 2397 if (Remainder != 0) { 2398 ++GCDindependence; 2399 return true; 2400 } 2401 2402 // Try to disprove equal directions. 2403 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2404 // the code above can't disprove the dependence because the GCD = 1. 2405 // So we consider what happen if i = i' and what happens if j = j'. 2406 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2407 // which is infeasible, so we can disallow the = direction for the i level. 2408 // Setting j = j' doesn't help matters, so we end up with a direction vector 2409 // of [<>, *] 2410 // 2411 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2412 // we need to remember that the constant part is 5 and the RunningGCD should 2413 // be initialized to ExtraGCD = 30. 2414 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2415 2416 bool Improved = false; 2417 Coefficients = Src; 2418 while (const SCEVAddRecExpr *AddRec = 2419 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2420 Coefficients = AddRec->getStart(); 2421 const Loop *CurLoop = AddRec->getLoop(); 2422 RunningGCD = ExtraGCD; 2423 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2424 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2425 const SCEV *Inner = Src; 2426 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2427 AddRec = cast<SCEVAddRecExpr>(Inner); 2428 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2429 if (CurLoop == AddRec->getLoop()) 2430 ; // SrcCoeff == Coeff 2431 else { 2432 // If the coefficient is the product of a constant and other stuff, 2433 // we can use the constant in the GCD computation. 2434 Constant = getConstantPart(Coeff); 2435 if (!Constant) 2436 return false; 2437 APInt ConstCoeff = Constant->getAPInt(); 2438 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2439 } 2440 Inner = AddRec->getStart(); 2441 } 2442 Inner = Dst; 2443 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2444 AddRec = cast<SCEVAddRecExpr>(Inner); 2445 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2446 if (CurLoop == AddRec->getLoop()) 2447 DstCoeff = Coeff; 2448 else { 2449 // If the coefficient is the product of a constant and other stuff, 2450 // we can use the constant in the GCD computation. 2451 Constant = getConstantPart(Coeff); 2452 if (!Constant) 2453 return false; 2454 APInt ConstCoeff = Constant->getAPInt(); 2455 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2456 } 2457 Inner = AddRec->getStart(); 2458 } 2459 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2460 // If the coefficient is the product of a constant and other stuff, 2461 // we can use the constant in the GCD computation. 2462 Constant = getConstantPart(Delta); 2463 if (!Constant) 2464 // The difference of the two coefficients might not be a product 2465 // or constant, in which case we give up on this direction. 2466 continue; 2467 APInt ConstCoeff = Constant->getAPInt(); 2468 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2469 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2470 if (RunningGCD != 0) { 2471 Remainder = ConstDelta.srem(RunningGCD); 2472 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2473 if (Remainder != 0) { 2474 unsigned Level = mapSrcLoop(CurLoop); 2475 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2476 Improved = true; 2477 } 2478 } 2479 } 2480 if (Improved) 2481 ++GCDsuccesses; 2482 LLVM_DEBUG(dbgs() << "all done\n"); 2483 return false; 2484 } 2485 2486 2487 //===----------------------------------------------------------------------===// 2488 // banerjeeMIVtest - 2489 // Use Banerjee's Inequalities to test an MIV subscript pair. 2490 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2491 // Generally follows the discussion in Section 2.5.2 of 2492 // 2493 // Optimizing Supercompilers for Supercomputers 2494 // Michael Wolfe 2495 // 2496 // The inequalities given on page 25 are simplified in that loops are 2497 // normalized so that the lower bound is always 0 and the stride is always 1. 2498 // For example, Wolfe gives 2499 // 2500 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2501 // 2502 // where A_k is the coefficient of the kth index in the source subscript, 2503 // B_k is the coefficient of the kth index in the destination subscript, 2504 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2505 // index, and N_k is the stride of the kth index. Since all loops are normalized 2506 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2507 // equation to 2508 // 2509 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2510 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2511 // 2512 // Similar simplifications are possible for the other equations. 2513 // 2514 // When we can't determine the number of iterations for a loop, 2515 // we use NULL as an indicator for the worst case, infinity. 2516 // When computing the upper bound, NULL denotes +inf; 2517 // for the lower bound, NULL denotes -inf. 2518 // 2519 // Return true if dependence disproved. 2520 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2521 const SmallBitVector &Loops, 2522 FullDependence &Result) const { 2523 LLVM_DEBUG(dbgs() << "starting Banerjee\n"); 2524 ++BanerjeeApplications; 2525 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n'); 2526 const SCEV *A0; 2527 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2528 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2529 const SCEV *B0; 2530 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2531 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2532 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2533 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2534 2535 // Compute bounds for all the * directions. 2536 LLVM_DEBUG(dbgs() << "\tBounds[*]\n"); 2537 for (unsigned K = 1; K <= MaxLevels; ++K) { 2538 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2539 Bound[K].Direction = Dependence::DVEntry::ALL; 2540 Bound[K].DirSet = Dependence::DVEntry::NONE; 2541 findBoundsALL(A, B, Bound, K); 2542 #ifndef NDEBUG 2543 LLVM_DEBUG(dbgs() << "\t " << K << '\t'); 2544 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2545 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2546 else 2547 LLVM_DEBUG(dbgs() << "-inf\t"); 2548 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2549 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2550 else 2551 LLVM_DEBUG(dbgs() << "+inf\n"); 2552 #endif 2553 } 2554 2555 // Test the *, *, *, ... case. 2556 bool Disproved = false; 2557 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2558 // Explore the direction vector hierarchy. 2559 unsigned DepthExpanded = 0; 2560 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2561 Loops, DepthExpanded, Delta); 2562 if (NewDeps > 0) { 2563 bool Improved = false; 2564 for (unsigned K = 1; K <= CommonLevels; ++K) { 2565 if (Loops[K]) { 2566 unsigned Old = Result.DV[K - 1].Direction; 2567 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2568 Improved |= Old != Result.DV[K - 1].Direction; 2569 if (!Result.DV[K - 1].Direction) { 2570 Improved = false; 2571 Disproved = true; 2572 break; 2573 } 2574 } 2575 } 2576 if (Improved) 2577 ++BanerjeeSuccesses; 2578 } 2579 else { 2580 ++BanerjeeIndependence; 2581 Disproved = true; 2582 } 2583 } 2584 else { 2585 ++BanerjeeIndependence; 2586 Disproved = true; 2587 } 2588 delete [] Bound; 2589 delete [] A; 2590 delete [] B; 2591 return Disproved; 2592 } 2593 2594 2595 // Hierarchically expands the direction vector 2596 // search space, combining the directions of discovered dependences 2597 // in the DirSet field of Bound. Returns the number of distinct 2598 // dependences discovered. If the dependence is disproved, 2599 // it will return 0. 2600 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2601 CoefficientInfo *B, BoundInfo *Bound, 2602 const SmallBitVector &Loops, 2603 unsigned &DepthExpanded, 2604 const SCEV *Delta) const { 2605 if (Level > CommonLevels) { 2606 // record result 2607 LLVM_DEBUG(dbgs() << "\t["); 2608 for (unsigned K = 1; K <= CommonLevels; ++K) { 2609 if (Loops[K]) { 2610 Bound[K].DirSet |= Bound[K].Direction; 2611 #ifndef NDEBUG 2612 switch (Bound[K].Direction) { 2613 case Dependence::DVEntry::LT: 2614 LLVM_DEBUG(dbgs() << " <"); 2615 break; 2616 case Dependence::DVEntry::EQ: 2617 LLVM_DEBUG(dbgs() << " ="); 2618 break; 2619 case Dependence::DVEntry::GT: 2620 LLVM_DEBUG(dbgs() << " >"); 2621 break; 2622 case Dependence::DVEntry::ALL: 2623 LLVM_DEBUG(dbgs() << " *"); 2624 break; 2625 default: 2626 llvm_unreachable("unexpected Bound[K].Direction"); 2627 } 2628 #endif 2629 } 2630 } 2631 LLVM_DEBUG(dbgs() << " ]\n"); 2632 return 1; 2633 } 2634 if (Loops[Level]) { 2635 if (Level > DepthExpanded) { 2636 DepthExpanded = Level; 2637 // compute bounds for <, =, > at current level 2638 findBoundsLT(A, B, Bound, Level); 2639 findBoundsGT(A, B, Bound, Level); 2640 findBoundsEQ(A, B, Bound, Level); 2641 #ifndef NDEBUG 2642 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2643 LLVM_DEBUG(dbgs() << "\t <\t"); 2644 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2645 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] 2646 << '\t'); 2647 else 2648 LLVM_DEBUG(dbgs() << "-inf\t"); 2649 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2650 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] 2651 << '\n'); 2652 else 2653 LLVM_DEBUG(dbgs() << "+inf\n"); 2654 LLVM_DEBUG(dbgs() << "\t =\t"); 2655 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2656 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] 2657 << '\t'); 2658 else 2659 LLVM_DEBUG(dbgs() << "-inf\t"); 2660 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2661 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] 2662 << '\n'); 2663 else 2664 LLVM_DEBUG(dbgs() << "+inf\n"); 2665 LLVM_DEBUG(dbgs() << "\t >\t"); 2666 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2667 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] 2668 << '\t'); 2669 else 2670 LLVM_DEBUG(dbgs() << "-inf\t"); 2671 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2672 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] 2673 << '\n'); 2674 else 2675 LLVM_DEBUG(dbgs() << "+inf\n"); 2676 #endif 2677 } 2678 2679 unsigned NewDeps = 0; 2680 2681 // test bounds for <, *, *, ... 2682 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2683 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2684 Loops, DepthExpanded, Delta); 2685 2686 // Test bounds for =, *, *, ... 2687 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2688 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2689 Loops, DepthExpanded, Delta); 2690 2691 // test bounds for >, *, *, ... 2692 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2693 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2694 Loops, DepthExpanded, Delta); 2695 2696 Bound[Level].Direction = Dependence::DVEntry::ALL; 2697 return NewDeps; 2698 } 2699 else 2700 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2701 } 2702 2703 2704 // Returns true iff the current bounds are plausible. 2705 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2706 BoundInfo *Bound, const SCEV *Delta) const { 2707 Bound[Level].Direction = DirKind; 2708 if (const SCEV *LowerBound = getLowerBound(Bound)) 2709 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2710 return false; 2711 if (const SCEV *UpperBound = getUpperBound(Bound)) 2712 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2713 return false; 2714 return true; 2715 } 2716 2717 2718 // Computes the upper and lower bounds for level K 2719 // using the * direction. Records them in Bound. 2720 // Wolfe gives the equations 2721 // 2722 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2723 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2724 // 2725 // Since we normalize loops, we can simplify these equations to 2726 // 2727 // LB^*_k = (A^-_k - B^+_k)U_k 2728 // UB^*_k = (A^+_k - B^-_k)U_k 2729 // 2730 // We must be careful to handle the case where the upper bound is unknown. 2731 // Note that the lower bound is always <= 0 2732 // and the upper bound is always >= 0. 2733 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2734 BoundInfo *Bound, unsigned K) const { 2735 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2736 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2737 if (Bound[K].Iterations) { 2738 Bound[K].Lower[Dependence::DVEntry::ALL] = 2739 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2740 Bound[K].Iterations); 2741 Bound[K].Upper[Dependence::DVEntry::ALL] = 2742 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2743 Bound[K].Iterations); 2744 } 2745 else { 2746 // If the difference is 0, we won't need to know the number of iterations. 2747 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2748 Bound[K].Lower[Dependence::DVEntry::ALL] = 2749 SE->getZero(A[K].Coeff->getType()); 2750 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2751 Bound[K].Upper[Dependence::DVEntry::ALL] = 2752 SE->getZero(A[K].Coeff->getType()); 2753 } 2754 } 2755 2756 2757 // Computes the upper and lower bounds for level K 2758 // using the = direction. Records them in Bound. 2759 // Wolfe gives the equations 2760 // 2761 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2762 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2763 // 2764 // Since we normalize loops, we can simplify these equations to 2765 // 2766 // LB^=_k = (A_k - B_k)^- U_k 2767 // UB^=_k = (A_k - B_k)^+ U_k 2768 // 2769 // We must be careful to handle the case where the upper bound is unknown. 2770 // Note that the lower bound is always <= 0 2771 // and the upper bound is always >= 0. 2772 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2773 BoundInfo *Bound, unsigned K) const { 2774 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2775 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2776 if (Bound[K].Iterations) { 2777 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2778 const SCEV *NegativePart = getNegativePart(Delta); 2779 Bound[K].Lower[Dependence::DVEntry::EQ] = 2780 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2781 const SCEV *PositivePart = getPositivePart(Delta); 2782 Bound[K].Upper[Dependence::DVEntry::EQ] = 2783 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2784 } 2785 else { 2786 // If the positive/negative part of the difference is 0, 2787 // we won't need to know the number of iterations. 2788 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2789 const SCEV *NegativePart = getNegativePart(Delta); 2790 if (NegativePart->isZero()) 2791 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2792 const SCEV *PositivePart = getPositivePart(Delta); 2793 if (PositivePart->isZero()) 2794 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2795 } 2796 } 2797 2798 2799 // Computes the upper and lower bounds for level K 2800 // using the < direction. Records them in Bound. 2801 // Wolfe gives the equations 2802 // 2803 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2804 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2805 // 2806 // Since we normalize loops, we can simplify these equations to 2807 // 2808 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2809 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2810 // 2811 // We must be careful to handle the case where the upper bound is unknown. 2812 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2813 BoundInfo *Bound, unsigned K) const { 2814 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2815 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2816 if (Bound[K].Iterations) { 2817 const SCEV *Iter_1 = SE->getMinusSCEV( 2818 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2819 const SCEV *NegPart = 2820 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2821 Bound[K].Lower[Dependence::DVEntry::LT] = 2822 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2823 const SCEV *PosPart = 2824 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2825 Bound[K].Upper[Dependence::DVEntry::LT] = 2826 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2827 } 2828 else { 2829 // If the positive/negative part of the difference is 0, 2830 // we won't need to know the number of iterations. 2831 const SCEV *NegPart = 2832 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2833 if (NegPart->isZero()) 2834 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2835 const SCEV *PosPart = 2836 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2837 if (PosPart->isZero()) 2838 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2839 } 2840 } 2841 2842 2843 // Computes the upper and lower bounds for level K 2844 // using the > direction. Records them in Bound. 2845 // Wolfe gives the equations 2846 // 2847 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2848 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2849 // 2850 // Since we normalize loops, we can simplify these equations to 2851 // 2852 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2853 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2854 // 2855 // We must be careful to handle the case where the upper bound is unknown. 2856 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2857 BoundInfo *Bound, unsigned K) const { 2858 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2859 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2860 if (Bound[K].Iterations) { 2861 const SCEV *Iter_1 = SE->getMinusSCEV( 2862 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2863 const SCEV *NegPart = 2864 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2865 Bound[K].Lower[Dependence::DVEntry::GT] = 2866 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2867 const SCEV *PosPart = 2868 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2869 Bound[K].Upper[Dependence::DVEntry::GT] = 2870 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2871 } 2872 else { 2873 // If the positive/negative part of the difference is 0, 2874 // we won't need to know the number of iterations. 2875 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2876 if (NegPart->isZero()) 2877 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2878 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2879 if (PosPart->isZero()) 2880 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2881 } 2882 } 2883 2884 2885 // X^+ = max(X, 0) 2886 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2887 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2888 } 2889 2890 2891 // X^- = min(X, 0) 2892 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2893 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2894 } 2895 2896 2897 // Walks through the subscript, 2898 // collecting each coefficient, the associated loop bounds, 2899 // and recording its positive and negative parts for later use. 2900 DependenceInfo::CoefficientInfo * 2901 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 2902 const SCEV *&Constant) const { 2903 const SCEV *Zero = SE->getZero(Subscript->getType()); 2904 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2905 for (unsigned K = 1; K <= MaxLevels; ++K) { 2906 CI[K].Coeff = Zero; 2907 CI[K].PosPart = Zero; 2908 CI[K].NegPart = Zero; 2909 CI[K].Iterations = nullptr; 2910 } 2911 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2912 const Loop *L = AddRec->getLoop(); 2913 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2914 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2915 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2916 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2917 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2918 Subscript = AddRec->getStart(); 2919 } 2920 Constant = Subscript; 2921 #ifndef NDEBUG 2922 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n"); 2923 for (unsigned K = 1; K <= MaxLevels; ++K) { 2924 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2925 LLVM_DEBUG(dbgs() << "\tPos Part = "); 2926 LLVM_DEBUG(dbgs() << *CI[K].PosPart); 2927 LLVM_DEBUG(dbgs() << "\tNeg Part = "); 2928 LLVM_DEBUG(dbgs() << *CI[K].NegPart); 2929 LLVM_DEBUG(dbgs() << "\tUpper Bound = "); 2930 if (CI[K].Iterations) 2931 LLVM_DEBUG(dbgs() << *CI[K].Iterations); 2932 else 2933 LLVM_DEBUG(dbgs() << "+inf"); 2934 LLVM_DEBUG(dbgs() << '\n'); 2935 } 2936 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2937 #endif 2938 return CI; 2939 } 2940 2941 2942 // Looks through all the bounds info and 2943 // computes the lower bound given the current direction settings 2944 // at each level. If the lower bound for any level is -inf, 2945 // the result is -inf. 2946 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 2947 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2948 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2949 if (Bound[K].Lower[Bound[K].Direction]) 2950 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2951 else 2952 Sum = nullptr; 2953 } 2954 return Sum; 2955 } 2956 2957 2958 // Looks through all the bounds info and 2959 // computes the upper bound given the current direction settings 2960 // at each level. If the upper bound at any level is +inf, 2961 // the result is +inf. 2962 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 2963 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2964 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2965 if (Bound[K].Upper[Bound[K].Direction]) 2966 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2967 else 2968 Sum = nullptr; 2969 } 2970 return Sum; 2971 } 2972 2973 2974 //===----------------------------------------------------------------------===// 2975 // Constraint manipulation for Delta test. 2976 2977 // Given a linear SCEV, 2978 // return the coefficient (the step) 2979 // corresponding to the specified loop. 2980 // If there isn't one, return 0. 2981 // For example, given a*i + b*j + c*k, finding the coefficient 2982 // corresponding to the j loop would yield b. 2983 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 2984 const Loop *TargetLoop) const { 2985 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2986 if (!AddRec) 2987 return SE->getZero(Expr->getType()); 2988 if (AddRec->getLoop() == TargetLoop) 2989 return AddRec->getStepRecurrence(*SE); 2990 return findCoefficient(AddRec->getStart(), TargetLoop); 2991 } 2992 2993 2994 // Given a linear SCEV, 2995 // return the SCEV given by zeroing out the coefficient 2996 // corresponding to the specified loop. 2997 // For example, given a*i + b*j + c*k, zeroing the coefficient 2998 // corresponding to the j loop would yield a*i + c*k. 2999 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 3000 const Loop *TargetLoop) const { 3001 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3002 if (!AddRec) 3003 return Expr; // ignore 3004 if (AddRec->getLoop() == TargetLoop) 3005 return AddRec->getStart(); 3006 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 3007 AddRec->getStepRecurrence(*SE), 3008 AddRec->getLoop(), 3009 AddRec->getNoWrapFlags()); 3010 } 3011 3012 3013 // Given a linear SCEV Expr, 3014 // return the SCEV given by adding some Value to the 3015 // coefficient corresponding to the specified TargetLoop. 3016 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 3017 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 3018 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 3019 const Loop *TargetLoop, 3020 const SCEV *Value) const { 3021 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3022 if (!AddRec) // create a new addRec 3023 return SE->getAddRecExpr(Expr, 3024 Value, 3025 TargetLoop, 3026 SCEV::FlagAnyWrap); // Worst case, with no info. 3027 if (AddRec->getLoop() == TargetLoop) { 3028 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 3029 if (Sum->isZero()) 3030 return AddRec->getStart(); 3031 return SE->getAddRecExpr(AddRec->getStart(), 3032 Sum, 3033 AddRec->getLoop(), 3034 AddRec->getNoWrapFlags()); 3035 } 3036 if (SE->isLoopInvariant(AddRec, TargetLoop)) 3037 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 3038 return SE->getAddRecExpr( 3039 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 3040 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 3041 AddRec->getNoWrapFlags()); 3042 } 3043 3044 3045 // Review the constraints, looking for opportunities 3046 // to simplify a subscript pair (Src and Dst). 3047 // Return true if some simplification occurs. 3048 // If the simplification isn't exact (that is, if it is conservative 3049 // in terms of dependence), set consistent to false. 3050 // Corresponds to Figure 5 from the paper 3051 // 3052 // Practical Dependence Testing 3053 // Goff, Kennedy, Tseng 3054 // PLDI 1991 3055 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 3056 SmallBitVector &Loops, 3057 SmallVectorImpl<Constraint> &Constraints, 3058 bool &Consistent) { 3059 bool Result = false; 3060 for (unsigned LI : Loops.set_bits()) { 3061 LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3062 LLVM_DEBUG(Constraints[LI].dump(dbgs())); 3063 if (Constraints[LI].isDistance()) 3064 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3065 else if (Constraints[LI].isLine()) 3066 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3067 else if (Constraints[LI].isPoint()) 3068 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3069 } 3070 return Result; 3071 } 3072 3073 3074 // Attempt to propagate a distance 3075 // constraint into a subscript pair (Src and Dst). 3076 // Return true if some simplification occurs. 3077 // If the simplification isn't exact (that is, if it is conservative 3078 // in terms of dependence), set consistent to false. 3079 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3080 Constraint &CurConstraint, 3081 bool &Consistent) { 3082 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3083 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3084 const SCEV *A_K = findCoefficient(Src, CurLoop); 3085 if (A_K->isZero()) 3086 return false; 3087 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3088 Src = SE->getMinusSCEV(Src, DA_K); 3089 Src = zeroCoefficient(Src, CurLoop); 3090 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3091 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3092 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3093 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3094 if (!findCoefficient(Dst, CurLoop)->isZero()) 3095 Consistent = false; 3096 return true; 3097 } 3098 3099 3100 // Attempt to propagate a line 3101 // constraint into a subscript pair (Src and Dst). 3102 // Return true if some simplification occurs. 3103 // If the simplification isn't exact (that is, if it is conservative 3104 // in terms of dependence), set consistent to false. 3105 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3106 Constraint &CurConstraint, 3107 bool &Consistent) { 3108 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3109 const SCEV *A = CurConstraint.getA(); 3110 const SCEV *B = CurConstraint.getB(); 3111 const SCEV *C = CurConstraint.getC(); 3112 LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C 3113 << "\n"); 3114 LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3115 LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3116 if (A->isZero()) { 3117 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3118 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3119 if (!Bconst || !Cconst) return false; 3120 APInt Beta = Bconst->getAPInt(); 3121 APInt Charlie = Cconst->getAPInt(); 3122 APInt CdivB = Charlie.sdiv(Beta); 3123 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3124 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3125 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3126 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3127 Dst = zeroCoefficient(Dst, CurLoop); 3128 if (!findCoefficient(Src, CurLoop)->isZero()) 3129 Consistent = false; 3130 } 3131 else if (B->isZero()) { 3132 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3133 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3134 if (!Aconst || !Cconst) return false; 3135 APInt Alpha = Aconst->getAPInt(); 3136 APInt Charlie = Cconst->getAPInt(); 3137 APInt CdivA = Charlie.sdiv(Alpha); 3138 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3139 const SCEV *A_K = findCoefficient(Src, CurLoop); 3140 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3141 Src = zeroCoefficient(Src, CurLoop); 3142 if (!findCoefficient(Dst, CurLoop)->isZero()) 3143 Consistent = false; 3144 } 3145 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3146 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3147 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3148 if (!Aconst || !Cconst) return false; 3149 APInt Alpha = Aconst->getAPInt(); 3150 APInt Charlie = Cconst->getAPInt(); 3151 APInt CdivA = Charlie.sdiv(Alpha); 3152 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3153 const SCEV *A_K = findCoefficient(Src, CurLoop); 3154 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3155 Src = zeroCoefficient(Src, CurLoop); 3156 Dst = addToCoefficient(Dst, CurLoop, A_K); 3157 if (!findCoefficient(Dst, CurLoop)->isZero()) 3158 Consistent = false; 3159 } 3160 else { 3161 // paper is incorrect here, or perhaps just misleading 3162 const SCEV *A_K = findCoefficient(Src, CurLoop); 3163 Src = SE->getMulExpr(Src, A); 3164 Dst = SE->getMulExpr(Dst, A); 3165 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3166 Src = zeroCoefficient(Src, CurLoop); 3167 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3168 if (!findCoefficient(Dst, CurLoop)->isZero()) 3169 Consistent = false; 3170 } 3171 LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3172 LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3173 return true; 3174 } 3175 3176 3177 // Attempt to propagate a point 3178 // constraint into a subscript pair (Src and Dst). 3179 // Return true if some simplification occurs. 3180 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3181 Constraint &CurConstraint) { 3182 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3183 const SCEV *A_K = findCoefficient(Src, CurLoop); 3184 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3185 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3186 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3187 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3188 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3189 Src = zeroCoefficient(Src, CurLoop); 3190 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3191 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3192 Dst = zeroCoefficient(Dst, CurLoop); 3193 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3194 return true; 3195 } 3196 3197 3198 // Update direction vector entry based on the current constraint. 3199 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3200 const Constraint &CurConstraint) const { 3201 LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3202 LLVM_DEBUG(CurConstraint.dump(dbgs())); 3203 if (CurConstraint.isAny()) 3204 ; // use defaults 3205 else if (CurConstraint.isDistance()) { 3206 // this one is consistent, the others aren't 3207 Level.Scalar = false; 3208 Level.Distance = CurConstraint.getD(); 3209 unsigned NewDirection = Dependence::DVEntry::NONE; 3210 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3211 NewDirection = Dependence::DVEntry::EQ; 3212 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3213 NewDirection |= Dependence::DVEntry::LT; 3214 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3215 NewDirection |= Dependence::DVEntry::GT; 3216 Level.Direction &= NewDirection; 3217 } 3218 else if (CurConstraint.isLine()) { 3219 Level.Scalar = false; 3220 Level.Distance = nullptr; 3221 // direction should be accurate 3222 } 3223 else if (CurConstraint.isPoint()) { 3224 Level.Scalar = false; 3225 Level.Distance = nullptr; 3226 unsigned NewDirection = Dependence::DVEntry::NONE; 3227 if (!isKnownPredicate(CmpInst::ICMP_NE, 3228 CurConstraint.getY(), 3229 CurConstraint.getX())) 3230 // if X may be = Y 3231 NewDirection |= Dependence::DVEntry::EQ; 3232 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3233 CurConstraint.getY(), 3234 CurConstraint.getX())) 3235 // if Y may be > X 3236 NewDirection |= Dependence::DVEntry::LT; 3237 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3238 CurConstraint.getY(), 3239 CurConstraint.getX())) 3240 // if Y may be < X 3241 NewDirection |= Dependence::DVEntry::GT; 3242 Level.Direction &= NewDirection; 3243 } 3244 else 3245 llvm_unreachable("constraint has unexpected kind"); 3246 } 3247 3248 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3249 /// source and destination array references are recurrences on a nested loop, 3250 /// this function flattens the nested recurrences into separate recurrences 3251 /// for each loop level. 3252 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3253 SmallVectorImpl<Subscript> &Pair) { 3254 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3255 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3256 Value *SrcPtr = getLoadStorePointerOperand(Src); 3257 Value *DstPtr = getLoadStorePointerOperand(Dst); 3258 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3259 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3260 const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop); 3261 const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop); 3262 const SCEVUnknown *SrcBase = 3263 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3264 const SCEVUnknown *DstBase = 3265 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3266 3267 if (!SrcBase || !DstBase || SrcBase != DstBase) 3268 return false; 3269 3270 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3271 3272 if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn, 3273 SrcSubscripts, DstSubscripts) && 3274 !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn, 3275 SrcSubscripts, DstSubscripts)) 3276 return false; 3277 3278 int Size = SrcSubscripts.size(); 3279 LLVM_DEBUG({ 3280 dbgs() << "\nSrcSubscripts: "; 3281 for (int I = 0; I < Size; I++) 3282 dbgs() << *SrcSubscripts[I]; 3283 dbgs() << "\nDstSubscripts: "; 3284 for (int I = 0; I < Size; I++) 3285 dbgs() << *DstSubscripts[I]; 3286 }); 3287 3288 // The delinearization transforms a single-subscript MIV dependence test into 3289 // a multi-subscript SIV dependence test that is easier to compute. So we 3290 // resize Pair to contain as many pairs of subscripts as the delinearization 3291 // has found, and then initialize the pairs following the delinearization. 3292 Pair.resize(Size); 3293 for (int I = 0; I < Size; ++I) { 3294 Pair[I].Src = SrcSubscripts[I]; 3295 Pair[I].Dst = DstSubscripts[I]; 3296 unifySubscriptType(&Pair[I]); 3297 } 3298 3299 return true; 3300 } 3301 3302 bool DependenceInfo::tryDelinearizeFixedSize( 3303 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, 3304 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, 3305 SmallVectorImpl<const SCEV *> &DstSubscripts) { 3306 3307 Value *SrcPtr = getLoadStorePointerOperand(Src); 3308 Value *DstPtr = getLoadStorePointerOperand(Dst); 3309 const SCEVUnknown *SrcBase = 3310 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3311 const SCEVUnknown *DstBase = 3312 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3313 assert(SrcBase && DstBase && SrcBase == DstBase && 3314 "expected src and dst scev unknowns to be equal"); 3315 3316 // Check the simple case where the array dimensions are fixed size. 3317 auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr); 3318 auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr); 3319 if (!SrcGEP || !DstGEP) 3320 return false; 3321 3322 SmallVector<int, 4> SrcSizes, DstSizes; 3323 SE->getIndexExpressionsFromGEP(SrcGEP, SrcSubscripts, SrcSizes); 3324 SE->getIndexExpressionsFromGEP(DstGEP, DstSubscripts, DstSizes); 3325 3326 // Check that the two size arrays are non-empty and equal in length and 3327 // value. 3328 if (SrcSizes.empty() || SrcSubscripts.size() <= 1 || 3329 SrcSizes.size() != DstSizes.size() || 3330 !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) { 3331 SrcSubscripts.clear(); 3332 DstSubscripts.clear(); 3333 return false; 3334 } 3335 3336 Value *SrcBasePtr = SrcGEP->getOperand(0); 3337 Value *DstBasePtr = DstGEP->getOperand(0); 3338 while (auto *PCast = dyn_cast<BitCastInst>(SrcBasePtr)) 3339 SrcBasePtr = PCast->getOperand(0); 3340 while (auto *PCast = dyn_cast<BitCastInst>(DstBasePtr)) 3341 DstBasePtr = PCast->getOperand(0); 3342 3343 // Check that for identical base pointers we do not miss index offsets 3344 // that have been added before this GEP is applied. 3345 if (SrcBasePtr != SrcBase->getValue() || DstBasePtr != DstBase->getValue()) { 3346 SrcSubscripts.clear(); 3347 DstSubscripts.clear(); 3348 return false; 3349 } 3350 3351 assert(SrcSubscripts.size() == DstSubscripts.size() && 3352 SrcSubscripts.size() == SrcSizes.size() + 1 && 3353 "Expected equal number of entries in the list of sizes and " 3354 "subscripts."); 3355 3356 // In general we cannot safely assume that the subscripts recovered from GEPs 3357 // are in the range of values defined for their corresponding array 3358 // dimensions. For example some C language usage/interpretation make it 3359 // impossible to verify this at compile-time. As such we can only delinearize 3360 // iff the subscripts are positive and are less than the range of the 3361 // dimension. 3362 if (!DisableDelinearizationChecks) { 3363 auto AllIndiciesInRange = [&](SmallVector<int, 4> &DimensionSizes, 3364 SmallVectorImpl<const SCEV *> &Subscripts, 3365 Value *Ptr) { 3366 size_t SSize = Subscripts.size(); 3367 for (size_t I = 1; I < SSize; ++I) { 3368 const SCEV *S = Subscripts[I]; 3369 if (!isKnownNonNegative(S, Ptr)) 3370 return false; 3371 if (auto *SType = dyn_cast<IntegerType>(S->getType())) { 3372 const SCEV *Range = SE->getConstant( 3373 ConstantInt::get(SType, DimensionSizes[I - 1], false)); 3374 if (!isKnownLessThan(S, Range)) 3375 return false; 3376 } 3377 } 3378 return true; 3379 }; 3380 3381 if (!AllIndiciesInRange(SrcSizes, SrcSubscripts, SrcPtr) || 3382 !AllIndiciesInRange(DstSizes, DstSubscripts, DstPtr)) { 3383 SrcSubscripts.clear(); 3384 DstSubscripts.clear(); 3385 return false; 3386 } 3387 } 3388 LLVM_DEBUG({ 3389 dbgs() << "Delinearized subscripts of fixed-size array\n" 3390 << "SrcGEP:" << *SrcGEP << "\n" 3391 << "DstGEP:" << *DstGEP << "\n"; 3392 }); 3393 return true; 3394 } 3395 3396 bool DependenceInfo::tryDelinearizeParametricSize( 3397 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, 3398 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, 3399 SmallVectorImpl<const SCEV *> &DstSubscripts) { 3400 3401 Value *SrcPtr = getLoadStorePointerOperand(Src); 3402 Value *DstPtr = getLoadStorePointerOperand(Dst); 3403 const SCEVUnknown *SrcBase = 3404 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3405 const SCEVUnknown *DstBase = 3406 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3407 assert(SrcBase && DstBase && SrcBase == DstBase && 3408 "expected src and dst scev unknowns to be equal"); 3409 3410 const SCEV *ElementSize = SE->getElementSize(Src); 3411 if (ElementSize != SE->getElementSize(Dst)) 3412 return false; 3413 3414 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3415 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3416 3417 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3418 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3419 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3420 return false; 3421 3422 // First step: collect parametric terms in both array references. 3423 SmallVector<const SCEV *, 4> Terms; 3424 SE->collectParametricTerms(SrcAR, Terms); 3425 SE->collectParametricTerms(DstAR, Terms); 3426 3427 // Second step: find subscript sizes. 3428 SmallVector<const SCEV *, 4> Sizes; 3429 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3430 3431 // Third step: compute the access functions for each subscript. 3432 SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); 3433 SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); 3434 3435 // Fail when there is only a subscript: that's a linearized access function. 3436 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3437 SrcSubscripts.size() != DstSubscripts.size()) 3438 return false; 3439 3440 size_t Size = SrcSubscripts.size(); 3441 3442 // Statically check that the array bounds are in-range. The first subscript we 3443 // don't have a size for and it cannot overflow into another subscript, so is 3444 // always safe. The others need to be 0 <= subscript[i] < bound, for both src 3445 // and dst. 3446 // FIXME: It may be better to record these sizes and add them as constraints 3447 // to the dependency checks. 3448 if (!DisableDelinearizationChecks) 3449 for (size_t I = 1; I < Size; ++I) { 3450 if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr)) 3451 return false; 3452 3453 if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1])) 3454 return false; 3455 3456 if (!isKnownNonNegative(DstSubscripts[I], DstPtr)) 3457 return false; 3458 3459 if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1])) 3460 return false; 3461 } 3462 3463 return true; 3464 } 3465 3466 //===----------------------------------------------------------------------===// 3467 3468 #ifndef NDEBUG 3469 // For debugging purposes, dump a small bit vector to dbgs(). 3470 static void dumpSmallBitVector(SmallBitVector &BV) { 3471 dbgs() << "{"; 3472 for (unsigned VI : BV.set_bits()) { 3473 dbgs() << VI; 3474 if (BV.find_next(VI) >= 0) 3475 dbgs() << ' '; 3476 } 3477 dbgs() << "}\n"; 3478 } 3479 #endif 3480 3481 bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA, 3482 FunctionAnalysisManager::Invalidator &Inv) { 3483 // Check if the analysis itself has been invalidated. 3484 auto PAC = PA.getChecker<DependenceAnalysis>(); 3485 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) 3486 return true; 3487 3488 // Check transitive dependencies. 3489 return Inv.invalidate<AAManager>(F, PA) || 3490 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) || 3491 Inv.invalidate<LoopAnalysis>(F, PA); 3492 } 3493 3494 // depends - 3495 // Returns NULL if there is no dependence. 3496 // Otherwise, return a Dependence with as many details as possible. 3497 // Corresponds to Section 3.1 in the paper 3498 // 3499 // Practical Dependence Testing 3500 // Goff, Kennedy, Tseng 3501 // PLDI 1991 3502 // 3503 // Care is required to keep the routine below, getSplitIteration(), 3504 // up to date with respect to this routine. 3505 std::unique_ptr<Dependence> 3506 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3507 bool PossiblyLoopIndependent) { 3508 if (Src == Dst) 3509 PossiblyLoopIndependent = false; 3510 3511 if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory())) 3512 // if both instructions don't reference memory, there's no dependence 3513 return nullptr; 3514 3515 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3516 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3517 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3518 return std::make_unique<Dependence>(Src, Dst); 3519 } 3520 3521 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3522 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3523 Value *SrcPtr = getLoadStorePointerOperand(Src); 3524 Value *DstPtr = getLoadStorePointerOperand(Dst); 3525 3526 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3527 MemoryLocation::get(Dst), 3528 MemoryLocation::get(Src))) { 3529 case AliasResult::MayAlias: 3530 case AliasResult::PartialAlias: 3531 // cannot analyse objects if we don't understand their aliasing. 3532 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3533 return std::make_unique<Dependence>(Src, Dst); 3534 case AliasResult::NoAlias: 3535 // If the objects noalias, they are distinct, accesses are independent. 3536 LLVM_DEBUG(dbgs() << "no alias\n"); 3537 return nullptr; 3538 case AliasResult::MustAlias: 3539 break; // The underlying objects alias; test accesses for dependence. 3540 } 3541 3542 // establish loop nesting levels 3543 establishNestingLevels(Src, Dst); 3544 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3545 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3546 3547 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3548 ++TotalArrayPairs; 3549 3550 unsigned Pairs = 1; 3551 SmallVector<Subscript, 2> Pair(Pairs); 3552 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3553 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3554 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3555 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3556 if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) { 3557 // If two pointers have different bases, trying to analyze indexes won't 3558 // work; we can't compare them to each other. This can happen, for example, 3559 // if one is produced by an LCSSA PHI node. 3560 // 3561 // We check this upfront so we don't crash in cases where getMinusSCEV() 3562 // returns a SCEVCouldNotCompute. 3563 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n"); 3564 return std::make_unique<Dependence>(Src, Dst); 3565 } 3566 Pair[0].Src = SrcSCEV; 3567 Pair[0].Dst = DstSCEV; 3568 3569 if (Delinearize) { 3570 if (tryDelinearize(Src, Dst, Pair)) { 3571 LLVM_DEBUG(dbgs() << " delinearized\n"); 3572 Pairs = Pair.size(); 3573 } 3574 } 3575 3576 for (unsigned P = 0; P < Pairs; ++P) { 3577 Pair[P].Loops.resize(MaxLevels + 1); 3578 Pair[P].GroupLoops.resize(MaxLevels + 1); 3579 Pair[P].Group.resize(Pairs); 3580 removeMatchingExtensions(&Pair[P]); 3581 Pair[P].Classification = 3582 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3583 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3584 Pair[P].Loops); 3585 Pair[P].GroupLoops = Pair[P].Loops; 3586 Pair[P].Group.set(P); 3587 LLVM_DEBUG(dbgs() << " subscript " << P << "\n"); 3588 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3589 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3590 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3591 LLVM_DEBUG(dbgs() << "\tloops = "); 3592 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3593 } 3594 3595 SmallBitVector Separable(Pairs); 3596 SmallBitVector Coupled(Pairs); 3597 3598 // Partition subscripts into separable and minimally-coupled groups 3599 // Algorithm in paper is algorithmically better; 3600 // this may be faster in practice. Check someday. 3601 // 3602 // Here's an example of how it works. Consider this code: 3603 // 3604 // for (i = ...) { 3605 // for (j = ...) { 3606 // for (k = ...) { 3607 // for (l = ...) { 3608 // for (m = ...) { 3609 // A[i][j][k][m] = ...; 3610 // ... = A[0][j][l][i + j]; 3611 // } 3612 // } 3613 // } 3614 // } 3615 // } 3616 // 3617 // There are 4 subscripts here: 3618 // 0 [i] and [0] 3619 // 1 [j] and [j] 3620 // 2 [k] and [l] 3621 // 3 [m] and [i + j] 3622 // 3623 // We've already classified each subscript pair as ZIV, SIV, etc., 3624 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3625 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3626 // and set Pair[P].Group = {P}. 3627 // 3628 // Src Dst Classification Loops GroupLoops Group 3629 // 0 [i] [0] SIV {1} {1} {0} 3630 // 1 [j] [j] SIV {2} {2} {1} 3631 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3632 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3633 // 3634 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3635 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3636 // 3637 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3638 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3639 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3640 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3641 // to either Separable or Coupled). 3642 // 3643 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3644 // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty, 3645 // so Pair[3].Group = {0, 1, 3} and Done = false. 3646 // 3647 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3648 // Since Done remains true, we add 2 to the set of Separable pairs. 3649 // 3650 // Finally, we consider 3. There's nothing to compare it with, 3651 // so Done remains true and we add it to the Coupled set. 3652 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3653 // 3654 // In the end, we've got 1 separable subscript and 1 coupled group. 3655 for (unsigned SI = 0; SI < Pairs; ++SI) { 3656 if (Pair[SI].Classification == Subscript::NonLinear) { 3657 // ignore these, but collect loops for later 3658 ++NonlinearSubscriptPairs; 3659 collectCommonLoops(Pair[SI].Src, 3660 LI->getLoopFor(Src->getParent()), 3661 Pair[SI].Loops); 3662 collectCommonLoops(Pair[SI].Dst, 3663 LI->getLoopFor(Dst->getParent()), 3664 Pair[SI].Loops); 3665 Result.Consistent = false; 3666 } else if (Pair[SI].Classification == Subscript::ZIV) { 3667 // always separable 3668 Separable.set(SI); 3669 } 3670 else { 3671 // SIV, RDIV, or MIV, so check for coupled group 3672 bool Done = true; 3673 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3674 SmallBitVector Intersection = Pair[SI].GroupLoops; 3675 Intersection &= Pair[SJ].GroupLoops; 3676 if (Intersection.any()) { 3677 // accumulate set of all the loops in group 3678 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3679 // accumulate set of all subscripts in group 3680 Pair[SJ].Group |= Pair[SI].Group; 3681 Done = false; 3682 } 3683 } 3684 if (Done) { 3685 if (Pair[SI].Group.count() == 1) { 3686 Separable.set(SI); 3687 ++SeparableSubscriptPairs; 3688 } 3689 else { 3690 Coupled.set(SI); 3691 ++CoupledSubscriptPairs; 3692 } 3693 } 3694 } 3695 } 3696 3697 LLVM_DEBUG(dbgs() << " Separable = "); 3698 LLVM_DEBUG(dumpSmallBitVector(Separable)); 3699 LLVM_DEBUG(dbgs() << " Coupled = "); 3700 LLVM_DEBUG(dumpSmallBitVector(Coupled)); 3701 3702 Constraint NewConstraint; 3703 NewConstraint.setAny(SE); 3704 3705 // test separable subscripts 3706 for (unsigned SI : Separable.set_bits()) { 3707 LLVM_DEBUG(dbgs() << "testing subscript " << SI); 3708 switch (Pair[SI].Classification) { 3709 case Subscript::ZIV: 3710 LLVM_DEBUG(dbgs() << ", ZIV\n"); 3711 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3712 return nullptr; 3713 break; 3714 case Subscript::SIV: { 3715 LLVM_DEBUG(dbgs() << ", SIV\n"); 3716 unsigned Level; 3717 const SCEV *SplitIter = nullptr; 3718 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3719 SplitIter)) 3720 return nullptr; 3721 break; 3722 } 3723 case Subscript::RDIV: 3724 LLVM_DEBUG(dbgs() << ", RDIV\n"); 3725 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3726 return nullptr; 3727 break; 3728 case Subscript::MIV: 3729 LLVM_DEBUG(dbgs() << ", MIV\n"); 3730 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3731 return nullptr; 3732 break; 3733 default: 3734 llvm_unreachable("subscript has unexpected classification"); 3735 } 3736 } 3737 3738 if (Coupled.count()) { 3739 // test coupled subscript groups 3740 LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n"); 3741 LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3742 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3743 for (unsigned II = 0; II <= MaxLevels; ++II) 3744 Constraints[II].setAny(SE); 3745 for (unsigned SI : Coupled.set_bits()) { 3746 LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3747 SmallBitVector Group(Pair[SI].Group); 3748 SmallBitVector Sivs(Pairs); 3749 SmallBitVector Mivs(Pairs); 3750 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3751 SmallVector<Subscript *, 4> PairsInGroup; 3752 for (unsigned SJ : Group.set_bits()) { 3753 LLVM_DEBUG(dbgs() << SJ << " "); 3754 if (Pair[SJ].Classification == Subscript::SIV) 3755 Sivs.set(SJ); 3756 else 3757 Mivs.set(SJ); 3758 PairsInGroup.push_back(&Pair[SJ]); 3759 } 3760 unifySubscriptType(PairsInGroup); 3761 LLVM_DEBUG(dbgs() << "}\n"); 3762 while (Sivs.any()) { 3763 bool Changed = false; 3764 for (unsigned SJ : Sivs.set_bits()) { 3765 LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3766 // SJ is an SIV subscript that's part of the current coupled group 3767 unsigned Level; 3768 const SCEV *SplitIter = nullptr; 3769 LLVM_DEBUG(dbgs() << "SIV\n"); 3770 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3771 SplitIter)) 3772 return nullptr; 3773 ConstrainedLevels.set(Level); 3774 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3775 if (Constraints[Level].isEmpty()) { 3776 ++DeltaIndependence; 3777 return nullptr; 3778 } 3779 Changed = true; 3780 } 3781 Sivs.reset(SJ); 3782 } 3783 if (Changed) { 3784 // propagate, possibly creating new SIVs and ZIVs 3785 LLVM_DEBUG(dbgs() << " propagating\n"); 3786 LLVM_DEBUG(dbgs() << "\tMivs = "); 3787 LLVM_DEBUG(dumpSmallBitVector(Mivs)); 3788 for (unsigned SJ : Mivs.set_bits()) { 3789 // SJ is an MIV subscript that's part of the current coupled group 3790 LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3791 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3792 Constraints, Result.Consistent)) { 3793 LLVM_DEBUG(dbgs() << "\t Changed\n"); 3794 ++DeltaPropagations; 3795 Pair[SJ].Classification = 3796 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3797 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3798 Pair[SJ].Loops); 3799 switch (Pair[SJ].Classification) { 3800 case Subscript::ZIV: 3801 LLVM_DEBUG(dbgs() << "ZIV\n"); 3802 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3803 return nullptr; 3804 Mivs.reset(SJ); 3805 break; 3806 case Subscript::SIV: 3807 Sivs.set(SJ); 3808 Mivs.reset(SJ); 3809 break; 3810 case Subscript::RDIV: 3811 case Subscript::MIV: 3812 break; 3813 default: 3814 llvm_unreachable("bad subscript classification"); 3815 } 3816 } 3817 } 3818 } 3819 } 3820 3821 // test & propagate remaining RDIVs 3822 for (unsigned SJ : Mivs.set_bits()) { 3823 if (Pair[SJ].Classification == Subscript::RDIV) { 3824 LLVM_DEBUG(dbgs() << "RDIV test\n"); 3825 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3826 return nullptr; 3827 // I don't yet understand how to propagate RDIV results 3828 Mivs.reset(SJ); 3829 } 3830 } 3831 3832 // test remaining MIVs 3833 // This code is temporary. 3834 // Better to somehow test all remaining subscripts simultaneously. 3835 for (unsigned SJ : Mivs.set_bits()) { 3836 if (Pair[SJ].Classification == Subscript::MIV) { 3837 LLVM_DEBUG(dbgs() << "MIV test\n"); 3838 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3839 return nullptr; 3840 } 3841 else 3842 llvm_unreachable("expected only MIV subscripts at this point"); 3843 } 3844 3845 // update Result.DV from constraint vector 3846 LLVM_DEBUG(dbgs() << " updating\n"); 3847 for (unsigned SJ : ConstrainedLevels.set_bits()) { 3848 if (SJ > CommonLevels) 3849 break; 3850 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3851 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3852 return nullptr; 3853 } 3854 } 3855 } 3856 3857 // Make sure the Scalar flags are set correctly. 3858 SmallBitVector CompleteLoops(MaxLevels + 1); 3859 for (unsigned SI = 0; SI < Pairs; ++SI) 3860 CompleteLoops |= Pair[SI].Loops; 3861 for (unsigned II = 1; II <= CommonLevels; ++II) 3862 if (CompleteLoops[II]) 3863 Result.DV[II - 1].Scalar = false; 3864 3865 if (PossiblyLoopIndependent) { 3866 // Make sure the LoopIndependent flag is set correctly. 3867 // All directions must include equal, otherwise no 3868 // loop-independent dependence is possible. 3869 for (unsigned II = 1; II <= CommonLevels; ++II) { 3870 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3871 Result.LoopIndependent = false; 3872 break; 3873 } 3874 } 3875 } 3876 else { 3877 // On the other hand, if all directions are equal and there's no 3878 // loop-independent dependence possible, then no dependence exists. 3879 bool AllEqual = true; 3880 for (unsigned II = 1; II <= CommonLevels; ++II) { 3881 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3882 AllEqual = false; 3883 break; 3884 } 3885 } 3886 if (AllEqual) 3887 return nullptr; 3888 } 3889 3890 return std::make_unique<FullDependence>(std::move(Result)); 3891 } 3892 3893 //===----------------------------------------------------------------------===// 3894 // getSplitIteration - 3895 // Rather than spend rarely-used space recording the splitting iteration 3896 // during the Weak-Crossing SIV test, we re-compute it on demand. 3897 // The re-computation is basically a repeat of the entire dependence test, 3898 // though simplified since we know that the dependence exists. 3899 // It's tedious, since we must go through all propagations, etc. 3900 // 3901 // Care is required to keep this code up to date with respect to the routine 3902 // above, depends(). 3903 // 3904 // Generally, the dependence analyzer will be used to build 3905 // a dependence graph for a function (basically a map from instructions 3906 // to dependences). Looking for cycles in the graph shows us loops 3907 // that cannot be trivially vectorized/parallelized. 3908 // 3909 // We can try to improve the situation by examining all the dependences 3910 // that make up the cycle, looking for ones we can break. 3911 // Sometimes, peeling the first or last iteration of a loop will break 3912 // dependences, and we've got flags for those possibilities. 3913 // Sometimes, splitting a loop at some other iteration will do the trick, 3914 // and we've got a flag for that case. Rather than waste the space to 3915 // record the exact iteration (since we rarely know), we provide 3916 // a method that calculates the iteration. It's a drag that it must work 3917 // from scratch, but wonderful in that it's possible. 3918 // 3919 // Here's an example: 3920 // 3921 // for (i = 0; i < 10; i++) 3922 // A[i] = ... 3923 // ... = A[11 - i] 3924 // 3925 // There's a loop-carried flow dependence from the store to the load, 3926 // found by the weak-crossing SIV test. The dependence will have a flag, 3927 // indicating that the dependence can be broken by splitting the loop. 3928 // Calling getSplitIteration will return 5. 3929 // Splitting the loop breaks the dependence, like so: 3930 // 3931 // for (i = 0; i <= 5; i++) 3932 // A[i] = ... 3933 // ... = A[11 - i] 3934 // for (i = 6; i < 10; i++) 3935 // A[i] = ... 3936 // ... = A[11 - i] 3937 // 3938 // breaks the dependence and allows us to vectorize/parallelize 3939 // both loops. 3940 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 3941 unsigned SplitLevel) { 3942 assert(Dep.isSplitable(SplitLevel) && 3943 "Dep should be splitable at SplitLevel"); 3944 Instruction *Src = Dep.getSrc(); 3945 Instruction *Dst = Dep.getDst(); 3946 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3947 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3948 assert(isLoadOrStore(Src)); 3949 assert(isLoadOrStore(Dst)); 3950 Value *SrcPtr = getLoadStorePointerOperand(Src); 3951 Value *DstPtr = getLoadStorePointerOperand(Dst); 3952 assert(underlyingObjectsAlias( 3953 AA, F->getParent()->getDataLayout(), MemoryLocation::get(Dst), 3954 MemoryLocation::get(Src)) == AliasResult::MustAlias); 3955 3956 // establish loop nesting levels 3957 establishNestingLevels(Src, Dst); 3958 3959 FullDependence Result(Src, Dst, false, CommonLevels); 3960 3961 unsigned Pairs = 1; 3962 SmallVector<Subscript, 2> Pair(Pairs); 3963 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3964 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3965 Pair[0].Src = SrcSCEV; 3966 Pair[0].Dst = DstSCEV; 3967 3968 if (Delinearize) { 3969 if (tryDelinearize(Src, Dst, Pair)) { 3970 LLVM_DEBUG(dbgs() << " delinearized\n"); 3971 Pairs = Pair.size(); 3972 } 3973 } 3974 3975 for (unsigned P = 0; P < Pairs; ++P) { 3976 Pair[P].Loops.resize(MaxLevels + 1); 3977 Pair[P].GroupLoops.resize(MaxLevels + 1); 3978 Pair[P].Group.resize(Pairs); 3979 removeMatchingExtensions(&Pair[P]); 3980 Pair[P].Classification = 3981 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3982 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3983 Pair[P].Loops); 3984 Pair[P].GroupLoops = Pair[P].Loops; 3985 Pair[P].Group.set(P); 3986 } 3987 3988 SmallBitVector Separable(Pairs); 3989 SmallBitVector Coupled(Pairs); 3990 3991 // partition subscripts into separable and minimally-coupled groups 3992 for (unsigned SI = 0; SI < Pairs; ++SI) { 3993 if (Pair[SI].Classification == Subscript::NonLinear) { 3994 // ignore these, but collect loops for later 3995 collectCommonLoops(Pair[SI].Src, 3996 LI->getLoopFor(Src->getParent()), 3997 Pair[SI].Loops); 3998 collectCommonLoops(Pair[SI].Dst, 3999 LI->getLoopFor(Dst->getParent()), 4000 Pair[SI].Loops); 4001 Result.Consistent = false; 4002 } 4003 else if (Pair[SI].Classification == Subscript::ZIV) 4004 Separable.set(SI); 4005 else { 4006 // SIV, RDIV, or MIV, so check for coupled group 4007 bool Done = true; 4008 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 4009 SmallBitVector Intersection = Pair[SI].GroupLoops; 4010 Intersection &= Pair[SJ].GroupLoops; 4011 if (Intersection.any()) { 4012 // accumulate set of all the loops in group 4013 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 4014 // accumulate set of all subscripts in group 4015 Pair[SJ].Group |= Pair[SI].Group; 4016 Done = false; 4017 } 4018 } 4019 if (Done) { 4020 if (Pair[SI].Group.count() == 1) 4021 Separable.set(SI); 4022 else 4023 Coupled.set(SI); 4024 } 4025 } 4026 } 4027 4028 Constraint NewConstraint; 4029 NewConstraint.setAny(SE); 4030 4031 // test separable subscripts 4032 for (unsigned SI : Separable.set_bits()) { 4033 switch (Pair[SI].Classification) { 4034 case Subscript::SIV: { 4035 unsigned Level; 4036 const SCEV *SplitIter = nullptr; 4037 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 4038 Result, NewConstraint, SplitIter); 4039 if (Level == SplitLevel) { 4040 assert(SplitIter != nullptr); 4041 return SplitIter; 4042 } 4043 break; 4044 } 4045 case Subscript::ZIV: 4046 case Subscript::RDIV: 4047 case Subscript::MIV: 4048 break; 4049 default: 4050 llvm_unreachable("subscript has unexpected classification"); 4051 } 4052 } 4053 4054 if (Coupled.count()) { 4055 // test coupled subscript groups 4056 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 4057 for (unsigned II = 0; II <= MaxLevels; ++II) 4058 Constraints[II].setAny(SE); 4059 for (unsigned SI : Coupled.set_bits()) { 4060 SmallBitVector Group(Pair[SI].Group); 4061 SmallBitVector Sivs(Pairs); 4062 SmallBitVector Mivs(Pairs); 4063 SmallBitVector ConstrainedLevels(MaxLevels + 1); 4064 for (unsigned SJ : Group.set_bits()) { 4065 if (Pair[SJ].Classification == Subscript::SIV) 4066 Sivs.set(SJ); 4067 else 4068 Mivs.set(SJ); 4069 } 4070 while (Sivs.any()) { 4071 bool Changed = false; 4072 for (unsigned SJ : Sivs.set_bits()) { 4073 // SJ is an SIV subscript that's part of the current coupled group 4074 unsigned Level; 4075 const SCEV *SplitIter = nullptr; 4076 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 4077 Result, NewConstraint, SplitIter); 4078 if (Level == SplitLevel && SplitIter) 4079 return SplitIter; 4080 ConstrainedLevels.set(Level); 4081 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 4082 Changed = true; 4083 Sivs.reset(SJ); 4084 } 4085 if (Changed) { 4086 // propagate, possibly creating new SIVs and ZIVs 4087 for (unsigned SJ : Mivs.set_bits()) { 4088 // SJ is an MIV subscript that's part of the current coupled group 4089 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 4090 Pair[SJ].Loops, Constraints, Result.Consistent)) { 4091 Pair[SJ].Classification = 4092 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 4093 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 4094 Pair[SJ].Loops); 4095 switch (Pair[SJ].Classification) { 4096 case Subscript::ZIV: 4097 Mivs.reset(SJ); 4098 break; 4099 case Subscript::SIV: 4100 Sivs.set(SJ); 4101 Mivs.reset(SJ); 4102 break; 4103 case Subscript::RDIV: 4104 case Subscript::MIV: 4105 break; 4106 default: 4107 llvm_unreachable("bad subscript classification"); 4108 } 4109 } 4110 } 4111 } 4112 } 4113 } 4114 } 4115 llvm_unreachable("somehow reached end of routine"); 4116 return nullptr; 4117 } 4118