1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 10 // accesses. Currently, it is an (incomplete) implementation of the approach 11 // described in 12 // 13 // Practical Dependence Testing 14 // Goff, Kennedy, Tseng 15 // PLDI 1991 16 // 17 // There's a single entry point that analyzes the dependence between a pair 18 // of memory references in a function, returning either NULL, for no dependence, 19 // or a more-or-less detailed description of the dependence between them. 20 // 21 // Currently, the implementation cannot propagate constraints between 22 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 23 // Both of these are conservative weaknesses; 24 // that is, not a source of correctness problems. 25 // 26 // Since Clang linearizes some array subscripts, the dependence 27 // analysis is using SCEV->delinearize to recover the representation of multiple 28 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 29 // delinearization is controlled by the flag -da-delinearize. 30 // 31 // We should pay some careful attention to the possibility of integer overflow 32 // in the implementation of the various tests. This could happen with Add, 33 // Subtract, or Multiply, with both APInt's and SCEV's. 34 // 35 // Some non-linear subscript pairs can be handled by the GCD test 36 // (and perhaps other tests). 37 // Should explore how often these things occur. 38 // 39 // Finally, it seems like certain test cases expose weaknesses in the SCEV 40 // simplification, especially in the handling of sign and zero extensions. 41 // It could be useful to spend time exploring these. 42 // 43 // Please note that this is work in progress and the interface is subject to 44 // change. 45 // 46 //===----------------------------------------------------------------------===// 47 // // 48 // In memory of Ken Kennedy, 1945 - 2007 // 49 // // 50 //===----------------------------------------------------------------------===// 51 52 #include "llvm/Analysis/DependenceAnalysis.h" 53 #include "llvm/ADT/STLExtras.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/LoopInfo.h" 57 #include "llvm/Analysis/ScalarEvolution.h" 58 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 59 #include "llvm/Analysis/ValueTracking.h" 60 #include "llvm/Config/llvm-config.h" 61 #include "llvm/IR/InstIterator.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/Support/CommandLine.h" 65 #include "llvm/Support/Debug.h" 66 #include "llvm/Support/ErrorHandling.h" 67 #include "llvm/Support/raw_ostream.h" 68 69 using namespace llvm; 70 71 #define DEBUG_TYPE "da" 72 73 //===----------------------------------------------------------------------===// 74 // statistics 75 76 STATISTIC(TotalArrayPairs, "Array pairs tested"); 77 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 78 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 79 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 80 STATISTIC(ZIVapplications, "ZIV applications"); 81 STATISTIC(ZIVindependence, "ZIV independence"); 82 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 83 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 84 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 85 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 86 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 87 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 88 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 89 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 90 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 91 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 92 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 93 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 94 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 95 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 96 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 97 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 98 STATISTIC(DeltaApplications, "Delta applications"); 99 STATISTIC(DeltaSuccesses, "Delta successes"); 100 STATISTIC(DeltaIndependence, "Delta independence"); 101 STATISTIC(DeltaPropagations, "Delta propagations"); 102 STATISTIC(GCDapplications, "GCD applications"); 103 STATISTIC(GCDsuccesses, "GCD successes"); 104 STATISTIC(GCDindependence, "GCD independence"); 105 STATISTIC(BanerjeeApplications, "Banerjee applications"); 106 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 107 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 108 109 static cl::opt<bool> 110 Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore, 111 cl::desc("Try to delinearize array references.")); 112 static cl::opt<bool> DisableDelinearizationChecks( 113 "da-disable-delinearization-checks", cl::init(false), cl::Hidden, 114 cl::ZeroOrMore, 115 cl::desc( 116 "Disable checks that try to statically verify validity of " 117 "delinearized subscripts. Enabling this option may result in incorrect " 118 "dependence vectors for languages that allow the subscript of one " 119 "dimension to underflow or overflow into another dimension.")); 120 121 //===----------------------------------------------------------------------===// 122 // basics 123 124 DependenceAnalysis::Result 125 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 126 auto &AA = FAM.getResult<AAManager>(F); 127 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 128 auto &LI = FAM.getResult<LoopAnalysis>(F); 129 return DependenceInfo(&F, &AA, &SE, &LI); 130 } 131 132 AnalysisKey DependenceAnalysis::Key; 133 134 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 135 "Dependence Analysis", true, true) 136 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 137 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 138 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 139 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 140 true, true) 141 142 char DependenceAnalysisWrapperPass::ID = 0; 143 144 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 145 return new DependenceAnalysisWrapperPass(); 146 } 147 148 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 149 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 150 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 151 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 152 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 153 return false; 154 } 155 156 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 157 158 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 159 160 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 161 AU.setPreservesAll(); 162 AU.addRequiredTransitive<AAResultsWrapperPass>(); 163 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 164 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 165 } 166 167 168 // Used to test the dependence analyzer. 169 // Looks through the function, noting loads and stores. 170 // Calls depends() on every possible pair and prints out the result. 171 // Ignores all other instructions. 172 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { 173 auto *F = DA->getFunction(); 174 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 175 ++SrcI) { 176 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 177 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 178 DstI != DstE; ++DstI) { 179 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 180 OS << "da analyze - "; 181 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 182 D->dump(OS); 183 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 184 if (D->isSplitable(Level)) { 185 OS << "da analyze - split level = " << Level; 186 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 187 OS << "!\n"; 188 } 189 } 190 } 191 else 192 OS << "none!\n"; 193 } 194 } 195 } 196 } 197 } 198 199 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 200 const Module *) const { 201 dumpExampleDependence(OS, info.get()); 202 } 203 204 PreservedAnalyses 205 DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { 206 OS << "'Dependence Analysis' for function '" << F.getName() << "':\n"; 207 dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F)); 208 return PreservedAnalyses::all(); 209 } 210 211 //===----------------------------------------------------------------------===// 212 // Dependence methods 213 214 // Returns true if this is an input dependence. 215 bool Dependence::isInput() const { 216 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 217 } 218 219 220 // Returns true if this is an output dependence. 221 bool Dependence::isOutput() const { 222 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 223 } 224 225 226 // Returns true if this is an flow (aka true) dependence. 227 bool Dependence::isFlow() const { 228 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 229 } 230 231 232 // Returns true if this is an anti dependence. 233 bool Dependence::isAnti() const { 234 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 235 } 236 237 238 // Returns true if a particular level is scalar; that is, 239 // if no subscript in the source or destination mention the induction 240 // variable associated with the loop at this level. 241 // Leave this out of line, so it will serve as a virtual method anchor 242 bool Dependence::isScalar(unsigned level) const { 243 return false; 244 } 245 246 247 //===----------------------------------------------------------------------===// 248 // FullDependence methods 249 250 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 251 bool PossiblyLoopIndependent, 252 unsigned CommonLevels) 253 : Dependence(Source, Destination), Levels(CommonLevels), 254 LoopIndependent(PossiblyLoopIndependent) { 255 Consistent = true; 256 if (CommonLevels) 257 DV = make_unique<DVEntry[]>(CommonLevels); 258 } 259 260 // The rest are simple getters that hide the implementation. 261 262 // getDirection - Returns the direction associated with a particular level. 263 unsigned FullDependence::getDirection(unsigned Level) const { 264 assert(0 < Level && Level <= Levels && "Level out of range"); 265 return DV[Level - 1].Direction; 266 } 267 268 269 // Returns the distance (or NULL) associated with a particular level. 270 const SCEV *FullDependence::getDistance(unsigned Level) const { 271 assert(0 < Level && Level <= Levels && "Level out of range"); 272 return DV[Level - 1].Distance; 273 } 274 275 276 // Returns true if a particular level is scalar; that is, 277 // if no subscript in the source or destination mention the induction 278 // variable associated with the loop at this level. 279 bool FullDependence::isScalar(unsigned Level) const { 280 assert(0 < Level && Level <= Levels && "Level out of range"); 281 return DV[Level - 1].Scalar; 282 } 283 284 285 // Returns true if peeling the first iteration from this loop 286 // will break this dependence. 287 bool FullDependence::isPeelFirst(unsigned Level) const { 288 assert(0 < Level && Level <= Levels && "Level out of range"); 289 return DV[Level - 1].PeelFirst; 290 } 291 292 293 // Returns true if peeling the last iteration from this loop 294 // will break this dependence. 295 bool FullDependence::isPeelLast(unsigned Level) const { 296 assert(0 < Level && Level <= Levels && "Level out of range"); 297 return DV[Level - 1].PeelLast; 298 } 299 300 301 // Returns true if splitting this loop will break the dependence. 302 bool FullDependence::isSplitable(unsigned Level) const { 303 assert(0 < Level && Level <= Levels && "Level out of range"); 304 return DV[Level - 1].Splitable; 305 } 306 307 308 //===----------------------------------------------------------------------===// 309 // DependenceInfo::Constraint methods 310 311 // If constraint is a point <X, Y>, returns X. 312 // Otherwise assert. 313 const SCEV *DependenceInfo::Constraint::getX() const { 314 assert(Kind == Point && "Kind should be Point"); 315 return A; 316 } 317 318 319 // If constraint is a point <X, Y>, returns Y. 320 // Otherwise assert. 321 const SCEV *DependenceInfo::Constraint::getY() const { 322 assert(Kind == Point && "Kind should be Point"); 323 return B; 324 } 325 326 327 // If constraint is a line AX + BY = C, returns A. 328 // Otherwise assert. 329 const SCEV *DependenceInfo::Constraint::getA() const { 330 assert((Kind == Line || Kind == Distance) && 331 "Kind should be Line (or Distance)"); 332 return A; 333 } 334 335 336 // If constraint is a line AX + BY = C, returns B. 337 // Otherwise assert. 338 const SCEV *DependenceInfo::Constraint::getB() const { 339 assert((Kind == Line || Kind == Distance) && 340 "Kind should be Line (or Distance)"); 341 return B; 342 } 343 344 345 // If constraint is a line AX + BY = C, returns C. 346 // Otherwise assert. 347 const SCEV *DependenceInfo::Constraint::getC() const { 348 assert((Kind == Line || Kind == Distance) && 349 "Kind should be Line (or Distance)"); 350 return C; 351 } 352 353 354 // If constraint is a distance, returns D. 355 // Otherwise assert. 356 const SCEV *DependenceInfo::Constraint::getD() const { 357 assert(Kind == Distance && "Kind should be Distance"); 358 return SE->getNegativeSCEV(C); 359 } 360 361 362 // Returns the loop associated with this constraint. 363 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 364 assert((Kind == Distance || Kind == Line || Kind == Point) && 365 "Kind should be Distance, Line, or Point"); 366 return AssociatedLoop; 367 } 368 369 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 370 const Loop *CurLoop) { 371 Kind = Point; 372 A = X; 373 B = Y; 374 AssociatedLoop = CurLoop; 375 } 376 377 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 378 const SCEV *CC, const Loop *CurLoop) { 379 Kind = Line; 380 A = AA; 381 B = BB; 382 C = CC; 383 AssociatedLoop = CurLoop; 384 } 385 386 void DependenceInfo::Constraint::setDistance(const SCEV *D, 387 const Loop *CurLoop) { 388 Kind = Distance; 389 A = SE->getOne(D->getType()); 390 B = SE->getNegativeSCEV(A); 391 C = SE->getNegativeSCEV(D); 392 AssociatedLoop = CurLoop; 393 } 394 395 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 396 397 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 398 SE = NewSE; 399 Kind = Any; 400 } 401 402 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 403 // For debugging purposes. Dumps the constraint out to OS. 404 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 405 if (isEmpty()) 406 OS << " Empty\n"; 407 else if (isAny()) 408 OS << " Any\n"; 409 else if (isPoint()) 410 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 411 else if (isDistance()) 412 OS << " Distance is " << *getD() << 413 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 414 else if (isLine()) 415 OS << " Line is " << *getA() << "*X + " << 416 *getB() << "*Y = " << *getC() << "\n"; 417 else 418 llvm_unreachable("unknown constraint type in Constraint::dump"); 419 } 420 #endif 421 422 423 // Updates X with the intersection 424 // of the Constraints X and Y. Returns true if X has changed. 425 // Corresponds to Figure 4 from the paper 426 // 427 // Practical Dependence Testing 428 // Goff, Kennedy, Tseng 429 // PLDI 1991 430 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 431 ++DeltaApplications; 432 LLVM_DEBUG(dbgs() << "\tintersect constraints\n"); 433 LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 434 LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 435 assert(!Y->isPoint() && "Y must not be a Point"); 436 if (X->isAny()) { 437 if (Y->isAny()) 438 return false; 439 *X = *Y; 440 return true; 441 } 442 if (X->isEmpty()) 443 return false; 444 if (Y->isEmpty()) { 445 X->setEmpty(); 446 return true; 447 } 448 449 if (X->isDistance() && Y->isDistance()) { 450 LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n"); 451 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 452 return false; 453 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 454 X->setEmpty(); 455 ++DeltaSuccesses; 456 return true; 457 } 458 // Hmmm, interesting situation. 459 // I guess if either is constant, keep it and ignore the other. 460 if (isa<SCEVConstant>(Y->getD())) { 461 *X = *Y; 462 return true; 463 } 464 return false; 465 } 466 467 // At this point, the pseudo-code in Figure 4 of the paper 468 // checks if (X->isPoint() && Y->isPoint()). 469 // This case can't occur in our implementation, 470 // since a Point can only arise as the result of intersecting 471 // two Line constraints, and the right-hand value, Y, is never 472 // the result of an intersection. 473 assert(!(X->isPoint() && Y->isPoint()) && 474 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 475 476 if (X->isLine() && Y->isLine()) { 477 LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n"); 478 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 479 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 480 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 481 // slopes are equal, so lines are parallel 482 LLVM_DEBUG(dbgs() << "\t\tsame slope\n"); 483 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 484 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 485 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 486 return false; 487 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 488 X->setEmpty(); 489 ++DeltaSuccesses; 490 return true; 491 } 492 return false; 493 } 494 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 495 // slopes differ, so lines intersect 496 LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n"); 497 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 498 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 499 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 500 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 501 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 502 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 503 const SCEVConstant *C1A2_C2A1 = 504 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 505 const SCEVConstant *C1B2_C2B1 = 506 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 507 const SCEVConstant *A1B2_A2B1 = 508 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 509 const SCEVConstant *A2B1_A1B2 = 510 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 511 if (!C1B2_C2B1 || !C1A2_C2A1 || 512 !A1B2_A2B1 || !A2B1_A1B2) 513 return false; 514 APInt Xtop = C1B2_C2B1->getAPInt(); 515 APInt Xbot = A1B2_A2B1->getAPInt(); 516 APInt Ytop = C1A2_C2A1->getAPInt(); 517 APInt Ybot = A2B1_A1B2->getAPInt(); 518 LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 519 LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 520 LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 521 LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 522 APInt Xq = Xtop; // these need to be initialized, even 523 APInt Xr = Xtop; // though they're just going to be overwritten 524 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 525 APInt Yq = Ytop; 526 APInt Yr = Ytop; 527 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 528 if (Xr != 0 || Yr != 0) { 529 X->setEmpty(); 530 ++DeltaSuccesses; 531 return true; 532 } 533 LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 534 if (Xq.slt(0) || Yq.slt(0)) { 535 X->setEmpty(); 536 ++DeltaSuccesses; 537 return true; 538 } 539 if (const SCEVConstant *CUB = 540 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 541 const APInt &UpperBound = CUB->getAPInt(); 542 LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 543 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 544 X->setEmpty(); 545 ++DeltaSuccesses; 546 return true; 547 } 548 } 549 X->setPoint(SE->getConstant(Xq), 550 SE->getConstant(Yq), 551 X->getAssociatedLoop()); 552 ++DeltaSuccesses; 553 return true; 554 } 555 return false; 556 } 557 558 // if (X->isLine() && Y->isPoint()) This case can't occur. 559 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 560 561 if (X->isPoint() && Y->isLine()) { 562 LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n"); 563 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 564 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 565 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 566 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 567 return false; 568 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 569 X->setEmpty(); 570 ++DeltaSuccesses; 571 return true; 572 } 573 return false; 574 } 575 576 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 577 return false; 578 } 579 580 581 //===----------------------------------------------------------------------===// 582 // DependenceInfo methods 583 584 // For debugging purposes. Dumps a dependence to OS. 585 void Dependence::dump(raw_ostream &OS) const { 586 bool Splitable = false; 587 if (isConfused()) 588 OS << "confused"; 589 else { 590 if (isConsistent()) 591 OS << "consistent "; 592 if (isFlow()) 593 OS << "flow"; 594 else if (isOutput()) 595 OS << "output"; 596 else if (isAnti()) 597 OS << "anti"; 598 else if (isInput()) 599 OS << "input"; 600 unsigned Levels = getLevels(); 601 OS << " ["; 602 for (unsigned II = 1; II <= Levels; ++II) { 603 if (isSplitable(II)) 604 Splitable = true; 605 if (isPeelFirst(II)) 606 OS << 'p'; 607 const SCEV *Distance = getDistance(II); 608 if (Distance) 609 OS << *Distance; 610 else if (isScalar(II)) 611 OS << "S"; 612 else { 613 unsigned Direction = getDirection(II); 614 if (Direction == DVEntry::ALL) 615 OS << "*"; 616 else { 617 if (Direction & DVEntry::LT) 618 OS << "<"; 619 if (Direction & DVEntry::EQ) 620 OS << "="; 621 if (Direction & DVEntry::GT) 622 OS << ">"; 623 } 624 } 625 if (isPeelLast(II)) 626 OS << 'p'; 627 if (II < Levels) 628 OS << " "; 629 } 630 if (isLoopIndependent()) 631 OS << "|<"; 632 OS << "]"; 633 if (Splitable) 634 OS << " splitable"; 635 } 636 OS << "!\n"; 637 } 638 639 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their 640 // underlaying objects. If LocA and LocB are known to not alias (for any reason: 641 // tbaa, non-overlapping regions etc), then it is known there is no dependecy. 642 // Otherwise the underlying objects are checked to see if they point to 643 // different identifiable objects. 644 static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 645 const DataLayout &DL, 646 const MemoryLocation &LocA, 647 const MemoryLocation &LocB) { 648 // Check the original locations (minus size) for noalias, which can happen for 649 // tbaa, incompatible underlying object locations, etc. 650 MemoryLocation LocAS(LocA.Ptr, LocationSize::unknown(), LocA.AATags); 651 MemoryLocation LocBS(LocB.Ptr, LocationSize::unknown(), LocB.AATags); 652 if (AA->alias(LocAS, LocBS) == NoAlias) 653 return NoAlias; 654 655 // Check the underlying objects are the same 656 const Value *AObj = GetUnderlyingObject(LocA.Ptr, DL); 657 const Value *BObj = GetUnderlyingObject(LocB.Ptr, DL); 658 659 // If the underlying objects are the same, they must alias 660 if (AObj == BObj) 661 return MustAlias; 662 663 // We may have hit the recursion limit for underlying objects, or have 664 // underlying objects where we don't know they will alias. 665 if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj)) 666 return MayAlias; 667 668 // Otherwise we know the objects are different and both identified objects so 669 // must not alias. 670 return NoAlias; 671 } 672 673 674 // Returns true if the load or store can be analyzed. Atomic and volatile 675 // operations have properties which this analysis does not understand. 676 static 677 bool isLoadOrStore(const Instruction *I) { 678 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 679 return LI->isUnordered(); 680 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 681 return SI->isUnordered(); 682 return false; 683 } 684 685 686 // Examines the loop nesting of the Src and Dst 687 // instructions and establishes their shared loops. Sets the variables 688 // CommonLevels, SrcLevels, and MaxLevels. 689 // The source and destination instructions needn't be contained in the same 690 // loop. The routine establishNestingLevels finds the level of most deeply 691 // nested loop that contains them both, CommonLevels. An instruction that's 692 // not contained in a loop is at level = 0. MaxLevels is equal to the level 693 // of the source plus the level of the destination, minus CommonLevels. 694 // This lets us allocate vectors MaxLevels in length, with room for every 695 // distinct loop referenced in both the source and destination subscripts. 696 // The variable SrcLevels is the nesting depth of the source instruction. 697 // It's used to help calculate distinct loops referenced by the destination. 698 // Here's the map from loops to levels: 699 // 0 - unused 700 // 1 - outermost common loop 701 // ... - other common loops 702 // CommonLevels - innermost common loop 703 // ... - loops containing Src but not Dst 704 // SrcLevels - innermost loop containing Src but not Dst 705 // ... - loops containing Dst but not Src 706 // MaxLevels - innermost loops containing Dst but not Src 707 // Consider the follow code fragment: 708 // for (a = ...) { 709 // for (b = ...) { 710 // for (c = ...) { 711 // for (d = ...) { 712 // A[] = ...; 713 // } 714 // } 715 // for (e = ...) { 716 // for (f = ...) { 717 // for (g = ...) { 718 // ... = A[]; 719 // } 720 // } 721 // } 722 // } 723 // } 724 // If we're looking at the possibility of a dependence between the store 725 // to A (the Src) and the load from A (the Dst), we'll note that they 726 // have 2 loops in common, so CommonLevels will equal 2 and the direction 727 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 728 // A map from loop names to loop numbers would look like 729 // a - 1 730 // b - 2 = CommonLevels 731 // c - 3 732 // d - 4 = SrcLevels 733 // e - 5 734 // f - 6 735 // g - 7 = MaxLevels 736 void DependenceInfo::establishNestingLevels(const Instruction *Src, 737 const Instruction *Dst) { 738 const BasicBlock *SrcBlock = Src->getParent(); 739 const BasicBlock *DstBlock = Dst->getParent(); 740 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 741 unsigned DstLevel = LI->getLoopDepth(DstBlock); 742 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 743 const Loop *DstLoop = LI->getLoopFor(DstBlock); 744 SrcLevels = SrcLevel; 745 MaxLevels = SrcLevel + DstLevel; 746 while (SrcLevel > DstLevel) { 747 SrcLoop = SrcLoop->getParentLoop(); 748 SrcLevel--; 749 } 750 while (DstLevel > SrcLevel) { 751 DstLoop = DstLoop->getParentLoop(); 752 DstLevel--; 753 } 754 while (SrcLoop != DstLoop) { 755 SrcLoop = SrcLoop->getParentLoop(); 756 DstLoop = DstLoop->getParentLoop(); 757 SrcLevel--; 758 } 759 CommonLevels = SrcLevel; 760 MaxLevels -= CommonLevels; 761 } 762 763 764 // Given one of the loops containing the source, return 765 // its level index in our numbering scheme. 766 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 767 return SrcLoop->getLoopDepth(); 768 } 769 770 771 // Given one of the loops containing the destination, 772 // return its level index in our numbering scheme. 773 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 774 unsigned D = DstLoop->getLoopDepth(); 775 if (D > CommonLevels) 776 return D - CommonLevels + SrcLevels; 777 else 778 return D; 779 } 780 781 782 // Returns true if Expression is loop invariant in LoopNest. 783 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 784 const Loop *LoopNest) const { 785 if (!LoopNest) 786 return true; 787 return SE->isLoopInvariant(Expression, LoopNest) && 788 isLoopInvariant(Expression, LoopNest->getParentLoop()); 789 } 790 791 792 793 // Finds the set of loops from the LoopNest that 794 // have a level <= CommonLevels and are referred to by the SCEV Expression. 795 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 796 const Loop *LoopNest, 797 SmallBitVector &Loops) const { 798 while (LoopNest) { 799 unsigned Level = LoopNest->getLoopDepth(); 800 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 801 Loops.set(Level); 802 LoopNest = LoopNest->getParentLoop(); 803 } 804 } 805 806 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 807 808 unsigned widestWidthSeen = 0; 809 Type *widestType; 810 811 // Go through each pair and find the widest bit to which we need 812 // to extend all of them. 813 for (Subscript *Pair : Pairs) { 814 const SCEV *Src = Pair->Src; 815 const SCEV *Dst = Pair->Dst; 816 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 817 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 818 if (SrcTy == nullptr || DstTy == nullptr) { 819 assert(SrcTy == DstTy && "This function only unify integer types and " 820 "expect Src and Dst share the same type " 821 "otherwise."); 822 continue; 823 } 824 if (SrcTy->getBitWidth() > widestWidthSeen) { 825 widestWidthSeen = SrcTy->getBitWidth(); 826 widestType = SrcTy; 827 } 828 if (DstTy->getBitWidth() > widestWidthSeen) { 829 widestWidthSeen = DstTy->getBitWidth(); 830 widestType = DstTy; 831 } 832 } 833 834 835 assert(widestWidthSeen > 0); 836 837 // Now extend each pair to the widest seen. 838 for (Subscript *Pair : Pairs) { 839 const SCEV *Src = Pair->Src; 840 const SCEV *Dst = Pair->Dst; 841 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 842 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 843 if (SrcTy == nullptr || DstTy == nullptr) { 844 assert(SrcTy == DstTy && "This function only unify integer types and " 845 "expect Src and Dst share the same type " 846 "otherwise."); 847 continue; 848 } 849 if (SrcTy->getBitWidth() < widestWidthSeen) 850 // Sign-extend Src to widestType 851 Pair->Src = SE->getSignExtendExpr(Src, widestType); 852 if (DstTy->getBitWidth() < widestWidthSeen) { 853 // Sign-extend Dst to widestType 854 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 855 } 856 } 857 } 858 859 // removeMatchingExtensions - Examines a subscript pair. 860 // If the source and destination are identically sign (or zero) 861 // extended, it strips off the extension in an effect to simplify 862 // the actual analysis. 863 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 864 const SCEV *Src = Pair->Src; 865 const SCEV *Dst = Pair->Dst; 866 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 867 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 868 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 869 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 870 const SCEV *SrcCastOp = SrcCast->getOperand(); 871 const SCEV *DstCastOp = DstCast->getOperand(); 872 if (SrcCastOp->getType() == DstCastOp->getType()) { 873 Pair->Src = SrcCastOp; 874 Pair->Dst = DstCastOp; 875 } 876 } 877 } 878 879 880 // Examine the scev and return true iff it's linear. 881 // Collect any loops mentioned in the set of "Loops". 882 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 883 SmallBitVector &Loops) { 884 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 885 if (!AddRec) 886 return isLoopInvariant(Src, LoopNest); 887 const SCEV *Start = AddRec->getStart(); 888 const SCEV *Step = AddRec->getStepRecurrence(*SE); 889 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 890 if (!isa<SCEVCouldNotCompute>(UB)) { 891 if (SE->getTypeSizeInBits(Start->getType()) < 892 SE->getTypeSizeInBits(UB->getType())) { 893 if (!AddRec->getNoWrapFlags()) 894 return false; 895 } 896 } 897 if (!isLoopInvariant(Step, LoopNest)) 898 return false; 899 Loops.set(mapSrcLoop(AddRec->getLoop())); 900 return checkSrcSubscript(Start, LoopNest, Loops); 901 } 902 903 904 905 // Examine the scev and return true iff it's linear. 906 // Collect any loops mentioned in the set of "Loops". 907 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 908 SmallBitVector &Loops) { 909 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 910 if (!AddRec) 911 return isLoopInvariant(Dst, LoopNest); 912 const SCEV *Start = AddRec->getStart(); 913 const SCEV *Step = AddRec->getStepRecurrence(*SE); 914 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 915 if (!isa<SCEVCouldNotCompute>(UB)) { 916 if (SE->getTypeSizeInBits(Start->getType()) < 917 SE->getTypeSizeInBits(UB->getType())) { 918 if (!AddRec->getNoWrapFlags()) 919 return false; 920 } 921 } 922 if (!isLoopInvariant(Step, LoopNest)) 923 return false; 924 Loops.set(mapDstLoop(AddRec->getLoop())); 925 return checkDstSubscript(Start, LoopNest, Loops); 926 } 927 928 929 // Examines the subscript pair (the Src and Dst SCEVs) 930 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 931 // Collects the associated loops in a set. 932 DependenceInfo::Subscript::ClassificationKind 933 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 934 const SCEV *Dst, const Loop *DstLoopNest, 935 SmallBitVector &Loops) { 936 SmallBitVector SrcLoops(MaxLevels + 1); 937 SmallBitVector DstLoops(MaxLevels + 1); 938 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 939 return Subscript::NonLinear; 940 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 941 return Subscript::NonLinear; 942 Loops = SrcLoops; 943 Loops |= DstLoops; 944 unsigned N = Loops.count(); 945 if (N == 0) 946 return Subscript::ZIV; 947 if (N == 1) 948 return Subscript::SIV; 949 if (N == 2 && (SrcLoops.count() == 0 || 950 DstLoops.count() == 0 || 951 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 952 return Subscript::RDIV; 953 return Subscript::MIV; 954 } 955 956 957 // A wrapper around SCEV::isKnownPredicate. 958 // Looks for cases where we're interested in comparing for equality. 959 // If both X and Y have been identically sign or zero extended, 960 // it strips off the (confusing) extensions before invoking 961 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 962 // will be similarly updated. 963 // 964 // If SCEV::isKnownPredicate can't prove the predicate, 965 // we try simple subtraction, which seems to help in some cases 966 // involving symbolics. 967 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 968 const SCEV *Y) const { 969 if (Pred == CmpInst::ICMP_EQ || 970 Pred == CmpInst::ICMP_NE) { 971 if ((isa<SCEVSignExtendExpr>(X) && 972 isa<SCEVSignExtendExpr>(Y)) || 973 (isa<SCEVZeroExtendExpr>(X) && 974 isa<SCEVZeroExtendExpr>(Y))) { 975 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 976 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 977 const SCEV *Xop = CX->getOperand(); 978 const SCEV *Yop = CY->getOperand(); 979 if (Xop->getType() == Yop->getType()) { 980 X = Xop; 981 Y = Yop; 982 } 983 } 984 } 985 if (SE->isKnownPredicate(Pred, X, Y)) 986 return true; 987 // If SE->isKnownPredicate can't prove the condition, 988 // we try the brute-force approach of subtracting 989 // and testing the difference. 990 // By testing with SE->isKnownPredicate first, we avoid 991 // the possibility of overflow when the arguments are constants. 992 const SCEV *Delta = SE->getMinusSCEV(X, Y); 993 switch (Pred) { 994 case CmpInst::ICMP_EQ: 995 return Delta->isZero(); 996 case CmpInst::ICMP_NE: 997 return SE->isKnownNonZero(Delta); 998 case CmpInst::ICMP_SGE: 999 return SE->isKnownNonNegative(Delta); 1000 case CmpInst::ICMP_SLE: 1001 return SE->isKnownNonPositive(Delta); 1002 case CmpInst::ICMP_SGT: 1003 return SE->isKnownPositive(Delta); 1004 case CmpInst::ICMP_SLT: 1005 return SE->isKnownNegative(Delta); 1006 default: 1007 llvm_unreachable("unexpected predicate in isKnownPredicate"); 1008 } 1009 } 1010 1011 /// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1)) 1012 /// with some extra checking if S is an AddRec and we can prove less-than using 1013 /// the loop bounds. 1014 bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { 1015 // First unify to the same type 1016 auto *SType = dyn_cast<IntegerType>(S->getType()); 1017 auto *SizeType = dyn_cast<IntegerType>(Size->getType()); 1018 if (!SType || !SizeType) 1019 return false; 1020 Type *MaxType = 1021 (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType; 1022 S = SE->getTruncateOrZeroExtend(S, MaxType); 1023 Size = SE->getTruncateOrZeroExtend(Size, MaxType); 1024 1025 // Special check for addrecs using BE taken count 1026 const SCEV *Bound = SE->getMinusSCEV(S, Size); 1027 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) { 1028 if (AddRec->isAffine()) { 1029 const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); 1030 if (!isa<SCEVCouldNotCompute>(BECount)) { 1031 const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE); 1032 if (SE->isKnownNegative(Limit)) 1033 return true; 1034 } 1035 } 1036 } 1037 1038 // Check using normal isKnownNegative 1039 const SCEV *LimitedBound = 1040 SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType()))); 1041 return SE->isKnownNegative(LimitedBound); 1042 } 1043 1044 bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const { 1045 bool Inbounds = false; 1046 if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr)) 1047 Inbounds = SrcGEP->isInBounds(); 1048 if (Inbounds) { 1049 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { 1050 if (AddRec->isAffine()) { 1051 // We know S is for Ptr, the operand on a load/store, so doesn't wrap. 1052 // If both parts are NonNegative, the end result will be NonNegative 1053 if (SE->isKnownNonNegative(AddRec->getStart()) && 1054 SE->isKnownNonNegative(AddRec->getOperand(1))) 1055 return true; 1056 } 1057 } 1058 } 1059 1060 return SE->isKnownNonNegative(S); 1061 } 1062 1063 // All subscripts are all the same type. 1064 // Loop bound may be smaller (e.g., a char). 1065 // Should zero extend loop bound, since it's always >= 0. 1066 // This routine collects upper bound and extends or truncates if needed. 1067 // Truncating is safe when subscripts are known not to wrap. Cases without 1068 // nowrap flags should have been rejected earlier. 1069 // Return null if no bound available. 1070 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 1071 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 1072 const SCEV *UB = SE->getBackedgeTakenCount(L); 1073 return SE->getTruncateOrZeroExtend(UB, T); 1074 } 1075 return nullptr; 1076 } 1077 1078 1079 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1080 // If the cast fails, returns NULL. 1081 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1082 Type *T) const { 1083 if (const SCEV *UB = collectUpperBound(L, T)) 1084 return dyn_cast<SCEVConstant>(UB); 1085 return nullptr; 1086 } 1087 1088 1089 // testZIV - 1090 // When we have a pair of subscripts of the form [c1] and [c2], 1091 // where c1 and c2 are both loop invariant, we attack it using 1092 // the ZIV test. Basically, we test by comparing the two values, 1093 // but there are actually three possible results: 1094 // 1) the values are equal, so there's a dependence 1095 // 2) the values are different, so there's no dependence 1096 // 3) the values might be equal, so we have to assume a dependence. 1097 // 1098 // Return true if dependence disproved. 1099 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1100 FullDependence &Result) const { 1101 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 1102 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1103 ++ZIVapplications; 1104 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1105 LLVM_DEBUG(dbgs() << " provably dependent\n"); 1106 return false; // provably dependent 1107 } 1108 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1109 LLVM_DEBUG(dbgs() << " provably independent\n"); 1110 ++ZIVindependence; 1111 return true; // provably independent 1112 } 1113 LLVM_DEBUG(dbgs() << " possibly dependent\n"); 1114 Result.Consistent = false; 1115 return false; // possibly dependent 1116 } 1117 1118 1119 // strongSIVtest - 1120 // From the paper, Practical Dependence Testing, Section 4.2.1 1121 // 1122 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1123 // where i is an induction variable, c1 and c2 are loop invariant, 1124 // and a is a constant, we can solve it exactly using the Strong SIV test. 1125 // 1126 // Can prove independence. Failing that, can compute distance (and direction). 1127 // In the presence of symbolic terms, we can sometimes make progress. 1128 // 1129 // If there's a dependence, 1130 // 1131 // c1 + a*i = c2 + a*i' 1132 // 1133 // The dependence distance is 1134 // 1135 // d = i' - i = (c1 - c2)/a 1136 // 1137 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1138 // loop's upper bound. If a dependence exists, the dependence direction is 1139 // defined as 1140 // 1141 // { < if d > 0 1142 // direction = { = if d = 0 1143 // { > if d < 0 1144 // 1145 // Return true if dependence disproved. 1146 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1147 const SCEV *DstConst, const Loop *CurLoop, 1148 unsigned Level, FullDependence &Result, 1149 Constraint &NewConstraint) const { 1150 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n"); 1151 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1152 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1153 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1154 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1155 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1156 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1157 ++StrongSIVapplications; 1158 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1159 Level--; 1160 1161 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1162 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta); 1163 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1164 1165 // check that |Delta| < iteration count 1166 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1167 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1168 LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1169 const SCEV *AbsDelta = 1170 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1171 const SCEV *AbsCoeff = 1172 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1173 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1174 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1175 // Distance greater than trip count - no dependence 1176 ++StrongSIVindependence; 1177 ++StrongSIVsuccesses; 1178 return true; 1179 } 1180 } 1181 1182 // Can we compute distance? 1183 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1184 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1185 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1186 APInt Distance = ConstDelta; // these need to be initialized 1187 APInt Remainder = ConstDelta; 1188 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1189 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1190 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1191 // Make sure Coeff divides Delta exactly 1192 if (Remainder != 0) { 1193 // Coeff doesn't divide Distance, no dependence 1194 ++StrongSIVindependence; 1195 ++StrongSIVsuccesses; 1196 return true; 1197 } 1198 Result.DV[Level].Distance = SE->getConstant(Distance); 1199 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1200 if (Distance.sgt(0)) 1201 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1202 else if (Distance.slt(0)) 1203 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1204 else 1205 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1206 ++StrongSIVsuccesses; 1207 } 1208 else if (Delta->isZero()) { 1209 // since 0/X == 0 1210 Result.DV[Level].Distance = Delta; 1211 NewConstraint.setDistance(Delta, CurLoop); 1212 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1213 ++StrongSIVsuccesses; 1214 } 1215 else { 1216 if (Coeff->isOne()) { 1217 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1218 Result.DV[Level].Distance = Delta; // since X/1 == X 1219 NewConstraint.setDistance(Delta, CurLoop); 1220 } 1221 else { 1222 Result.Consistent = false; 1223 NewConstraint.setLine(Coeff, 1224 SE->getNegativeSCEV(Coeff), 1225 SE->getNegativeSCEV(Delta), CurLoop); 1226 } 1227 1228 // maybe we can get a useful direction 1229 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1230 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1231 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1232 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1233 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1234 // The double negatives above are confusing. 1235 // It helps to read !SE->isKnownNonZero(Delta) 1236 // as "Delta might be Zero" 1237 unsigned NewDirection = Dependence::DVEntry::NONE; 1238 if ((DeltaMaybePositive && CoeffMaybePositive) || 1239 (DeltaMaybeNegative && CoeffMaybeNegative)) 1240 NewDirection = Dependence::DVEntry::LT; 1241 if (DeltaMaybeZero) 1242 NewDirection |= Dependence::DVEntry::EQ; 1243 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1244 (DeltaMaybePositive && CoeffMaybeNegative)) 1245 NewDirection |= Dependence::DVEntry::GT; 1246 if (NewDirection < Result.DV[Level].Direction) 1247 ++StrongSIVsuccesses; 1248 Result.DV[Level].Direction &= NewDirection; 1249 } 1250 return false; 1251 } 1252 1253 1254 // weakCrossingSIVtest - 1255 // From the paper, Practical Dependence Testing, Section 4.2.2 1256 // 1257 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1258 // where i is an induction variable, c1 and c2 are loop invariant, 1259 // and a is a constant, we can solve it exactly using the 1260 // Weak-Crossing SIV test. 1261 // 1262 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1263 // the two lines, where i = i', yielding 1264 // 1265 // c1 + a*i = c2 - a*i 1266 // 2a*i = c2 - c1 1267 // i = (c2 - c1)/2a 1268 // 1269 // If i < 0, there is no dependence. 1270 // If i > upperbound, there is no dependence. 1271 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1272 // If i = upperbound, there's a dependence with distance = 0. 1273 // If i is integral, there's a dependence (all directions). 1274 // If the non-integer part = 1/2, there's a dependence (<> directions). 1275 // Otherwise, there's no dependence. 1276 // 1277 // Can prove independence. Failing that, 1278 // can sometimes refine the directions. 1279 // Can determine iteration for splitting. 1280 // 1281 // Return true if dependence disproved. 1282 bool DependenceInfo::weakCrossingSIVtest( 1283 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1284 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1285 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1286 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1287 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1288 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1289 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1290 ++WeakCrossingSIVapplications; 1291 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1292 Level--; 1293 Result.Consistent = false; 1294 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1295 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1296 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1297 if (Delta->isZero()) { 1298 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1299 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1300 ++WeakCrossingSIVsuccesses; 1301 if (!Result.DV[Level].Direction) { 1302 ++WeakCrossingSIVindependence; 1303 return true; 1304 } 1305 Result.DV[Level].Distance = Delta; // = 0 1306 return false; 1307 } 1308 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1309 if (!ConstCoeff) 1310 return false; 1311 1312 Result.DV[Level].Splitable = true; 1313 if (SE->isKnownNegative(ConstCoeff)) { 1314 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1315 assert(ConstCoeff && 1316 "dynamic cast of negative of ConstCoeff should yield constant"); 1317 Delta = SE->getNegativeSCEV(Delta); 1318 } 1319 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1320 1321 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1322 SplitIter = SE->getUDivExpr( 1323 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1324 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1325 LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1326 1327 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1328 if (!ConstDelta) 1329 return false; 1330 1331 // We're certain that ConstCoeff > 0; therefore, 1332 // if Delta < 0, then no dependence. 1333 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1334 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1335 if (SE->isKnownNegative(Delta)) { 1336 // No dependence, Delta < 0 1337 ++WeakCrossingSIVindependence; 1338 ++WeakCrossingSIVsuccesses; 1339 return true; 1340 } 1341 1342 // We're certain that Delta > 0 and ConstCoeff > 0. 1343 // Check Delta/(2*ConstCoeff) against upper loop bound 1344 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1345 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1346 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1347 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1348 ConstantTwo); 1349 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1350 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1351 // Delta too big, no dependence 1352 ++WeakCrossingSIVindependence; 1353 ++WeakCrossingSIVsuccesses; 1354 return true; 1355 } 1356 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1357 // i = i' = UB 1358 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1359 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1360 ++WeakCrossingSIVsuccesses; 1361 if (!Result.DV[Level].Direction) { 1362 ++WeakCrossingSIVindependence; 1363 return true; 1364 } 1365 Result.DV[Level].Splitable = false; 1366 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1367 return false; 1368 } 1369 } 1370 1371 // check that Coeff divides Delta 1372 APInt APDelta = ConstDelta->getAPInt(); 1373 APInt APCoeff = ConstCoeff->getAPInt(); 1374 APInt Distance = APDelta; // these need to be initialzed 1375 APInt Remainder = APDelta; 1376 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1377 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1378 if (Remainder != 0) { 1379 // Coeff doesn't divide Delta, no dependence 1380 ++WeakCrossingSIVindependence; 1381 ++WeakCrossingSIVsuccesses; 1382 return true; 1383 } 1384 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1385 1386 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1387 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1388 Remainder = Distance.srem(Two); 1389 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1390 if (Remainder != 0) { 1391 // Equal direction isn't possible 1392 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1393 ++WeakCrossingSIVsuccesses; 1394 } 1395 return false; 1396 } 1397 1398 1399 // Kirch's algorithm, from 1400 // 1401 // Optimizing Supercompilers for Supercomputers 1402 // Michael Wolfe 1403 // MIT Press, 1989 1404 // 1405 // Program 2.1, page 29. 1406 // Computes the GCD of AM and BM. 1407 // Also finds a solution to the equation ax - by = gcd(a, b). 1408 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1409 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1410 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1411 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1412 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1413 APInt G0 = AM.abs(); 1414 APInt G1 = BM.abs(); 1415 APInt Q = G0; // these need to be initialized 1416 APInt R = G0; 1417 APInt::sdivrem(G0, G1, Q, R); 1418 while (R != 0) { 1419 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1420 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1421 G0 = G1; G1 = R; 1422 APInt::sdivrem(G0, G1, Q, R); 1423 } 1424 G = G1; 1425 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1426 X = AM.slt(0) ? -A1 : A1; 1427 Y = BM.slt(0) ? B1 : -B1; 1428 1429 // make sure gcd divides Delta 1430 R = Delta.srem(G); 1431 if (R != 0) 1432 return true; // gcd doesn't divide Delta, no dependence 1433 Q = Delta.sdiv(G); 1434 X *= Q; 1435 Y *= Q; 1436 return false; 1437 } 1438 1439 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1440 APInt Q = A; // these need to be initialized 1441 APInt R = A; 1442 APInt::sdivrem(A, B, Q, R); 1443 if (R == 0) 1444 return Q; 1445 if ((A.sgt(0) && B.sgt(0)) || 1446 (A.slt(0) && B.slt(0))) 1447 return Q; 1448 else 1449 return Q - 1; 1450 } 1451 1452 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1453 APInt Q = A; // these need to be initialized 1454 APInt R = A; 1455 APInt::sdivrem(A, B, Q, R); 1456 if (R == 0) 1457 return Q; 1458 if ((A.sgt(0) && B.sgt(0)) || 1459 (A.slt(0) && B.slt(0))) 1460 return Q + 1; 1461 else 1462 return Q; 1463 } 1464 1465 1466 static 1467 APInt maxAPInt(APInt A, APInt B) { 1468 return A.sgt(B) ? A : B; 1469 } 1470 1471 1472 static 1473 APInt minAPInt(APInt A, APInt B) { 1474 return A.slt(B) ? A : B; 1475 } 1476 1477 1478 // exactSIVtest - 1479 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1480 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1481 // and a2 are constant, we can solve it exactly using an algorithm developed 1482 // by Banerjee and Wolfe. See Section 2.5.3 in 1483 // 1484 // Optimizing Supercompilers for Supercomputers 1485 // Michael Wolfe 1486 // MIT Press, 1989 1487 // 1488 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1489 // so use them if possible. They're also a bit better with symbolics and, 1490 // in the case of the strong SIV test, can compute Distances. 1491 // 1492 // Return true if dependence disproved. 1493 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1494 const SCEV *SrcConst, const SCEV *DstConst, 1495 const Loop *CurLoop, unsigned Level, 1496 FullDependence &Result, 1497 Constraint &NewConstraint) const { 1498 LLVM_DEBUG(dbgs() << "\tExact SIV test\n"); 1499 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1500 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1501 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1502 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1503 ++ExactSIVapplications; 1504 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1505 Level--; 1506 Result.Consistent = false; 1507 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1508 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1509 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1510 Delta, CurLoop); 1511 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1512 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1513 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1514 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1515 return false; 1516 1517 // find gcd 1518 APInt G, X, Y; 1519 APInt AM = ConstSrcCoeff->getAPInt(); 1520 APInt BM = ConstDstCoeff->getAPInt(); 1521 unsigned Bits = AM.getBitWidth(); 1522 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1523 // gcd doesn't divide Delta, no dependence 1524 ++ExactSIVindependence; 1525 ++ExactSIVsuccesses; 1526 return true; 1527 } 1528 1529 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1530 1531 // since SCEV construction normalizes, LM = 0 1532 APInt UM(Bits, 1, true); 1533 bool UMvalid = false; 1534 // UM is perhaps unavailable, let's check 1535 if (const SCEVConstant *CUB = 1536 collectConstantUpperBound(CurLoop, Delta->getType())) { 1537 UM = CUB->getAPInt(); 1538 LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1539 UMvalid = true; 1540 } 1541 1542 APInt TU(APInt::getSignedMaxValue(Bits)); 1543 APInt TL(APInt::getSignedMinValue(Bits)); 1544 1545 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1546 APInt TMUL = BM.sdiv(G); 1547 if (TMUL.sgt(0)) { 1548 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1549 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1550 if (UMvalid) { 1551 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1552 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1553 } 1554 } 1555 else { 1556 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1557 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1558 if (UMvalid) { 1559 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1560 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1561 } 1562 } 1563 1564 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1565 TMUL = AM.sdiv(G); 1566 if (TMUL.sgt(0)) { 1567 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1568 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1569 if (UMvalid) { 1570 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1571 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1572 } 1573 } 1574 else { 1575 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1576 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1577 if (UMvalid) { 1578 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1579 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1580 } 1581 } 1582 if (TL.sgt(TU)) { 1583 ++ExactSIVindependence; 1584 ++ExactSIVsuccesses; 1585 return true; 1586 } 1587 1588 // explore directions 1589 unsigned NewDirection = Dependence::DVEntry::NONE; 1590 1591 // less than 1592 APInt SaveTU(TU); // save these 1593 APInt SaveTL(TL); 1594 LLVM_DEBUG(dbgs() << "\t exploring LT direction\n"); 1595 TMUL = AM - BM; 1596 if (TMUL.sgt(0)) { 1597 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1598 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1599 } 1600 else { 1601 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1602 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1603 } 1604 if (TL.sle(TU)) { 1605 NewDirection |= Dependence::DVEntry::LT; 1606 ++ExactSIVsuccesses; 1607 } 1608 1609 // equal 1610 TU = SaveTU; // restore 1611 TL = SaveTL; 1612 LLVM_DEBUG(dbgs() << "\t exploring EQ direction\n"); 1613 if (TMUL.sgt(0)) { 1614 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1615 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1616 } 1617 else { 1618 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1619 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1620 } 1621 TMUL = BM - AM; 1622 if (TMUL.sgt(0)) { 1623 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1624 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1625 } 1626 else { 1627 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1628 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1629 } 1630 if (TL.sle(TU)) { 1631 NewDirection |= Dependence::DVEntry::EQ; 1632 ++ExactSIVsuccesses; 1633 } 1634 1635 // greater than 1636 TU = SaveTU; // restore 1637 TL = SaveTL; 1638 LLVM_DEBUG(dbgs() << "\t exploring GT direction\n"); 1639 if (TMUL.sgt(0)) { 1640 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1641 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1642 } 1643 else { 1644 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1645 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1646 } 1647 if (TL.sle(TU)) { 1648 NewDirection |= Dependence::DVEntry::GT; 1649 ++ExactSIVsuccesses; 1650 } 1651 1652 // finished 1653 Result.DV[Level].Direction &= NewDirection; 1654 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1655 ++ExactSIVindependence; 1656 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1657 } 1658 1659 1660 1661 // Return true if the divisor evenly divides the dividend. 1662 static 1663 bool isRemainderZero(const SCEVConstant *Dividend, 1664 const SCEVConstant *Divisor) { 1665 const APInt &ConstDividend = Dividend->getAPInt(); 1666 const APInt &ConstDivisor = Divisor->getAPInt(); 1667 return ConstDividend.srem(ConstDivisor) == 0; 1668 } 1669 1670 1671 // weakZeroSrcSIVtest - 1672 // From the paper, Practical Dependence Testing, Section 4.2.2 1673 // 1674 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1675 // where i is an induction variable, c1 and c2 are loop invariant, 1676 // and a is a constant, we can solve it exactly using the 1677 // Weak-Zero SIV test. 1678 // 1679 // Given 1680 // 1681 // c1 = c2 + a*i 1682 // 1683 // we get 1684 // 1685 // (c1 - c2)/a = i 1686 // 1687 // If i is not an integer, there's no dependence. 1688 // If i < 0 or > UB, there's no dependence. 1689 // If i = 0, the direction is >= and peeling the 1690 // 1st iteration will break the dependence. 1691 // If i = UB, the direction is <= and peeling the 1692 // last iteration will break the dependence. 1693 // Otherwise, the direction is *. 1694 // 1695 // Can prove independence. Failing that, we can sometimes refine 1696 // the directions. Can sometimes show that first or last 1697 // iteration carries all the dependences (so worth peeling). 1698 // 1699 // (see also weakZeroDstSIVtest) 1700 // 1701 // Return true if dependence disproved. 1702 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1703 const SCEV *SrcConst, 1704 const SCEV *DstConst, 1705 const Loop *CurLoop, unsigned Level, 1706 FullDependence &Result, 1707 Constraint &NewConstraint) const { 1708 // For the WeakSIV test, it's possible the loop isn't common to 1709 // the Src and Dst loops. If it isn't, then there's no need to 1710 // record a direction. 1711 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1712 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1713 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1714 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1715 ++WeakZeroSIVapplications; 1716 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1717 Level--; 1718 Result.Consistent = false; 1719 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1720 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1721 CurLoop); 1722 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1723 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1724 if (Level < CommonLevels) { 1725 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1726 Result.DV[Level].PeelFirst = true; 1727 ++WeakZeroSIVsuccesses; 1728 } 1729 return false; // dependences caused by first iteration 1730 } 1731 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1732 if (!ConstCoeff) 1733 return false; 1734 const SCEV *AbsCoeff = 1735 SE->isKnownNegative(ConstCoeff) ? 1736 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1737 const SCEV *NewDelta = 1738 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1739 1740 // check that Delta/SrcCoeff < iteration count 1741 // really check NewDelta < count*AbsCoeff 1742 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1743 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1744 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1745 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1746 ++WeakZeroSIVindependence; 1747 ++WeakZeroSIVsuccesses; 1748 return true; 1749 } 1750 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1751 // dependences caused by last iteration 1752 if (Level < CommonLevels) { 1753 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1754 Result.DV[Level].PeelLast = true; 1755 ++WeakZeroSIVsuccesses; 1756 } 1757 return false; 1758 } 1759 } 1760 1761 // check that Delta/SrcCoeff >= 0 1762 // really check that NewDelta >= 0 1763 if (SE->isKnownNegative(NewDelta)) { 1764 // No dependence, newDelta < 0 1765 ++WeakZeroSIVindependence; 1766 ++WeakZeroSIVsuccesses; 1767 return true; 1768 } 1769 1770 // if SrcCoeff doesn't divide Delta, then no dependence 1771 if (isa<SCEVConstant>(Delta) && 1772 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1773 ++WeakZeroSIVindependence; 1774 ++WeakZeroSIVsuccesses; 1775 return true; 1776 } 1777 return false; 1778 } 1779 1780 1781 // weakZeroDstSIVtest - 1782 // From the paper, Practical Dependence Testing, Section 4.2.2 1783 // 1784 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1785 // where i is an induction variable, c1 and c2 are loop invariant, 1786 // and a is a constant, we can solve it exactly using the 1787 // Weak-Zero SIV test. 1788 // 1789 // Given 1790 // 1791 // c1 + a*i = c2 1792 // 1793 // we get 1794 // 1795 // i = (c2 - c1)/a 1796 // 1797 // If i is not an integer, there's no dependence. 1798 // If i < 0 or > UB, there's no dependence. 1799 // If i = 0, the direction is <= and peeling the 1800 // 1st iteration will break the dependence. 1801 // If i = UB, the direction is >= and peeling the 1802 // last iteration will break the dependence. 1803 // Otherwise, the direction is *. 1804 // 1805 // Can prove independence. Failing that, we can sometimes refine 1806 // the directions. Can sometimes show that first or last 1807 // iteration carries all the dependences (so worth peeling). 1808 // 1809 // (see also weakZeroSrcSIVtest) 1810 // 1811 // Return true if dependence disproved. 1812 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1813 const SCEV *SrcConst, 1814 const SCEV *DstConst, 1815 const Loop *CurLoop, unsigned Level, 1816 FullDependence &Result, 1817 Constraint &NewConstraint) const { 1818 // For the WeakSIV test, it's possible the loop isn't common to the 1819 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1820 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1821 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1822 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1823 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1824 ++WeakZeroSIVapplications; 1825 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1826 Level--; 1827 Result.Consistent = false; 1828 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1829 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1830 CurLoop); 1831 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1832 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1833 if (Level < CommonLevels) { 1834 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1835 Result.DV[Level].PeelFirst = true; 1836 ++WeakZeroSIVsuccesses; 1837 } 1838 return false; // dependences caused by first iteration 1839 } 1840 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1841 if (!ConstCoeff) 1842 return false; 1843 const SCEV *AbsCoeff = 1844 SE->isKnownNegative(ConstCoeff) ? 1845 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1846 const SCEV *NewDelta = 1847 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1848 1849 // check that Delta/SrcCoeff < iteration count 1850 // really check NewDelta < count*AbsCoeff 1851 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1852 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1853 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1854 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1855 ++WeakZeroSIVindependence; 1856 ++WeakZeroSIVsuccesses; 1857 return true; 1858 } 1859 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1860 // dependences caused by last iteration 1861 if (Level < CommonLevels) { 1862 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1863 Result.DV[Level].PeelLast = true; 1864 ++WeakZeroSIVsuccesses; 1865 } 1866 return false; 1867 } 1868 } 1869 1870 // check that Delta/SrcCoeff >= 0 1871 // really check that NewDelta >= 0 1872 if (SE->isKnownNegative(NewDelta)) { 1873 // No dependence, newDelta < 0 1874 ++WeakZeroSIVindependence; 1875 ++WeakZeroSIVsuccesses; 1876 return true; 1877 } 1878 1879 // if SrcCoeff doesn't divide Delta, then no dependence 1880 if (isa<SCEVConstant>(Delta) && 1881 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1882 ++WeakZeroSIVindependence; 1883 ++WeakZeroSIVsuccesses; 1884 return true; 1885 } 1886 return false; 1887 } 1888 1889 1890 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1891 // Things of the form [c1 + a*i] and [c2 + b*j], 1892 // where i and j are induction variable, c1 and c2 are loop invariant, 1893 // and a and b are constants. 1894 // Returns true if any possible dependence is disproved. 1895 // Marks the result as inconsistent. 1896 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1897 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1898 const SCEV *SrcConst, const SCEV *DstConst, 1899 const Loop *SrcLoop, const Loop *DstLoop, 1900 FullDependence &Result) const { 1901 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n"); 1902 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1903 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1904 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1905 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1906 ++ExactRDIVapplications; 1907 Result.Consistent = false; 1908 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1909 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1910 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1911 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1912 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1913 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1914 return false; 1915 1916 // find gcd 1917 APInt G, X, Y; 1918 APInt AM = ConstSrcCoeff->getAPInt(); 1919 APInt BM = ConstDstCoeff->getAPInt(); 1920 unsigned Bits = AM.getBitWidth(); 1921 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1922 // gcd doesn't divide Delta, no dependence 1923 ++ExactRDIVindependence; 1924 return true; 1925 } 1926 1927 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1928 1929 // since SCEV construction seems to normalize, LM = 0 1930 APInt SrcUM(Bits, 1, true); 1931 bool SrcUMvalid = false; 1932 // SrcUM is perhaps unavailable, let's check 1933 if (const SCEVConstant *UpperBound = 1934 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1935 SrcUM = UpperBound->getAPInt(); 1936 LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1937 SrcUMvalid = true; 1938 } 1939 1940 APInt DstUM(Bits, 1, true); 1941 bool DstUMvalid = false; 1942 // UM is perhaps unavailable, let's check 1943 if (const SCEVConstant *UpperBound = 1944 collectConstantUpperBound(DstLoop, Delta->getType())) { 1945 DstUM = UpperBound->getAPInt(); 1946 LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1947 DstUMvalid = true; 1948 } 1949 1950 APInt TU(APInt::getSignedMaxValue(Bits)); 1951 APInt TL(APInt::getSignedMinValue(Bits)); 1952 1953 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1954 APInt TMUL = BM.sdiv(G); 1955 if (TMUL.sgt(0)) { 1956 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1957 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1958 if (SrcUMvalid) { 1959 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1960 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1961 } 1962 } 1963 else { 1964 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1965 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1966 if (SrcUMvalid) { 1967 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1968 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1969 } 1970 } 1971 1972 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1973 TMUL = AM.sdiv(G); 1974 if (TMUL.sgt(0)) { 1975 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1976 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1977 if (DstUMvalid) { 1978 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1979 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1980 } 1981 } 1982 else { 1983 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1984 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1985 if (DstUMvalid) { 1986 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1987 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1988 } 1989 } 1990 if (TL.sgt(TU)) 1991 ++ExactRDIVindependence; 1992 return TL.sgt(TU); 1993 } 1994 1995 1996 // symbolicRDIVtest - 1997 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1998 // introduce a special case of Banerjee's Inequalities (also called the 1999 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 2000 // particularly cases with symbolics. Since it's only able to disprove 2001 // dependence (not compute distances or directions), we'll use it as a 2002 // fall back for the other tests. 2003 // 2004 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2005 // where i and j are induction variables and c1 and c2 are loop invariants, 2006 // we can use the symbolic tests to disprove some dependences, serving as a 2007 // backup for the RDIV test. Note that i and j can be the same variable, 2008 // letting this test serve as a backup for the various SIV tests. 2009 // 2010 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 2011 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 2012 // loop bounds for the i and j loops, respectively. So, ... 2013 // 2014 // c1 + a1*i = c2 + a2*j 2015 // a1*i - a2*j = c2 - c1 2016 // 2017 // To test for a dependence, we compute c2 - c1 and make sure it's in the 2018 // range of the maximum and minimum possible values of a1*i - a2*j. 2019 // Considering the signs of a1 and a2, we have 4 possible cases: 2020 // 2021 // 1) If a1 >= 0 and a2 >= 0, then 2022 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 2023 // -a2*N2 <= c2 - c1 <= a1*N1 2024 // 2025 // 2) If a1 >= 0 and a2 <= 0, then 2026 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 2027 // 0 <= c2 - c1 <= a1*N1 - a2*N2 2028 // 2029 // 3) If a1 <= 0 and a2 >= 0, then 2030 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 2031 // a1*N1 - a2*N2 <= c2 - c1 <= 0 2032 // 2033 // 4) If a1 <= 0 and a2 <= 0, then 2034 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 2035 // a1*N1 <= c2 - c1 <= -a2*N2 2036 // 2037 // return true if dependence disproved 2038 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 2039 const SCEV *C1, const SCEV *C2, 2040 const Loop *Loop1, 2041 const Loop *Loop2) const { 2042 ++SymbolicRDIVapplications; 2043 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 2044 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1); 2045 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 2046 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 2047 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 2048 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 2049 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 2050 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 2051 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 2052 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 2053 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 2054 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 2055 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 2056 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 2057 if (SE->isKnownNonNegative(A1)) { 2058 if (SE->isKnownNonNegative(A2)) { 2059 // A1 >= 0 && A2 >= 0 2060 if (N1) { 2061 // make sure that c2 - c1 <= a1*N1 2062 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2063 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2064 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 2065 ++SymbolicRDIVindependence; 2066 return true; 2067 } 2068 } 2069 if (N2) { 2070 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 2071 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2072 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2073 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 2074 ++SymbolicRDIVindependence; 2075 return true; 2076 } 2077 } 2078 } 2079 else if (SE->isKnownNonPositive(A2)) { 2080 // a1 >= 0 && a2 <= 0 2081 if (N1 && N2) { 2082 // make sure that c2 - c1 <= a1*N1 - a2*N2 2083 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2084 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2085 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2086 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2087 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2088 ++SymbolicRDIVindependence; 2089 return true; 2090 } 2091 } 2092 // make sure that 0 <= c2 - c1 2093 if (SE->isKnownNegative(C2_C1)) { 2094 ++SymbolicRDIVindependence; 2095 return true; 2096 } 2097 } 2098 } 2099 else if (SE->isKnownNonPositive(A1)) { 2100 if (SE->isKnownNonNegative(A2)) { 2101 // a1 <= 0 && a2 >= 0 2102 if (N1 && N2) { 2103 // make sure that a1*N1 - a2*N2 <= c2 - c1 2104 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2105 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2106 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2107 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2108 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2109 ++SymbolicRDIVindependence; 2110 return true; 2111 } 2112 } 2113 // make sure that c2 - c1 <= 0 2114 if (SE->isKnownPositive(C2_C1)) { 2115 ++SymbolicRDIVindependence; 2116 return true; 2117 } 2118 } 2119 else if (SE->isKnownNonPositive(A2)) { 2120 // a1 <= 0 && a2 <= 0 2121 if (N1) { 2122 // make sure that a1*N1 <= c2 - c1 2123 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2124 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2125 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2126 ++SymbolicRDIVindependence; 2127 return true; 2128 } 2129 } 2130 if (N2) { 2131 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2132 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2133 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2134 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2135 ++SymbolicRDIVindependence; 2136 return true; 2137 } 2138 } 2139 } 2140 } 2141 return false; 2142 } 2143 2144 2145 // testSIV - 2146 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2147 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2148 // a2 are constant, we attack it with an SIV test. While they can all be 2149 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2150 // they apply; they're cheaper and sometimes more precise. 2151 // 2152 // Return true if dependence disproved. 2153 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2154 FullDependence &Result, Constraint &NewConstraint, 2155 const SCEV *&SplitIter) const { 2156 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2157 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2158 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2159 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2160 if (SrcAddRec && DstAddRec) { 2161 const SCEV *SrcConst = SrcAddRec->getStart(); 2162 const SCEV *DstConst = DstAddRec->getStart(); 2163 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2164 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2165 const Loop *CurLoop = SrcAddRec->getLoop(); 2166 assert(CurLoop == DstAddRec->getLoop() && 2167 "both loops in SIV should be same"); 2168 Level = mapSrcLoop(CurLoop); 2169 bool disproven; 2170 if (SrcCoeff == DstCoeff) 2171 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2172 Level, Result, NewConstraint); 2173 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2174 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2175 Level, Result, NewConstraint, SplitIter); 2176 else 2177 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2178 Level, Result, NewConstraint); 2179 return disproven || 2180 gcdMIVtest(Src, Dst, Result) || 2181 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2182 } 2183 if (SrcAddRec) { 2184 const SCEV *SrcConst = SrcAddRec->getStart(); 2185 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2186 const SCEV *DstConst = Dst; 2187 const Loop *CurLoop = SrcAddRec->getLoop(); 2188 Level = mapSrcLoop(CurLoop); 2189 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2190 Level, Result, NewConstraint) || 2191 gcdMIVtest(Src, Dst, Result); 2192 } 2193 if (DstAddRec) { 2194 const SCEV *DstConst = DstAddRec->getStart(); 2195 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2196 const SCEV *SrcConst = Src; 2197 const Loop *CurLoop = DstAddRec->getLoop(); 2198 Level = mapDstLoop(CurLoop); 2199 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2200 CurLoop, Level, Result, NewConstraint) || 2201 gcdMIVtest(Src, Dst, Result); 2202 } 2203 llvm_unreachable("SIV test expected at least one AddRec"); 2204 return false; 2205 } 2206 2207 2208 // testRDIV - 2209 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2210 // where i and j are induction variables, c1 and c2 are loop invariant, 2211 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2212 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2213 // It doesn't make sense to talk about distance or direction in this case, 2214 // so there's no point in making special versions of the Strong SIV test or 2215 // the Weak-crossing SIV test. 2216 // 2217 // With minor algebra, this test can also be used for things like 2218 // [c1 + a1*i + a2*j][c2]. 2219 // 2220 // Return true if dependence disproved. 2221 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2222 FullDependence &Result) const { 2223 // we have 3 possible situations here: 2224 // 1) [a*i + b] and [c*j + d] 2225 // 2) [a*i + c*j + b] and [d] 2226 // 3) [b] and [a*i + c*j + d] 2227 // We need to find what we've got and get organized 2228 2229 const SCEV *SrcConst, *DstConst; 2230 const SCEV *SrcCoeff, *DstCoeff; 2231 const Loop *SrcLoop, *DstLoop; 2232 2233 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2234 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2235 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2236 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2237 if (SrcAddRec && DstAddRec) { 2238 SrcConst = SrcAddRec->getStart(); 2239 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2240 SrcLoop = SrcAddRec->getLoop(); 2241 DstConst = DstAddRec->getStart(); 2242 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2243 DstLoop = DstAddRec->getLoop(); 2244 } 2245 else if (SrcAddRec) { 2246 if (const SCEVAddRecExpr *tmpAddRec = 2247 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2248 SrcConst = tmpAddRec->getStart(); 2249 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2250 SrcLoop = tmpAddRec->getLoop(); 2251 DstConst = Dst; 2252 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2253 DstLoop = SrcAddRec->getLoop(); 2254 } 2255 else 2256 llvm_unreachable("RDIV reached by surprising SCEVs"); 2257 } 2258 else if (DstAddRec) { 2259 if (const SCEVAddRecExpr *tmpAddRec = 2260 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2261 DstConst = tmpAddRec->getStart(); 2262 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2263 DstLoop = tmpAddRec->getLoop(); 2264 SrcConst = Src; 2265 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2266 SrcLoop = DstAddRec->getLoop(); 2267 } 2268 else 2269 llvm_unreachable("RDIV reached by surprising SCEVs"); 2270 } 2271 else 2272 llvm_unreachable("RDIV expected at least one AddRec"); 2273 return exactRDIVtest(SrcCoeff, DstCoeff, 2274 SrcConst, DstConst, 2275 SrcLoop, DstLoop, 2276 Result) || 2277 gcdMIVtest(Src, Dst, Result) || 2278 symbolicRDIVtest(SrcCoeff, DstCoeff, 2279 SrcConst, DstConst, 2280 SrcLoop, DstLoop); 2281 } 2282 2283 2284 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2285 // Return true if dependence disproved. 2286 // Can sometimes refine direction vectors. 2287 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2288 const SmallBitVector &Loops, 2289 FullDependence &Result) const { 2290 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2291 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2292 Result.Consistent = false; 2293 return gcdMIVtest(Src, Dst, Result) || 2294 banerjeeMIVtest(Src, Dst, Loops, Result); 2295 } 2296 2297 2298 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2299 // in this case 10. If there is no constant part, returns NULL. 2300 static 2301 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2302 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2303 return Constant; 2304 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2305 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2306 return Constant; 2307 return nullptr; 2308 } 2309 2310 2311 //===----------------------------------------------------------------------===// 2312 // gcdMIVtest - 2313 // Tests an MIV subscript pair for dependence. 2314 // Returns true if any possible dependence is disproved. 2315 // Marks the result as inconsistent. 2316 // Can sometimes disprove the equal direction for 1 or more loops, 2317 // as discussed in Michael Wolfe's book, 2318 // High Performance Compilers for Parallel Computing, page 235. 2319 // 2320 // We spend some effort (code!) to handle cases like 2321 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2322 // but M and N are just loop-invariant variables. 2323 // This should help us handle linearized subscripts; 2324 // also makes this test a useful backup to the various SIV tests. 2325 // 2326 // It occurs to me that the presence of loop-invariant variables 2327 // changes the nature of the test from "greatest common divisor" 2328 // to "a common divisor". 2329 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2330 FullDependence &Result) const { 2331 LLVM_DEBUG(dbgs() << "starting gcd\n"); 2332 ++GCDapplications; 2333 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2334 APInt RunningGCD = APInt::getNullValue(BitWidth); 2335 2336 // Examine Src coefficients. 2337 // Compute running GCD and record source constant. 2338 // Because we're looking for the constant at the end of the chain, 2339 // we can't quit the loop just because the GCD == 1. 2340 const SCEV *Coefficients = Src; 2341 while (const SCEVAddRecExpr *AddRec = 2342 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2343 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2344 // If the coefficient is the product of a constant and other stuff, 2345 // we can use the constant in the GCD computation. 2346 const auto *Constant = getConstantPart(Coeff); 2347 if (!Constant) 2348 return false; 2349 APInt ConstCoeff = Constant->getAPInt(); 2350 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2351 Coefficients = AddRec->getStart(); 2352 } 2353 const SCEV *SrcConst = Coefficients; 2354 2355 // Examine Dst coefficients. 2356 // Compute running GCD and record destination constant. 2357 // Because we're looking for the constant at the end of the chain, 2358 // we can't quit the loop just because the GCD == 1. 2359 Coefficients = Dst; 2360 while (const SCEVAddRecExpr *AddRec = 2361 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2362 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2363 // If the coefficient is the product of a constant and other stuff, 2364 // we can use the constant in the GCD computation. 2365 const auto *Constant = getConstantPart(Coeff); 2366 if (!Constant) 2367 return false; 2368 APInt ConstCoeff = Constant->getAPInt(); 2369 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2370 Coefficients = AddRec->getStart(); 2371 } 2372 const SCEV *DstConst = Coefficients; 2373 2374 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2375 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2376 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2377 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2378 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2379 // If Delta is a sum of products, we may be able to make further progress. 2380 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2381 const SCEV *Operand = Sum->getOperand(Op); 2382 if (isa<SCEVConstant>(Operand)) { 2383 assert(!Constant && "Surprised to find multiple constants"); 2384 Constant = cast<SCEVConstant>(Operand); 2385 } 2386 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2387 // Search for constant operand to participate in GCD; 2388 // If none found; return false. 2389 const SCEVConstant *ConstOp = getConstantPart(Product); 2390 if (!ConstOp) 2391 return false; 2392 APInt ConstOpValue = ConstOp->getAPInt(); 2393 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2394 ConstOpValue.abs()); 2395 } 2396 else 2397 return false; 2398 } 2399 } 2400 if (!Constant) 2401 return false; 2402 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2403 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2404 if (ConstDelta == 0) 2405 return false; 2406 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2407 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2408 APInt Remainder = ConstDelta.srem(RunningGCD); 2409 if (Remainder != 0) { 2410 ++GCDindependence; 2411 return true; 2412 } 2413 2414 // Try to disprove equal directions. 2415 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2416 // the code above can't disprove the dependence because the GCD = 1. 2417 // So we consider what happen if i = i' and what happens if j = j'. 2418 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2419 // which is infeasible, so we can disallow the = direction for the i level. 2420 // Setting j = j' doesn't help matters, so we end up with a direction vector 2421 // of [<>, *] 2422 // 2423 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2424 // we need to remember that the constant part is 5 and the RunningGCD should 2425 // be initialized to ExtraGCD = 30. 2426 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2427 2428 bool Improved = false; 2429 Coefficients = Src; 2430 while (const SCEVAddRecExpr *AddRec = 2431 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2432 Coefficients = AddRec->getStart(); 2433 const Loop *CurLoop = AddRec->getLoop(); 2434 RunningGCD = ExtraGCD; 2435 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2436 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2437 const SCEV *Inner = Src; 2438 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2439 AddRec = cast<SCEVAddRecExpr>(Inner); 2440 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2441 if (CurLoop == AddRec->getLoop()) 2442 ; // SrcCoeff == Coeff 2443 else { 2444 // If the coefficient is the product of a constant and other stuff, 2445 // we can use the constant in the GCD computation. 2446 Constant = getConstantPart(Coeff); 2447 if (!Constant) 2448 return false; 2449 APInt ConstCoeff = Constant->getAPInt(); 2450 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2451 } 2452 Inner = AddRec->getStart(); 2453 } 2454 Inner = Dst; 2455 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2456 AddRec = cast<SCEVAddRecExpr>(Inner); 2457 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2458 if (CurLoop == AddRec->getLoop()) 2459 DstCoeff = Coeff; 2460 else { 2461 // If the coefficient is the product of a constant and other stuff, 2462 // we can use the constant in the GCD computation. 2463 Constant = getConstantPart(Coeff); 2464 if (!Constant) 2465 return false; 2466 APInt ConstCoeff = Constant->getAPInt(); 2467 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2468 } 2469 Inner = AddRec->getStart(); 2470 } 2471 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2472 // If the coefficient is the product of a constant and other stuff, 2473 // we can use the constant in the GCD computation. 2474 Constant = getConstantPart(Delta); 2475 if (!Constant) 2476 // The difference of the two coefficients might not be a product 2477 // or constant, in which case we give up on this direction. 2478 continue; 2479 APInt ConstCoeff = Constant->getAPInt(); 2480 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2481 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2482 if (RunningGCD != 0) { 2483 Remainder = ConstDelta.srem(RunningGCD); 2484 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2485 if (Remainder != 0) { 2486 unsigned Level = mapSrcLoop(CurLoop); 2487 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2488 Improved = true; 2489 } 2490 } 2491 } 2492 if (Improved) 2493 ++GCDsuccesses; 2494 LLVM_DEBUG(dbgs() << "all done\n"); 2495 return false; 2496 } 2497 2498 2499 //===----------------------------------------------------------------------===// 2500 // banerjeeMIVtest - 2501 // Use Banerjee's Inequalities to test an MIV subscript pair. 2502 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2503 // Generally follows the discussion in Section 2.5.2 of 2504 // 2505 // Optimizing Supercompilers for Supercomputers 2506 // Michael Wolfe 2507 // 2508 // The inequalities given on page 25 are simplified in that loops are 2509 // normalized so that the lower bound is always 0 and the stride is always 1. 2510 // For example, Wolfe gives 2511 // 2512 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2513 // 2514 // where A_k is the coefficient of the kth index in the source subscript, 2515 // B_k is the coefficient of the kth index in the destination subscript, 2516 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2517 // index, and N_k is the stride of the kth index. Since all loops are normalized 2518 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2519 // equation to 2520 // 2521 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2522 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2523 // 2524 // Similar simplifications are possible for the other equations. 2525 // 2526 // When we can't determine the number of iterations for a loop, 2527 // we use NULL as an indicator for the worst case, infinity. 2528 // When computing the upper bound, NULL denotes +inf; 2529 // for the lower bound, NULL denotes -inf. 2530 // 2531 // Return true if dependence disproved. 2532 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2533 const SmallBitVector &Loops, 2534 FullDependence &Result) const { 2535 LLVM_DEBUG(dbgs() << "starting Banerjee\n"); 2536 ++BanerjeeApplications; 2537 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n'); 2538 const SCEV *A0; 2539 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2540 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2541 const SCEV *B0; 2542 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2543 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2544 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2545 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2546 2547 // Compute bounds for all the * directions. 2548 LLVM_DEBUG(dbgs() << "\tBounds[*]\n"); 2549 for (unsigned K = 1; K <= MaxLevels; ++K) { 2550 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2551 Bound[K].Direction = Dependence::DVEntry::ALL; 2552 Bound[K].DirSet = Dependence::DVEntry::NONE; 2553 findBoundsALL(A, B, Bound, K); 2554 #ifndef NDEBUG 2555 LLVM_DEBUG(dbgs() << "\t " << K << '\t'); 2556 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2557 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2558 else 2559 LLVM_DEBUG(dbgs() << "-inf\t"); 2560 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2561 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2562 else 2563 LLVM_DEBUG(dbgs() << "+inf\n"); 2564 #endif 2565 } 2566 2567 // Test the *, *, *, ... case. 2568 bool Disproved = false; 2569 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2570 // Explore the direction vector hierarchy. 2571 unsigned DepthExpanded = 0; 2572 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2573 Loops, DepthExpanded, Delta); 2574 if (NewDeps > 0) { 2575 bool Improved = false; 2576 for (unsigned K = 1; K <= CommonLevels; ++K) { 2577 if (Loops[K]) { 2578 unsigned Old = Result.DV[K - 1].Direction; 2579 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2580 Improved |= Old != Result.DV[K - 1].Direction; 2581 if (!Result.DV[K - 1].Direction) { 2582 Improved = false; 2583 Disproved = true; 2584 break; 2585 } 2586 } 2587 } 2588 if (Improved) 2589 ++BanerjeeSuccesses; 2590 } 2591 else { 2592 ++BanerjeeIndependence; 2593 Disproved = true; 2594 } 2595 } 2596 else { 2597 ++BanerjeeIndependence; 2598 Disproved = true; 2599 } 2600 delete [] Bound; 2601 delete [] A; 2602 delete [] B; 2603 return Disproved; 2604 } 2605 2606 2607 // Hierarchically expands the direction vector 2608 // search space, combining the directions of discovered dependences 2609 // in the DirSet field of Bound. Returns the number of distinct 2610 // dependences discovered. If the dependence is disproved, 2611 // it will return 0. 2612 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2613 CoefficientInfo *B, BoundInfo *Bound, 2614 const SmallBitVector &Loops, 2615 unsigned &DepthExpanded, 2616 const SCEV *Delta) const { 2617 if (Level > CommonLevels) { 2618 // record result 2619 LLVM_DEBUG(dbgs() << "\t["); 2620 for (unsigned K = 1; K <= CommonLevels; ++K) { 2621 if (Loops[K]) { 2622 Bound[K].DirSet |= Bound[K].Direction; 2623 #ifndef NDEBUG 2624 switch (Bound[K].Direction) { 2625 case Dependence::DVEntry::LT: 2626 LLVM_DEBUG(dbgs() << " <"); 2627 break; 2628 case Dependence::DVEntry::EQ: 2629 LLVM_DEBUG(dbgs() << " ="); 2630 break; 2631 case Dependence::DVEntry::GT: 2632 LLVM_DEBUG(dbgs() << " >"); 2633 break; 2634 case Dependence::DVEntry::ALL: 2635 LLVM_DEBUG(dbgs() << " *"); 2636 break; 2637 default: 2638 llvm_unreachable("unexpected Bound[K].Direction"); 2639 } 2640 #endif 2641 } 2642 } 2643 LLVM_DEBUG(dbgs() << " ]\n"); 2644 return 1; 2645 } 2646 if (Loops[Level]) { 2647 if (Level > DepthExpanded) { 2648 DepthExpanded = Level; 2649 // compute bounds for <, =, > at current level 2650 findBoundsLT(A, B, Bound, Level); 2651 findBoundsGT(A, B, Bound, Level); 2652 findBoundsEQ(A, B, Bound, Level); 2653 #ifndef NDEBUG 2654 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2655 LLVM_DEBUG(dbgs() << "\t <\t"); 2656 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2657 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] 2658 << '\t'); 2659 else 2660 LLVM_DEBUG(dbgs() << "-inf\t"); 2661 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2662 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] 2663 << '\n'); 2664 else 2665 LLVM_DEBUG(dbgs() << "+inf\n"); 2666 LLVM_DEBUG(dbgs() << "\t =\t"); 2667 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2668 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] 2669 << '\t'); 2670 else 2671 LLVM_DEBUG(dbgs() << "-inf\t"); 2672 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2673 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] 2674 << '\n'); 2675 else 2676 LLVM_DEBUG(dbgs() << "+inf\n"); 2677 LLVM_DEBUG(dbgs() << "\t >\t"); 2678 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2679 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] 2680 << '\t'); 2681 else 2682 LLVM_DEBUG(dbgs() << "-inf\t"); 2683 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2684 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] 2685 << '\n'); 2686 else 2687 LLVM_DEBUG(dbgs() << "+inf\n"); 2688 #endif 2689 } 2690 2691 unsigned NewDeps = 0; 2692 2693 // test bounds for <, *, *, ... 2694 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2695 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2696 Loops, DepthExpanded, Delta); 2697 2698 // Test bounds for =, *, *, ... 2699 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2700 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2701 Loops, DepthExpanded, Delta); 2702 2703 // test bounds for >, *, *, ... 2704 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2705 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2706 Loops, DepthExpanded, Delta); 2707 2708 Bound[Level].Direction = Dependence::DVEntry::ALL; 2709 return NewDeps; 2710 } 2711 else 2712 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2713 } 2714 2715 2716 // Returns true iff the current bounds are plausible. 2717 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2718 BoundInfo *Bound, const SCEV *Delta) const { 2719 Bound[Level].Direction = DirKind; 2720 if (const SCEV *LowerBound = getLowerBound(Bound)) 2721 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2722 return false; 2723 if (const SCEV *UpperBound = getUpperBound(Bound)) 2724 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2725 return false; 2726 return true; 2727 } 2728 2729 2730 // Computes the upper and lower bounds for level K 2731 // using the * direction. Records them in Bound. 2732 // Wolfe gives the equations 2733 // 2734 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2735 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2736 // 2737 // Since we normalize loops, we can simplify these equations to 2738 // 2739 // LB^*_k = (A^-_k - B^+_k)U_k 2740 // UB^*_k = (A^+_k - B^-_k)U_k 2741 // 2742 // We must be careful to handle the case where the upper bound is unknown. 2743 // Note that the lower bound is always <= 0 2744 // and the upper bound is always >= 0. 2745 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2746 BoundInfo *Bound, unsigned K) const { 2747 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2748 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2749 if (Bound[K].Iterations) { 2750 Bound[K].Lower[Dependence::DVEntry::ALL] = 2751 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2752 Bound[K].Iterations); 2753 Bound[K].Upper[Dependence::DVEntry::ALL] = 2754 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2755 Bound[K].Iterations); 2756 } 2757 else { 2758 // If the difference is 0, we won't need to know the number of iterations. 2759 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2760 Bound[K].Lower[Dependence::DVEntry::ALL] = 2761 SE->getZero(A[K].Coeff->getType()); 2762 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2763 Bound[K].Upper[Dependence::DVEntry::ALL] = 2764 SE->getZero(A[K].Coeff->getType()); 2765 } 2766 } 2767 2768 2769 // Computes the upper and lower bounds for level K 2770 // using the = direction. Records them in Bound. 2771 // Wolfe gives the equations 2772 // 2773 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2774 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2775 // 2776 // Since we normalize loops, we can simplify these equations to 2777 // 2778 // LB^=_k = (A_k - B_k)^- U_k 2779 // UB^=_k = (A_k - B_k)^+ U_k 2780 // 2781 // We must be careful to handle the case where the upper bound is unknown. 2782 // Note that the lower bound is always <= 0 2783 // and the upper bound is always >= 0. 2784 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2785 BoundInfo *Bound, unsigned K) const { 2786 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2787 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2788 if (Bound[K].Iterations) { 2789 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2790 const SCEV *NegativePart = getNegativePart(Delta); 2791 Bound[K].Lower[Dependence::DVEntry::EQ] = 2792 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2793 const SCEV *PositivePart = getPositivePart(Delta); 2794 Bound[K].Upper[Dependence::DVEntry::EQ] = 2795 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2796 } 2797 else { 2798 // If the positive/negative part of the difference is 0, 2799 // we won't need to know the number of iterations. 2800 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2801 const SCEV *NegativePart = getNegativePart(Delta); 2802 if (NegativePart->isZero()) 2803 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2804 const SCEV *PositivePart = getPositivePart(Delta); 2805 if (PositivePart->isZero()) 2806 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2807 } 2808 } 2809 2810 2811 // Computes the upper and lower bounds for level K 2812 // using the < direction. Records them in Bound. 2813 // Wolfe gives the equations 2814 // 2815 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2816 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2817 // 2818 // Since we normalize loops, we can simplify these equations to 2819 // 2820 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2821 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2822 // 2823 // We must be careful to handle the case where the upper bound is unknown. 2824 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2825 BoundInfo *Bound, unsigned K) const { 2826 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2827 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2828 if (Bound[K].Iterations) { 2829 const SCEV *Iter_1 = SE->getMinusSCEV( 2830 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2831 const SCEV *NegPart = 2832 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2833 Bound[K].Lower[Dependence::DVEntry::LT] = 2834 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2835 const SCEV *PosPart = 2836 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2837 Bound[K].Upper[Dependence::DVEntry::LT] = 2838 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2839 } 2840 else { 2841 // If the positive/negative part of the difference is 0, 2842 // we won't need to know the number of iterations. 2843 const SCEV *NegPart = 2844 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2845 if (NegPart->isZero()) 2846 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2847 const SCEV *PosPart = 2848 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2849 if (PosPart->isZero()) 2850 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2851 } 2852 } 2853 2854 2855 // Computes the upper and lower bounds for level K 2856 // using the > direction. Records them in Bound. 2857 // Wolfe gives the equations 2858 // 2859 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2860 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2861 // 2862 // Since we normalize loops, we can simplify these equations to 2863 // 2864 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2865 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2866 // 2867 // We must be careful to handle the case where the upper bound is unknown. 2868 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2869 BoundInfo *Bound, unsigned K) const { 2870 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2871 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2872 if (Bound[K].Iterations) { 2873 const SCEV *Iter_1 = SE->getMinusSCEV( 2874 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2875 const SCEV *NegPart = 2876 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2877 Bound[K].Lower[Dependence::DVEntry::GT] = 2878 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2879 const SCEV *PosPart = 2880 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2881 Bound[K].Upper[Dependence::DVEntry::GT] = 2882 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2883 } 2884 else { 2885 // If the positive/negative part of the difference is 0, 2886 // we won't need to know the number of iterations. 2887 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2888 if (NegPart->isZero()) 2889 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2890 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2891 if (PosPart->isZero()) 2892 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2893 } 2894 } 2895 2896 2897 // X^+ = max(X, 0) 2898 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2899 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2900 } 2901 2902 2903 // X^- = min(X, 0) 2904 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2905 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2906 } 2907 2908 2909 // Walks through the subscript, 2910 // collecting each coefficient, the associated loop bounds, 2911 // and recording its positive and negative parts for later use. 2912 DependenceInfo::CoefficientInfo * 2913 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 2914 const SCEV *&Constant) const { 2915 const SCEV *Zero = SE->getZero(Subscript->getType()); 2916 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2917 for (unsigned K = 1; K <= MaxLevels; ++K) { 2918 CI[K].Coeff = Zero; 2919 CI[K].PosPart = Zero; 2920 CI[K].NegPart = Zero; 2921 CI[K].Iterations = nullptr; 2922 } 2923 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2924 const Loop *L = AddRec->getLoop(); 2925 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2926 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2927 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2928 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2929 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2930 Subscript = AddRec->getStart(); 2931 } 2932 Constant = Subscript; 2933 #ifndef NDEBUG 2934 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n"); 2935 for (unsigned K = 1; K <= MaxLevels; ++K) { 2936 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2937 LLVM_DEBUG(dbgs() << "\tPos Part = "); 2938 LLVM_DEBUG(dbgs() << *CI[K].PosPart); 2939 LLVM_DEBUG(dbgs() << "\tNeg Part = "); 2940 LLVM_DEBUG(dbgs() << *CI[K].NegPart); 2941 LLVM_DEBUG(dbgs() << "\tUpper Bound = "); 2942 if (CI[K].Iterations) 2943 LLVM_DEBUG(dbgs() << *CI[K].Iterations); 2944 else 2945 LLVM_DEBUG(dbgs() << "+inf"); 2946 LLVM_DEBUG(dbgs() << '\n'); 2947 } 2948 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2949 #endif 2950 return CI; 2951 } 2952 2953 2954 // Looks through all the bounds info and 2955 // computes the lower bound given the current direction settings 2956 // at each level. If the lower bound for any level is -inf, 2957 // the result is -inf. 2958 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 2959 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2960 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2961 if (Bound[K].Lower[Bound[K].Direction]) 2962 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2963 else 2964 Sum = nullptr; 2965 } 2966 return Sum; 2967 } 2968 2969 2970 // Looks through all the bounds info and 2971 // computes the upper bound given the current direction settings 2972 // at each level. If the upper bound at any level is +inf, 2973 // the result is +inf. 2974 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 2975 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2976 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2977 if (Bound[K].Upper[Bound[K].Direction]) 2978 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2979 else 2980 Sum = nullptr; 2981 } 2982 return Sum; 2983 } 2984 2985 2986 //===----------------------------------------------------------------------===// 2987 // Constraint manipulation for Delta test. 2988 2989 // Given a linear SCEV, 2990 // return the coefficient (the step) 2991 // corresponding to the specified loop. 2992 // If there isn't one, return 0. 2993 // For example, given a*i + b*j + c*k, finding the coefficient 2994 // corresponding to the j loop would yield b. 2995 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 2996 const Loop *TargetLoop) const { 2997 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2998 if (!AddRec) 2999 return SE->getZero(Expr->getType()); 3000 if (AddRec->getLoop() == TargetLoop) 3001 return AddRec->getStepRecurrence(*SE); 3002 return findCoefficient(AddRec->getStart(), TargetLoop); 3003 } 3004 3005 3006 // Given a linear SCEV, 3007 // return the SCEV given by zeroing out the coefficient 3008 // corresponding to the specified loop. 3009 // For example, given a*i + b*j + c*k, zeroing the coefficient 3010 // corresponding to the j loop would yield a*i + c*k. 3011 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 3012 const Loop *TargetLoop) const { 3013 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3014 if (!AddRec) 3015 return Expr; // ignore 3016 if (AddRec->getLoop() == TargetLoop) 3017 return AddRec->getStart(); 3018 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 3019 AddRec->getStepRecurrence(*SE), 3020 AddRec->getLoop(), 3021 AddRec->getNoWrapFlags()); 3022 } 3023 3024 3025 // Given a linear SCEV Expr, 3026 // return the SCEV given by adding some Value to the 3027 // coefficient corresponding to the specified TargetLoop. 3028 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 3029 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 3030 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 3031 const Loop *TargetLoop, 3032 const SCEV *Value) const { 3033 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3034 if (!AddRec) // create a new addRec 3035 return SE->getAddRecExpr(Expr, 3036 Value, 3037 TargetLoop, 3038 SCEV::FlagAnyWrap); // Worst case, with no info. 3039 if (AddRec->getLoop() == TargetLoop) { 3040 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 3041 if (Sum->isZero()) 3042 return AddRec->getStart(); 3043 return SE->getAddRecExpr(AddRec->getStart(), 3044 Sum, 3045 AddRec->getLoop(), 3046 AddRec->getNoWrapFlags()); 3047 } 3048 if (SE->isLoopInvariant(AddRec, TargetLoop)) 3049 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 3050 return SE->getAddRecExpr( 3051 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 3052 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 3053 AddRec->getNoWrapFlags()); 3054 } 3055 3056 3057 // Review the constraints, looking for opportunities 3058 // to simplify a subscript pair (Src and Dst). 3059 // Return true if some simplification occurs. 3060 // If the simplification isn't exact (that is, if it is conservative 3061 // in terms of dependence), set consistent to false. 3062 // Corresponds to Figure 5 from the paper 3063 // 3064 // Practical Dependence Testing 3065 // Goff, Kennedy, Tseng 3066 // PLDI 1991 3067 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 3068 SmallBitVector &Loops, 3069 SmallVectorImpl<Constraint> &Constraints, 3070 bool &Consistent) { 3071 bool Result = false; 3072 for (unsigned LI : Loops.set_bits()) { 3073 LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3074 LLVM_DEBUG(Constraints[LI].dump(dbgs())); 3075 if (Constraints[LI].isDistance()) 3076 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3077 else if (Constraints[LI].isLine()) 3078 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3079 else if (Constraints[LI].isPoint()) 3080 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3081 } 3082 return Result; 3083 } 3084 3085 3086 // Attempt to propagate a distance 3087 // constraint into a subscript pair (Src and Dst). 3088 // Return true if some simplification occurs. 3089 // If the simplification isn't exact (that is, if it is conservative 3090 // in terms of dependence), set consistent to false. 3091 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3092 Constraint &CurConstraint, 3093 bool &Consistent) { 3094 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3095 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3096 const SCEV *A_K = findCoefficient(Src, CurLoop); 3097 if (A_K->isZero()) 3098 return false; 3099 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3100 Src = SE->getMinusSCEV(Src, DA_K); 3101 Src = zeroCoefficient(Src, CurLoop); 3102 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3103 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3104 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3105 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3106 if (!findCoefficient(Dst, CurLoop)->isZero()) 3107 Consistent = false; 3108 return true; 3109 } 3110 3111 3112 // Attempt to propagate a line 3113 // constraint into a subscript pair (Src and Dst). 3114 // Return true if some simplification occurs. 3115 // If the simplification isn't exact (that is, if it is conservative 3116 // in terms of dependence), set consistent to false. 3117 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3118 Constraint &CurConstraint, 3119 bool &Consistent) { 3120 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3121 const SCEV *A = CurConstraint.getA(); 3122 const SCEV *B = CurConstraint.getB(); 3123 const SCEV *C = CurConstraint.getC(); 3124 LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C 3125 << "\n"); 3126 LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3127 LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3128 if (A->isZero()) { 3129 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3130 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3131 if (!Bconst || !Cconst) return false; 3132 APInt Beta = Bconst->getAPInt(); 3133 APInt Charlie = Cconst->getAPInt(); 3134 APInt CdivB = Charlie.sdiv(Beta); 3135 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3136 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3137 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3138 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3139 Dst = zeroCoefficient(Dst, CurLoop); 3140 if (!findCoefficient(Src, CurLoop)->isZero()) 3141 Consistent = false; 3142 } 3143 else if (B->isZero()) { 3144 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3145 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3146 if (!Aconst || !Cconst) return false; 3147 APInt Alpha = Aconst->getAPInt(); 3148 APInt Charlie = Cconst->getAPInt(); 3149 APInt CdivA = Charlie.sdiv(Alpha); 3150 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3151 const SCEV *A_K = findCoefficient(Src, CurLoop); 3152 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3153 Src = zeroCoefficient(Src, CurLoop); 3154 if (!findCoefficient(Dst, CurLoop)->isZero()) 3155 Consistent = false; 3156 } 3157 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3158 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3159 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3160 if (!Aconst || !Cconst) return false; 3161 APInt Alpha = Aconst->getAPInt(); 3162 APInt Charlie = Cconst->getAPInt(); 3163 APInt CdivA = Charlie.sdiv(Alpha); 3164 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3165 const SCEV *A_K = findCoefficient(Src, CurLoop); 3166 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3167 Src = zeroCoefficient(Src, CurLoop); 3168 Dst = addToCoefficient(Dst, CurLoop, A_K); 3169 if (!findCoefficient(Dst, CurLoop)->isZero()) 3170 Consistent = false; 3171 } 3172 else { 3173 // paper is incorrect here, or perhaps just misleading 3174 const SCEV *A_K = findCoefficient(Src, CurLoop); 3175 Src = SE->getMulExpr(Src, A); 3176 Dst = SE->getMulExpr(Dst, A); 3177 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3178 Src = zeroCoefficient(Src, CurLoop); 3179 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3180 if (!findCoefficient(Dst, CurLoop)->isZero()) 3181 Consistent = false; 3182 } 3183 LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3184 LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3185 return true; 3186 } 3187 3188 3189 // Attempt to propagate a point 3190 // constraint into a subscript pair (Src and Dst). 3191 // Return true if some simplification occurs. 3192 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3193 Constraint &CurConstraint) { 3194 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3195 const SCEV *A_K = findCoefficient(Src, CurLoop); 3196 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3197 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3198 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3199 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3200 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3201 Src = zeroCoefficient(Src, CurLoop); 3202 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3203 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3204 Dst = zeroCoefficient(Dst, CurLoop); 3205 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3206 return true; 3207 } 3208 3209 3210 // Update direction vector entry based on the current constraint. 3211 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3212 const Constraint &CurConstraint) const { 3213 LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3214 LLVM_DEBUG(CurConstraint.dump(dbgs())); 3215 if (CurConstraint.isAny()) 3216 ; // use defaults 3217 else if (CurConstraint.isDistance()) { 3218 // this one is consistent, the others aren't 3219 Level.Scalar = false; 3220 Level.Distance = CurConstraint.getD(); 3221 unsigned NewDirection = Dependence::DVEntry::NONE; 3222 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3223 NewDirection = Dependence::DVEntry::EQ; 3224 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3225 NewDirection |= Dependence::DVEntry::LT; 3226 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3227 NewDirection |= Dependence::DVEntry::GT; 3228 Level.Direction &= NewDirection; 3229 } 3230 else if (CurConstraint.isLine()) { 3231 Level.Scalar = false; 3232 Level.Distance = nullptr; 3233 // direction should be accurate 3234 } 3235 else if (CurConstraint.isPoint()) { 3236 Level.Scalar = false; 3237 Level.Distance = nullptr; 3238 unsigned NewDirection = Dependence::DVEntry::NONE; 3239 if (!isKnownPredicate(CmpInst::ICMP_NE, 3240 CurConstraint.getY(), 3241 CurConstraint.getX())) 3242 // if X may be = Y 3243 NewDirection |= Dependence::DVEntry::EQ; 3244 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3245 CurConstraint.getY(), 3246 CurConstraint.getX())) 3247 // if Y may be > X 3248 NewDirection |= Dependence::DVEntry::LT; 3249 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3250 CurConstraint.getY(), 3251 CurConstraint.getX())) 3252 // if Y may be < X 3253 NewDirection |= Dependence::DVEntry::GT; 3254 Level.Direction &= NewDirection; 3255 } 3256 else 3257 llvm_unreachable("constraint has unexpected kind"); 3258 } 3259 3260 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3261 /// source and destination array references are recurrences on a nested loop, 3262 /// this function flattens the nested recurrences into separate recurrences 3263 /// for each loop level. 3264 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3265 SmallVectorImpl<Subscript> &Pair) { 3266 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3267 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3268 Value *SrcPtr = getLoadStorePointerOperand(Src); 3269 Value *DstPtr = getLoadStorePointerOperand(Dst); 3270 3271 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3272 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3273 3274 // Below code mimics the code in Delinearization.cpp 3275 const SCEV *SrcAccessFn = 3276 SE->getSCEVAtScope(SrcPtr, SrcLoop); 3277 const SCEV *DstAccessFn = 3278 SE->getSCEVAtScope(DstPtr, DstLoop); 3279 3280 const SCEVUnknown *SrcBase = 3281 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3282 const SCEVUnknown *DstBase = 3283 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3284 3285 if (!SrcBase || !DstBase || SrcBase != DstBase) 3286 return false; 3287 3288 const SCEV *ElementSize = SE->getElementSize(Src); 3289 if (ElementSize != SE->getElementSize(Dst)) 3290 return false; 3291 3292 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3293 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3294 3295 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3296 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3297 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3298 return false; 3299 3300 // First step: collect parametric terms in both array references. 3301 SmallVector<const SCEV *, 4> Terms; 3302 SE->collectParametricTerms(SrcAR, Terms); 3303 SE->collectParametricTerms(DstAR, Terms); 3304 3305 // Second step: find subscript sizes. 3306 SmallVector<const SCEV *, 4> Sizes; 3307 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3308 3309 // Third step: compute the access functions for each subscript. 3310 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3311 SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); 3312 SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); 3313 3314 // Fail when there is only a subscript: that's a linearized access function. 3315 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3316 SrcSubscripts.size() != DstSubscripts.size()) 3317 return false; 3318 3319 int size = SrcSubscripts.size(); 3320 3321 // Statically check that the array bounds are in-range. The first subscript we 3322 // don't have a size for and it cannot overflow into another subscript, so is 3323 // always safe. The others need to be 0 <= subscript[i] < bound, for both src 3324 // and dst. 3325 // FIXME: It may be better to record these sizes and add them as constraints 3326 // to the dependency checks. 3327 if (!DisableDelinearizationChecks) 3328 for (int i = 1; i < size; ++i) { 3329 if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr)) 3330 return false; 3331 3332 if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1])) 3333 return false; 3334 3335 if (!isKnownNonNegative(DstSubscripts[i], DstPtr)) 3336 return false; 3337 3338 if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1])) 3339 return false; 3340 } 3341 3342 LLVM_DEBUG({ 3343 dbgs() << "\nSrcSubscripts: "; 3344 for (int i = 0; i < size; i++) 3345 dbgs() << *SrcSubscripts[i]; 3346 dbgs() << "\nDstSubscripts: "; 3347 for (int i = 0; i < size; i++) 3348 dbgs() << *DstSubscripts[i]; 3349 }); 3350 3351 // The delinearization transforms a single-subscript MIV dependence test into 3352 // a multi-subscript SIV dependence test that is easier to compute. So we 3353 // resize Pair to contain as many pairs of subscripts as the delinearization 3354 // has found, and then initialize the pairs following the delinearization. 3355 Pair.resize(size); 3356 for (int i = 0; i < size; ++i) { 3357 Pair[i].Src = SrcSubscripts[i]; 3358 Pair[i].Dst = DstSubscripts[i]; 3359 unifySubscriptType(&Pair[i]); 3360 } 3361 3362 return true; 3363 } 3364 3365 //===----------------------------------------------------------------------===// 3366 3367 #ifndef NDEBUG 3368 // For debugging purposes, dump a small bit vector to dbgs(). 3369 static void dumpSmallBitVector(SmallBitVector &BV) { 3370 dbgs() << "{"; 3371 for (unsigned VI : BV.set_bits()) { 3372 dbgs() << VI; 3373 if (BV.find_next(VI) >= 0) 3374 dbgs() << ' '; 3375 } 3376 dbgs() << "}\n"; 3377 } 3378 #endif 3379 3380 bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA, 3381 FunctionAnalysisManager::Invalidator &Inv) { 3382 // Check if the analysis itself has been invalidated. 3383 auto PAC = PA.getChecker<DependenceAnalysis>(); 3384 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) 3385 return true; 3386 3387 // Check transitive dependencies. 3388 return Inv.invalidate<AAManager>(F, PA) || 3389 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) || 3390 Inv.invalidate<LoopAnalysis>(F, PA); 3391 } 3392 3393 // depends - 3394 // Returns NULL if there is no dependence. 3395 // Otherwise, return a Dependence with as many details as possible. 3396 // Corresponds to Section 3.1 in the paper 3397 // 3398 // Practical Dependence Testing 3399 // Goff, Kennedy, Tseng 3400 // PLDI 1991 3401 // 3402 // Care is required to keep the routine below, getSplitIteration(), 3403 // up to date with respect to this routine. 3404 std::unique_ptr<Dependence> 3405 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3406 bool PossiblyLoopIndependent) { 3407 if (Src == Dst) 3408 PossiblyLoopIndependent = false; 3409 3410 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3411 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3412 // if both instructions don't reference memory, there's no dependence 3413 return nullptr; 3414 3415 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3416 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3417 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3418 return make_unique<Dependence>(Src, Dst); 3419 } 3420 3421 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3422 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3423 Value *SrcPtr = getLoadStorePointerOperand(Src); 3424 Value *DstPtr = getLoadStorePointerOperand(Dst); 3425 3426 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3427 MemoryLocation::get(Dst), 3428 MemoryLocation::get(Src))) { 3429 case MayAlias: 3430 case PartialAlias: 3431 // cannot analyse objects if we don't understand their aliasing. 3432 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3433 return make_unique<Dependence>(Src, Dst); 3434 case NoAlias: 3435 // If the objects noalias, they are distinct, accesses are independent. 3436 LLVM_DEBUG(dbgs() << "no alias\n"); 3437 return nullptr; 3438 case MustAlias: 3439 break; // The underlying objects alias; test accesses for dependence. 3440 } 3441 3442 // establish loop nesting levels 3443 establishNestingLevels(Src, Dst); 3444 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3445 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3446 3447 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3448 ++TotalArrayPairs; 3449 3450 unsigned Pairs = 1; 3451 SmallVector<Subscript, 2> Pair(Pairs); 3452 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3453 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3454 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3455 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3456 Pair[0].Src = SrcSCEV; 3457 Pair[0].Dst = DstSCEV; 3458 3459 if (Delinearize) { 3460 if (tryDelinearize(Src, Dst, Pair)) { 3461 LLVM_DEBUG(dbgs() << " delinearized\n"); 3462 Pairs = Pair.size(); 3463 } 3464 } 3465 3466 for (unsigned P = 0; P < Pairs; ++P) { 3467 Pair[P].Loops.resize(MaxLevels + 1); 3468 Pair[P].GroupLoops.resize(MaxLevels + 1); 3469 Pair[P].Group.resize(Pairs); 3470 removeMatchingExtensions(&Pair[P]); 3471 Pair[P].Classification = 3472 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3473 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3474 Pair[P].Loops); 3475 Pair[P].GroupLoops = Pair[P].Loops; 3476 Pair[P].Group.set(P); 3477 LLVM_DEBUG(dbgs() << " subscript " << P << "\n"); 3478 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3479 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3480 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3481 LLVM_DEBUG(dbgs() << "\tloops = "); 3482 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3483 } 3484 3485 SmallBitVector Separable(Pairs); 3486 SmallBitVector Coupled(Pairs); 3487 3488 // Partition subscripts into separable and minimally-coupled groups 3489 // Algorithm in paper is algorithmically better; 3490 // this may be faster in practice. Check someday. 3491 // 3492 // Here's an example of how it works. Consider this code: 3493 // 3494 // for (i = ...) { 3495 // for (j = ...) { 3496 // for (k = ...) { 3497 // for (l = ...) { 3498 // for (m = ...) { 3499 // A[i][j][k][m] = ...; 3500 // ... = A[0][j][l][i + j]; 3501 // } 3502 // } 3503 // } 3504 // } 3505 // } 3506 // 3507 // There are 4 subscripts here: 3508 // 0 [i] and [0] 3509 // 1 [j] and [j] 3510 // 2 [k] and [l] 3511 // 3 [m] and [i + j] 3512 // 3513 // We've already classified each subscript pair as ZIV, SIV, etc., 3514 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3515 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3516 // and set Pair[P].Group = {P}. 3517 // 3518 // Src Dst Classification Loops GroupLoops Group 3519 // 0 [i] [0] SIV {1} {1} {0} 3520 // 1 [j] [j] SIV {2} {2} {1} 3521 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3522 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3523 // 3524 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3525 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3526 // 3527 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3528 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3529 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3530 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3531 // to either Separable or Coupled). 3532 // 3533 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3534 // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty, 3535 // so Pair[3].Group = {0, 1, 3} and Done = false. 3536 // 3537 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3538 // Since Done remains true, we add 2 to the set of Separable pairs. 3539 // 3540 // Finally, we consider 3. There's nothing to compare it with, 3541 // so Done remains true and we add it to the Coupled set. 3542 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3543 // 3544 // In the end, we've got 1 separable subscript and 1 coupled group. 3545 for (unsigned SI = 0; SI < Pairs; ++SI) { 3546 if (Pair[SI].Classification == Subscript::NonLinear) { 3547 // ignore these, but collect loops for later 3548 ++NonlinearSubscriptPairs; 3549 collectCommonLoops(Pair[SI].Src, 3550 LI->getLoopFor(Src->getParent()), 3551 Pair[SI].Loops); 3552 collectCommonLoops(Pair[SI].Dst, 3553 LI->getLoopFor(Dst->getParent()), 3554 Pair[SI].Loops); 3555 Result.Consistent = false; 3556 } else if (Pair[SI].Classification == Subscript::ZIV) { 3557 // always separable 3558 Separable.set(SI); 3559 } 3560 else { 3561 // SIV, RDIV, or MIV, so check for coupled group 3562 bool Done = true; 3563 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3564 SmallBitVector Intersection = Pair[SI].GroupLoops; 3565 Intersection &= Pair[SJ].GroupLoops; 3566 if (Intersection.any()) { 3567 // accumulate set of all the loops in group 3568 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3569 // accumulate set of all subscripts in group 3570 Pair[SJ].Group |= Pair[SI].Group; 3571 Done = false; 3572 } 3573 } 3574 if (Done) { 3575 if (Pair[SI].Group.count() == 1) { 3576 Separable.set(SI); 3577 ++SeparableSubscriptPairs; 3578 } 3579 else { 3580 Coupled.set(SI); 3581 ++CoupledSubscriptPairs; 3582 } 3583 } 3584 } 3585 } 3586 3587 LLVM_DEBUG(dbgs() << " Separable = "); 3588 LLVM_DEBUG(dumpSmallBitVector(Separable)); 3589 LLVM_DEBUG(dbgs() << " Coupled = "); 3590 LLVM_DEBUG(dumpSmallBitVector(Coupled)); 3591 3592 Constraint NewConstraint; 3593 NewConstraint.setAny(SE); 3594 3595 // test separable subscripts 3596 for (unsigned SI : Separable.set_bits()) { 3597 LLVM_DEBUG(dbgs() << "testing subscript " << SI); 3598 switch (Pair[SI].Classification) { 3599 case Subscript::ZIV: 3600 LLVM_DEBUG(dbgs() << ", ZIV\n"); 3601 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3602 return nullptr; 3603 break; 3604 case Subscript::SIV: { 3605 LLVM_DEBUG(dbgs() << ", SIV\n"); 3606 unsigned Level; 3607 const SCEV *SplitIter = nullptr; 3608 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3609 SplitIter)) 3610 return nullptr; 3611 break; 3612 } 3613 case Subscript::RDIV: 3614 LLVM_DEBUG(dbgs() << ", RDIV\n"); 3615 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3616 return nullptr; 3617 break; 3618 case Subscript::MIV: 3619 LLVM_DEBUG(dbgs() << ", MIV\n"); 3620 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3621 return nullptr; 3622 break; 3623 default: 3624 llvm_unreachable("subscript has unexpected classification"); 3625 } 3626 } 3627 3628 if (Coupled.count()) { 3629 // test coupled subscript groups 3630 LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n"); 3631 LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3632 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3633 for (unsigned II = 0; II <= MaxLevels; ++II) 3634 Constraints[II].setAny(SE); 3635 for (unsigned SI : Coupled.set_bits()) { 3636 LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3637 SmallBitVector Group(Pair[SI].Group); 3638 SmallBitVector Sivs(Pairs); 3639 SmallBitVector Mivs(Pairs); 3640 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3641 SmallVector<Subscript *, 4> PairsInGroup; 3642 for (unsigned SJ : Group.set_bits()) { 3643 LLVM_DEBUG(dbgs() << SJ << " "); 3644 if (Pair[SJ].Classification == Subscript::SIV) 3645 Sivs.set(SJ); 3646 else 3647 Mivs.set(SJ); 3648 PairsInGroup.push_back(&Pair[SJ]); 3649 } 3650 unifySubscriptType(PairsInGroup); 3651 LLVM_DEBUG(dbgs() << "}\n"); 3652 while (Sivs.any()) { 3653 bool Changed = false; 3654 for (unsigned SJ : Sivs.set_bits()) { 3655 LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3656 // SJ is an SIV subscript that's part of the current coupled group 3657 unsigned Level; 3658 const SCEV *SplitIter = nullptr; 3659 LLVM_DEBUG(dbgs() << "SIV\n"); 3660 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3661 SplitIter)) 3662 return nullptr; 3663 ConstrainedLevels.set(Level); 3664 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3665 if (Constraints[Level].isEmpty()) { 3666 ++DeltaIndependence; 3667 return nullptr; 3668 } 3669 Changed = true; 3670 } 3671 Sivs.reset(SJ); 3672 } 3673 if (Changed) { 3674 // propagate, possibly creating new SIVs and ZIVs 3675 LLVM_DEBUG(dbgs() << " propagating\n"); 3676 LLVM_DEBUG(dbgs() << "\tMivs = "); 3677 LLVM_DEBUG(dumpSmallBitVector(Mivs)); 3678 for (unsigned SJ : Mivs.set_bits()) { 3679 // SJ is an MIV subscript that's part of the current coupled group 3680 LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3681 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3682 Constraints, Result.Consistent)) { 3683 LLVM_DEBUG(dbgs() << "\t Changed\n"); 3684 ++DeltaPropagations; 3685 Pair[SJ].Classification = 3686 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3687 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3688 Pair[SJ].Loops); 3689 switch (Pair[SJ].Classification) { 3690 case Subscript::ZIV: 3691 LLVM_DEBUG(dbgs() << "ZIV\n"); 3692 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3693 return nullptr; 3694 Mivs.reset(SJ); 3695 break; 3696 case Subscript::SIV: 3697 Sivs.set(SJ); 3698 Mivs.reset(SJ); 3699 break; 3700 case Subscript::RDIV: 3701 case Subscript::MIV: 3702 break; 3703 default: 3704 llvm_unreachable("bad subscript classification"); 3705 } 3706 } 3707 } 3708 } 3709 } 3710 3711 // test & propagate remaining RDIVs 3712 for (unsigned SJ : Mivs.set_bits()) { 3713 if (Pair[SJ].Classification == Subscript::RDIV) { 3714 LLVM_DEBUG(dbgs() << "RDIV test\n"); 3715 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3716 return nullptr; 3717 // I don't yet understand how to propagate RDIV results 3718 Mivs.reset(SJ); 3719 } 3720 } 3721 3722 // test remaining MIVs 3723 // This code is temporary. 3724 // Better to somehow test all remaining subscripts simultaneously. 3725 for (unsigned SJ : Mivs.set_bits()) { 3726 if (Pair[SJ].Classification == Subscript::MIV) { 3727 LLVM_DEBUG(dbgs() << "MIV test\n"); 3728 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3729 return nullptr; 3730 } 3731 else 3732 llvm_unreachable("expected only MIV subscripts at this point"); 3733 } 3734 3735 // update Result.DV from constraint vector 3736 LLVM_DEBUG(dbgs() << " updating\n"); 3737 for (unsigned SJ : ConstrainedLevels.set_bits()) { 3738 if (SJ > CommonLevels) 3739 break; 3740 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3741 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3742 return nullptr; 3743 } 3744 } 3745 } 3746 3747 // Make sure the Scalar flags are set correctly. 3748 SmallBitVector CompleteLoops(MaxLevels + 1); 3749 for (unsigned SI = 0; SI < Pairs; ++SI) 3750 CompleteLoops |= Pair[SI].Loops; 3751 for (unsigned II = 1; II <= CommonLevels; ++II) 3752 if (CompleteLoops[II]) 3753 Result.DV[II - 1].Scalar = false; 3754 3755 if (PossiblyLoopIndependent) { 3756 // Make sure the LoopIndependent flag is set correctly. 3757 // All directions must include equal, otherwise no 3758 // loop-independent dependence is possible. 3759 for (unsigned II = 1; II <= CommonLevels; ++II) { 3760 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3761 Result.LoopIndependent = false; 3762 break; 3763 } 3764 } 3765 } 3766 else { 3767 // On the other hand, if all directions are equal and there's no 3768 // loop-independent dependence possible, then no dependence exists. 3769 bool AllEqual = true; 3770 for (unsigned II = 1; II <= CommonLevels; ++II) { 3771 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3772 AllEqual = false; 3773 break; 3774 } 3775 } 3776 if (AllEqual) 3777 return nullptr; 3778 } 3779 3780 return make_unique<FullDependence>(std::move(Result)); 3781 } 3782 3783 3784 3785 //===----------------------------------------------------------------------===// 3786 // getSplitIteration - 3787 // Rather than spend rarely-used space recording the splitting iteration 3788 // during the Weak-Crossing SIV test, we re-compute it on demand. 3789 // The re-computation is basically a repeat of the entire dependence test, 3790 // though simplified since we know that the dependence exists. 3791 // It's tedious, since we must go through all propagations, etc. 3792 // 3793 // Care is required to keep this code up to date with respect to the routine 3794 // above, depends(). 3795 // 3796 // Generally, the dependence analyzer will be used to build 3797 // a dependence graph for a function (basically a map from instructions 3798 // to dependences). Looking for cycles in the graph shows us loops 3799 // that cannot be trivially vectorized/parallelized. 3800 // 3801 // We can try to improve the situation by examining all the dependences 3802 // that make up the cycle, looking for ones we can break. 3803 // Sometimes, peeling the first or last iteration of a loop will break 3804 // dependences, and we've got flags for those possibilities. 3805 // Sometimes, splitting a loop at some other iteration will do the trick, 3806 // and we've got a flag for that case. Rather than waste the space to 3807 // record the exact iteration (since we rarely know), we provide 3808 // a method that calculates the iteration. It's a drag that it must work 3809 // from scratch, but wonderful in that it's possible. 3810 // 3811 // Here's an example: 3812 // 3813 // for (i = 0; i < 10; i++) 3814 // A[i] = ... 3815 // ... = A[11 - i] 3816 // 3817 // There's a loop-carried flow dependence from the store to the load, 3818 // found by the weak-crossing SIV test. The dependence will have a flag, 3819 // indicating that the dependence can be broken by splitting the loop. 3820 // Calling getSplitIteration will return 5. 3821 // Splitting the loop breaks the dependence, like so: 3822 // 3823 // for (i = 0; i <= 5; i++) 3824 // A[i] = ... 3825 // ... = A[11 - i] 3826 // for (i = 6; i < 10; i++) 3827 // A[i] = ... 3828 // ... = A[11 - i] 3829 // 3830 // breaks the dependence and allows us to vectorize/parallelize 3831 // both loops. 3832 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 3833 unsigned SplitLevel) { 3834 assert(Dep.isSplitable(SplitLevel) && 3835 "Dep should be splitable at SplitLevel"); 3836 Instruction *Src = Dep.getSrc(); 3837 Instruction *Dst = Dep.getDst(); 3838 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3839 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3840 assert(isLoadOrStore(Src)); 3841 assert(isLoadOrStore(Dst)); 3842 Value *SrcPtr = getLoadStorePointerOperand(Src); 3843 Value *DstPtr = getLoadStorePointerOperand(Dst); 3844 assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3845 MemoryLocation::get(Dst), 3846 MemoryLocation::get(Src)) == MustAlias); 3847 3848 // establish loop nesting levels 3849 establishNestingLevels(Src, Dst); 3850 3851 FullDependence Result(Src, Dst, false, CommonLevels); 3852 3853 unsigned Pairs = 1; 3854 SmallVector<Subscript, 2> Pair(Pairs); 3855 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3856 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3857 Pair[0].Src = SrcSCEV; 3858 Pair[0].Dst = DstSCEV; 3859 3860 if (Delinearize) { 3861 if (tryDelinearize(Src, Dst, Pair)) { 3862 LLVM_DEBUG(dbgs() << " delinearized\n"); 3863 Pairs = Pair.size(); 3864 } 3865 } 3866 3867 for (unsigned P = 0; P < Pairs; ++P) { 3868 Pair[P].Loops.resize(MaxLevels + 1); 3869 Pair[P].GroupLoops.resize(MaxLevels + 1); 3870 Pair[P].Group.resize(Pairs); 3871 removeMatchingExtensions(&Pair[P]); 3872 Pair[P].Classification = 3873 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3874 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3875 Pair[P].Loops); 3876 Pair[P].GroupLoops = Pair[P].Loops; 3877 Pair[P].Group.set(P); 3878 } 3879 3880 SmallBitVector Separable(Pairs); 3881 SmallBitVector Coupled(Pairs); 3882 3883 // partition subscripts into separable and minimally-coupled groups 3884 for (unsigned SI = 0; SI < Pairs; ++SI) { 3885 if (Pair[SI].Classification == Subscript::NonLinear) { 3886 // ignore these, but collect loops for later 3887 collectCommonLoops(Pair[SI].Src, 3888 LI->getLoopFor(Src->getParent()), 3889 Pair[SI].Loops); 3890 collectCommonLoops(Pair[SI].Dst, 3891 LI->getLoopFor(Dst->getParent()), 3892 Pair[SI].Loops); 3893 Result.Consistent = false; 3894 } 3895 else if (Pair[SI].Classification == Subscript::ZIV) 3896 Separable.set(SI); 3897 else { 3898 // SIV, RDIV, or MIV, so check for coupled group 3899 bool Done = true; 3900 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3901 SmallBitVector Intersection = Pair[SI].GroupLoops; 3902 Intersection &= Pair[SJ].GroupLoops; 3903 if (Intersection.any()) { 3904 // accumulate set of all the loops in group 3905 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3906 // accumulate set of all subscripts in group 3907 Pair[SJ].Group |= Pair[SI].Group; 3908 Done = false; 3909 } 3910 } 3911 if (Done) { 3912 if (Pair[SI].Group.count() == 1) 3913 Separable.set(SI); 3914 else 3915 Coupled.set(SI); 3916 } 3917 } 3918 } 3919 3920 Constraint NewConstraint; 3921 NewConstraint.setAny(SE); 3922 3923 // test separable subscripts 3924 for (unsigned SI : Separable.set_bits()) { 3925 switch (Pair[SI].Classification) { 3926 case Subscript::SIV: { 3927 unsigned Level; 3928 const SCEV *SplitIter = nullptr; 3929 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3930 Result, NewConstraint, SplitIter); 3931 if (Level == SplitLevel) { 3932 assert(SplitIter != nullptr); 3933 return SplitIter; 3934 } 3935 break; 3936 } 3937 case Subscript::ZIV: 3938 case Subscript::RDIV: 3939 case Subscript::MIV: 3940 break; 3941 default: 3942 llvm_unreachable("subscript has unexpected classification"); 3943 } 3944 } 3945 3946 if (Coupled.count()) { 3947 // test coupled subscript groups 3948 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3949 for (unsigned II = 0; II <= MaxLevels; ++II) 3950 Constraints[II].setAny(SE); 3951 for (unsigned SI : Coupled.set_bits()) { 3952 SmallBitVector Group(Pair[SI].Group); 3953 SmallBitVector Sivs(Pairs); 3954 SmallBitVector Mivs(Pairs); 3955 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3956 for (unsigned SJ : Group.set_bits()) { 3957 if (Pair[SJ].Classification == Subscript::SIV) 3958 Sivs.set(SJ); 3959 else 3960 Mivs.set(SJ); 3961 } 3962 while (Sivs.any()) { 3963 bool Changed = false; 3964 for (unsigned SJ : Sivs.set_bits()) { 3965 // SJ is an SIV subscript that's part of the current coupled group 3966 unsigned Level; 3967 const SCEV *SplitIter = nullptr; 3968 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3969 Result, NewConstraint, SplitIter); 3970 if (Level == SplitLevel && SplitIter) 3971 return SplitIter; 3972 ConstrainedLevels.set(Level); 3973 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3974 Changed = true; 3975 Sivs.reset(SJ); 3976 } 3977 if (Changed) { 3978 // propagate, possibly creating new SIVs and ZIVs 3979 for (unsigned SJ : Mivs.set_bits()) { 3980 // SJ is an MIV subscript that's part of the current coupled group 3981 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3982 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3983 Pair[SJ].Classification = 3984 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3985 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3986 Pair[SJ].Loops); 3987 switch (Pair[SJ].Classification) { 3988 case Subscript::ZIV: 3989 Mivs.reset(SJ); 3990 break; 3991 case Subscript::SIV: 3992 Sivs.set(SJ); 3993 Mivs.reset(SJ); 3994 break; 3995 case Subscript::RDIV: 3996 case Subscript::MIV: 3997 break; 3998 default: 3999 llvm_unreachable("bad subscript classification"); 4000 } 4001 } 4002 } 4003 } 4004 } 4005 } 4006 } 4007 llvm_unreachable("somehow reached end of routine"); 4008 return nullptr; 4009 } 4010