1 //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass removes the computation of provably redundant expressions that have 10 // been computed earlier in a previous iteration. It relies on the use of PHIs 11 // to identify loop carried dependences. This is scalar replacement for vector 12 // types. 13 // 14 //----------------------------------------------------------------------------- 15 // Motivation: Consider the case where we have the following loop structure. 16 // 17 // Loop: 18 // t0 = a[i]; 19 // t1 = f(t0); 20 // t2 = g(t1); 21 // ... 22 // t3 = a[i+1]; 23 // t4 = f(t3); 24 // t5 = g(t4); 25 // t6 = op(t2, t5) 26 // cond_branch <Loop> 27 // 28 // This can be converted to 29 // t00 = a[0]; 30 // t10 = f(t00); 31 // t20 = g(t10); 32 // Loop: 33 // t2 = t20; 34 // t3 = a[i+1]; 35 // t4 = f(t3); 36 // t5 = g(t4); 37 // t6 = op(t2, t5) 38 // t20 = t5 39 // cond_branch <Loop> 40 // 41 // SROA does a good job of reusing a[i+1] as a[i] in the next iteration. 42 // Such a loop comes to this pass in the following form. 43 // 44 // LoopPreheader: 45 // X0 = a[0]; 46 // Loop: 47 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> 48 // t1 = f(X2) <-- I1 49 // t2 = g(t1) 50 // ... 51 // X1 = a[i+1] 52 // t4 = f(X1) <-- I2 53 // t5 = g(t4) 54 // t6 = op(t2, t5) 55 // cond_branch <Loop> 56 // 57 // In this pass, we look for PHIs such as X2 whose incoming values come only 58 // from the Loop Preheader and over the backedge and additionaly, both these 59 // values are the results of the same operation in terms of opcode. We call such 60 // a PHI node a dependence chain or DepChain. In this case, the dependence of X2 61 // over X1 is carried over only one iteration and so the DepChain is only one 62 // PHI node long. 63 // 64 // Then, we traverse the uses of the PHI (X2) and the uses of the value of the 65 // PHI coming over the backedge (X1). We stop at the first pair of such users 66 // I1 (of X2) and I2 (of X1) that meet the following conditions. 67 // 1. I1 and I2 are the same operation, but with different operands. 68 // 2. X2 and X1 are used at the same operand number in the two instructions. 69 // 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a 70 // a DepChain from Op1 to Op2 of the same length as that between X2 and X1. 71 // 72 // We then make the following transformation 73 // LoopPreheader: 74 // X0 = a[0]; 75 // Y0 = f(X0); 76 // Loop: 77 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> 78 // Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)> 79 // t1 = f(X2) <-- Will be removed by DCE. 80 // t2 = g(Y2) 81 // ... 82 // X1 = a[i+1] 83 // t4 = f(X1) 84 // t5 = g(t4) 85 // t6 = op(t2, t5) 86 // cond_branch <Loop> 87 // 88 // We proceed until we cannot find any more such instructions I1 and I2. 89 // 90 // --- DepChains & Loop carried dependences --- 91 // Consider a single basic block loop such as 92 // 93 // LoopPreheader: 94 // X0 = ... 95 // Y0 = ... 96 // Loop: 97 // X2 = PHI<(X0, LoopPreheader), (X1, Loop)> 98 // Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)> 99 // ... 100 // X1 = ... 101 // ... 102 // cond_branch <Loop> 103 // 104 // Then there is a dependence between X2 and X1 that goes back one iteration, 105 // i.e. X1 is used as X2 in the very next iteration. We represent this as a 106 // DepChain from X2 to X1 (X2->X1). 107 // Similarly, there is a dependence between Y2 and X1 that goes back two 108 // iterations. X1 is used as Y2 two iterations after it is computed. This is 109 // represented by a DepChain as (Y2->X2->X1). 110 // 111 // A DepChain has the following properties. 112 // 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of 113 // iterations of carried dependence + 1. 114 // 2. All instructions in the DepChain except the last are PHIs. 115 // 116 //===----------------------------------------------------------------------===// 117 118 #include "llvm/ADT/SetVector.h" 119 #include "llvm/ADT/SmallVector.h" 120 #include "llvm/ADT/Statistic.h" 121 #include "llvm/Analysis/LoopInfo.h" 122 #include "llvm/Analysis/LoopPass.h" 123 #include "llvm/IR/BasicBlock.h" 124 #include "llvm/IR/DerivedTypes.h" 125 #include "llvm/IR/IRBuilder.h" 126 #include "llvm/IR/Instruction.h" 127 #include "llvm/IR/Instructions.h" 128 #include "llvm/IR/IntrinsicInst.h" 129 #include "llvm/IR/Intrinsics.h" 130 #include "llvm/IR/IntrinsicsHexagon.h" 131 #include "llvm/IR/Use.h" 132 #include "llvm/IR/User.h" 133 #include "llvm/IR/Value.h" 134 #include "llvm/InitializePasses.h" 135 #include "llvm/Pass.h" 136 #include "llvm/Support/Casting.h" 137 #include "llvm/Support/CommandLine.h" 138 #include "llvm/Support/Compiler.h" 139 #include "llvm/Support/Debug.h" 140 #include "llvm/Support/raw_ostream.h" 141 #include "llvm/Transforms/Scalar.h" 142 #include "llvm/Transforms/Utils.h" 143 #include <algorithm> 144 #include <cassert> 145 #include <cstddef> 146 #include <map> 147 #include <memory> 148 #include <set> 149 150 using namespace llvm; 151 152 #define DEBUG_TYPE "hexagon-vlcr" 153 154 STATISTIC(HexagonNumVectorLoopCarriedReuse, 155 "Number of values that were reused from a previous iteration."); 156 157 static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", 158 cl::Hidden, 159 cl::desc("Maximum distance of loop carried dependences that are handled"), 160 cl::init(2), cl::ZeroOrMore); 161 162 namespace llvm { 163 164 void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&); 165 Pass *createHexagonVectorLoopCarriedReusePass(); 166 167 } // end namespace llvm 168 169 namespace { 170 171 // See info about DepChain in the comments at the top of this file. 172 using ChainOfDependences = SmallVector<Instruction *, 4>; 173 174 class DepChain { 175 ChainOfDependences Chain; 176 177 public: 178 bool isIdentical(DepChain &Other) const { 179 if (Other.size() != size()) 180 return false; 181 ChainOfDependences &OtherChain = Other.getChain(); 182 for (int i = 0; i < size(); ++i) { 183 if (Chain[i] != OtherChain[i]) 184 return false; 185 } 186 return true; 187 } 188 189 ChainOfDependences &getChain() { 190 return Chain; 191 } 192 193 int size() const { 194 return Chain.size(); 195 } 196 197 void clear() { 198 Chain.clear(); 199 } 200 201 void push_back(Instruction *I) { 202 Chain.push_back(I); 203 } 204 205 int iterations() const { 206 return size() - 1; 207 } 208 209 Instruction *front() const { 210 return Chain.front(); 211 } 212 213 Instruction *back() const { 214 return Chain.back(); 215 } 216 217 Instruction *&operator[](const int index) { 218 return Chain[index]; 219 } 220 221 friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D); 222 }; 223 224 LLVM_ATTRIBUTE_UNUSED 225 raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) { 226 const ChainOfDependences &CD = D.Chain; 227 int ChainSize = CD.size(); 228 OS << "**DepChain Start::**\n"; 229 for (int i = 0; i < ChainSize -1; ++i) { 230 OS << *(CD[i]) << " -->\n"; 231 } 232 OS << *CD[ChainSize-1] << "\n"; 233 return OS; 234 } 235 236 struct ReuseValue { 237 Instruction *Inst2Replace = nullptr; 238 239 // In the new PHI node that we'll construct this is the value that'll be 240 // used over the backedge. This is the value that gets reused from a 241 // previous iteration. 242 Instruction *BackedgeInst = nullptr; 243 std::map<Instruction *, DepChain *> DepChains; 244 int Iterations = -1; 245 246 ReuseValue() = default; 247 248 void reset() { 249 Inst2Replace = nullptr; 250 BackedgeInst = nullptr; 251 DepChains.clear(); 252 Iterations = -1; 253 } 254 bool isDefined() { return Inst2Replace != nullptr; } 255 }; 256 257 LLVM_ATTRIBUTE_UNUSED 258 raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) { 259 OS << "** ReuseValue ***\n"; 260 OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n"; 261 OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n"; 262 return OS; 263 } 264 265 class HexagonVectorLoopCarriedReuse : public LoopPass { 266 public: 267 static char ID; 268 269 explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) { 270 PassRegistry *PR = PassRegistry::getPassRegistry(); 271 initializeHexagonVectorLoopCarriedReusePass(*PR); 272 } 273 274 StringRef getPassName() const override { 275 return "Hexagon-specific loop carried reuse for HVX vectors"; 276 } 277 278 void getAnalysisUsage(AnalysisUsage &AU) const override { 279 AU.addRequired<LoopInfoWrapperPass>(); 280 AU.addRequiredID(LoopSimplifyID); 281 AU.addRequiredID(LCSSAID); 282 AU.addPreservedID(LCSSAID); 283 AU.setPreservesCFG(); 284 } 285 286 bool runOnLoop(Loop *L, LPPassManager &LPM) override; 287 288 private: 289 SetVector<DepChain *> Dependences; 290 std::set<Instruction *> ReplacedInsts; 291 Loop *CurLoop; 292 ReuseValue ReuseCandidate; 293 294 bool doVLCR(); 295 void findLoopCarriedDeps(); 296 void findValueToReuse(); 297 void findDepChainFromPHI(Instruction *I, DepChain &D); 298 void reuseValue(); 299 Value *findValueInBlock(Value *Op, BasicBlock *BB); 300 DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters); 301 bool isEquivalentOperation(Instruction *I1, Instruction *I2); 302 bool canReplace(Instruction *I); 303 bool isCallInstCommutative(CallInst *C); 304 }; 305 306 } // end anonymous namespace 307 308 char HexagonVectorLoopCarriedReuse::ID = 0; 309 310 INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr", 311 "Hexagon-specific predictive commoning for HVX vectors", false, false) 312 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 313 INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 314 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 315 INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr", 316 "Hexagon-specific predictive commoning for HVX vectors", false, false) 317 318 bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) { 319 if (skipLoop(L)) 320 return false; 321 322 if (!L->getLoopPreheader()) 323 return false; 324 325 // Work only on innermost loops. 326 if (!L->getSubLoops().empty()) 327 return false; 328 329 // Work only on single basic blocks loops. 330 if (L->getNumBlocks() != 1) 331 return false; 332 333 CurLoop = L; 334 335 return doVLCR(); 336 } 337 338 bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) { 339 switch (C->getCalledFunction()->getIntrinsicID()) { 340 case Intrinsic::hexagon_V6_vaddb: 341 case Intrinsic::hexagon_V6_vaddb_128B: 342 case Intrinsic::hexagon_V6_vaddh: 343 case Intrinsic::hexagon_V6_vaddh_128B: 344 case Intrinsic::hexagon_V6_vaddw: 345 case Intrinsic::hexagon_V6_vaddw_128B: 346 case Intrinsic::hexagon_V6_vaddubh: 347 case Intrinsic::hexagon_V6_vaddubh_128B: 348 case Intrinsic::hexagon_V6_vadduhw: 349 case Intrinsic::hexagon_V6_vadduhw_128B: 350 case Intrinsic::hexagon_V6_vaddhw: 351 case Intrinsic::hexagon_V6_vaddhw_128B: 352 case Intrinsic::hexagon_V6_vmaxb: 353 case Intrinsic::hexagon_V6_vmaxb_128B: 354 case Intrinsic::hexagon_V6_vmaxh: 355 case Intrinsic::hexagon_V6_vmaxh_128B: 356 case Intrinsic::hexagon_V6_vmaxw: 357 case Intrinsic::hexagon_V6_vmaxw_128B: 358 case Intrinsic::hexagon_V6_vmaxub: 359 case Intrinsic::hexagon_V6_vmaxub_128B: 360 case Intrinsic::hexagon_V6_vmaxuh: 361 case Intrinsic::hexagon_V6_vmaxuh_128B: 362 case Intrinsic::hexagon_V6_vminub: 363 case Intrinsic::hexagon_V6_vminub_128B: 364 case Intrinsic::hexagon_V6_vminuh: 365 case Intrinsic::hexagon_V6_vminuh_128B: 366 case Intrinsic::hexagon_V6_vminb: 367 case Intrinsic::hexagon_V6_vminb_128B: 368 case Intrinsic::hexagon_V6_vminh: 369 case Intrinsic::hexagon_V6_vminh_128B: 370 case Intrinsic::hexagon_V6_vminw: 371 case Intrinsic::hexagon_V6_vminw_128B: 372 case Intrinsic::hexagon_V6_vmpyub: 373 case Intrinsic::hexagon_V6_vmpyub_128B: 374 case Intrinsic::hexagon_V6_vmpyuh: 375 case Intrinsic::hexagon_V6_vmpyuh_128B: 376 case Intrinsic::hexagon_V6_vavgub: 377 case Intrinsic::hexagon_V6_vavgub_128B: 378 case Intrinsic::hexagon_V6_vavgh: 379 case Intrinsic::hexagon_V6_vavgh_128B: 380 case Intrinsic::hexagon_V6_vavguh: 381 case Intrinsic::hexagon_V6_vavguh_128B: 382 case Intrinsic::hexagon_V6_vavgw: 383 case Intrinsic::hexagon_V6_vavgw_128B: 384 case Intrinsic::hexagon_V6_vavgb: 385 case Intrinsic::hexagon_V6_vavgb_128B: 386 case Intrinsic::hexagon_V6_vavguw: 387 case Intrinsic::hexagon_V6_vavguw_128B: 388 case Intrinsic::hexagon_V6_vabsdiffh: 389 case Intrinsic::hexagon_V6_vabsdiffh_128B: 390 case Intrinsic::hexagon_V6_vabsdiffub: 391 case Intrinsic::hexagon_V6_vabsdiffub_128B: 392 case Intrinsic::hexagon_V6_vabsdiffuh: 393 case Intrinsic::hexagon_V6_vabsdiffuh_128B: 394 case Intrinsic::hexagon_V6_vabsdiffw: 395 case Intrinsic::hexagon_V6_vabsdiffw_128B: 396 return true; 397 default: 398 return false; 399 } 400 } 401 402 bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1, 403 Instruction *I2) { 404 if (!I1->isSameOperationAs(I2)) 405 return false; 406 // This check is in place specifically for intrinsics. isSameOperationAs will 407 // return two for any two hexagon intrinsics because they are essentially the 408 // same instruciton (CallInst). We need to scratch the surface to see if they 409 // are calls to the same function. 410 if (CallInst *C1 = dyn_cast<CallInst>(I1)) { 411 if (CallInst *C2 = dyn_cast<CallInst>(I2)) { 412 if (C1->getCalledFunction() != C2->getCalledFunction()) 413 return false; 414 } 415 } 416 417 // If both the Instructions are of Vector Type and any of the element 418 // is integer constant, check their values too for equivalence. 419 if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) { 420 unsigned NumOperands = I1->getNumOperands(); 421 for (unsigned i = 0; i < NumOperands; ++i) { 422 ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i)); 423 ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i)); 424 if(!C1) continue; 425 assert(C2); 426 if (C1->getSExtValue() != C2->getSExtValue()) 427 return false; 428 } 429 } 430 431 return true; 432 } 433 434 bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) { 435 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); 436 if (!II) 437 return true; 438 439 switch (II->getIntrinsicID()) { 440 case Intrinsic::hexagon_V6_hi: 441 case Intrinsic::hexagon_V6_lo: 442 case Intrinsic::hexagon_V6_hi_128B: 443 case Intrinsic::hexagon_V6_lo_128B: 444 LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n"); 445 return false; 446 default: 447 return true; 448 } 449 } 450 void HexagonVectorLoopCarriedReuse::findValueToReuse() { 451 for (auto *D : Dependences) { 452 LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n"); 453 if (D->iterations() > HexagonVLCRIterationLim) { 454 LLVM_DEBUG( 455 dbgs() 456 << ".. Skipping because number of iterations > than the limit\n"); 457 continue; 458 } 459 460 PHINode *PN = cast<PHINode>(D->front()); 461 Instruction *BEInst = D->back(); 462 int Iters = D->iterations(); 463 BasicBlock *BB = PN->getParent(); 464 LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN 465 << " can be reused\n"); 466 467 SmallVector<Instruction *, 4> PNUsers; 468 for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) { 469 Use &U = *UI; 470 Instruction *User = cast<Instruction>(U.getUser()); 471 472 if (User->getParent() != BB) 473 continue; 474 if (ReplacedInsts.count(User)) { 475 LLVM_DEBUG(dbgs() << *User 476 << " has already been replaced. Skipping...\n"); 477 continue; 478 } 479 if (isa<PHINode>(User)) 480 continue; 481 if (User->mayHaveSideEffects()) 482 continue; 483 if (!canReplace(User)) 484 continue; 485 486 PNUsers.push_back(User); 487 } 488 LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n"); 489 490 // For each interesting use I of PN, find an Instruction BEUser that 491 // performs the same operation as I on BEInst and whose other operands, 492 // if any, can also be rematerialized in OtherBB. We stop when we find the 493 // first such Instruction BEUser. This is because once BEUser is 494 // rematerialized in OtherBB, we may find more such "fixup" opportunities 495 // in this block. So, we'll start over again. 496 for (Instruction *I : PNUsers) { 497 for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E; 498 ++UI) { 499 Use &U = *UI; 500 Instruction *BEUser = cast<Instruction>(U.getUser()); 501 502 if (BEUser->getParent() != BB) 503 continue; 504 if (!isEquivalentOperation(I, BEUser)) 505 continue; 506 507 int NumOperands = I->getNumOperands(); 508 509 // Take operands of each PNUser one by one and try to find DepChain 510 // with every operand of the BEUser. If any of the operands of BEUser 511 // has DepChain with current operand of the PNUser, break the matcher 512 // loop. Keep doing this for Every PNUser operand. If PNUser operand 513 // does not have DepChain with any of the BEUser operand, break the 514 // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate. 515 // This ensures that DepChain exist for all the PNUser operand with 516 // BEUser operand. This also ensures that DepChains are independent of 517 // the positions in PNUser and BEUser. 518 std::map<Instruction *, DepChain *> DepChains; 519 CallInst *C1 = dyn_cast<CallInst>(I); 520 if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) { 521 bool Found = false; 522 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) { 523 Value *Op = I->getOperand(OpNo); 524 Instruction *OpInst = dyn_cast<Instruction>(Op); 525 Found = false; 526 for (int T = 0; T < NumOperands; ++T) { 527 Value *BEOp = BEUser->getOperand(T); 528 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp); 529 if (!OpInst && !BEOpInst) { 530 if (Op == BEOp) { 531 Found = true; 532 break; 533 } 534 } 535 536 if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst)) 537 continue; 538 539 DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters); 540 541 if (D) { 542 Found = true; 543 DepChains[OpInst] = D; 544 break; 545 } 546 } 547 if (!Found) { 548 BEUser = nullptr; 549 break; 550 } 551 } 552 } else { 553 554 for (int OpNo = 0; OpNo < NumOperands; ++OpNo) { 555 Value *Op = I->getOperand(OpNo); 556 Value *BEOp = BEUser->getOperand(OpNo); 557 558 Instruction *OpInst = dyn_cast<Instruction>(Op); 559 if (!OpInst) { 560 if (Op == BEOp) 561 continue; 562 // Do not allow reuse to occur when the operands may be different 563 // values. 564 BEUser = nullptr; 565 break; 566 } 567 568 Instruction *BEOpInst = dyn_cast<Instruction>(BEOp); 569 DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters); 570 571 if (D) { 572 DepChains[OpInst] = D; 573 } else { 574 BEUser = nullptr; 575 break; 576 } 577 } 578 } 579 if (BEUser) { 580 LLVM_DEBUG(dbgs() << "Found Value for reuse.\n"); 581 ReuseCandidate.Inst2Replace = I; 582 ReuseCandidate.BackedgeInst = BEUser; 583 ReuseCandidate.DepChains = DepChains; 584 ReuseCandidate.Iterations = Iters; 585 return; 586 } 587 ReuseCandidate.reset(); 588 } 589 } 590 } 591 ReuseCandidate.reset(); 592 } 593 594 Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op, 595 BasicBlock *BB) { 596 PHINode *PN = dyn_cast<PHINode>(Op); 597 assert(PN); 598 Value *ValueInBlock = PN->getIncomingValueForBlock(BB); 599 return ValueInBlock; 600 } 601 602 void HexagonVectorLoopCarriedReuse::reuseValue() { 603 LLVM_DEBUG(dbgs() << ReuseCandidate); 604 Instruction *Inst2Replace = ReuseCandidate.Inst2Replace; 605 Instruction *BEInst = ReuseCandidate.BackedgeInst; 606 int NumOperands = Inst2Replace->getNumOperands(); 607 std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains; 608 int Iterations = ReuseCandidate.Iterations; 609 BasicBlock *LoopPH = CurLoop->getLoopPreheader(); 610 assert(!DepChains.empty() && "No DepChains"); 611 LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n"); 612 613 SmallVector<Instruction *, 4> InstsInPreheader; 614 for (int i = 0; i < Iterations; ++i) { 615 Instruction *InstInPreheader = Inst2Replace->clone(); 616 SmallVector<Value *, 4> Ops; 617 for (int j = 0; j < NumOperands; ++j) { 618 Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j)); 619 if (!I) 620 continue; 621 // Get the DepChain corresponding to this operand. 622 DepChain &D = *DepChains[I]; 623 // Get the PHI for the iteration number and find 624 // the incoming value from the Loop Preheader for 625 // that PHI. 626 Value *ValInPreheader = findValueInBlock(D[i], LoopPH); 627 InstInPreheader->setOperand(j, ValInPreheader); 628 } 629 InstsInPreheader.push_back(InstInPreheader); 630 InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr"); 631 InstInPreheader->insertBefore(LoopPH->getTerminator()); 632 LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to " 633 << LoopPH->getName() << "\n"); 634 } 635 BasicBlock *BB = BEInst->getParent(); 636 IRBuilder<> IRB(BB); 637 IRB.SetInsertPoint(BB->getFirstNonPHI()); 638 Value *BEVal = BEInst; 639 PHINode *NewPhi; 640 for (int i = Iterations-1; i >=0 ; --i) { 641 Instruction *InstInPreheader = InstsInPreheader[i]; 642 NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2); 643 NewPhi->addIncoming(InstInPreheader, LoopPH); 644 NewPhi->addIncoming(BEVal, BB); 645 LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName() 646 << "\n"); 647 BEVal = NewPhi; 648 } 649 // We are in LCSSA form. So, a value defined inside the Loop is used only 650 // inside the loop. So, the following is safe. 651 Inst2Replace->replaceAllUsesWith(NewPhi); 652 ReplacedInsts.insert(Inst2Replace); 653 ++HexagonNumVectorLoopCarriedReuse; 654 } 655 656 bool HexagonVectorLoopCarriedReuse::doVLCR() { 657 assert(CurLoop->getSubLoops().empty() && 658 "Can do VLCR on the innermost loop only"); 659 assert((CurLoop->getNumBlocks() == 1) && 660 "Can do VLCR only on single block loops"); 661 662 bool Changed = false; 663 bool Continue; 664 665 LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n"); 666 do { 667 // Reset datastructures. 668 Dependences.clear(); 669 Continue = false; 670 671 findLoopCarriedDeps(); 672 findValueToReuse(); 673 if (ReuseCandidate.isDefined()) { 674 reuseValue(); 675 Changed = true; 676 Continue = true; 677 } 678 llvm::for_each(Dependences, std::default_delete<DepChain>()); 679 } while (Continue); 680 return Changed; 681 } 682 683 void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I, 684 DepChain &D) { 685 PHINode *PN = dyn_cast<PHINode>(I); 686 if (!PN) { 687 D.push_back(I); 688 return; 689 } else { 690 auto NumIncomingValues = PN->getNumIncomingValues(); 691 if (NumIncomingValues != 2) { 692 D.clear(); 693 return; 694 } 695 696 BasicBlock *BB = PN->getParent(); 697 if (BB != CurLoop->getHeader()) { 698 D.clear(); 699 return; 700 } 701 702 Value *BEVal = PN->getIncomingValueForBlock(BB); 703 Instruction *BEInst = dyn_cast<Instruction>(BEVal); 704 // This is a single block loop with a preheader, so at least 705 // one value should come over the backedge. 706 assert(BEInst && "There should be a value over the backedge"); 707 708 Value *PreHdrVal = 709 PN->getIncomingValueForBlock(CurLoop->getLoopPreheader()); 710 if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) { 711 D.clear(); 712 return; 713 } 714 D.push_back(PN); 715 findDepChainFromPHI(BEInst, D); 716 } 717 } 718 719 DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1, 720 Instruction *I2, 721 int Iters) { 722 for (auto *D : Dependences) { 723 if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters) 724 return D; 725 } 726 return nullptr; 727 } 728 729 void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() { 730 BasicBlock *BB = CurLoop->getHeader(); 731 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) { 732 auto *PN = cast<PHINode>(I); 733 if (!isa<VectorType>(PN->getType())) 734 continue; 735 736 DepChain *D = new DepChain(); 737 findDepChainFromPHI(PN, *D); 738 if (D->size() != 0) 739 Dependences.insert(D); 740 else 741 delete D; 742 } 743 LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n"); 744 LLVM_DEBUG(for (size_t i = 0; i < Dependences.size(); 745 ++i) { dbgs() << *Dependences[i] << "\n"; }); 746 } 747 748 Pass *llvm::createHexagonVectorLoopCarriedReusePass() { 749 return new HexagonVectorLoopCarriedReuse(); 750 } 751