1 //===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// Insert hardware loop intrinsics into loops which are deemed profitable by 10 /// the target, by querying TargetTransformInfo. A hardware loop comprises of 11 /// two intrinsics: one, outside the loop, to set the loop iteration count and 12 /// another, in the exit block, to decrement the counter. The decremented value 13 /// can either be carried through the loop via a phi or handled in some opaque 14 /// way by the target. 15 /// 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/CodeGen/HardwareLoops.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/Analysis/AssumptionCache.h" 21 #include "llvm/Analysis/BranchProbabilityInfo.h" 22 #include "llvm/Analysis/LoopInfo.h" 23 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 24 #include "llvm/Analysis/ScalarEvolution.h" 25 #include "llvm/Analysis/TargetLibraryInfo.h" 26 #include "llvm/Analysis/TargetTransformInfo.h" 27 #include "llvm/CodeGen/Passes.h" 28 #include "llvm/IR/BasicBlock.h" 29 #include "llvm/IR/Constants.h" 30 #include "llvm/IR/Dominators.h" 31 #include "llvm/IR/IRBuilder.h" 32 #include "llvm/IR/Instructions.h" 33 #include "llvm/IR/IntrinsicInst.h" 34 #include "llvm/IR/Value.h" 35 #include "llvm/InitializePasses.h" 36 #include "llvm/Pass.h" 37 #include "llvm/PassRegistry.h" 38 #include "llvm/Support/CommandLine.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Transforms/Utils.h" 41 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 42 #include "llvm/Transforms/Utils/Local.h" 43 #include "llvm/Transforms/Utils/LoopUtils.h" 44 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 45 46 #define DEBUG_TYPE "hardware-loops" 47 48 #define HW_LOOPS_NAME "Hardware Loop Insertion" 49 50 using namespace llvm; 51 52 static cl::opt<bool> 53 ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false), 54 cl::desc("Force hardware loops intrinsics to be inserted")); 55 56 static cl::opt<bool> 57 ForceHardwareLoopPHI( 58 "force-hardware-loop-phi", cl::Hidden, cl::init(false), 59 cl::desc("Force hardware loop counter to be updated through a phi")); 60 61 static cl::opt<bool> 62 ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), 63 cl::desc("Force allowance of nested hardware loops")); 64 65 static cl::opt<unsigned> 66 LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1), 67 cl::desc("Set the loop decrement value")); 68 69 static cl::opt<unsigned> 70 CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32), 71 cl::desc("Set the loop counter bitwidth")); 72 73 static cl::opt<bool> 74 ForceGuardLoopEntry( 75 "force-hardware-loop-guard", cl::Hidden, cl::init(false), 76 cl::desc("Force generation of loop guard intrinsic")); 77 78 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); 79 80 #ifndef NDEBUG 81 static void debugHWLoopFailure(const StringRef DebugMsg, 82 Instruction *I) { 83 dbgs() << "HWLoops: " << DebugMsg; 84 if (I) 85 dbgs() << ' ' << *I; 86 else 87 dbgs() << '.'; 88 dbgs() << '\n'; 89 } 90 #endif 91 92 static OptimizationRemarkAnalysis 93 createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) { 94 Value *CodeRegion = L->getHeader(); 95 DebugLoc DL = L->getStartLoc(); 96 97 if (I) { 98 CodeRegion = I->getParent(); 99 // If there is no debug location attached to the instruction, revert back to 100 // using the loop's. 101 if (I->getDebugLoc()) 102 DL = I->getDebugLoc(); 103 } 104 105 OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion); 106 R << "hardware-loop not created: "; 107 return R; 108 } 109 110 namespace { 111 112 void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag, 113 OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) { 114 LLVM_DEBUG(debugHWLoopFailure(Msg, I)); 115 ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg); 116 } 117 118 using TTI = TargetTransformInfo; 119 120 class HardwareLoopsLegacy : public FunctionPass { 121 public: 122 static char ID; 123 124 HardwareLoopsLegacy() : FunctionPass(ID) { 125 initializeHardwareLoopsLegacyPass(*PassRegistry::getPassRegistry()); 126 } 127 128 bool runOnFunction(Function &F) override; 129 130 void getAnalysisUsage(AnalysisUsage &AU) const override { 131 AU.addRequired<LoopInfoWrapperPass>(); 132 AU.addPreserved<LoopInfoWrapperPass>(); 133 AU.addRequired<DominatorTreeWrapperPass>(); 134 AU.addPreserved<DominatorTreeWrapperPass>(); 135 AU.addRequired<ScalarEvolutionWrapperPass>(); 136 AU.addPreserved<ScalarEvolutionWrapperPass>(); 137 AU.addRequired<AssumptionCacheTracker>(); 138 AU.addRequired<TargetTransformInfoWrapperPass>(); 139 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 140 AU.addPreserved<BranchProbabilityInfoWrapperPass>(); 141 } 142 }; 143 144 class HardwareLoopsImpl { 145 public: 146 HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA, 147 DominatorTree &DT, const DataLayout &DL, 148 const TargetTransformInfo &TTI, TargetLibraryInfo *TLI, 149 AssumptionCache &AC, OptimizationRemarkEmitter *ORE, 150 HardwareLoopOptions &Opts) 151 : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI), 152 TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { } 153 154 bool run(Function &F); 155 156 private: 157 // Try to convert the given Loop into a hardware loop. 158 bool TryConvertLoop(Loop *L, LLVMContext &Ctx); 159 160 // Given that the target believes the loop to be profitable, try to 161 // convert it. 162 bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo); 163 164 ScalarEvolution &SE; 165 LoopInfo &LI; 166 bool PreserveLCSSA; 167 DominatorTree &DT; 168 const DataLayout &DL; 169 const TargetTransformInfo &TTI; 170 TargetLibraryInfo *TLI = nullptr; 171 AssumptionCache &AC; 172 OptimizationRemarkEmitter *ORE; 173 HardwareLoopOptions &Opts; 174 bool MadeChange = false; 175 }; 176 177 class HardwareLoop { 178 // Expand the trip count scev into a value that we can use. 179 Value *InitLoopCount(); 180 181 // Insert the set_loop_iteration intrinsic. 182 Value *InsertIterationSetup(Value *LoopCountInit); 183 184 // Insert the loop_decrement intrinsic. 185 void InsertLoopDec(); 186 187 // Insert the loop_decrement_reg intrinsic. 188 Instruction *InsertLoopRegDec(Value *EltsRem); 189 190 // If the target requires the counter value to be updated in the loop, 191 // insert a phi to hold the value. The intended purpose is for use by 192 // loop_decrement_reg. 193 PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem); 194 195 // Create a new cmp, that checks the returned value of loop_decrement*, 196 // and update the exit branch to use it. 197 void UpdateBranch(Value *EltsRem); 198 199 public: 200 HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE, 201 const DataLayout &DL, 202 OptimizationRemarkEmitter *ORE, 203 HardwareLoopOptions &Opts) : 204 SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()), 205 ExitCount(Info.ExitCount), 206 CountType(Info.CountType), 207 ExitBranch(Info.ExitBranch), 208 LoopDecrement(Info.LoopDecrement), 209 UsePHICounter(Info.CounterInReg), 210 UseLoopGuard(Info.PerformEntryTest) { } 211 212 void Create(); 213 214 private: 215 ScalarEvolution &SE; 216 const DataLayout &DL; 217 OptimizationRemarkEmitter *ORE = nullptr; 218 HardwareLoopOptions &Opts; 219 Loop *L = nullptr; 220 Module *M = nullptr; 221 const SCEV *ExitCount = nullptr; 222 Type *CountType = nullptr; 223 BranchInst *ExitBranch = nullptr; 224 Value *LoopDecrement = nullptr; 225 bool UsePHICounter = false; 226 bool UseLoopGuard = false; 227 BasicBlock *BeginBB = nullptr; 228 }; 229 } 230 231 char HardwareLoopsLegacy::ID = 0; 232 233 bool HardwareLoopsLegacy::runOnFunction(Function &F) { 234 if (skipFunction(F)) 235 return false; 236 237 LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n"); 238 239 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 240 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 241 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 242 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 243 auto &DL = F.getDataLayout(); 244 auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 245 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 246 auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr; 247 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 248 bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 249 250 HardwareLoopOptions Opts; 251 if (ForceHardwareLoops.getNumOccurrences()) 252 Opts.setForce(ForceHardwareLoops); 253 if (ForceHardwareLoopPHI.getNumOccurrences()) 254 Opts.setForcePhi(ForceHardwareLoopPHI); 255 if (ForceNestedLoop.getNumOccurrences()) 256 Opts.setForceNested(ForceNestedLoop); 257 if (ForceGuardLoopEntry.getNumOccurrences()) 258 Opts.setForceGuard(ForceGuardLoopEntry); 259 if (LoopDecrement.getNumOccurrences()) 260 Opts.setDecrement(LoopDecrement); 261 if (CounterBitWidth.getNumOccurrences()) 262 Opts.setCounterBitwidth(CounterBitWidth); 263 264 HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE, 265 Opts); 266 return Impl.run(F); 267 } 268 269 PreservedAnalyses HardwareLoopsPass::run(Function &F, 270 FunctionAnalysisManager &AM) { 271 auto &LI = AM.getResult<LoopAnalysis>(F); 272 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); 273 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 274 auto &TTI = AM.getResult<TargetIRAnalysis>(F); 275 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F); 276 auto &AC = AM.getResult<AssumptionAnalysis>(F); 277 auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F); 278 auto &DL = F.getDataLayout(); 279 280 HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts); 281 bool Changed = Impl.run(F); 282 if (!Changed) 283 return PreservedAnalyses::all(); 284 285 PreservedAnalyses PA; 286 PA.preserve<LoopAnalysis>(); 287 PA.preserve<ScalarEvolutionAnalysis>(); 288 PA.preserve<DominatorTreeAnalysis>(); 289 PA.preserve<BranchProbabilityAnalysis>(); 290 return PA; 291 } 292 293 bool HardwareLoopsImpl::run(Function &F) { 294 LLVMContext &Ctx = F.getContext(); 295 for (Loop *L : LI) 296 if (L->isOutermost()) 297 TryConvertLoop(L, Ctx); 298 return MadeChange; 299 } 300 301 // Return true if the search should stop, which will be when an inner loop is 302 // converted and the parent loop doesn't support containing a hardware loop. 303 bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) { 304 // Process nested loops first. 305 bool AnyChanged = false; 306 for (Loop *SL : *L) 307 AnyChanged |= TryConvertLoop(SL, Ctx); 308 if (AnyChanged) { 309 reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested", 310 ORE, L); 311 return true; // Stop search. 312 } 313 314 LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n"); 315 316 HardwareLoopInfo HWLoopInfo(L); 317 if (!HWLoopInfo.canAnalyze(LI)) { 318 reportHWLoopFailure("cannot analyze loop, irreducible control flow", 319 "HWLoopCannotAnalyze", ORE, L); 320 return false; 321 } 322 323 if (!Opts.Force && 324 !TTI.isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) { 325 reportHWLoopFailure("it's not profitable to create a hardware-loop", 326 "HWLoopNotProfitable", ORE, L); 327 return false; 328 } 329 330 // Allow overriding of the counter width and loop decrement value. 331 if (Opts.Bitwidth.has_value()) { 332 HWLoopInfo.CountType = IntegerType::get(Ctx, Opts.Bitwidth.value()); 333 } 334 335 if (Opts.Decrement.has_value()) 336 HWLoopInfo.LoopDecrement = 337 ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value()); 338 339 MadeChange |= TryConvertLoop(HWLoopInfo); 340 return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested); 341 } 342 343 bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { 344 345 Loop *L = HWLoopInfo.L; 346 LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L); 347 348 if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, Opts.getForceNested(), 349 Opts.getForcePhi())) { 350 // TODO: there can be many reasons a loop is not considered a 351 // candidate, so we should let isHardwareLoopCandidate fill in the 352 // reason and then report a better message here. 353 reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L); 354 return false; 355 } 356 357 assert( 358 (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) && 359 "Hardware Loop must have set exit info."); 360 361 BasicBlock *Preheader = L->getLoopPreheader(); 362 363 // If we don't have a preheader, then insert one. 364 if (!Preheader) 365 Preheader = InsertPreheaderForLoop(L, &DT, &LI, nullptr, PreserveLCSSA); 366 if (!Preheader) 367 return false; 368 369 HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts); 370 HWLoop.Create(); 371 ++NumHWLoops; 372 return true; 373 } 374 375 void HardwareLoop::Create() { 376 LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n"); 377 378 Value *LoopCountInit = InitLoopCount(); 379 if (!LoopCountInit) { 380 reportHWLoopFailure("could not safely create a loop count expression", 381 "HWLoopNotSafe", ORE, L); 382 return; 383 } 384 385 Value *Setup = InsertIterationSetup(LoopCountInit); 386 387 if (UsePHICounter || Opts.ForcePhi) { 388 Instruction *LoopDec = InsertLoopRegDec(LoopCountInit); 389 Value *EltsRem = InsertPHICounter(Setup, LoopDec); 390 LoopDec->setOperand(0, EltsRem); 391 UpdateBranch(LoopDec); 392 } else 393 InsertLoopDec(); 394 395 // Run through the basic blocks of the loop and see if any of them have dead 396 // PHIs that can be removed. 397 for (auto *I : L->blocks()) 398 DeleteDeadPHIs(I); 399 } 400 401 static bool CanGenerateTest(Loop *L, Value *Count) { 402 BasicBlock *Preheader = L->getLoopPreheader(); 403 if (!Preheader->getSinglePredecessor()) 404 return false; 405 406 BasicBlock *Pred = Preheader->getSinglePredecessor(); 407 if (!isa<BranchInst>(Pred->getTerminator())) 408 return false; 409 410 auto *BI = cast<BranchInst>(Pred->getTerminator()); 411 if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition())) 412 return false; 413 414 // Check that the icmp is checking for equality of Count and zero and that 415 // a non-zero value results in entering the loop. 416 auto ICmp = cast<ICmpInst>(BI->getCondition()); 417 LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n"); 418 if (!ICmp->isEquality()) 419 return false; 420 421 auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) { 422 if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx))) 423 return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count; 424 return false; 425 }; 426 427 // Check if Count is a zext. 428 Value *CountBefZext = 429 isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr; 430 431 if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) && 432 !IsCompareZero(ICmp, CountBefZext, 0) && 433 !IsCompareZero(ICmp, CountBefZext, 1)) 434 return false; 435 436 unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1; 437 if (BI->getSuccessor(SuccIdx) != Preheader) 438 return false; 439 440 return true; 441 } 442 443 Value *HardwareLoop::InitLoopCount() { 444 LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n"); 445 // Can we replace a conditional branch with an intrinsic that sets the 446 // loop counter and tests that is not zero? 447 448 SCEVExpander SCEVE(SE, DL, "loopcnt"); 449 if (!ExitCount->getType()->isPointerTy() && 450 ExitCount->getType() != CountType) 451 ExitCount = SE.getZeroExtendExpr(ExitCount, CountType); 452 453 ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType)); 454 455 // If we're trying to use the 'test and set' form of the intrinsic, we need 456 // to replace a conditional branch that is controlling entry to the loop. It 457 // is likely (guaranteed?) that the preheader has an unconditional branch to 458 // the loop header, so also check if it has a single predecessor. 459 if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount, 460 SE.getZero(ExitCount->getType()))) { 461 LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n"); 462 if (Opts.ForceGuard) 463 UseLoopGuard = true; 464 } else 465 UseLoopGuard = false; 466 467 BasicBlock *BB = L->getLoopPreheader(); 468 if (UseLoopGuard && BB->getSinglePredecessor() && 469 cast<BranchInst>(BB->getTerminator())->isUnconditional()) { 470 BasicBlock *Predecessor = BB->getSinglePredecessor(); 471 // If it's not safe to create a while loop then don't force it and create a 472 // do-while loop instead 473 if (!SCEVE.isSafeToExpandAt(ExitCount, Predecessor->getTerminator())) 474 UseLoopGuard = false; 475 else 476 BB = Predecessor; 477 } 478 479 if (!SCEVE.isSafeToExpandAt(ExitCount, BB->getTerminator())) { 480 LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount " 481 << *ExitCount << "\n"); 482 return nullptr; 483 } 484 485 Value *Count = SCEVE.expandCodeFor(ExitCount, CountType, 486 BB->getTerminator()); 487 488 // FIXME: We've expanded Count where we hope to insert the counter setting 489 // intrinsic. But, in the case of the 'test and set' form, we may fallback to 490 // the just 'set' form and in which case the insertion block is most likely 491 // different. It means there will be instruction(s) in a block that possibly 492 // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue, 493 // but it's doesn't appear to work in all cases. 494 495 UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count); 496 BeginBB = UseLoopGuard ? BB : L->getLoopPreheader(); 497 LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n" 498 << " - Expanded Count in " << BB->getName() << "\n" 499 << " - Will insert set counter intrinsic into: " 500 << BeginBB->getName() << "\n"); 501 return Count; 502 } 503 504 Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) { 505 IRBuilder<> Builder(BeginBB->getTerminator()); 506 if (BeginBB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP)) 507 Builder.setIsFPConstrained(true); 508 Type *Ty = LoopCountInit->getType(); 509 bool UsePhi = UsePHICounter || Opts.ForcePhi; 510 Intrinsic::ID ID = UseLoopGuard 511 ? (UsePhi ? Intrinsic::test_start_loop_iterations 512 : Intrinsic::test_set_loop_iterations) 513 : (UsePhi ? Intrinsic::start_loop_iterations 514 : Intrinsic::set_loop_iterations); 515 Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty); 516 Value *LoopSetup = Builder.CreateCall(LoopIter, LoopCountInit); 517 518 // Use the return value of the intrinsic to control the entry of the loop. 519 if (UseLoopGuard) { 520 assert((isa<BranchInst>(BeginBB->getTerminator()) && 521 cast<BranchInst>(BeginBB->getTerminator())->isConditional()) && 522 "Expected conditional branch"); 523 524 Value *SetCount = 525 UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup; 526 auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator()); 527 LoopGuard->setCondition(SetCount); 528 if (LoopGuard->getSuccessor(0) != L->getLoopPreheader()) 529 LoopGuard->swapSuccessors(); 530 } 531 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: " << *LoopSetup 532 << "\n"); 533 if (UsePhi && UseLoopGuard) 534 LoopSetup = Builder.CreateExtractValue(LoopSetup, 0); 535 return !UsePhi ? LoopCountInit : LoopSetup; 536 } 537 538 void HardwareLoop::InsertLoopDec() { 539 IRBuilder<> CondBuilder(ExitBranch); 540 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr( 541 Attribute::StrictFP)) 542 CondBuilder.setIsFPConstrained(true); 543 544 Function *DecFunc = 545 Intrinsic::getDeclaration(M, Intrinsic::loop_decrement, 546 LoopDecrement->getType()); 547 Value *Ops[] = { LoopDecrement }; 548 Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops); 549 Value *OldCond = ExitBranch->getCondition(); 550 ExitBranch->setCondition(NewCond); 551 552 // The false branch must exit the loop. 553 if (!L->contains(ExitBranch->getSuccessor(0))) 554 ExitBranch->swapSuccessors(); 555 556 // The old condition may be dead now, and may have even created a dead PHI 557 // (the original induction variable). 558 RecursivelyDeleteTriviallyDeadInstructions(OldCond); 559 560 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n"); 561 } 562 563 Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) { 564 IRBuilder<> CondBuilder(ExitBranch); 565 if (ExitBranch->getParent()->getParent()->getAttributes().hasFnAttr( 566 Attribute::StrictFP)) 567 CondBuilder.setIsFPConstrained(true); 568 569 Function *DecFunc = 570 Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg, 571 { EltsRem->getType() }); 572 Value *Ops[] = { EltsRem, LoopDecrement }; 573 Value *Call = CondBuilder.CreateCall(DecFunc, Ops); 574 575 LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n"); 576 return cast<Instruction>(Call); 577 } 578 579 PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) { 580 BasicBlock *Preheader = L->getLoopPreheader(); 581 BasicBlock *Header = L->getHeader(); 582 BasicBlock *Latch = ExitBranch->getParent(); 583 IRBuilder<> Builder(Header, Header->getFirstNonPHIIt()); 584 PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2); 585 Index->addIncoming(NumElts, Preheader); 586 Index->addIncoming(EltsRem, Latch); 587 LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n"); 588 return Index; 589 } 590 591 void HardwareLoop::UpdateBranch(Value *EltsRem) { 592 IRBuilder<> CondBuilder(ExitBranch); 593 Value *NewCond = 594 CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0)); 595 Value *OldCond = ExitBranch->getCondition(); 596 ExitBranch->setCondition(NewCond); 597 598 // The false branch must exit the loop. 599 if (!L->contains(ExitBranch->getSuccessor(0))) 600 ExitBranch->swapSuccessors(); 601 602 // The old condition may be dead now, and may have even created a dead PHI 603 // (the original induction variable). 604 RecursivelyDeleteTriviallyDeadInstructions(OldCond); 605 } 606 607 INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false) 608 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 609 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 610 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 611 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 612 INITIALIZE_PASS_END(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false) 613 614 FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); } 615