1 //===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass performs loop invariant code motion on machine instructions. We 10 // attempt to remove as much code from the body of a loop as possible. 11 // 12 // This pass is not intended to be a replacement or a complete alternative 13 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple 14 // constructs that are not exposed before lowering and instruction selection. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/ADT/BitVector.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/Analysis/AliasAnalysis.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 27 #include "llvm/CodeGen/MachineDominators.h" 28 #include "llvm/CodeGen/MachineFrameInfo.h" 29 #include "llvm/CodeGen/MachineFunction.h" 30 #include "llvm/CodeGen/MachineFunctionPass.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineLoopInfo.h" 33 #include "llvm/CodeGen/MachineMemOperand.h" 34 #include "llvm/CodeGen/MachineOperand.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h" 36 #include "llvm/CodeGen/PseudoSourceValue.h" 37 #include "llvm/CodeGen/TargetInstrInfo.h" 38 #include "llvm/CodeGen/TargetLowering.h" 39 #include "llvm/CodeGen/TargetRegisterInfo.h" 40 #include "llvm/CodeGen/TargetSchedule.h" 41 #include "llvm/CodeGen/TargetSubtargetInfo.h" 42 #include "llvm/IR/DebugLoc.h" 43 #include "llvm/InitializePasses.h" 44 #include "llvm/MC/MCInstrDesc.h" 45 #include "llvm/MC/MCRegister.h" 46 #include "llvm/MC/MCRegisterInfo.h" 47 #include "llvm/Pass.h" 48 #include "llvm/Support/Casting.h" 49 #include "llvm/Support/CommandLine.h" 50 #include "llvm/Support/Debug.h" 51 #include "llvm/Support/raw_ostream.h" 52 #include <algorithm> 53 #include <cassert> 54 #include <limits> 55 #include <vector> 56 57 using namespace llvm; 58 59 #define DEBUG_TYPE "machinelicm" 60 61 static cl::opt<bool> 62 AvoidSpeculation("avoid-speculation", 63 cl::desc("MachineLICM should avoid speculation"), 64 cl::init(true), cl::Hidden); 65 66 static cl::opt<bool> 67 HoistCheapInsts("hoist-cheap-insts", 68 cl::desc("MachineLICM should hoist even cheap instructions"), 69 cl::init(false), cl::Hidden); 70 71 static cl::opt<bool> 72 HoistConstStores("hoist-const-stores", 73 cl::desc("Hoist invariant stores"), 74 cl::init(true), cl::Hidden); 75 // The default threshold of 100 (i.e. if target block is 100 times hotter) 76 // is based on empirical data on a single target and is subject to tuning. 77 static cl::opt<unsigned> 78 BlockFrequencyRatioThreshold("block-freq-ratio-threshold", 79 cl::desc("Do not hoist instructions if target" 80 "block is N times hotter than the source."), 81 cl::init(100), cl::Hidden); 82 83 enum class UseBFI { None, PGO, All }; 84 85 static cl::opt<UseBFI> 86 DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks", 87 cl::desc("Disable hoisting instructions to" 88 " hotter blocks"), 89 cl::init(UseBFI::PGO), cl::Hidden, 90 cl::values(clEnumValN(UseBFI::None, "none", 91 "disable the feature"), 92 clEnumValN(UseBFI::PGO, "pgo", 93 "enable the feature when using profile data"), 94 clEnumValN(UseBFI::All, "all", 95 "enable the feature with/wo profile data"))); 96 97 STATISTIC(NumHoisted, 98 "Number of machine instructions hoisted out of loops"); 99 STATISTIC(NumLowRP, 100 "Number of instructions hoisted in low reg pressure situation"); 101 STATISTIC(NumHighLatency, 102 "Number of high latency instructions hoisted"); 103 STATISTIC(NumCSEed, 104 "Number of hoisted machine instructions CSEed"); 105 STATISTIC(NumPostRAHoisted, 106 "Number of machine instructions hoisted out of loops post regalloc"); 107 STATISTIC(NumStoreConst, 108 "Number of stores of const phys reg hoisted out of loops"); 109 STATISTIC(NumNotHoistedDueToHotness, 110 "Number of instructions not hoisted due to block frequency"); 111 112 namespace { 113 114 class MachineLICMBase : public MachineFunctionPass { 115 const TargetInstrInfo *TII; 116 const TargetLoweringBase *TLI; 117 const TargetRegisterInfo *TRI; 118 const MachineFrameInfo *MFI; 119 MachineRegisterInfo *MRI; 120 TargetSchedModel SchedModel; 121 bool PreRegAlloc; 122 bool HasProfileData; 123 124 // Various analyses that we use... 125 AliasAnalysis *AA; // Alias analysis info. 126 MachineBlockFrequencyInfo *MBFI; // Machine block frequncy info 127 MachineLoopInfo *MLI; // Current MachineLoopInfo 128 MachineDominatorTree *DT; // Machine dominator tree for the cur loop 129 130 // State that is updated as we process loops 131 bool Changed; // True if a loop is changed. 132 bool FirstInLoop; // True if it's the first LICM in the loop. 133 MachineLoop *CurLoop; // The current loop we are working on. 134 MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 135 136 // Exit blocks for CurLoop. 137 SmallVector<MachineBasicBlock *, 8> ExitBlocks; 138 139 bool isExitBlock(const MachineBasicBlock *MBB) const { 140 return is_contained(ExitBlocks, MBB); 141 } 142 143 // Track 'estimated' register pressure. 144 SmallSet<Register, 32> RegSeen; 145 SmallVector<unsigned, 8> RegPressure; 146 147 // Register pressure "limit" per register pressure set. If the pressure 148 // is higher than the limit, then it's considered high. 149 SmallVector<unsigned, 8> RegLimit; 150 151 // Register pressure on path leading from loop preheader to current BB. 152 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; 153 154 // For each opcode, keep a list of potential CSE instructions. 155 DenseMap<unsigned, std::vector<MachineInstr *>> CSEMap; 156 157 enum { 158 SpeculateFalse = 0, 159 SpeculateTrue = 1, 160 SpeculateUnknown = 2 161 }; 162 163 // If a MBB does not dominate loop exiting blocks then it may not safe 164 // to hoist loads from this block. 165 // Tri-state: 0 - false, 1 - true, 2 - unknown 166 unsigned SpeculationState; 167 168 public: 169 MachineLICMBase(char &PassID, bool PreRegAlloc) 170 : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {} 171 172 bool runOnMachineFunction(MachineFunction &MF) override; 173 174 void getAnalysisUsage(AnalysisUsage &AU) const override { 175 AU.addRequired<MachineLoopInfo>(); 176 if (DisableHoistingToHotterBlocks != UseBFI::None) 177 AU.addRequired<MachineBlockFrequencyInfo>(); 178 AU.addRequired<MachineDominatorTree>(); 179 AU.addRequired<AAResultsWrapperPass>(); 180 AU.addPreserved<MachineLoopInfo>(); 181 MachineFunctionPass::getAnalysisUsage(AU); 182 } 183 184 void releaseMemory() override { 185 RegSeen.clear(); 186 RegPressure.clear(); 187 RegLimit.clear(); 188 BackTrace.clear(); 189 CSEMap.clear(); 190 } 191 192 private: 193 /// Keep track of information about hoisting candidates. 194 struct CandidateInfo { 195 MachineInstr *MI; 196 unsigned Def; 197 int FI; 198 199 CandidateInfo(MachineInstr *mi, unsigned def, int fi) 200 : MI(mi), Def(def), FI(fi) {} 201 }; 202 203 void HoistRegionPostRA(); 204 205 void HoistPostRA(MachineInstr *MI, unsigned Def); 206 207 void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, 208 BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, 209 SmallVectorImpl<CandidateInfo> &Candidates); 210 211 void AddToLiveIns(MCRegister Reg); 212 213 bool IsLICMCandidate(MachineInstr &I); 214 215 bool IsLoopInvariantInst(MachineInstr &I); 216 217 bool HasLoopPHIUse(const MachineInstr *MI) const; 218 219 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, 220 Register Reg) const; 221 222 bool IsCheapInstruction(MachineInstr &MI) const; 223 224 bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost, 225 bool Cheap); 226 227 void UpdateBackTraceRegPressure(const MachineInstr *MI); 228 229 bool IsProfitableToHoist(MachineInstr &MI); 230 231 bool IsGuaranteedToExecute(MachineBasicBlock *BB); 232 233 void EnterScope(MachineBasicBlock *MBB); 234 235 void ExitScope(MachineBasicBlock *MBB); 236 237 void ExitScopeIfDone( 238 MachineDomTreeNode *Node, 239 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren, 240 DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap); 241 242 void HoistOutOfLoop(MachineDomTreeNode *HeaderN); 243 244 void InitRegPressure(MachineBasicBlock *BB); 245 246 DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI, 247 bool ConsiderSeen, 248 bool ConsiderUnseenAsDef); 249 250 void UpdateRegPressure(const MachineInstr *MI, 251 bool ConsiderUnseenAsDef = false); 252 253 MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 254 255 MachineInstr *LookForDuplicate(const MachineInstr *MI, 256 std::vector<MachineInstr *> &PrevMIs); 257 258 bool 259 EliminateCSE(MachineInstr *MI, 260 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI); 261 262 bool MayCSE(MachineInstr *MI); 263 264 bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); 265 266 void InitCSEMap(MachineBasicBlock *BB); 267 268 bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock, 269 MachineBasicBlock *TgtBlock); 270 MachineBasicBlock *getCurPreheader(); 271 }; 272 273 class MachineLICM : public MachineLICMBase { 274 public: 275 static char ID; 276 MachineLICM() : MachineLICMBase(ID, false) { 277 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 278 } 279 }; 280 281 class EarlyMachineLICM : public MachineLICMBase { 282 public: 283 static char ID; 284 EarlyMachineLICM() : MachineLICMBase(ID, true) { 285 initializeEarlyMachineLICMPass(*PassRegistry::getPassRegistry()); 286 } 287 }; 288 289 } // end anonymous namespace 290 291 char MachineLICM::ID; 292 char EarlyMachineLICM::ID; 293 294 char &llvm::MachineLICMID = MachineLICM::ID; 295 char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID; 296 297 INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE, 298 "Machine Loop Invariant Code Motion", false, false) 299 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 300 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) 301 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 302 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 303 INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE, 304 "Machine Loop Invariant Code Motion", false, false) 305 306 INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm", 307 "Early Machine Loop Invariant Code Motion", false, false) 308 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 309 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) 310 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 311 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 312 INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm", 313 "Early Machine Loop Invariant Code Motion", false, false) 314 315 /// Test if the given loop is the outer-most loop that has a unique predecessor. 316 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { 317 // Check whether this loop even has a unique predecessor. 318 if (!CurLoop->getLoopPredecessor()) 319 return false; 320 // Ok, now check to see if any of its outer loops do. 321 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 322 if (L->getLoopPredecessor()) 323 return false; 324 // None of them did, so this is the outermost with a unique predecessor. 325 return true; 326 } 327 328 bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) { 329 if (skipFunction(MF.getFunction())) 330 return false; 331 332 Changed = FirstInLoop = false; 333 const TargetSubtargetInfo &ST = MF.getSubtarget(); 334 TII = ST.getInstrInfo(); 335 TLI = ST.getTargetLowering(); 336 TRI = ST.getRegisterInfo(); 337 MFI = &MF.getFrameInfo(); 338 MRI = &MF.getRegInfo(); 339 SchedModel.init(&ST); 340 341 PreRegAlloc = MRI->isSSA(); 342 HasProfileData = MF.getFunction().hasProfileData(); 343 344 if (PreRegAlloc) 345 LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); 346 else 347 LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); 348 LLVM_DEBUG(dbgs() << MF.getName() << " ********\n"); 349 350 if (PreRegAlloc) { 351 // Estimate register pressure during pre-regalloc pass. 352 unsigned NumRPS = TRI->getNumRegPressureSets(); 353 RegPressure.resize(NumRPS); 354 std::fill(RegPressure.begin(), RegPressure.end(), 0); 355 RegLimit.resize(NumRPS); 356 for (unsigned i = 0, e = NumRPS; i != e; ++i) 357 RegLimit[i] = TRI->getRegPressureSetLimit(MF, i); 358 } 359 360 // Get our Loop information... 361 if (DisableHoistingToHotterBlocks != UseBFI::None) 362 MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 363 MLI = &getAnalysis<MachineLoopInfo>(); 364 DT = &getAnalysis<MachineDominatorTree>(); 365 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 366 367 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); 368 while (!Worklist.empty()) { 369 CurLoop = Worklist.pop_back_val(); 370 CurPreheader = nullptr; 371 ExitBlocks.clear(); 372 373 // If this is done before regalloc, only visit outer-most preheader-sporting 374 // loops. 375 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { 376 Worklist.append(CurLoop->begin(), CurLoop->end()); 377 continue; 378 } 379 380 CurLoop->getExitBlocks(ExitBlocks); 381 382 if (!PreRegAlloc) 383 HoistRegionPostRA(); 384 else { 385 // CSEMap is initialized for loop header when the first instruction is 386 // being hoisted. 387 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 388 FirstInLoop = true; 389 HoistOutOfLoop(N); 390 CSEMap.clear(); 391 } 392 } 393 394 return Changed; 395 } 396 397 /// Return true if instruction stores to the specified frame. 398 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 399 // Check mayStore before memory operands so that e.g. DBG_VALUEs will return 400 // true since they have no memory operands. 401 if (!MI->mayStore()) 402 return false; 403 // If we lost memory operands, conservatively assume that the instruction 404 // writes to all slots. 405 if (MI->memoperands_empty()) 406 return true; 407 for (const MachineMemOperand *MemOp : MI->memoperands()) { 408 if (!MemOp->isStore() || !MemOp->getPseudoValue()) 409 continue; 410 if (const FixedStackPseudoSourceValue *Value = 411 dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) { 412 if (Value->getFrameIndex() == FI) 413 return true; 414 } 415 } 416 return false; 417 } 418 419 /// Examine the instruction for potentai LICM candidate. Also 420 /// gather register def and frame object update information. 421 void MachineLICMBase::ProcessMI(MachineInstr *MI, 422 BitVector &PhysRegDefs, 423 BitVector &PhysRegClobbers, 424 SmallSet<int, 32> &StoredFIs, 425 SmallVectorImpl<CandidateInfo> &Candidates) { 426 bool RuledOut = false; 427 bool HasNonInvariantUse = false; 428 unsigned Def = 0; 429 for (const MachineOperand &MO : MI->operands()) { 430 if (MO.isFI()) { 431 // Remember if the instruction stores to the frame index. 432 int FI = MO.getIndex(); 433 if (!StoredFIs.count(FI) && 434 MFI->isSpillSlotObjectIndex(FI) && 435 InstructionStoresToFI(MI, FI)) 436 StoredFIs.insert(FI); 437 HasNonInvariantUse = true; 438 continue; 439 } 440 441 // We can't hoist an instruction defining a physreg that is clobbered in 442 // the loop. 443 if (MO.isRegMask()) { 444 PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); 445 continue; 446 } 447 448 if (!MO.isReg()) 449 continue; 450 Register Reg = MO.getReg(); 451 if (!Reg) 452 continue; 453 assert(Register::isPhysicalRegister(Reg) && 454 "Not expecting virtual register!"); 455 456 if (!MO.isDef()) { 457 if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) 458 // If it's using a non-loop-invariant register, then it's obviously not 459 // safe to hoist. 460 HasNonInvariantUse = true; 461 continue; 462 } 463 464 if (MO.isImplicit()) { 465 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 466 PhysRegClobbers.set(*AI); 467 if (!MO.isDead()) 468 // Non-dead implicit def? This cannot be hoisted. 469 RuledOut = true; 470 // No need to check if a dead implicit def is also defined by 471 // another instruction. 472 continue; 473 } 474 475 // FIXME: For now, avoid instructions with multiple defs, unless 476 // it's a dead implicit def. 477 if (Def) 478 RuledOut = true; 479 else 480 Def = Reg; 481 482 // If we have already seen another instruction that defines the same 483 // register, then this is not safe. Two defs is indicated by setting a 484 // PhysRegClobbers bit. 485 for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { 486 if (PhysRegDefs.test(*AS)) 487 PhysRegClobbers.set(*AS); 488 } 489 // Need a second loop because MCRegAliasIterator can visit the same 490 // register twice. 491 for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) 492 PhysRegDefs.set(*AS); 493 494 if (PhysRegClobbers.test(Reg)) 495 // MI defined register is seen defined by another instruction in 496 // the loop, it cannot be a LICM candidate. 497 RuledOut = true; 498 } 499 500 // Only consider reloads for now and remats which do not have register 501 // operands. FIXME: Consider unfold load folding instructions. 502 if (Def && !RuledOut) { 503 int FI = std::numeric_limits<int>::min(); 504 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 505 (TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 506 Candidates.push_back(CandidateInfo(MI, Def, FI)); 507 } 508 } 509 510 /// Walk the specified region of the CFG and hoist loop invariants out to the 511 /// preheader. 512 void MachineLICMBase::HoistRegionPostRA() { 513 MachineBasicBlock *Preheader = getCurPreheader(); 514 if (!Preheader) 515 return; 516 517 unsigned NumRegs = TRI->getNumRegs(); 518 BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. 519 BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. 520 521 SmallVector<CandidateInfo, 32> Candidates; 522 SmallSet<int, 32> StoredFIs; 523 524 // Walk the entire region, count number of defs for each register, and 525 // collect potential LICM candidates. 526 for (MachineBasicBlock *BB : CurLoop->getBlocks()) { 527 // If the header of the loop containing this basic block is a landing pad, 528 // then don't try to hoist instructions out of this loop. 529 const MachineLoop *ML = MLI->getLoopFor(BB); 530 if (ML && ML->getHeader()->isEHPad()) continue; 531 532 // Conservatively treat live-in's as an external def. 533 // FIXME: That means a reload that're reused in successor block(s) will not 534 // be LICM'ed. 535 for (const auto &LI : BB->liveins()) { 536 for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) 537 PhysRegDefs.set(*AI); 538 } 539 540 SpeculationState = SpeculateUnknown; 541 for (MachineInstr &MI : *BB) 542 ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); 543 } 544 545 // Gather the registers read / clobbered by the terminator. 546 BitVector TermRegs(NumRegs); 547 MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); 548 if (TI != Preheader->end()) { 549 for (const MachineOperand &MO : TI->operands()) { 550 if (!MO.isReg()) 551 continue; 552 Register Reg = MO.getReg(); 553 if (!Reg) 554 continue; 555 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 556 TermRegs.set(*AI); 557 } 558 } 559 560 // Now evaluate whether the potential candidates qualify. 561 // 1. Check if the candidate defined register is defined by another 562 // instruction in the loop. 563 // 2. If the candidate is a load from stack slot (always true for now), 564 // check if the slot is stored anywhere in the loop. 565 // 3. Make sure candidate def should not clobber 566 // registers read by the terminator. Similarly its def should not be 567 // clobbered by the terminator. 568 for (CandidateInfo &Candidate : Candidates) { 569 if (Candidate.FI != std::numeric_limits<int>::min() && 570 StoredFIs.count(Candidate.FI)) 571 continue; 572 573 unsigned Def = Candidate.Def; 574 if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { 575 bool Safe = true; 576 MachineInstr *MI = Candidate.MI; 577 for (const MachineOperand &MO : MI->operands()) { 578 if (!MO.isReg() || MO.isDef() || !MO.getReg()) 579 continue; 580 Register Reg = MO.getReg(); 581 if (PhysRegDefs.test(Reg) || 582 PhysRegClobbers.test(Reg)) { 583 // If it's using a non-loop-invariant register, then it's obviously 584 // not safe to hoist. 585 Safe = false; 586 break; 587 } 588 } 589 if (Safe) 590 HoistPostRA(MI, Candidate.Def); 591 } 592 } 593 } 594 595 /// Add register 'Reg' to the livein sets of BBs in the current loop, and make 596 /// sure it is not killed by any instructions in the loop. 597 void MachineLICMBase::AddToLiveIns(MCRegister Reg) { 598 for (MachineBasicBlock *BB : CurLoop->getBlocks()) { 599 if (!BB->isLiveIn(Reg)) 600 BB->addLiveIn(Reg); 601 for (MachineInstr &MI : *BB) { 602 for (MachineOperand &MO : MI.operands()) { 603 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 604 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 605 MO.setIsKill(false); 606 } 607 } 608 } 609 } 610 611 /// When an instruction is found to only use loop invariant operands that is 612 /// safe to hoist, this instruction is called to do the dirty work. 613 void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) { 614 MachineBasicBlock *Preheader = getCurPreheader(); 615 616 // Now move the instructions to the predecessor, inserting it before any 617 // terminator instructions. 618 LLVM_DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader) 619 << " from " << printMBBReference(*MI->getParent()) << ": " 620 << *MI); 621 622 // Splice the instruction to the preheader. 623 MachineBasicBlock *MBB = MI->getParent(); 624 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); 625 626 // Since we are moving the instruction out of its basic block, we do not 627 // retain its debug location. Doing so would degrade the debugging 628 // experience and adversely affect the accuracy of profiling information. 629 assert(!MI->isDebugInstr() && "Should not hoist debug inst"); 630 MI->setDebugLoc(DebugLoc()); 631 632 // Add register to livein list to all the BBs in the current loop since a 633 // loop invariant must be kept live throughout the whole loop. This is 634 // important to ensure later passes do not scavenge the def register. 635 AddToLiveIns(Def); 636 637 ++NumPostRAHoisted; 638 Changed = true; 639 } 640 641 /// Check if this mbb is guaranteed to execute. If not then a load from this mbb 642 /// may not be safe to hoist. 643 bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) { 644 if (SpeculationState != SpeculateUnknown) 645 return SpeculationState == SpeculateFalse; 646 647 if (BB != CurLoop->getHeader()) { 648 // Check loop exiting blocks. 649 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; 650 CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); 651 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks) 652 if (!DT->dominates(BB, CurrentLoopExitingBlock)) { 653 SpeculationState = SpeculateTrue; 654 return false; 655 } 656 } 657 658 SpeculationState = SpeculateFalse; 659 return true; 660 } 661 662 void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) { 663 LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n'); 664 665 // Remember livein register pressure. 666 BackTrace.push_back(RegPressure); 667 } 668 669 void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) { 670 LLVM_DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n'); 671 BackTrace.pop_back(); 672 } 673 674 /// Destroy scope for the MBB that corresponds to the given dominator tree node 675 /// if its a leaf or all of its children are done. Walk up the dominator tree to 676 /// destroy ancestors which are now done. 677 void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node, 678 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 679 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 680 if (OpenChildren[Node]) 681 return; 682 683 // Pop scope. 684 ExitScope(Node->getBlock()); 685 686 // Now traverse upwards to pop ancestors whose offsprings are all done. 687 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 688 unsigned Left = --OpenChildren[Parent]; 689 if (Left != 0) 690 break; 691 ExitScope(Parent->getBlock()); 692 Node = Parent; 693 } 694 } 695 696 /// Walk the specified loop in the CFG (defined by all blocks dominated by the 697 /// specified header block, and that are in the current loop) in depth first 698 /// order w.r.t the DominatorTree. This allows us to visit definitions before 699 /// uses, allowing us to hoist a loop body in one pass without iteration. 700 void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { 701 MachineBasicBlock *Preheader = getCurPreheader(); 702 if (!Preheader) 703 return; 704 705 SmallVector<MachineDomTreeNode*, 32> Scopes; 706 SmallVector<MachineDomTreeNode*, 8> WorkList; 707 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 708 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 709 710 // Perform a DFS walk to determine the order of visit. 711 WorkList.push_back(HeaderN); 712 while (!WorkList.empty()) { 713 MachineDomTreeNode *Node = WorkList.pop_back_val(); 714 assert(Node && "Null dominator tree node?"); 715 MachineBasicBlock *BB = Node->getBlock(); 716 717 // If the header of the loop containing this basic block is a landing pad, 718 // then don't try to hoist instructions out of this loop. 719 const MachineLoop *ML = MLI->getLoopFor(BB); 720 if (ML && ML->getHeader()->isEHPad()) 721 continue; 722 723 // If this subregion is not in the top level loop at all, exit. 724 if (!CurLoop->contains(BB)) 725 continue; 726 727 Scopes.push_back(Node); 728 unsigned NumChildren = Node->getNumChildren(); 729 730 // Don't hoist things out of a large switch statement. This often causes 731 // code to be hoisted that wasn't going to be executed, and increases 732 // register pressure in a situation where it's likely to matter. 733 if (BB->succ_size() >= 25) 734 NumChildren = 0; 735 736 OpenChildren[Node] = NumChildren; 737 if (NumChildren) { 738 // Add children in reverse order as then the next popped worklist node is 739 // the first child of this node. This means we ultimately traverse the 740 // DOM tree in exactly the same order as if we'd recursed. 741 for (MachineDomTreeNode *Child : reverse(Node->children())) { 742 ParentMap[Child] = Node; 743 WorkList.push_back(Child); 744 } 745 } 746 } 747 748 if (Scopes.size() == 0) 749 return; 750 751 // Compute registers which are livein into the loop headers. 752 RegSeen.clear(); 753 BackTrace.clear(); 754 InitRegPressure(Preheader); 755 756 // Now perform LICM. 757 for (MachineDomTreeNode *Node : Scopes) { 758 MachineBasicBlock *MBB = Node->getBlock(); 759 760 EnterScope(MBB); 761 762 // Process the block 763 SpeculationState = SpeculateUnknown; 764 for (MachineBasicBlock::iterator 765 MII = MBB->begin(), E = MBB->end(); MII != E; ) { 766 MachineBasicBlock::iterator NextMII = MII; ++NextMII; 767 MachineInstr *MI = &*MII; 768 if (!Hoist(MI, Preheader)) 769 UpdateRegPressure(MI); 770 // If we have hoisted an instruction that may store, it can only be a 771 // constant store. 772 MII = NextMII; 773 } 774 775 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 776 ExitScopeIfDone(Node, OpenChildren, ParentMap); 777 } 778 } 779 780 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { 781 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); 782 } 783 784 /// Find all virtual register references that are liveout of the preheader to 785 /// initialize the starting "register pressure". Note this does not count live 786 /// through (livein but not used) registers. 787 void MachineLICMBase::InitRegPressure(MachineBasicBlock *BB) { 788 std::fill(RegPressure.begin(), RegPressure.end(), 0); 789 790 // If the preheader has only a single predecessor and it ends with a 791 // fallthrough or an unconditional branch, then scan its predecessor for live 792 // defs as well. This happens whenever the preheader is created by splitting 793 // the critical edge from the loop predecessor to the loop header. 794 if (BB->pred_size() == 1) { 795 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 796 SmallVector<MachineOperand, 4> Cond; 797 if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) 798 InitRegPressure(*BB->pred_begin()); 799 } 800 801 for (const MachineInstr &MI : *BB) 802 UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true); 803 } 804 805 /// Update estimate of register pressure after the specified instruction. 806 void MachineLICMBase::UpdateRegPressure(const MachineInstr *MI, 807 bool ConsiderUnseenAsDef) { 808 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef); 809 for (const auto &RPIdAndCost : Cost) { 810 unsigned Class = RPIdAndCost.first; 811 if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second) 812 RegPressure[Class] = 0; 813 else 814 RegPressure[Class] += RPIdAndCost.second; 815 } 816 } 817 818 /// Calculate the additional register pressure that the registers used in MI 819 /// cause. 820 /// 821 /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to 822 /// figure out which usages are live-ins. 823 /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. 824 DenseMap<unsigned, int> 825 MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, 826 bool ConsiderUnseenAsDef) { 827 DenseMap<unsigned, int> Cost; 828 if (MI->isImplicitDef()) 829 return Cost; 830 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 831 const MachineOperand &MO = MI->getOperand(i); 832 if (!MO.isReg() || MO.isImplicit()) 833 continue; 834 Register Reg = MO.getReg(); 835 if (!Register::isVirtualRegister(Reg)) 836 continue; 837 838 // FIXME: It seems bad to use RegSeen only for some of these calculations. 839 bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false; 840 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 841 842 RegClassWeight W = TRI->getRegClassWeight(RC); 843 int RCCost = 0; 844 if (MO.isDef()) 845 RCCost = W.RegWeight; 846 else { 847 bool isKill = isOperandKill(MO, MRI); 848 if (isNew && !isKill && ConsiderUnseenAsDef) 849 // Haven't seen this, it must be a livein. 850 RCCost = W.RegWeight; 851 else if (!isNew && isKill) 852 RCCost = -W.RegWeight; 853 } 854 if (RCCost == 0) 855 continue; 856 const int *PS = TRI->getRegClassPressureSets(RC); 857 for (; *PS != -1; ++PS) { 858 if (Cost.find(*PS) == Cost.end()) 859 Cost[*PS] = RCCost; 860 else 861 Cost[*PS] += RCCost; 862 } 863 } 864 return Cost; 865 } 866 867 /// Return true if this machine instruction loads from global offset table or 868 /// constant pool. 869 static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { 870 assert(MI.mayLoad() && "Expected MI that loads!"); 871 872 // If we lost memory operands, conservatively assume that the instruction 873 // reads from everything.. 874 if (MI.memoperands_empty()) 875 return true; 876 877 for (MachineMemOperand *MemOp : MI.memoperands()) 878 if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) 879 if (PSV->isGOT() || PSV->isConstantPool()) 880 return true; 881 882 return false; 883 } 884 885 // This function iterates through all the operands of the input store MI and 886 // checks that each register operand statisfies isCallerPreservedPhysReg. 887 // This means, the value being stored and the address where it is being stored 888 // is constant throughout the body of the function (not including prologue and 889 // epilogue). When called with an MI that isn't a store, it returns false. 890 // A future improvement can be to check if the store registers are constant 891 // throughout the loop rather than throughout the funtion. 892 static bool isInvariantStore(const MachineInstr &MI, 893 const TargetRegisterInfo *TRI, 894 const MachineRegisterInfo *MRI) { 895 896 bool FoundCallerPresReg = false; 897 if (!MI.mayStore() || MI.hasUnmodeledSideEffects() || 898 (MI.getNumOperands() == 0)) 899 return false; 900 901 // Check that all register operands are caller-preserved physical registers. 902 for (const MachineOperand &MO : MI.operands()) { 903 if (MO.isReg()) { 904 Register Reg = MO.getReg(); 905 // If operand is a virtual register, check if it comes from a copy of a 906 // physical register. 907 if (Register::isVirtualRegister(Reg)) 908 Reg = TRI->lookThruCopyLike(MO.getReg(), MRI); 909 if (Register::isVirtualRegister(Reg)) 910 return false; 911 if (!TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF())) 912 return false; 913 else 914 FoundCallerPresReg = true; 915 } else if (!MO.isImm()) { 916 return false; 917 } 918 } 919 return FoundCallerPresReg; 920 } 921 922 // Return true if the input MI is a copy instruction that feeds an invariant 923 // store instruction. This means that the src of the copy has to satisfy 924 // isCallerPreservedPhysReg and atleast one of it's users should satisfy 925 // isInvariantStore. 926 static bool isCopyFeedingInvariantStore(const MachineInstr &MI, 927 const MachineRegisterInfo *MRI, 928 const TargetRegisterInfo *TRI) { 929 930 // FIXME: If targets would like to look through instructions that aren't 931 // pure copies, this can be updated to a query. 932 if (!MI.isCopy()) 933 return false; 934 935 const MachineFunction *MF = MI.getMF(); 936 // Check that we are copying a constant physical register. 937 Register CopySrcReg = MI.getOperand(1).getReg(); 938 if (Register::isVirtualRegister(CopySrcReg)) 939 return false; 940 941 if (!TRI->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF)) 942 return false; 943 944 Register CopyDstReg = MI.getOperand(0).getReg(); 945 // Check if any of the uses of the copy are invariant stores. 946 assert(Register::isVirtualRegister(CopyDstReg) && 947 "copy dst is not a virtual reg"); 948 949 for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) { 950 if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI)) 951 return true; 952 } 953 return false; 954 } 955 956 /// Returns true if the instruction may be a suitable candidate for LICM. 957 /// e.g. If the instruction is a call, then it's obviously not safe to hoist it. 958 bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { 959 // Check if it's safe to move the instruction. 960 bool DontMoveAcrossStore = true; 961 if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) && 962 !(HoistConstStores && isInvariantStore(I, TRI, MRI))) { 963 LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n"); 964 return false; 965 } 966 967 // If it is a load then check if it is guaranteed to execute by making sure 968 // that it dominates all exiting blocks. If it doesn't, then there is a path 969 // out of the loop which does not execute this load, so we can't hoist it. 970 // Loads from constant memory are safe to speculate, for example indexed load 971 // from a jump table. 972 // Stores and side effects are already checked by isSafeToMove. 973 if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) && 974 !IsGuaranteedToExecute(I.getParent())) { 975 LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n"); 976 return false; 977 } 978 979 // Convergent attribute has been used on operations that involve inter-thread 980 // communication which results are implicitly affected by the enclosing 981 // control flows. It is not safe to hoist or sink such operations across 982 // control flow. 983 if (I.isConvergent()) 984 return false; 985 986 return true; 987 } 988 989 /// Returns true if the instruction is loop invariant. 990 bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) { 991 if (!IsLICMCandidate(I)) { 992 LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n"); 993 return false; 994 } 995 return CurLoop->isLoopInvariant(I); 996 } 997 998 /// Return true if the specified instruction is used by a phi node and hoisting 999 /// it could cause a copy to be inserted. 1000 bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const { 1001 SmallVector<const MachineInstr*, 8> Work(1, MI); 1002 do { 1003 MI = Work.pop_back_val(); 1004 for (const MachineOperand &MO : MI->operands()) { 1005 if (!MO.isReg() || !MO.isDef()) 1006 continue; 1007 Register Reg = MO.getReg(); 1008 if (!Register::isVirtualRegister(Reg)) 1009 continue; 1010 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { 1011 // A PHI may cause a copy to be inserted. 1012 if (UseMI.isPHI()) { 1013 // A PHI inside the loop causes a copy because the live range of Reg is 1014 // extended across the PHI. 1015 if (CurLoop->contains(&UseMI)) 1016 return true; 1017 // A PHI in an exit block can cause a copy to be inserted if the PHI 1018 // has multiple predecessors in the loop with different values. 1019 // For now, approximate by rejecting all exit blocks. 1020 if (isExitBlock(UseMI.getParent())) 1021 return true; 1022 continue; 1023 } 1024 // Look past copies as well. 1025 if (UseMI.isCopy() && CurLoop->contains(&UseMI)) 1026 Work.push_back(&UseMI); 1027 } 1028 } 1029 } while (!Work.empty()); 1030 return false; 1031 } 1032 1033 /// Compute operand latency between a def of 'Reg' and an use in the current 1034 /// loop, return true if the target considered it high. 1035 bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, 1036 Register Reg) const { 1037 if (MRI->use_nodbg_empty(Reg)) 1038 return false; 1039 1040 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) { 1041 if (UseMI.isCopyLike()) 1042 continue; 1043 if (!CurLoop->contains(UseMI.getParent())) 1044 continue; 1045 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) { 1046 const MachineOperand &MO = UseMI.getOperand(i); 1047 if (!MO.isReg() || !MO.isUse()) 1048 continue; 1049 Register MOReg = MO.getReg(); 1050 if (MOReg != Reg) 1051 continue; 1052 1053 if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i)) 1054 return true; 1055 } 1056 1057 // Only look at the first in loop use. 1058 break; 1059 } 1060 1061 return false; 1062 } 1063 1064 /// Return true if the instruction is marked "cheap" or the operand latency 1065 /// between its def and a use is one or less. 1066 bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const { 1067 if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike()) 1068 return true; 1069 1070 bool isCheap = false; 1071 unsigned NumDefs = MI.getDesc().getNumDefs(); 1072 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { 1073 MachineOperand &DefMO = MI.getOperand(i); 1074 if (!DefMO.isReg() || !DefMO.isDef()) 1075 continue; 1076 --NumDefs; 1077 Register Reg = DefMO.getReg(); 1078 if (Register::isPhysicalRegister(Reg)) 1079 continue; 1080 1081 if (!TII->hasLowDefLatency(SchedModel, MI, i)) 1082 return false; 1083 isCheap = true; 1084 } 1085 1086 return isCheap; 1087 } 1088 1089 /// Visit BBs from header to current BB, check if hoisting an instruction of the 1090 /// given cost matrix can cause high register pressure. 1091 bool 1092 MachineLICMBase::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost, 1093 bool CheapInstr) { 1094 for (const auto &RPIdAndCost : Cost) { 1095 if (RPIdAndCost.second <= 0) 1096 continue; 1097 1098 unsigned Class = RPIdAndCost.first; 1099 int Limit = RegLimit[Class]; 1100 1101 // Don't hoist cheap instructions if they would increase register pressure, 1102 // even if we're under the limit. 1103 if (CheapInstr && !HoistCheapInsts) 1104 return true; 1105 1106 for (const auto &RP : BackTrace) 1107 if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit) 1108 return true; 1109 } 1110 1111 return false; 1112 } 1113 1114 /// Traverse the back trace from header to the current block and update their 1115 /// register pressures to reflect the effect of hoisting MI from the current 1116 /// block to the preheader. 1117 void MachineLICMBase::UpdateBackTraceRegPressure(const MachineInstr *MI) { 1118 // First compute the 'cost' of the instruction, i.e. its contribution 1119 // to register pressure. 1120 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false, 1121 /*ConsiderUnseenAsDef=*/false); 1122 1123 // Update register pressure of blocks from loop header to current block. 1124 for (auto &RP : BackTrace) 1125 for (const auto &RPIdAndCost : Cost) 1126 RP[RPIdAndCost.first] += RPIdAndCost.second; 1127 } 1128 1129 /// Return true if it is potentially profitable to hoist the given loop 1130 /// invariant. 1131 bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) { 1132 if (MI.isImplicitDef()) 1133 return true; 1134 1135 // Besides removing computation from the loop, hoisting an instruction has 1136 // these effects: 1137 // 1138 // - The value defined by the instruction becomes live across the entire 1139 // loop. This increases register pressure in the loop. 1140 // 1141 // - If the value is used by a PHI in the loop, a copy will be required for 1142 // lowering the PHI after extending the live range. 1143 // 1144 // - When hoisting the last use of a value in the loop, that value no longer 1145 // needs to be live in the loop. This lowers register pressure in the loop. 1146 1147 if (HoistConstStores && isCopyFeedingInvariantStore(MI, MRI, TRI)) 1148 return true; 1149 1150 bool CheapInstr = IsCheapInstruction(MI); 1151 bool CreatesCopy = HasLoopPHIUse(&MI); 1152 1153 // Don't hoist a cheap instruction if it would create a copy in the loop. 1154 if (CheapInstr && CreatesCopy) { 1155 LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); 1156 return false; 1157 } 1158 1159 // Rematerializable instructions should always be hoisted since the register 1160 // allocator can just pull them down again when needed. 1161 if (TII->isTriviallyReMaterializable(MI, AA)) 1162 return true; 1163 1164 // FIXME: If there are long latency loop-invariant instructions inside the 1165 // loop at this point, why didn't the optimizer's LICM hoist them? 1166 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { 1167 const MachineOperand &MO = MI.getOperand(i); 1168 if (!MO.isReg() || MO.isImplicit()) 1169 continue; 1170 Register Reg = MO.getReg(); 1171 if (!Register::isVirtualRegister(Reg)) 1172 continue; 1173 if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) { 1174 LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI); 1175 ++NumHighLatency; 1176 return true; 1177 } 1178 } 1179 1180 // Estimate register pressure to determine whether to LICM the instruction. 1181 // In low register pressure situation, we can be more aggressive about 1182 // hoisting. Also, favors hoisting long latency instructions even in 1183 // moderately high pressure situation. 1184 // Cheap instructions will only be hoisted if they don't increase register 1185 // pressure at all. 1186 auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false, 1187 /*ConsiderUnseenAsDef=*/false); 1188 1189 // Visit BBs from header to current BB, if hoisting this doesn't cause 1190 // high register pressure, then it's safe to proceed. 1191 if (!CanCauseHighRegPressure(Cost, CheapInstr)) { 1192 LLVM_DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); 1193 ++NumLowRP; 1194 return true; 1195 } 1196 1197 // Don't risk increasing register pressure if it would create copies. 1198 if (CreatesCopy) { 1199 LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); 1200 return false; 1201 } 1202 1203 // Do not "speculate" in high register pressure situation. If an 1204 // instruction is not guaranteed to be executed in the loop, it's best to be 1205 // conservative. 1206 if (AvoidSpeculation && 1207 (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { 1208 LLVM_DEBUG(dbgs() << "Won't speculate: " << MI); 1209 return false; 1210 } 1211 1212 // High register pressure situation, only hoist if the instruction is going 1213 // to be remat'ed. 1214 if (!TII->isTriviallyReMaterializable(MI, AA) && 1215 !MI.isDereferenceableInvariantLoad(AA)) { 1216 LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); 1217 return false; 1218 } 1219 1220 return true; 1221 } 1222 1223 /// Unfold a load from the given machineinstr if the load itself could be 1224 /// hoisted. Return the unfolded and hoistable load, or null if the load 1225 /// couldn't be unfolded or if it wouldn't be hoistable. 1226 MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) { 1227 // Don't unfold simple loads. 1228 if (MI->canFoldAsLoad()) 1229 return nullptr; 1230 1231 // If not, we may be able to unfold a load and hoist that. 1232 // First test whether the instruction is loading from an amenable 1233 // memory location. 1234 if (!MI->isDereferenceableInvariantLoad(AA)) 1235 return nullptr; 1236 1237 // Next determine the register class for a temporary register. 1238 unsigned LoadRegIndex; 1239 unsigned NewOpc = 1240 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 1241 /*UnfoldLoad=*/true, 1242 /*UnfoldStore=*/false, 1243 &LoadRegIndex); 1244 if (NewOpc == 0) return nullptr; 1245 const MCInstrDesc &MID = TII->get(NewOpc); 1246 MachineFunction &MF = *MI->getMF(); 1247 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); 1248 // Ok, we're unfolding. Create a temporary register and do the unfold. 1249 Register Reg = MRI->createVirtualRegister(RC); 1250 1251 SmallVector<MachineInstr *, 2> NewMIs; 1252 bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg, 1253 /*UnfoldLoad=*/true, 1254 /*UnfoldStore=*/false, NewMIs); 1255 (void)Success; 1256 assert(Success && 1257 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 1258 "succeeded!"); 1259 assert(NewMIs.size() == 2 && 1260 "Unfolded a load into multiple instructions!"); 1261 MachineBasicBlock *MBB = MI->getParent(); 1262 MachineBasicBlock::iterator Pos = MI; 1263 MBB->insert(Pos, NewMIs[0]); 1264 MBB->insert(Pos, NewMIs[1]); 1265 // If unfolding produced a load that wasn't loop-invariant or profitable to 1266 // hoist, discard the new instructions and bail. 1267 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 1268 NewMIs[0]->eraseFromParent(); 1269 NewMIs[1]->eraseFromParent(); 1270 return nullptr; 1271 } 1272 1273 // Update register pressure for the unfolded instruction. 1274 UpdateRegPressure(NewMIs[1]); 1275 1276 // Otherwise we successfully unfolded a load that we can hoist. 1277 1278 // Update the call site info. 1279 if (MI->shouldUpdateCallSiteInfo()) 1280 MF.eraseCallSiteInfo(MI); 1281 1282 MI->eraseFromParent(); 1283 return NewMIs[0]; 1284 } 1285 1286 /// Initialize the CSE map with instructions that are in the current loop 1287 /// preheader that may become duplicates of instructions that are hoisted 1288 /// out of the loop. 1289 void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) { 1290 for (MachineInstr &MI : *BB) 1291 CSEMap[MI.getOpcode()].push_back(&MI); 1292 } 1293 1294 /// Find an instruction amount PrevMIs that is a duplicate of MI. 1295 /// Return this instruction if it's found. 1296 MachineInstr * 1297 MachineLICMBase::LookForDuplicate(const MachineInstr *MI, 1298 std::vector<MachineInstr *> &PrevMIs) { 1299 for (MachineInstr *PrevMI : PrevMIs) 1300 if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr))) 1301 return PrevMI; 1302 1303 return nullptr; 1304 } 1305 1306 /// Given a LICM'ed instruction, look for an instruction on the preheader that 1307 /// computes the same value. If it's found, do a RAU on with the definition of 1308 /// the existing instruction rather than hoisting the instruction to the 1309 /// preheader. 1310 bool MachineLICMBase::EliminateCSE( 1311 MachineInstr *MI, 1312 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) { 1313 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1314 // the undef property onto uses. 1315 if (CI == CSEMap.end() || MI->isImplicitDef()) 1316 return false; 1317 1318 if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 1319 LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 1320 1321 // Replace virtual registers defined by MI by their counterparts defined 1322 // by Dup. 1323 SmallVector<unsigned, 2> Defs; 1324 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1325 const MachineOperand &MO = MI->getOperand(i); 1326 1327 // Physical registers may not differ here. 1328 assert((!MO.isReg() || MO.getReg() == 0 || 1329 !Register::isPhysicalRegister(MO.getReg()) || 1330 MO.getReg() == Dup->getOperand(i).getReg()) && 1331 "Instructions with different phys regs are not identical!"); 1332 1333 if (MO.isReg() && MO.isDef() && 1334 !Register::isPhysicalRegister(MO.getReg())) 1335 Defs.push_back(i); 1336 } 1337 1338 SmallVector<const TargetRegisterClass*, 2> OrigRCs; 1339 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1340 unsigned Idx = Defs[i]; 1341 Register Reg = MI->getOperand(Idx).getReg(); 1342 Register DupReg = Dup->getOperand(Idx).getReg(); 1343 OrigRCs.push_back(MRI->getRegClass(DupReg)); 1344 1345 if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { 1346 // Restore old RCs if more than one defs. 1347 for (unsigned j = 0; j != i; ++j) 1348 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); 1349 return false; 1350 } 1351 } 1352 1353 for (unsigned Idx : Defs) { 1354 Register Reg = MI->getOperand(Idx).getReg(); 1355 Register DupReg = Dup->getOperand(Idx).getReg(); 1356 MRI->replaceRegWith(Reg, DupReg); 1357 MRI->clearKillFlags(DupReg); 1358 // Clear Dup dead flag if any, we reuse it for Reg. 1359 if (!MRI->use_nodbg_empty(DupReg)) 1360 Dup->getOperand(Idx).setIsDead(false); 1361 } 1362 1363 MI->eraseFromParent(); 1364 ++NumCSEed; 1365 return true; 1366 } 1367 return false; 1368 } 1369 1370 /// Return true if the given instruction will be CSE'd if it's hoisted out of 1371 /// the loop. 1372 bool MachineLICMBase::MayCSE(MachineInstr *MI) { 1373 unsigned Opcode = MI->getOpcode(); 1374 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI = 1375 CSEMap.find(Opcode); 1376 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1377 // the undef property onto uses. 1378 if (CI == CSEMap.end() || MI->isImplicitDef()) 1379 return false; 1380 1381 return LookForDuplicate(MI, CI->second) != nullptr; 1382 } 1383 1384 /// When an instruction is found to use only loop invariant operands 1385 /// that are safe to hoist, this instruction is called to do the dirty work. 1386 /// It returns true if the instruction is hoisted. 1387 bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { 1388 MachineBasicBlock *SrcBlock = MI->getParent(); 1389 1390 // Disable the instruction hoisting due to block hotness 1391 if ((DisableHoistingToHotterBlocks == UseBFI::All || 1392 (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) && 1393 isTgtHotterThanSrc(SrcBlock, Preheader)) { 1394 ++NumNotHoistedDueToHotness; 1395 return false; 1396 } 1397 // First check whether we should hoist this instruction. 1398 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 1399 // If not, try unfolding a hoistable load. 1400 MI = ExtractHoistableLoad(MI); 1401 if (!MI) return false; 1402 } 1403 1404 // If we have hoisted an instruction that may store, it can only be a constant 1405 // store. 1406 if (MI->mayStore()) 1407 NumStoreConst++; 1408 1409 // Now move the instructions to the predecessor, inserting it before any 1410 // terminator instructions. 1411 LLVM_DEBUG({ 1412 dbgs() << "Hoisting " << *MI; 1413 if (MI->getParent()->getBasicBlock()) 1414 dbgs() << " from " << printMBBReference(*MI->getParent()); 1415 if (Preheader->getBasicBlock()) 1416 dbgs() << " to " << printMBBReference(*Preheader); 1417 dbgs() << "\n"; 1418 }); 1419 1420 // If this is the first instruction being hoisted to the preheader, 1421 // initialize the CSE map with potential common expressions. 1422 if (FirstInLoop) { 1423 InitCSEMap(Preheader); 1424 FirstInLoop = false; 1425 } 1426 1427 // Look for opportunity to CSE the hoisted instruction. 1428 unsigned Opcode = MI->getOpcode(); 1429 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI = 1430 CSEMap.find(Opcode); 1431 if (!EliminateCSE(MI, CI)) { 1432 // Otherwise, splice the instruction to the preheader. 1433 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); 1434 1435 // Since we are moving the instruction out of its basic block, we do not 1436 // retain its debug location. Doing so would degrade the debugging 1437 // experience and adversely affect the accuracy of profiling information. 1438 assert(!MI->isDebugInstr() && "Should not hoist debug inst"); 1439 MI->setDebugLoc(DebugLoc()); 1440 1441 // Update register pressure for BBs from header to this block. 1442 UpdateBackTraceRegPressure(MI); 1443 1444 // Clear the kill flags of any register this instruction defines, 1445 // since they may need to be live throughout the entire loop 1446 // rather than just live for part of it. 1447 for (MachineOperand &MO : MI->operands()) 1448 if (MO.isReg() && MO.isDef() && !MO.isDead()) 1449 MRI->clearKillFlags(MO.getReg()); 1450 1451 // Add to the CSE map. 1452 if (CI != CSEMap.end()) 1453 CI->second.push_back(MI); 1454 else 1455 CSEMap[Opcode].push_back(MI); 1456 } 1457 1458 ++NumHoisted; 1459 Changed = true; 1460 1461 return true; 1462 } 1463 1464 /// Get the preheader for the current loop, splitting a critical edge if needed. 1465 MachineBasicBlock *MachineLICMBase::getCurPreheader() { 1466 // Determine the block to which to hoist instructions. If we can't find a 1467 // suitable loop predecessor, we can't do any hoisting. 1468 1469 // If we've tried to get a preheader and failed, don't try again. 1470 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) 1471 return nullptr; 1472 1473 if (!CurPreheader) { 1474 CurPreheader = CurLoop->getLoopPreheader(); 1475 if (!CurPreheader) { 1476 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); 1477 if (!Pred) { 1478 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1479 return nullptr; 1480 } 1481 1482 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), *this); 1483 if (!CurPreheader) { 1484 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1485 return nullptr; 1486 } 1487 } 1488 } 1489 return CurPreheader; 1490 } 1491 1492 /// Is the target basic block at least "BlockFrequencyRatioThreshold" 1493 /// times hotter than the source basic block. 1494 bool MachineLICMBase::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock, 1495 MachineBasicBlock *TgtBlock) { 1496 // Parse source and target basic block frequency from MBFI 1497 uint64_t SrcBF = MBFI->getBlockFreq(SrcBlock).getFrequency(); 1498 uint64_t DstBF = MBFI->getBlockFreq(TgtBlock).getFrequency(); 1499 1500 // Disable the hoisting if source block frequency is zero 1501 if (!SrcBF) 1502 return true; 1503 1504 double Ratio = (double)DstBF / SrcBF; 1505 1506 // Compare the block frequency ratio with the threshold 1507 return Ratio > BlockFrequencyRatioThreshold; 1508 } 1509