1 //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass performs global common subexpression elimination on machine 10 // instructions using a scoped hash table based value numbering scheme. It 11 // must be run while the machine function is still in SSA form. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/MachineCSE.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/ScopedHashTable.h" 18 #include "llvm/ADT/SmallPtrSet.h" 19 #include "llvm/ADT/SmallSet.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/Analysis/CFG.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 25 #include "llvm/CodeGen/MachineDominators.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineLoopInfo.h" 30 #include "llvm/CodeGen/MachineOperand.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/CodeGen/Passes.h" 33 #include "llvm/CodeGen/TargetInstrInfo.h" 34 #include "llvm/CodeGen/TargetOpcodes.h" 35 #include "llvm/CodeGen/TargetRegisterInfo.h" 36 #include "llvm/CodeGen/TargetSubtargetInfo.h" 37 #include "llvm/InitializePasses.h" 38 #include "llvm/MC/MCRegister.h" 39 #include "llvm/MC/MCRegisterInfo.h" 40 #include "llvm/Pass.h" 41 #include "llvm/Support/Allocator.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/RecyclingAllocator.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include <cassert> 46 #include <iterator> 47 #include <utility> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "machine-cse" 52 53 STATISTIC(NumCoalesces, "Number of copies coalesced"); 54 STATISTIC(NumCSEs, "Number of common subexpression eliminated"); 55 STATISTIC(NumPREs, "Number of partial redundant expression" 56 " transformed to fully redundant"); 57 STATISTIC(NumPhysCSEs, 58 "Number of physreg referencing common subexpr eliminated"); 59 STATISTIC(NumCrossBBCSEs, 60 "Number of cross-MBB physreg referencing CS eliminated"); 61 STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); 62 63 // Threshold to avoid excessive cost to compute isProfitableToCSE. 64 static cl::opt<int> 65 CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), 66 cl::desc("Threshold for the size of CSUses")); 67 68 static cl::opt<bool> AggressiveMachineCSE( 69 "aggressive-machine-cse", cl::Hidden, cl::init(false), 70 cl::desc("Override the profitability heuristics for Machine CSE")); 71 72 namespace { 73 74 class MachineCSEImpl { 75 const TargetInstrInfo *TII = nullptr; 76 const TargetRegisterInfo *TRI = nullptr; 77 MachineDominatorTree *DT = nullptr; 78 MachineRegisterInfo *MRI = nullptr; 79 MachineBlockFrequencyInfo *MBFI = nullptr; 80 81 public: 82 MachineCSEImpl(MachineDominatorTree *DT, MachineBlockFrequencyInfo *MBFI) 83 : DT(DT), MBFI(MBFI) {} 84 bool run(MachineFunction &MF); 85 86 private: 87 using AllocatorTy = 88 RecyclingAllocator<BumpPtrAllocator, 89 ScopedHashTableVal<MachineInstr *, unsigned>>; 90 using ScopedHTType = 91 ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait, 92 AllocatorTy>; 93 using ScopeType = ScopedHTType::ScopeTy; 94 using PhysDefVector = SmallVector<std::pair<unsigned, Register>, 2>; 95 96 unsigned LookAheadLimit = 0; 97 DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap; 98 DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait> 99 PREMap; 100 ScopedHTType VNT; 101 SmallVector<MachineInstr *, 64> Exps; 102 unsigned CurrVN = 0; 103 104 bool PerformTrivialCopyPropagation(MachineInstr *MI, MachineBasicBlock *MBB); 105 bool isPhysDefTriviallyDead(MCRegister Reg, 106 MachineBasicBlock::const_iterator I, 107 MachineBasicBlock::const_iterator E) const; 108 bool hasLivePhysRegDefUses(const MachineInstr *MI, 109 const MachineBasicBlock *MBB, 110 SmallSet<MCRegister, 8> &PhysRefs, 111 PhysDefVector &PhysDefs, bool &PhysUseDef) const; 112 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, 113 const SmallSet<MCRegister, 8> &PhysRefs, 114 const PhysDefVector &PhysDefs, bool &NonLocal) const; 115 bool isCSECandidate(MachineInstr *MI); 116 bool isProfitableToCSE(Register CSReg, Register Reg, MachineBasicBlock *CSBB, 117 MachineInstr *MI); 118 void EnterScope(MachineBasicBlock *MBB); 119 void ExitScope(MachineBasicBlock *MBB); 120 bool ProcessBlockCSE(MachineBasicBlock *MBB); 121 void ExitScopeIfDone(MachineDomTreeNode *Node, 122 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren); 123 bool PerformCSE(MachineDomTreeNode *Node); 124 125 bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs); 126 bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); 127 bool PerformSimplePRE(MachineDominatorTree *DT); 128 /// Heuristics to see if it's profitable to move common computations of MBB 129 /// and MBB1 to CandidateBB. 130 bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB, 131 MachineBasicBlock *MBB, MachineBasicBlock *MBB1); 132 void releaseMemory(); 133 }; 134 135 class MachineCSELegacy : public MachineFunctionPass { 136 public: 137 static char ID; // Pass identification 138 139 MachineCSELegacy() : MachineFunctionPass(ID) { 140 initializeMachineCSELegacyPass(*PassRegistry::getPassRegistry()); 141 } 142 143 bool runOnMachineFunction(MachineFunction &MF) override; 144 145 void getAnalysisUsage(AnalysisUsage &AU) const override { 146 AU.setPreservesCFG(); 147 MachineFunctionPass::getAnalysisUsage(AU); 148 AU.addPreservedID(MachineLoopInfoID); 149 AU.addRequired<MachineDominatorTreeWrapperPass>(); 150 AU.addPreserved<MachineDominatorTreeWrapperPass>(); 151 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); 152 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>(); 153 } 154 155 MachineFunctionProperties getRequiredProperties() const override { 156 return MachineFunctionProperties().setIsSSA(); 157 } 158 }; 159 } // end anonymous namespace 160 161 char MachineCSELegacy::ID = 0; 162 163 char &llvm::MachineCSELegacyID = MachineCSELegacy::ID; 164 165 INITIALIZE_PASS_BEGIN(MachineCSELegacy, DEBUG_TYPE, 166 "Machine Common Subexpression Elimination", false, false) 167 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 168 INITIALIZE_PASS_END(MachineCSELegacy, DEBUG_TYPE, 169 "Machine Common Subexpression Elimination", false, false) 170 171 /// The source register of a COPY machine instruction can be propagated to all 172 /// its users, and this propagation could increase the probability of finding 173 /// common subexpressions. If the COPY has only one user, the COPY itself can 174 /// be removed. 175 bool MachineCSEImpl::PerformTrivialCopyPropagation(MachineInstr *MI, 176 MachineBasicBlock *MBB) { 177 bool Changed = false; 178 for (MachineOperand &MO : MI->all_uses()) { 179 Register Reg = MO.getReg(); 180 if (!Reg.isVirtual()) 181 continue; 182 bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); 183 MachineInstr *DefMI = MRI->getVRegDef(Reg); 184 if (!DefMI || !DefMI->isCopy()) 185 continue; 186 Register SrcReg = DefMI->getOperand(1).getReg(); 187 if (!SrcReg.isVirtual()) 188 continue; 189 // FIXME: We should trivially coalesce subregister copies to expose CSE 190 // opportunities on instructions with truncated operands (see 191 // cse-add-with-overflow.ll). This can be done here as follows: 192 // if (SrcSubReg) 193 // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, 194 // SrcSubReg); 195 // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); 196 // 197 // The 2-addr pass has been updated to handle coalesced subregs. However, 198 // some machine-specific code still can't handle it. 199 // To handle it properly we also need a way find a constrained subregister 200 // class given a super-reg class and subreg index. 201 if (DefMI->getOperand(1).getSubReg()) 202 continue; 203 if (!MRI->constrainRegAttrs(SrcReg, Reg)) 204 continue; 205 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); 206 LLVM_DEBUG(dbgs() << "*** to: " << *MI); 207 208 // Propagate SrcReg of copies to MI. 209 MO.setReg(SrcReg); 210 MRI->clearKillFlags(SrcReg); 211 // Coalesce single use copies. 212 if (OnlyOneUse) { 213 // If (and only if) we've eliminated all uses of the copy, also 214 // copy-propagate to any debug-users of MI, or they'll be left using 215 // an undefined value. 216 DefMI->changeDebugValuesDefReg(SrcReg); 217 218 DefMI->eraseFromParent(); 219 ++NumCoalesces; 220 } 221 Changed = true; 222 } 223 224 return Changed; 225 } 226 227 bool MachineCSEImpl::isPhysDefTriviallyDead( 228 MCRegister Reg, MachineBasicBlock::const_iterator I, 229 MachineBasicBlock::const_iterator E) const { 230 unsigned LookAheadLeft = LookAheadLimit; 231 while (LookAheadLeft) { 232 // Skip over dbg_value's. 233 I = skipDebugInstructionsForward(I, E); 234 235 if (I == E) 236 // Reached end of block, we don't know if register is dead or not. 237 return false; 238 239 bool SeenDef = false; 240 for (const MachineOperand &MO : I->operands()) { 241 if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) 242 SeenDef = true; 243 if (!MO.isReg() || !MO.getReg()) 244 continue; 245 if (!TRI->regsOverlap(MO.getReg(), Reg)) 246 continue; 247 if (MO.isUse()) 248 // Found a use! 249 return false; 250 SeenDef = true; 251 } 252 if (SeenDef) 253 // See a def of Reg (or an alias) before encountering any use, it's 254 // trivially dead. 255 return true; 256 257 --LookAheadLeft; 258 ++I; 259 } 260 return false; 261 } 262 263 static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, 264 const MachineOperand &MO, 265 const MachineFunction &MF, 266 const TargetRegisterInfo &TRI, 267 const TargetInstrInfo &TII) { 268 // MachineRegisterInfo::isConstantPhysReg directly called by 269 // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the 270 // reserved registers to be frozen. That doesn't cause a problem post-ISel as 271 // most (if not all) targets freeze reserved registers right after ISel. 272 // 273 // It does cause issues mid-GlobalISel, however, hence the additional 274 // reservedRegsFrozen check. 275 const MachineRegisterInfo &MRI = MF.getRegInfo(); 276 return TRI.isCallerPreservedPhysReg(Reg, MF) || TII.isIgnorableUse(MO) || 277 (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg)); 278 } 279 280 /// hasLivePhysRegDefUses - Return true if the specified instruction read/write 281 /// physical registers (except for dead defs of physical registers). It also 282 /// returns the physical register def by reference if it's the only one and the 283 /// instruction does not uses a physical register. 284 bool MachineCSEImpl::hasLivePhysRegDefUses(const MachineInstr *MI, 285 const MachineBasicBlock *MBB, 286 SmallSet<MCRegister, 8> &PhysRefs, 287 PhysDefVector &PhysDefs, 288 bool &PhysUseDef) const { 289 // First, add all uses to PhysRefs. 290 for (const MachineOperand &MO : MI->all_uses()) { 291 Register Reg = MO.getReg(); 292 if (!Reg) 293 continue; 294 if (Reg.isVirtual()) 295 continue; 296 // Reading either caller preserved or constant physregs is ok. 297 if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), MO, *MI->getMF(), *TRI, 298 *TII)) 299 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 300 PhysRefs.insert(*AI); 301 } 302 303 // Next, collect all defs into PhysDefs. If any is already in PhysRefs 304 // (which currently contains only uses), set the PhysUseDef flag. 305 PhysUseDef = false; 306 MachineBasicBlock::const_iterator I = MI; I = std::next(I); 307 for (const auto &MOP : llvm::enumerate(MI->operands())) { 308 const MachineOperand &MO = MOP.value(); 309 if (!MO.isReg() || !MO.isDef()) 310 continue; 311 Register Reg = MO.getReg(); 312 if (!Reg) 313 continue; 314 if (Reg.isVirtual()) 315 continue; 316 // Check against PhysRefs even if the def is "dead". 317 if (PhysRefs.count(Reg.asMCReg())) 318 PhysUseDef = true; 319 // If the def is dead, it's ok. But the def may not marked "dead". That's 320 // common since this pass is run before livevariables. We can scan 321 // forward a few instructions and check if it is obviously dead. 322 if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end())) 323 PhysDefs.emplace_back(MOP.index(), Reg); 324 } 325 326 // Finally, add all defs to PhysRefs as well. 327 for (const auto &Def : PhysDefs) 328 for (MCRegAliasIterator AI(Def.second, TRI, true); AI.isValid(); ++AI) 329 PhysRefs.insert(*AI); 330 331 return !PhysRefs.empty(); 332 } 333 334 bool MachineCSEImpl::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, 335 const SmallSet<MCRegister, 8> &PhysRefs, 336 const PhysDefVector &PhysDefs, 337 bool &NonLocal) const { 338 // For now conservatively returns false if the common subexpression is 339 // not in the same basic block as the given instruction. The only exception 340 // is if the common subexpression is in the sole predecessor block. 341 const MachineBasicBlock *MBB = MI->getParent(); 342 const MachineBasicBlock *CSMBB = CSMI->getParent(); 343 344 bool CrossMBB = false; 345 if (CSMBB != MBB) { 346 if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB) 347 return false; 348 349 for (const auto &PhysDef : PhysDefs) { 350 if (MRI->isAllocatable(PhysDef.second) || MRI->isReserved(PhysDef.second)) 351 // Avoid extending live range of physical registers if they are 352 //allocatable or reserved. 353 return false; 354 } 355 CrossMBB = true; 356 } 357 MachineBasicBlock::const_iterator I = CSMI; I = std::next(I); 358 MachineBasicBlock::const_iterator E = MI; 359 MachineBasicBlock::const_iterator EE = CSMBB->end(); 360 unsigned LookAheadLeft = LookAheadLimit; 361 while (LookAheadLeft) { 362 // Skip over dbg_value's. 363 while (I != E && I != EE && I->isDebugInstr()) 364 ++I; 365 366 if (I == EE) { 367 assert(CrossMBB && "Reaching end-of-MBB without finding MI?"); 368 (void)CrossMBB; 369 CrossMBB = false; 370 NonLocal = true; 371 I = MBB->begin(); 372 EE = MBB->end(); 373 continue; 374 } 375 376 if (I == E) 377 return true; 378 379 for (const MachineOperand &MO : I->operands()) { 380 // RegMasks go on instructions like calls that clobber lots of physregs. 381 // Don't attempt to CSE across such an instruction. 382 if (MO.isRegMask()) 383 return false; 384 if (!MO.isReg() || !MO.isDef()) 385 continue; 386 Register MOReg = MO.getReg(); 387 if (MOReg.isVirtual()) 388 continue; 389 if (PhysRefs.count(MOReg.asMCReg())) 390 return false; 391 } 392 393 --LookAheadLeft; 394 ++I; 395 } 396 397 return false; 398 } 399 400 bool MachineCSEImpl::isCSECandidate(MachineInstr *MI) { 401 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || 402 MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo() || 403 MI->isFakeUse()) 404 return false; 405 406 // Ignore copies. 407 if (MI->isCopyLike()) 408 return false; 409 410 // Ignore stuff that we obviously can't move. 411 if (MI->mayStore() || MI->isCall() || MI->isTerminator() || 412 MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects()) 413 return false; 414 415 if (MI->mayLoad()) { 416 // Okay, this instruction does a load. As a refinement, we allow the target 417 // to decide whether the loaded value is actually a constant. If so, we can 418 // actually use it as a load. 419 if (!MI->isDereferenceableInvariantLoad()) 420 // FIXME: we should be able to hoist loads with no other side effects if 421 // there are no other instructions which can change memory in this loop. 422 // This is a trivial form of alias analysis. 423 return false; 424 } 425 426 // Ignore stack guard loads, otherwise the register that holds CSEed value may 427 // be spilled and get loaded back with corrupted data. 428 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) 429 return false; 430 431 return true; 432 } 433 434 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a 435 /// common expression that defines Reg. CSBB is basic block where CSReg is 436 /// defined. 437 bool MachineCSEImpl::isProfitableToCSE(Register CSReg, Register Reg, 438 MachineBasicBlock *CSBB, 439 MachineInstr *MI) { 440 if (AggressiveMachineCSE) 441 return true; 442 443 // FIXME: Heuristics that works around the lack the live range splitting. 444 445 // If CSReg is used at all uses of Reg, CSE should not increase register 446 // pressure of CSReg. 447 bool MayIncreasePressure = true; 448 if (CSReg.isVirtual() && Reg.isVirtual()) { 449 MayIncreasePressure = false; 450 SmallPtrSet<MachineInstr*, 8> CSUses; 451 int NumOfUses = 0; 452 for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { 453 CSUses.insert(&MI); 454 // Too costly to compute if NumOfUses is very large. Conservatively assume 455 // MayIncreasePressure to avoid spending too much time here. 456 if (++NumOfUses > CSUsesThreshold) { 457 MayIncreasePressure = true; 458 break; 459 } 460 } 461 if (!MayIncreasePressure) 462 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 463 if (!CSUses.count(&MI)) { 464 MayIncreasePressure = true; 465 break; 466 } 467 } 468 } 469 if (!MayIncreasePressure) return true; 470 471 // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in 472 // an immediate predecessor. We don't want to increase register pressure and 473 // end up causing other computation to be spilled. 474 if (TII->isAsCheapAsAMove(*MI)) { 475 MachineBasicBlock *BB = MI->getParent(); 476 if (CSBB != BB && !CSBB->isSuccessor(BB)) 477 return false; 478 } 479 480 // Heuristics #2: If the expression doesn't not use a vr and the only use 481 // of the redundant computation are copies, do not cse. 482 bool HasVRegUse = false; 483 for (const MachineOperand &MO : MI->all_uses()) { 484 if (MO.getReg().isVirtual()) { 485 HasVRegUse = true; 486 break; 487 } 488 } 489 if (!HasVRegUse) { 490 bool HasNonCopyUse = false; 491 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 492 // Ignore copies. 493 if (!MI.isCopyLike()) { 494 HasNonCopyUse = true; 495 break; 496 } 497 } 498 if (!HasNonCopyUse) 499 return false; 500 } 501 502 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse 503 // it unless the defined value is already used in the BB of the new use. 504 bool HasPHI = false; 505 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) { 506 HasPHI |= UseMI.isPHI(); 507 if (UseMI.getParent() == MI->getParent()) 508 return true; 509 } 510 511 return !HasPHI; 512 } 513 514 void MachineCSEImpl::EnterScope(MachineBasicBlock *MBB) { 515 LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 516 ScopeType *Scope = new ScopeType(VNT); 517 ScopeMap[MBB] = Scope; 518 } 519 520 void MachineCSEImpl::ExitScope(MachineBasicBlock *MBB) { 521 LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 522 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); 523 assert(SI != ScopeMap.end()); 524 delete SI->second; 525 ScopeMap.erase(SI); 526 } 527 528 bool MachineCSEImpl::ProcessBlockCSE(MachineBasicBlock *MBB) { 529 bool Changed = false; 530 531 SmallVector<std::pair<Register, Register>, 8> CSEPairs; 532 SmallVector<unsigned, 2> ImplicitDefsToUpdate; 533 SmallVector<Register, 2> ImplicitDefs; 534 for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { 535 if (!isCSECandidate(&MI)) 536 continue; 537 538 bool FoundCSE = VNT.count(&MI); 539 if (!FoundCSE) { 540 // Using trivial copy propagation to find more CSE opportunities. 541 if (PerformTrivialCopyPropagation(&MI, MBB)) { 542 Changed = true; 543 544 // After coalescing MI itself may become a copy. 545 if (MI.isCopyLike()) 546 continue; 547 548 // Try again to see if CSE is possible. 549 FoundCSE = VNT.count(&MI); 550 } 551 } 552 553 // Commute commutable instructions. 554 bool Commuted = false; 555 if (!FoundCSE && MI.isCommutable()) { 556 if (MachineInstr *NewMI = TII->commuteInstruction(MI)) { 557 Commuted = true; 558 FoundCSE = VNT.count(NewMI); 559 if (NewMI != &MI) { 560 // New instruction. It doesn't need to be kept. 561 NewMI->eraseFromParent(); 562 Changed = true; 563 } else if (!FoundCSE) 564 // MI was changed but it didn't help, commute it back! 565 (void)TII->commuteInstruction(MI); 566 } 567 } 568 569 // If the instruction defines physical registers and the values *may* be 570 // used, then it's not safe to replace it with a common subexpression. 571 // It's also not safe if the instruction uses physical registers. 572 bool CrossMBBPhysDef = false; 573 SmallSet<MCRegister, 8> PhysRefs; 574 PhysDefVector PhysDefs; 575 bool PhysUseDef = false; 576 if (FoundCSE && 577 hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) { 578 FoundCSE = false; 579 580 // ... Unless the CS is local or is in the sole predecessor block 581 // and it also defines the physical register which is not clobbered 582 // in between and the physical register uses were not clobbered. 583 // This can never be the case if the instruction both uses and 584 // defines the same physical register, which was detected above. 585 if (!PhysUseDef) { 586 unsigned CSVN = VNT.lookup(&MI); 587 MachineInstr *CSMI = Exps[CSVN]; 588 if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) 589 FoundCSE = true; 590 } 591 } 592 593 if (!FoundCSE) { 594 VNT.insert(&MI, CurrVN++); 595 Exps.push_back(&MI); 596 continue; 597 } 598 599 // Found a common subexpression, eliminate it. 600 unsigned CSVN = VNT.lookup(&MI); 601 MachineInstr *CSMI = Exps[CSVN]; 602 LLVM_DEBUG(dbgs() << "Examining: " << MI); 603 LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); 604 605 // Prevent CSE-ing non-local convergent instructions. 606 // LLVM's current definition of `isConvergent` does not necessarily prove 607 // that non-local CSE is illegal. The following check extends the definition 608 // of `isConvergent` to assume a convergent instruction is dependent not 609 // only on additional conditions, but also on fewer conditions. LLVM does 610 // not have a MachineInstr attribute which expresses this extended 611 // definition, so it's necessary to use `isConvergent` to prevent illegally 612 // CSE-ing the subset of `isConvergent` instructions which do fall into this 613 // extended definition. 614 if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) { 615 LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in " 616 "different BBs, avoid CSE!\n"); 617 VNT.insert(&MI, CurrVN++); 618 Exps.push_back(&MI); 619 continue; 620 } 621 622 // Check if it's profitable to perform this CSE. 623 bool DoCSE = true; 624 unsigned NumDefs = MI.getNumDefs(); 625 626 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { 627 MachineOperand &MO = MI.getOperand(i); 628 if (!MO.isReg() || !MO.isDef()) 629 continue; 630 Register OldReg = MO.getReg(); 631 Register NewReg = CSMI->getOperand(i).getReg(); 632 633 // Go through implicit defs of CSMI and MI, if a def is not dead at MI, 634 // we should make sure it is not dead at CSMI. 635 if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) 636 ImplicitDefsToUpdate.push_back(i); 637 638 // Keep track of implicit defs of CSMI and MI, to clear possibly 639 // made-redundant kill flags. 640 if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg) 641 ImplicitDefs.push_back(OldReg); 642 643 if (OldReg == NewReg) { 644 --NumDefs; 645 continue; 646 } 647 648 assert(OldReg.isVirtual() && NewReg.isVirtual() && 649 "Do not CSE physical register defs!"); 650 651 if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) { 652 LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); 653 DoCSE = false; 654 break; 655 } 656 657 // Don't perform CSE if the result of the new instruction cannot exist 658 // within the constraints (register class, bank, or low-level type) of 659 // the old instruction. 660 if (!MRI->constrainRegAttrs(NewReg, OldReg)) { 661 LLVM_DEBUG( 662 dbgs() << "*** Not the same register constraints, avoid CSE!\n"); 663 DoCSE = false; 664 break; 665 } 666 667 CSEPairs.emplace_back(OldReg, NewReg); 668 --NumDefs; 669 } 670 671 // Actually perform the elimination. 672 if (DoCSE) { 673 for (const std::pair<Register, Register> &CSEPair : CSEPairs) { 674 Register OldReg = CSEPair.first; 675 Register NewReg = CSEPair.second; 676 // OldReg may have been unused but is used now, clear the Dead flag 677 MachineInstr *Def = MRI->getUniqueVRegDef(NewReg); 678 assert(Def != nullptr && "CSEd register has no unique definition?"); 679 Def->clearRegisterDeads(NewReg); 680 // Replace with NewReg and clear kill flags which may be wrong now. 681 MRI->replaceRegWith(OldReg, NewReg); 682 MRI->clearKillFlags(NewReg); 683 } 684 685 // Go through implicit defs of CSMI and MI, if a def is not dead at MI, 686 // we should make sure it is not dead at CSMI. 687 for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate) 688 CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false); 689 for (const auto &PhysDef : PhysDefs) 690 if (!MI.getOperand(PhysDef.first).isDead()) 691 CSMI->getOperand(PhysDef.first).setIsDead(false); 692 693 // Go through implicit defs of CSMI and MI, and clear the kill flags on 694 // their uses in all the instructions between CSMI and MI. 695 // We might have made some of the kill flags redundant, consider: 696 // subs ... implicit-def %nzcv <- CSMI 697 // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore 698 // subs ... implicit-def %nzcv <- MI, to be eliminated 699 // csinc ... implicit killed %nzcv 700 // Since we eliminated MI, and reused a register imp-def'd by CSMI 701 // (here %nzcv), that register, if it was killed before MI, should have 702 // that kill flag removed, because it's lifetime was extended. 703 if (CSMI->getParent() == MI.getParent()) { 704 for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II) 705 for (auto ImplicitDef : ImplicitDefs) 706 if (MachineOperand *MO = II->findRegisterUseOperand( 707 ImplicitDef, TRI, /*isKill=*/true)) 708 MO->setIsKill(false); 709 } else { 710 // If the instructions aren't in the same BB, bail out and clear the 711 // kill flag on all uses of the imp-def'd register. 712 for (auto ImplicitDef : ImplicitDefs) 713 MRI->clearKillFlags(ImplicitDef); 714 } 715 716 if (CrossMBBPhysDef) { 717 // Add physical register defs now coming in from a predecessor to MBB 718 // livein list. 719 while (!PhysDefs.empty()) { 720 auto LiveIn = PhysDefs.pop_back_val(); 721 if (!MBB->isLiveIn(LiveIn.second)) 722 MBB->addLiveIn(LiveIn.second); 723 } 724 ++NumCrossBBCSEs; 725 } 726 727 MI.eraseFromParent(); 728 ++NumCSEs; 729 if (!PhysRefs.empty()) 730 ++NumPhysCSEs; 731 if (Commuted) 732 ++NumCommutes; 733 Changed = true; 734 } else { 735 VNT.insert(&MI, CurrVN++); 736 Exps.push_back(&MI); 737 } 738 CSEPairs.clear(); 739 ImplicitDefsToUpdate.clear(); 740 ImplicitDefs.clear(); 741 } 742 743 return Changed; 744 } 745 746 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 747 /// dominator tree node if its a leaf or all of its children are done. Walk 748 /// up the dominator tree to destroy ancestors which are now done. 749 void MachineCSEImpl::ExitScopeIfDone( 750 MachineDomTreeNode *Node, 751 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren) { 752 if (OpenChildren[Node]) 753 return; 754 755 // Pop scope. 756 ExitScope(Node->getBlock()); 757 758 // Now traverse upwards to pop ancestors whose offsprings are all done. 759 while (MachineDomTreeNode *Parent = Node->getIDom()) { 760 unsigned Left = --OpenChildren[Parent]; 761 if (Left != 0) 762 break; 763 ExitScope(Parent->getBlock()); 764 Node = Parent; 765 } 766 } 767 768 bool MachineCSEImpl::PerformCSE(MachineDomTreeNode *Node) { 769 SmallVector<MachineDomTreeNode*, 32> Scopes; 770 SmallVector<MachineDomTreeNode*, 8> WorkList; 771 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 772 773 CurrVN = 0; 774 775 // Perform a DFS walk to determine the order of visit. 776 WorkList.push_back(Node); 777 do { 778 Node = WorkList.pop_back_val(); 779 Scopes.push_back(Node); 780 OpenChildren[Node] = Node->getNumChildren(); 781 append_range(WorkList, Node->children()); 782 } while (!WorkList.empty()); 783 784 // Now perform CSE. 785 bool Changed = false; 786 for (MachineDomTreeNode *Node : Scopes) { 787 MachineBasicBlock *MBB = Node->getBlock(); 788 EnterScope(MBB); 789 Changed |= ProcessBlockCSE(MBB); 790 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 791 ExitScopeIfDone(Node, OpenChildren); 792 } 793 794 return Changed; 795 } 796 797 // We use stronger checks for PRE candidate rather than for CSE ones to embrace 798 // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps 799 // to exclude instrs created by PRE that won't be CSEed later. 800 bool MachineCSEImpl::isPRECandidate(MachineInstr *MI, 801 SmallSet<MCRegister, 8> &PhysRefs) { 802 if (!isCSECandidate(MI) || 803 MI->isNotDuplicable() || 804 MI->mayLoad() || 805 TII->isAsCheapAsAMove(*MI) || 806 MI->getNumDefs() != 1 || 807 MI->getNumExplicitDefs() != 1) 808 return false; 809 810 for (const MachineOperand &MO : MI->operands()) { 811 if (MO.isReg() && !MO.getReg().isVirtual()) { 812 if (MO.isDef()) 813 return false; 814 else 815 PhysRefs.insert(MO.getReg()); 816 } 817 } 818 819 return true; 820 } 821 822 bool MachineCSEImpl::ProcessBlockPRE(MachineDominatorTree *DT, 823 MachineBasicBlock *MBB) { 824 bool Changed = false; 825 for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { 826 SmallSet<MCRegister, 8> PhysRefs; 827 if (!isPRECandidate(&MI, PhysRefs)) 828 continue; 829 830 auto [It, Inserted] = PREMap.try_emplace(&MI, MBB); 831 if (Inserted) 832 continue; 833 834 auto *MBB1 = It->second; 835 assert( 836 !DT->properlyDominates(MBB, MBB1) && 837 "MBB cannot properly dominate MBB1 while DFS through dominators tree!"); 838 auto CMBB = DT->findNearestCommonDominator(MBB, MBB1); 839 if (!CMBB->isLegalToHoistInto()) 840 continue; 841 842 if (!isProfitableToHoistInto(CMBB, MBB, MBB1)) 843 continue; 844 845 // Two instrs are partial redundant if their basic blocks are reachable 846 // from one to another but one doesn't dominate another. 847 if (CMBB != MBB1) { 848 auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock(); 849 if (BB != nullptr && BB1 != nullptr && 850 (isPotentiallyReachable(BB1, BB) || 851 isPotentiallyReachable(BB, BB1))) { 852 // The following check extends the definition of `isConvergent` to 853 // assume a convergent instruction is dependent not only on additional 854 // conditions, but also on fewer conditions. LLVM does not have a 855 // MachineInstr attribute which expresses this extended definition, so 856 // it's necessary to use `isConvergent` to prevent illegally PRE-ing the 857 // subset of `isConvergent` instructions which do fall into this 858 // extended definition. 859 if (MI.isConvergent() && CMBB != MBB) 860 continue; 861 862 // If this instruction uses physical registers then we can only do PRE 863 // if it's using the value that is live at the place we're hoisting to. 864 bool NonLocal; 865 PhysDefVector PhysDefs; 866 if (!PhysRefs.empty() && 867 !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs, 868 PhysDefs, NonLocal)) 869 continue; 870 871 assert(MI.getOperand(0).isDef() && 872 "First operand of instr with one explicit def must be this def"); 873 Register VReg = MI.getOperand(0).getReg(); 874 Register NewReg = MRI->cloneVirtualRegister(VReg); 875 if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI)) 876 continue; 877 MachineInstr &NewMI = 878 TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI); 879 880 // When hoisting, make sure we don't carry the debug location of 881 // the original instruction, as that's not correct and can cause 882 // unexpected jumps when debugging optimized code. 883 auto EmptyDL = DebugLoc(); 884 NewMI.setDebugLoc(EmptyDL); 885 886 NewMI.getOperand(0).setReg(NewReg); 887 888 PREMap[&MI] = CMBB; 889 ++NumPREs; 890 Changed = true; 891 } 892 } 893 } 894 return Changed; 895 } 896 897 // This simple PRE (partial redundancy elimination) pass doesn't actually 898 // eliminate partial redundancy but transforms it to full redundancy, 899 // anticipating that the next CSE step will eliminate this created redundancy. 900 // If CSE doesn't eliminate this, than created instruction will remain dead 901 // and eliminated later by Remove Dead Machine Instructions pass. 902 bool MachineCSEImpl::PerformSimplePRE(MachineDominatorTree *DT) { 903 SmallVector<MachineDomTreeNode *, 32> BBs; 904 905 PREMap.clear(); 906 bool Changed = false; 907 BBs.push_back(DT->getRootNode()); 908 do { 909 auto Node = BBs.pop_back_val(); 910 append_range(BBs, Node->children()); 911 912 MachineBasicBlock *MBB = Node->getBlock(); 913 Changed |= ProcessBlockPRE(DT, MBB); 914 915 } while (!BBs.empty()); 916 917 return Changed; 918 } 919 920 bool MachineCSEImpl::isProfitableToHoistInto(MachineBasicBlock *CandidateBB, 921 MachineBasicBlock *MBB, 922 MachineBasicBlock *MBB1) { 923 if (CandidateBB->getParent()->getFunction().hasMinSize()) 924 return true; 925 assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB"); 926 assert(DT->dominates(CandidateBB, MBB1) && 927 "CandidateBB should dominate MBB1"); 928 return MBFI->getBlockFreq(CandidateBB) <= 929 MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1); 930 } 931 932 void MachineCSEImpl::releaseMemory() { 933 ScopeMap.clear(); 934 PREMap.clear(); 935 Exps.clear(); 936 } 937 938 bool MachineCSEImpl::run(MachineFunction &MF) { 939 TII = MF.getSubtarget().getInstrInfo(); 940 TRI = MF.getSubtarget().getRegisterInfo(); 941 MRI = &MF.getRegInfo(); 942 LookAheadLimit = TII->getMachineCSELookAheadLimit(); 943 bool ChangedPRE, ChangedCSE; 944 ChangedPRE = PerformSimplePRE(DT); 945 ChangedCSE = PerformCSE(DT->getRootNode()); 946 releaseMemory(); 947 return ChangedPRE || ChangedCSE; 948 } 949 950 PreservedAnalyses MachineCSEPass::run(MachineFunction &MF, 951 MachineFunctionAnalysisManager &MFAM) { 952 MFPropsModifier _(*this, MF); 953 954 MachineDominatorTree &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(MF); 955 MachineBlockFrequencyInfo &MBFI = 956 MFAM.getResult<MachineBlockFrequencyAnalysis>(MF); 957 MachineCSEImpl Impl(&MDT, &MBFI); 958 bool Changed = Impl.run(MF); 959 if (!Changed) 960 return PreservedAnalyses::all(); 961 962 auto PA = getMachineFunctionPassPreservedAnalyses(); 963 PA.preserve<MachineLoopAnalysis>(); 964 PA.preserve<MachineDominatorTreeAnalysis>(); 965 PA.preserve<MachineBlockFrequencyAnalysis>(); 966 PA.preserveSet<CFGAnalyses>(); 967 return PA; 968 } 969 970 bool MachineCSELegacy::runOnMachineFunction(MachineFunction &MF) { 971 if (skipFunction(MF.getFunction())) 972 return false; 973 974 MachineDominatorTree &MDT = 975 getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 976 MachineBlockFrequencyInfo &MBFI = 977 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI(); 978 MachineCSEImpl Impl(&MDT, &MBFI); 979 return Impl.run(MF); 980 } 981